{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3.2 Human GEX Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "clustergrammer2 backend version 0.2.9\n"
     ]
    }
   ],
   "source": [
    "from clustergrammer2 import net\n",
    "df = {}\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from copy import deepcopy\n",
    "from scipy.spatial.distance import pdist\n",
    "import itertools as it"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(20400, 7339)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filename = '../data/CITE-seq_data/GSE100866_CBMC_8K_13AB_10X-RNA_umi_HUMAN.csv.gz'\n",
    "df['gex-ini'] = pd.read_csv(filename, compression='gzip', index_col=0)\n",
    "df['gex-ini'].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10, 7265)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "net.load_file('../data/CITE-seq_data/adt_ashz_trim_cats.txt')\n",
    "df['adt-ini'] = net.export_df()\n",
    "df['adt-ini'].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7265\n"
     ]
    }
   ],
   "source": [
    "cols = df['adt-ini'].columns.tolist()\n",
    "keep_cells = [x[0] for x in cols]\n",
    "print(len(keep_cells))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Remove HUMAN prefix from genes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "rows = df['gex-ini'].index.tolist()\n",
    "new_rows = [x.replace('HUMAN_','') for x in rows]\n",
    "df['gex-ini'].index = new_rows"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Filter for trimmed cells only and arcsinh transform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['gex-trim'] = df['gex-ini'][keep_cells]\n",
    "df['gex-trim'] = np.arcsinh(df['gex-trim']/5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Drop ribosomal and mitochondrial genes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(20400, 7265)\n",
      "20400\n",
      "20223\n",
      "['MT-ATP6', 'MT-ATP8', 'MT-CO1', 'MT-CO2', 'MT-CO3', 'MT-CYB', 'MT-ND1', 'MT-ND2', 'MT-ND3', 'MT-ND4', 'MT-ND4L', 'MT-ND5', 'MT-ND6', 'MT-RNR1', 'MT-RNR2', 'MT-TD', 'MT-TF', 'MT-TG', 'MT-TH', 'MT-TI', 'MT-TL1', 'MT-TP', 'MT-TT', 'MT-TW', 'MTRF1', 'MTRF1L', 'MTRNR2L1', 'MTRNR2L10', 'MTRNR2L11', 'MTRNR2L12', 'MTRNR2L3', 'MTRNR2L4', 'MTRNR2L5', 'MTRNR2L6', 'MTRNR2L7', 'MTRNR2L8']\n",
      "(20187, 7265)\n"
     ]
    }
   ],
   "source": [
    "print(df['gex-trim'].shape)\n",
    "df['gex'] = deepcopy(df['gex-trim'])\n",
    "all_genes = df['gex'].index.tolist()\n",
    "print(len(all_genes))\n",
    "keep_genes = [x for x in all_genes if 'RPL' not in x]\n",
    "keep_genes = [x for x in keep_genes if 'RPS' not in x]\n",
    "print(len(keep_genes))\n",
    "\n",
    "df['gex'] = df['gex'].loc[keep_genes]\n",
    "df['gex'].shape\n",
    "\n",
    "# Removing Mitochondrial Genes\n",
    "list_mito_genes = ['MTRNR2L11', 'MTRF1', 'MTRNR2L12', 'MTRNR2L13', 'MTRF1L', 'MTRNR2L6', 'MTRNR2L7',\n",
    "                'MTRNR2L10', 'MTRNR2L8', 'MTRNR2L5', 'MTRNR2L1', 'MTRNR2L3', 'MTRNR2L4']\n",
    "\n",
    "all_genes = df['gex'].index.tolist()\n",
    "mito_genes = [x for x in all_genes if 'MT-' == x[:3] or \n",
    "             x.split('_')[0] in list_mito_genes]\n",
    "\n",
    "print(mito_genes)\n",
    "\n",
    "keep_genes = [x for x in all_genes if x not in mito_genes]\n",
    "df['gex'] = df['gex'].loc[keep_genes]\n",
    "print(df['gex'].shape)\n",
    "\n",
    "# transfer categories\n",
    "cols = df['adt-ini'].columns.tolist()\n",
    "ct_dict = {}\n",
    "for inst_col in cols:\n",
    "    ct_dict[inst_col[0]] = inst_col[1]\n",
    "    \n",
    "cols = df['gex'].columns.tolist()\n",
    "new_cols = [(x, 'Cell Type: ' + ct_dict[x]) for x in cols]\n",
    "df['gex'].columns = new_cols    \n",
    "\n",
    "# normalize by UMI count\n",
    "barcode_umi_sum = df['gex'].sum()\n",
    "df['gex-umi'] = deepcopy(df['gex'].div(barcode_umi_sum))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(TTGGAACCATGTCTCC, Cell Type: Unknown_5)            1.0\n",
       "(AACTCTTCAACTGGCC, Cell Type: Unknown_5)            1.0\n",
       "(ACAGCTAAGCGATGAC, Cell Type: CD4 T cell)           1.0\n",
       "(CATCGAAGTTCCACGG, Cell Type: Unknown_3)            1.0\n",
       "(CCGGGATGTAACGACG, Cell Type: Unknown_3)            1.0\n",
       "(GCTTGAAGTACCGTAT, Cell Type: CD4 T cell)           1.0\n",
       "(GGCGACTGTCAGAATA, Cell Type: Unknown_2)            1.0\n",
       "(CGGACGTGTCGAACAG, Cell Type: CD14+ Mono cell_1)    1.0\n",
       "(CTCGTACAGCGGATCA, Cell Type: CD14+ Mono cell_1)    1.0\n",
       "(TCGGTAACATGTTGAC, Cell Type: Unknown_5)            1.0\n",
       "(TCTTTCCTCATTCACT, Cell Type: CD8 T cell)           1.0\n",
       "(CTAGTGAAGCCTATGT, Cell Type: CD4 T cell)           1.0\n",
       "(ACATCAGCAATGGACG, Cell Type: Unknown_5)            1.0\n",
       "(GGCCGATTCACAGTAC, Cell Type: Unknown_3)            1.0\n",
       "(TACAGTGAGAACAACT, Cell Type: CD4 T cell)           1.0\n",
       "(CTACATTTCAGTCAGT, Cell Type: CD14+ Mono cell_2)    1.0\n",
       "(CTGCCTAGTGTTGGGA, Cell Type: CD4 T cell)           1.0\n",
       "(ATTTCTGCATGCAACT, Cell Type: CD4 T cell)           1.0\n",
       "(TTTATGCAGGAGTTGC, Cell Type: Unknown_3)            1.0\n",
       "(CTCGTCACACATAACC, Cell Type: CD16+ Mono cell)      1.0\n",
       "(AGGTCATCAGTGACAG, Cell Type: Unknown_3)            1.0\n",
       "(GATGAAAAGCCCAGCT, Cell Type: Unknown_2)            1.0\n",
       "(TAAGTGCCAACACCTA, Cell Type: Unknown_1)            1.0\n",
       "(GTCGGGTTCTTCCTTC, Cell Type: CD34+ cell)           1.0\n",
       "(GATCGATAGACAGAGA, Cell Type: CD4 T cell)           1.0\n",
       "(GCTGCAGAGCTAGCCC, Cell Type: Unknown_2)            1.0\n",
       "(TCGCGTTTCGTAGATC, Cell Type: CD14+ Mono cell_2)    1.0\n",
       "(CTTTGCGCAATCTGCA, Cell Type: CD14+ Mono cell_1)    1.0\n",
       "(CCGTTCATCCAAACTG, Cell Type: CD4 T cell)           1.0\n",
       "(ACGGCCAGTGGTGTAG, Cell Type: Unknown_5)            1.0\n",
       "                                                   ... \n",
       "(AGCGTCGTCCCAAGTA, Cell Type: NK cell)              1.0\n",
       "(GACGCGTGTAGAAAGG, Cell Type: Unknown_1)            1.0\n",
       "(CGTGTAAGTGGCTCCA, Cell Type: pDC_1)                1.0\n",
       "(TGCACCTGTTCGTTGA, Cell Type: NK cell)              1.0\n",
       "(CGTGTAACACACATGT, Cell Type: NK cell)              1.0\n",
       "(CGTCAGGAGTGGTCCC, Cell Type: CD14+ Mono cell_1)    1.0\n",
       "(TCTCTAATCGCCTGAG, Cell Type: NK cell)              1.0\n",
       "(CTGGTCTGTAAAGGAG, Cell Type: NK cell)              1.0\n",
       "(AGGGTGAGTGATGCCC, Cell Type: CD4 T cell)           1.0\n",
       "(GTCGGGTAGTTCGCAT, Cell Type: CD14+ Mono cell_2)    1.0\n",
       "(TTGCGTCGTGACTCAT, Cell Type: CD4 T cell)           1.0\n",
       "(GTTACAGAGCGTCTAT, Cell Type: CD14+ Mono cell_2)    1.0\n",
       "(CGTGTAACAGGAATCG, Cell Type: NK cell)              1.0\n",
       "(TTCGGTCCACTTACGA, Cell Type: NK cell)              1.0\n",
       "(AGTCTTTAGCCAACAG, Cell Type: pDC_1)                1.0\n",
       "(GGTGCGTAGCGATGAC, Cell Type: NK cell)              1.0\n",
       "(GTCGGGTAGTACGACG, Cell Type: CD4 T cell)           1.0\n",
       "(GGCGTGTAGGATTCGG, Cell Type: Unknown_1)            1.0\n",
       "(GTCGGGTAGTGAATTG, Cell Type: CD4 T cell)           1.0\n",
       "(GCGACCAGTCACTTCC, Cell Type: pDC_1)                1.0\n",
       "(GCATACAAGCTGAACG, Cell Type: CD14+ Mono cell_2)    1.0\n",
       "(CGATCGGAGCCGTCGT, Cell Type: CD14+ Mono cell_2)    1.0\n",
       "(ATCTGCCTCTGACCTC, Cell Type: Unknown_1)            1.0\n",
       "(CTGAAGTAGGGATCTG, Cell Type: NK cell)              1.0\n",
       "(GGCGTGTAGAGTGAGA, Cell Type: CD14+ Mono cell_2)    1.0\n",
       "(AGCGTCGAGTCAAGGC, Cell Type: Unknown_1)            1.0\n",
       "(AGCGTCGAGTTACGGG, Cell Type: CD4 T cell)           1.0\n",
       "(TCGCGAGGTAGCCTAT, Cell Type: Unknown_1)            1.0\n",
       "(GTCGGGTAGTAGCCGA, Cell Type: Unknown_1)            1.0\n",
       "(TTGCCGTGTAGATTAG, Cell Type: Unknown_1)            1.0\n",
       "Length: 7265, dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['gex-umi'].sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Keep top 5K genes by sum"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5000, 7265)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser_sum = df['gex'].sum(axis=1).sort_values(ascending=False)\n",
    "keep_genes = ser_sum.index.tolist()[:5000]\n",
    "df['gex-filt'] = df['gex'].loc[keep_genes]\n",
    "df['gex-filt'].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5000, 7265)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser_sum = df['gex-umi'].sum(axis=1).sort_values(ascending=False)\n",
    "keep_genes = ser_sum.index.tolist()[:5000]\n",
    "df['gex-umi-filt'] = df['gex-umi'].loc[keep_genes]\n",
    "df['gex-umi-filt'].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10, 7265)\n",
      "(5000, 7265)\n",
      "(5000, 7265)\n"
     ]
    }
   ],
   "source": [
    "print(df['adt-ini'].shape)\n",
    "print(df['gex-filt'].shape)\n",
    "print(df['gex-umi-filt'].shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compare Sample-Sample Similarity Across Datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def corr_datasets(name_1, name_2):\n",
    "    dist_arr_1 = pdist(df[name_1].transpose(), metric='cosine')\n",
    "    ser_dist_1 = pd.Series(data=dist_arr_1, name=name_1)\n",
    "\n",
    "    dist_arr_2 = pdist(df[name_2].transpose(), metric='cosine')\n",
    "    ser_dist_2 = pd.Series(data=dist_arr_2, name=name_2)\n",
    "    df_dist = pd.concat([ser_dist_1, ser_dist_2], axis=1)\n",
    "\n",
    "    inst_corr = 1 - pdist(df_dist.transpose(), metric='correlation')\n",
    "    print(name_1, 'vs', name_2, inst_corr[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### ADT vs GEX"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "adt-ini vs gex-filt 0.670118808363\n"
     ]
    }
   ],
   "source": [
    "corr_datasets('adt-ini', 'gex-filt')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### ADT GEX vs UMI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "adt-ini vs gex-umi-filt 0.669929648801\n"
     ]
    }
   ],
   "source": [
    "corr_datasets('adt-ini', 'gex-umi-filt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "adt-ini vs gex-umi-filt 0.669929648801\n"
     ]
    }
   ],
   "source": [
    "corr_datasets('adt-ini', 'gex-umi-filt')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Make Z-scored versions of the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # z-scored ADT\n",
    "# net.load_df(df['adt-ini'])\n",
    "# net.normalize(axis='row', norm_type='zscore')\n",
    "# df['adt-z'] = net.export_df()\n",
    "\n",
    "df['adt-z'] = df['adt-ini']\n",
    "\n",
    "# Z-scored 5K gex\n",
    "net.load_df(df['gex-filt'])\n",
    "net.normalize(axis='row', norm_type='zscore')\n",
    "df['gex-5K-z'] = net.export_df()\n",
    "\n",
    "# Z-scored 5KV-UMI gex\n",
    "net.load_df(df['gex-umi-filt'])\n",
    "net.normalize(axis='row', norm_type='zscore')\n",
    "df['gex-5K-umi-z'] = net.export_df()\n",
    "\n",
    "# Z-scored 5K-1K gex\n",
    "net.load_df(df['gex-filt'])\n",
    "net.filter_N_top(inst_rc='row', N_top=1000, rank_type='var')\n",
    "net.normalize(axis='row', norm_type='zscore')\n",
    "df['gex-5K-1K-z'] = net.export_df()\n",
    "\n",
    "# Z-scored 5KV-1K-UMI gex\n",
    "net.load_df(df['gex-umi-filt'])\n",
    "net.filter_N_top(inst_rc='row', N_top=1000, rank_type='var')\n",
    "net.normalize(axis='row', norm_type='zscore')\n",
    "df['gex-5K-1K-umi-z'] = net.export_df()\n",
    "\n",
    "# Z-scored 5K-1H gex\n",
    "net.load_df(df['gex-filt'])\n",
    "net.filter_N_top(inst_rc='row', N_top=100, rank_type='var')\n",
    "net.normalize(axis='row', norm_type='zscore')\n",
    "df['gex-5K-1H-z'] = net.export_df()\n",
    "\n",
    "# Z-scored 5KV-1H-UMI gex\n",
    "net.load_df(df['gex-umi-filt'])\n",
    "net.filter_N_top(inst_rc='row', N_top=100, rank_type='var')\n",
    "net.normalize(axis='row', norm_type='zscore')\n",
    "df['gex-5K-1H-umi-z'] = net.export_df()\n",
    "\n",
    "# Z-scored 5K-50 gex\n",
    "net.load_df(df['gex-filt'])\n",
    "net.filter_N_top(inst_rc='row', N_top=50, rank_type='var')\n",
    "net.normalize(axis='row', norm_type='zscore')\n",
    "df['gex-5K-50-z'] = net.export_df()\n",
    "\n",
    "# Z-scored 5KV-50-UMI gex\n",
    "net.load_df(df['gex-umi-filt'])\n",
    "net.filter_N_top(inst_rc='row', N_top=50, rank_type='var')\n",
    "net.normalize(axis='row', norm_type='zscore')\n",
    "df['gex-5K-50-umi-z'] = net.export_df()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "adt-z vs gex-5K-z 0.577517466014\n",
      "adt-z vs gex-5K-umi-z 0.661076306163\n"
     ]
    }
   ],
   "source": [
    "corr_datasets('adt-z', 'gex-5K-z')\n",
    "corr_datasets('adt-z', 'gex-5K-umi-z')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "adt-z vs gex-5K-1K-z 0.630417530262\n",
      "adt-z vs gex-5K-1K-umi-z 0.706515334859\n"
     ]
    }
   ],
   "source": [
    "corr_datasets('adt-z', 'gex-5K-1K-z')\n",
    "corr_datasets('adt-z', 'gex-5K-1K-umi-z')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "adt-z vs gex-5K-1H-z 0.685659326402\n",
      "adt-z vs gex-5K-1H-umi-z 0.729042130795\n"
     ]
    }
   ],
   "source": [
    "corr_datasets('adt-z', 'gex-5K-1H-z')\n",
    "corr_datasets('adt-z', 'gex-5K-1H-umi-z')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "adt-z vs gex-5K-50-z 0.680044573427\n",
      "adt-z vs gex-5K-50-umi-z 0.72393620348\n"
     ]
    }
   ],
   "source": [
    "corr_datasets('adt-z', 'gex-5K-50-z')\n",
    "corr_datasets('adt-z', 'gex-5K-50-umi-z')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(50, 7265)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['gex-5K-50-umi-z'].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bc753af0a0e24c698d7b164d7f32afd1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "ExampleWidget(network='{\"row_nodes\": [{\"name\": \"S100A8\", \"ini\": 50, \"clust\": 22, \"rank\": 20, \"rankvar\": 4, \"gr…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "net.load_df(df['gex-5K-50-umi-z'])\n",
    "net.widget()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Cluster NK cells"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'gex-filt-umi'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-27-e9822880376f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'gex-filt-umi'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m: 'gex-filt-umi'"
     ]
    }
   ],
   "source": [
    "df['gex-filt-umi'].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols = df['gex-filt-umi'].columns.tolist()\n",
    "keep_cols = [x for x in cols if 'CD14+ Mono' in x[1]]\n",
    "print(len(keep_cols))\n",
    "net.load_df(df['gex-filt-umi'][keep_cols])\n",
    "# net.filter_cat(axis='col', cat_index=1, cat_name='Cell Type: NK cell')\n",
    "net.filter_N_top(inst_rc='row', N_top=100, rank_type='var')\n",
    "net.normalize(axis='row', norm_type='zscore')\n",
    "net.widget()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "net.load_df(df['gex-filt-umi'])\n",
    "net.filter_cat(axis='col', cat_index=1, cat_name='Cell Type: NK cell')\n",
    "net.filter_N_top(inst_rc='row', N_top=50, rank_type='var')\n",
    "net.normalize(axis='row', norm_type='zscore')\n",
    "net.widget()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "net.load_df(df['adt-ini'])\n",
    "# net.filter_N_top(inst_rc='row', N_top=100, rank_type='var')\n",
    "# net.normalize(axis='row', norm_type='zscore')\n",
    "net.widget()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# net.load_df(df['gex-cat-filt'])\n",
    "# net.filter_N_top(inst_rc='row', N_top=100, rank_type='var')\n",
    "# net.normalize(axis='row', norm_type='zscore')\n",
    "# net.widget()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}