{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 3.1 CITE-seq GEX" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "clustergrammer2 backend version 0.2.7\n" ] } ], "source": [ "from clustergrammer2 import net\n", "df = {}" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "%matplotlib inline \n", "import json_scripts" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import gene_exp_10x" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "filename = '../data/CITE-seq_data/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz'\n", "df['gex-ini'] = pd.read_csv(filename, compression='gzip', index_col=0)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(36280, 8617)\n" ] } ], "source": [ "print(df['gex-ini'].shape)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "36280" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rows = df['gex-ini'].index.tolist()\n", "len(rows)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "15879" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mouse_genes = [x for x in rows if 'MOUSE_' in x]\n", "len(mouse_genes)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "20400" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "human_genes = [x for x in rows if 'HUMAN_' in x]\n", "len(human_genes)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "36279" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(mouse_genes) + len(human_genes)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "15879\n", "15879\n" ] } ], "source": [ "print(len(mouse_genes))\n", "print(len(list(set(mouse_genes))))" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(15879, 8617)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['gex-mouse'] = df['gex-ini'].loc[mouse_genes]\n", "df['gex-mouse'].shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Filtering out mouse cells" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAADuCAYAAAA3IMxxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAGExJREFUeJzt3Xt0nPV95/H3d26SRndbsmRLlm8yNgYbg4Up2UKgWxKT1IXTNglOtjSnNC7bZfdsNns2Ibt7dtvTS7qb7mnT5JT1JpQmbQ0E0j0x9QYIJNBwnGLZXIzxFYNt+SbJlkfXkTTSb/+YkRldPZZm9GgefV7ncGAejZ7n6zn4o5++z+/3e8w5h4iI+FfA6wJERCS3FPQiIj6noBcR8TkFvYiIzynoRUR8TkEvIuJzCnoREZ9T0IuI+JyCXkTE50JeFwBQVVXlli9f7nUZIiJ5Zd++fe3Oueqrvc/ToDezrcDWxsZGmpubvSxFRCTvmNnJTN7naevGObfLObe9vLzcyzJERHxNPXoREZ/zNOjNbKuZ7YjFYl6WISLia2rdiIj4nFo3IiI+p9aNiIjP5X3rpjM+mMWKRET8J69bN3/x42P8wh+/xODQsNeliIjMWXkd9KtrSugdGOLAGbV+REQmk9c9+l9YuZBIKMCz+1qyXJmIiH/kdY9+QXGEX71pCc/ub+FcrC/L1YmI+ENet24AHv7oKoaGHX/58nGvSxERmZPyPugbF5WwdcMSntp7mtffv+R1OSIic05e9+hH/P59N1BXUcQXn3qT9u7+LFUnIuIPed2jH1FaGOYvt91MW1c/D39vHz39iSxVKCKS//K+dTPipqUV/M9PbWD/qQ7+7c43GBp2XpckIjIn+CboAe7bWMeXPraGlw+38u+fepOEFlKJiMyNRwlm0+/dtYru/gR/9dP3CAeMr3/qJgIB87osERHPzJlHCWbxnHx5y1rCAeMbLx/nUu8A3/zsLZQU+O5nmohIRnxxM3YiX7znOr76ibW8erSNTz+2h/OxeNavISKSD3zVo09nZmy/cxXf+fytnLzYw/3feo0Tbd1elyUiMut8G/Qj7l6ziO8//BF6+hN89R8O6AatiMw7vg96gHVLyvjPn7yen5+4xL978g36E0NelyQiMmvmzR3KBzY30NLRxzd/cpyAvcU3P3uL1yWJiMyKeRP0AP/x42soCAX4sxePsnnFBzx4+3KvSxIRyTlf7HVzLX7v7kb+5dpF/OFzhzjeqpuzIuJ/vp1eOZlgwPjT39hAKGj81U/fm7Xrioh4ZV7cjB2rqqSAX76+hj3vtXtdiohIzs3LoAdYU1vK2Viczvig16WIiOTUvA36jUsrANirh5WIiM/N26C/paGSYMD40TvnvS5FRCSn5m3QF0WCfGL9Yn5ypNXrUkREcmreBj3ATfXltHcP0Nalxw+KiH/N66BfWV0MwKlLvR5XIiKSO/NuwVS6+sooAKcu9XhyfRGR2TDvFkylW1FVTDhoHDmvFbIi4l/zunUTDgZYVV3CkfOdXpciIpIz8zroIblw6sj5Lq/LEBHJGQV9aoVsrE8rZEXEn+Z90K+tLQXg2AWN6kXEn+Z90K+sKgHQlsUi4lvzPuiXLkhOsTwXi3tciYhIbsz7oA8GjGgkSHd/wutSRERyYt4HPUBJQYjuuIJeRPxJQQ+UFYXp6tesGxHxJwU9UF4U5mL3gNdliIjkRE6C3syKzazZzH4lF+fPtmULo9rYTER8K6OgN7PHzazVzN4Zc3yLmR0xs+Nm9pW0L30ZeDqbhebSsgXFnIvF6R1Qn15E/CfTEf0TwJb0A2YWBL4F3AusA7aZ2Tozuwd4F8ibJ3rUVxYB0NqpfelFxH9CmbzJOfeqmS0fc3gzcNw5dwLAzJ4E7gNKgGKS4d9nZrudc8NZqzgHIqHkz7vBoTldpojItGQU9JOoA06nvW4BbnPOPQJgZp8H2icLeTPbDmwHaGhomEEZMxcOJoN+QEEvIj6Us1k3zrknnHPPTfH1Hc65JudcU3V1da7KyEgkZAAMDjlP6xARyYWZBP0ZYGna6/rUsYx5/YSpESMjerVuRMSPZhL0e4HVZrbCzCLAA8APr+UEXj9haoSCXkT8LNPplTuBPcAaM2sxs4eccwngEeB54BDwtHPuYO5KzZ2icBCAnv4hjysREcm+TGfdbJvk+G5g93QvbmZbga2NjY3TPUVWVEYjAFzu1epYEfGfef1w8BFFkeSIvm9QI3oR8R/tdQNEU0HfO6CgFxH/8TTo58qsm5EevYJeRPxIrRsgEDAKwwHiat2IiA+pdZNSHAlpUzMR8SW1blKKIkG1bkTEl9S6SYlGgvQp6EXEh9S6SSkvCtOhefQi4kMK+pTyojCdferRi4j/qEefUlIQortfQS8i/qMefUppYZiu+KDXZYiIZJ1aNyklhSG64gmc0570IuIvCvqUBdEIiWFHZ1ztGxHxFwV9SmVxcgfLWK/aNyLiLwr6lNLC5I7NnerTi4jPaNZNioJeRPxKs25SiiPJoNfqWBHxG7VuUrQnvYj4lYI+pTC1J71G9CLiNwr6lJERfY+2KhYRn1HQp1RGIxSGA5zp6PO6FBGRrNKsm5RAwKiMRoj1adaNiPiLZt2kKS8KK+hFxHfUuklTVhjWPHoR8R0FfZqyopD2pBcR31HQpykrVOtGRPxHQZ+mrEitGxHxHwV9Gj0gXET8SEGfJhoJkhh2DCSGvS5FRCRrFPRpilIbm/VqdayI+IgWTKUp1sZmIuJDWjCVprggOaLv0uMERcRH1LpJU11aAEBbV7/HlYiIZI+CPk1NWSEArV1xjysREckeBX2aRakR/flOBb2I+IeCPk1xQYiKaJgP2nu8LkVEJGsU9GOsrCrm9CXtSS8i/qGgH6O0MKx59CLiKwr6MYrCQc2jFxFfUdCPEY0E6RtU0IuIfyjoxyjSxmYi4jMK+jGKwhrRi4i/KOjHiEaSPXrnnNeliIhkRdaD3syuN7PHzOwZM/vX2T5/ro3sYNnVr5k3IuIPGQW9mT1uZq1m9s6Y41vM7IiZHTezrwA45w455x4GPg38i+yXnFulhcmgj6tPLyI+kemI/glgS/oBMwsC3wLuBdYB28xsXeprvwr8I7A7a5XOkoJQ8iPp18NHRMQnMgp659yrwKUxhzcDx51zJ5xzA8CTwH2p9//QOXcv8LnJzmlm282s2cya29rapld9DhSEk3vS9yc0ohcRfwjN4HvrgNNpr1uA28zsLuDXgAKmGNE753YAOwCamprmzJ1PjehFxG9mEvQTcs79FPhpts87WxT0IuI3M5l1cwZYmva6PnUsY3PtUYIAkZGgH1TQi4g/zCTo9wKrzWyFmUWAB4AfXssJ5tqjBAEKQske/cCQgl5E/CHT6ZU7gT3AGjNrMbOHnHMJ4BHgeeAQ8LRz7uC1XHwujuivtG60OlZEfCKjHr1zbtskx3czgymUzrldwK6mpqYvTPcc2aYevYj4jbZAGONK60ZBLyI+4WnQz8nWTVgjehHxF0+Dfm7ejB0JevXoRcQf1LoZI6IevYj4jIJ+jJEevR4nKCJ+oR79GMGAUVYYItY74HUpIiJZoR79BArCQS2YEhHfUOtmApFgQD16EfENBf0ECsIKehHxD/XoJxAJBrRgSkR8Qz36CRSEgxrRi4hvqHUzgYJggAEtmBIRn1DQTyBaEKSzL+F1GSIiWaGgn8CCaITO+KDXZYiIZIVuxk6grChMZ5+CXkT8QTdjJ1BaGKIzntDGZiLiC2rdTGBldTEAR893e1yJiMjMKegnsLQyCsAl7XcjIj6goJ/A4ooiAN5v04heRPKfgn4CdRVFLCyO8HbL3LpJLCIyHQr6SWyoL+etlstelyEiMmOaXjmJ62pKOd3Rx6C2KxaRPKfplZNYUVXMQGKY87G416WIiMyIWjeTWFldAsDPjrd7XImIyMwo6Cdx6/JKCkIBDp3r9LoUEZEZUdBPwsy4YUmZgl5E8p6Cfgq3NFTy1umYtkIQkbymoJ/CxoYKBoaGOXZBC6dEJH8p6Kewvi45G+jlw60eVyIiMn0K+iksW1jMnddV872fn2R42HldjojItGjB1FXcd9MS2rr6eVc3ZUUkT2nB1FXccV0VAM8fPO9xJSIi06PWzVUsKi1kQ305j73yHhe7+70uR0TkminoM/BfPrmOwSHHztdPeV2KiMg1U9BnYPOKBdxYV8bXXziqOfUikncU9BnatrkBgK/9v8MeVyIicm0U9Bn67OYG1teV89evfcALujErInlEQZ8hM2PHg5sA2P69fbxytM3jikREMqOgvwaLy4t46Usfpbq0gN96/HX+/p91c1ZE5j4F/TVaVV3CMw/fTmU0zFf/4QCP/uAACT2FSkTmMAX9NCxbWMxLX7qLO1ZXsfP1U9zxP37CU3s1uheRuUlBP00LiiN876Hb+N07V3IuFufLzx7ggR17OHh27m7nICLzkznn/WZdTU1Nrrm52esypi0+OMR/ePpNdh9IzsbZtKyS371zJR+7odbjykTEz8xsn3Ou6arvy0XQm9n9wCeBMuA7zrkXpnp/vgf9iFMXe/nzl47yg/1nAKiIhrl/Yx0P/eIKli6IelydiPhN1oPezB4HfgVodc7dmHZ8C/AXQBD4tnPua2lfqwS+7px7aKpz+yXoR8T6Bvnr197n+80tnLncB8Da2lLuv7mOz93WQGlh2OMKRcQPchH0dwLdwHdHgt7MgsBR4B6gBdgLbHPOvZv6+p8Bf+ec2z/Vuf0W9OneONXBk6+f5pn9LQyl9rRfVV3M1puWcNuKhdxYV6bgF5FpyUnrxsyWA8+lBf3twH93zn089frR1Fu/lvrnRefcjyc513ZgO0BDQ8OmkydPZlxHPhoedrx46AIvHLzAPx44S3zwwymZyxZGuam+glsaKrh1xQLWLS7DzDysVkTywWwF/W8AW5xzv5N6/ZvAbSRH+b9FcoT/pnPusanO6+cR/USccxxv7ab5ZAdvt8TYf7KDIxe6rnw9GDBuXlrBpuWVXF9bxvr6chaVFmjkLyKjZBr0oVxc3Dn3DeAbV3ufmW0FtjY2NuaijDnLzFhdU8rqmlK2bU4e6+lPcKy1m1eOtPHG6Q6aP+ig+WTHqO+rLStkSUUh6+vK2VBfwaKyAtbUllIZjRAOaqasiExspkF/Blia9ro+dSwjzrldwK6mpqYvzLCOvFdcEGLj0go2Lq0AkqP+tq5+Dp7t5HRHL0fOd9HS0ceBMzH2n7oMjG51ra0tpa6iiJXVxSxdEGVFVTGLywtpXFTqwZ9GROaSmQb9XmC1ma0gGfAPAJ+dcVWCmbGorJBFZYXjvtbaGed8Z5wDZ2KcaOuhpaOXg2c72XPiIi8dbh1zHgiYsbSyiBVVxZgZa2pLWVxeSDBg3NFYjRmUFoaoiEZm648nIrMo46A3s53AXUCVmbUA/8059x0zewR4nuT0ysedcwev4ZzzsnUzUyM/ADbUV4z7WkfPAO+1ddPRO8jbLZcBePP0ZWJ9g1zsGeDtlhgvj/lhMGJlVTHBQPIm8KZllVREI5jBR6+rJpQ6vqa2VPcKRPKMVsbOM/2JIbrjCRzw2vF2BhLDxPoG2XeyAzNIDDleOdqGAwYSE2/WVhlNBn1NWfJ5ugB1FVFurCsDoLa8kBuWzN0Hvov4hacrY6+Vgn7u2neyg96BBAAHz3ZyNrUA7MCZGOdjcQDOpf6dLhgwAgaGcf3iUhoWFl/5mgHr68qpryy6cmxFdTFra8ty+CcR8Z+8CPq01s0Xjh075lkdMjM9/Qnea+sG4HLvID8/cREAB/zkcOuo3wwc8H57z4TnCaQtHQgFA3xsXc2o9QSbVyygprRg1PdsqK+gtnz8fQyR+SAvgn6ERvTzS3t3P+3d/Vden4vFeeNkByP/J56+1MvbZ2KMHBhyjpMXeyc9X1E4OOp1ZTTMR9dUjzpWV1HErcsXjDoWDgXYWF9BIKDFaZKfPJ1HLzKVqpICqko+HJmvrS3j7jWLpvyec7E+LnYPjDp2+HwXR9MWmgEcaIlxor2blw59eMO5taufyZjBkvKiUcecc2y5cTElheP/etx7Yy0lBaOPLyiOUFygv0oyd2lEL77XFR/kQMvo5wQ44Lm3zzI4NPr//8u9g/z40IVrvsbtKxeOO+ZwfGL9YsqLPpylVFIQ4pfWLtIWF5IVedG6UY9e8snPjrVzvnP0jefD5zpHtZlGdMYHOXx+9G8b6aKR4LhjtWWF/NLa8b/ZlBWFuW/jkgnPs7i8iEhIq6Lnq7wI+hEa0YsftXbG6e5PXHmdGHY8u7+F4eHxf+deO36RkxfH36TuGRia8hoBg19cXT3uuHPJ3yZqJ1hw17ioRM9H8AkFvYgP9CeGeOHgBQYneAD9Px1rn3AGU3xwaMrfJgCWLxwf9OFggAdvX5a8cZFmVVUxH2msusbKZTYo6EXmsdOXernYMzDu+MGzMfa+f2lsp+nKdhqTKQwHMD78AWAGN9aV85FVo+9NREIBPnfbsnEzodReyo28CHr16EXmjo6eARJj2koXOuPseuvsuB8MT75+is54gkytqSkdN+W1YUGU+2+uG3XMQDOYrkFeBP0IjehF8stkufHMvpZx01n//p9PcWnMbxd9g5Pfe2hcVMLdE/xQ+FTT0lHHQgEjNM+351bQi8icFesd5AdvtJAYM731b/Z8MO6HQu8UN6Q//5HllKXWO6yuKWXrTRPPTvIrBb2I+EKsb5DvN58etebhg/Yenmo+DSTvF4zEWChgBMxwOIYd/Ju7VrEwbXHejXXlbFpWOav155KCXkTmjdauOH+75ySDqXsMLR197Hrr7ITvTZ9xtLi8iIfvWgVAcSTIpmWVebWYLS+CXjdjRSRXevoT9KdtqHfwbIxn97VcubH843cvjFunsKq6mFXVJQTM+J07Vlz57/Lo3HwGQ14E/QiN6EVktvUnhnj3bCfDDvoGhvij3YdwzuEcHBmzh9L6unLu27iEssIwn2qqnzOjfgW9iMg0/dOxNt5rTW69/bUfHSY++OFvBhXRMPWVRYSDAf7o/vVEI0EWlRUQjcz+tFAFvYhIFgwNO3oHEvQODPEHz71LfGCIY63dnLo0euvsR+9dS2E4yGduXUphePxeRrmgoBcRyRHnHC++e4GegQT/942zvHK07crXCkIBPrF+MX/ya+uJBAM5fd6Bgl5EZJbEB4cYHBrm0R8c4EfvnL+ywrisMMRj/2oTTcsX5GQbiLwIes26ERG/6elPsPP1U+x8/RTvpfYPWlldzJ9/ZiMb6iuyeq28CPoRGtGLiN8MDg1z8Gwnv/3E3iurff/Pg03cs64ma9fINOjn90YRIiI5Eg4G2Li0gv3/9R7+9NfXA/CF7zbz+M/en/VaFPQiIjn2mVsb+PaDyYH3Hzz3LgfPxq7yHdmloBcRmQW/vK6G//2bmwD49GN7JnzSWK4o6EVEZsnHb6hlQ305PQNDfPtnJ2btugp6EZFZ9N3f3gzA/3rx6KxdU0EvIjKLKqIR7l5TTXxwmLdbLs/KNRX0IiKz7D9tWQskn741GxT0IiKz7PrFZQAMpG2jnEueBr2ZbTWzHbHY7E41EhHx2vq6cl491j4r1/I06J1zu5xz28vLy70sQ0Rk1hVFgrR391/9jVmg1o2IiAfW1JQC0DuQyPm1FPQiIh64rqYEgO5+Bb2IiC+VFCafSNXTP3SVd86cgl5ExAPFqUcPXu4dyPm1FPQiIh4IpB4w3qGgFxHxp4UlEQDMcveowREKehERD4QCyfgdGsr9LpYKehERDwRTDw1PzMJ2xQp6EREPjAT9kIJeRMSfrgT9LDy3O+tBb2Yrzew7ZvZMts8tIuIXoSsj+txvbJZR0JvZ42bWambvjDm+xcyOmNlxM/sKgHPuhHPuoVwUKyLiF1d69HPoZuwTwJb0A2YWBL4F3AusA7aZ2bqsVici4lM1ZYW88MU7+dgNtTm/VkZB75x7Fbg05vBm4HhqBD8APAncl+X6RER8KRIKcF1NKeVF4ZxfayY9+jrgdNrrFqDOzBaa2WPAzWb26GTfbGbbzazZzJrb2tpmUIaIiEwllO0TOucuAg9n8L4dwA6Apqam3DepRETmqZmM6M8AS9Ne16eOZUxPmBIRyb2ZBP1eYLWZrTCzCPAA8MNrOYGeMCUiknuZTq/cCewB1phZi5k95JxLAI8AzwOHgKedcwdzV6qIiExHRj1659y2SY7vBnZP9+JmthXY2tjYON1TiIjIVejh4CIiPmduFvZZuGoRZm3AyWl+exXQnsVy/ESfzeT02UxMn8vk5uJns8w5V321N3ka9COtG2CXc27XNM/R7Jxrym5l/qDPZnL6bCamz2Vy+fzZZH0e/bVIhfu0Al5ERDKjbYpFRHzOD0G/w+sC5jB9NpPTZzMxfS6Ty9vPZk7cjBURkdzxw4heRESmoKAXEfE5Bb2IiM8p6EVEfE5BLyLic/8f7Ir1rvF0xBAAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "ser_bc_mouse_sum = df['gex-mouse'].sum(axis=0).sort_values(ascending=False)\n", "ser_bc_mouse_sum.plot('line', logy=True)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(7688,)" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ser_bc_mouse_sum[ser_bc_mouse_sum<125].shape" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(7339,)" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ser_bc_mouse_sum[ser_bc_mouse_sum<100].shape" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2317,)" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ser_bc_mouse_sum[ser_bc_mouse_sum<50].shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Human cells appear to have <100 mouse gene counts" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "mouse_cells = ser_bc_mouse_sum[ser_bc_mouse_sum>=100].index.tolist()\n", "human_cells = ser_bc_mouse_sum[ser_bc_mouse_sum<100].index.tolist()" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "number of mouse cells 1278\n", "number of human cells 7339\n" ] } ], "source": [ "print('number of mouse cells', len(mouse_cells))\n", "print('number of human cells', len(human_cells))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### ~15% Mouse Cells" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.14831147731228966" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(8617-7339)/8617" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Make DataFrame with Only Human Cells and Human Genes" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(20400, 7339)" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['gex-human'] = df['gex-ini'][human_cells].loc[human_genes]\n", "df['gex-human'].shape" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "df['gex-human'].to_csv('../data/CITE-seq_data/GSE100866_CBMC_8K_13AB_10X-RNA_umi_HUMAN.csv.gz', compression='gzip')" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "filename = '../data/CITE-seq_data/GSE100866_CBMC_8K_13AB_10X-RNA_umi_HUMAN.csv.gz'\n", "df['test'] = pd.read_csv(filename, compression='gzip', index_col=0)" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(20400, 7339)" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['test'].shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Save dictionary of cell types" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "cell_dict = {}\n", "cell_dict['mouse-cells'] = mouse_cells\n", "cell_dict['human-cells'] = human_cells" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "json_scripts.save_to_json(filename='../data/CITE-seq_data/human_mouse_cell_dictionary.json', inst_dict=cell_dict)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }