{ "cells": [ { "cell_type": "markdown", "id": "7ce520f5", "metadata": {}, "source": [ "Accession: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE133344" ] }, { "cell_type": "code", "execution_count": 1, "id": "a0446d55", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/icb/yuge.ji/miniconda3/envs/py37/lib/python3.7/site-packages/scanpy/_settings.py:447: DeprecationWarning: `set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`\n", " IPython.display.set_matplotlib_formats(*ipython_format)\n", "2022-02-10 04:13:15.865398: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", "2022-02-10 04:13:15.865497: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "scanpy==1.8.2 anndata==0.7.6 umap==0.5.2 numpy==1.20.3 scipy==1.5.3 pandas==1.3.4 scikit-learn==1.0.2 statsmodels==0.11.1 python-igraph==0.8.3 leidenalg==0.8.3 pynndescent==0.5.5\n" ] }, { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 111445 × 33694\n", " obs: 'guide_identity', 'read_count', 'UMI_count', 'coverage', 'gemgroup', 'good_coverage', 'number_of_cells', 'guide_AHR', 'guide_ARID1A', 'guide_ARRDC3', 'guide_ATL1', 'guide_BAK1', 'guide_BCL2L11', 'guide_BCORL1', 'guide_BPGM', 'guide_C19orf26', 'guide_C3orf72', 'guide_CBFA2T3', 'guide_CBL', 'guide_CDKN1A', 'guide_CDKN1B', 'guide_CDKN1C', 'guide_CEBPA', 'guide_CEBPB', 'guide_CEBPE', 'guide_CELF2', 'guide_CITED1', 'guide_CKS1B', 'guide_CLDN6', 'guide_CNN1', 'guide_CNNM4', 'guide_COL1A1', 'guide_COL2A1', 'guide_CSRNP1', 'guide_DLX2', 'guide_DUSP9', 'guide_EGR1', 'guide_ELMSAN1', 'guide_ETS2', 'guide_FEV', 'guide_FOSB', 'guide_FOXA1', 'guide_FOXA3', 'guide_FOXF1', 'guide_FOXL2', 'guide_FOXO4', 'guide_GLB1L2', 'guide_HES7', 'guide_HK2', 'guide_HNF4A', 'guide_HOXA13', 'guide_HOXB9', 'guide_HOXC13', 'guide_IER5L', 'guide_IGDCC3', 'guide_IKZF3', 'guide_IRF1', 'guide_ISL2', 'guide_JUN', 'guide_KIAA1804', 'guide_KIF18B', 'guide_KIF2C', 'guide_KLF1', 'guide_KMT2A', 'guide_LHX1', 'guide_LYL1', 'guide_MAML2', 'guide_MAP2K3', 'guide_MAP2K6', 'guide_MAP4K3', 'guide_MAP4K5', 'guide_MAP7D1', 'guide_MAPK1', 'guide_MEIS1', 'guide_MIDN', 'guide_NCL', 'guide_NIT1', 'guide_OSR2', 'guide_PLK4', 'guide_POU3F2', 'guide_PRDM1', 'guide_PRTG', 'guide_PTPN1', 'guide_PTPN12', 'guide_PTPN13', 'guide_PTPN9', 'guide_RHOXF2', 'guide_RREB1', 'guide_RUNX1T1', 'guide_S1PR2', 'guide_SAMD1', 'guide_SET', 'guide_SGK1', 'guide_SLC38A2', 'guide_SLC4A1', 'guide_SLC6A9', 'guide_SNAI1', 'guide_SPI1', 'guide_STIL', 'guide_TBX2', 'guide_TBX3', 'guide_TGFBR2', 'guide_TMSB4X', 'guide_TP73', 'guide_TSC22D1', 'guide_UBASH3A', 'guide_UBASH3B', 'guide_ZBTB1', 'guide_ZBTB10', 'guide_ZBTB25', 'guide_ZC3HAV1', 'guide_ZNF318', 'guide_ids'\n", " var: 'gene_symbols'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %load block0_load.py\n", "author_year = 'Norman_2019'\n", "is_counts = True\n", "var_genes = 'gene_symbols'\n", "doi = '10.1126/science.aax4438'\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import scanpy as sc\n", "sc.set_figure_params(dpi=100, frameon=False)\n", "sc.logging.print_header()\n", "\n", "# verify\n", "assert(doi in pd.read_csv('../personal.csv').DOI.values)\n", "\n", "adata = sc.read(f'{author_year}_raw.h5ad')\n", "adata" ] }, { "cell_type": "code", "execution_count": 2, "id": "3db2a34c", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
| \n", " | count | \n", "unique | \n", "top | \n", "freq | \n", "mean | \n", "std | \n", "min | \n", "25% | \n", "50% | \n", "75% | \n", "max | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|
| guide_identity | \n", "111445 | \n", "290 | \n", "NegCtrl10_NegCtrl0__NegCtrl10_NegCtrl0 | \n", "3532 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| read_count | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1186.607268 | \n", "858.953144 | \n", "1.0 | \n", "576.0 | \n", "1074.0 | \n", "1622.0 | \n", "28684.0 | \n", "
| UMI_count | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "56.654036 | \n", "37.853848 | \n", "1.0 | \n", "29.0 | \n", "54.0 | \n", "78.0 | \n", "1809.0 | \n", "
| coverage | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "20.349891 | \n", "6.730287 | \n", "1.0 | \n", "17.066667 | \n", "19.839506 | \n", "22.76 | \n", "114.0 | \n", "
| gemgroup | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.477733 | \n", "2.326137 | \n", "1.0 | \n", "2.0 | \n", "4.0 | \n", "7.0 | \n", "8.0 | \n", "
| good_coverage | \n", "111445 | \n", "2 | \n", "True | \n", "104507 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| number_of_cells | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.072242 | \n", "0.474838 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "10.0 | \n", "
| guide_AHR | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0118 | \n", "0.107983 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_ARID1A | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.002082 | \n", "0.045579 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_ARRDC3 | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.004442 | \n", "0.066498 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_ATL1 | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.003401 | \n", "0.058217 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_BAK1 | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.02114 | \n", "0.143853 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_BCL2L11 | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.010947 | \n", "0.104055 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_BCORL1 | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.004783 | \n", "0.068991 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_BPGM | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.010077 | \n", "0.099876 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_C19orf26 | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.010741 | \n", "0.10308 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_C3orf72 | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.002898 | \n", "0.053758 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_CBFA2T3 | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.01022 | \n", "0.100578 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_CBL | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.020764 | \n", "0.142593 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
| guide_CDKN1A | \n", "111445.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.005563 | \n", "0.07438 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "