{ "cells": [ { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import scanpy as sc\n", "import ToppCellPy as tp\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. load data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This PBMC dataset is from [Wilk et al.2020](https://www.nature.com/articles/s41591-020-0944-y), which includes two conditions (COVID-19 vs. healthy) and 20 cell types. It can be downloaded from [here](https://cellxgene.cziscience.com/collections/a72afd53-ab92-4511-88da-252fb0e26b9a)." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "output_dir = \"/Users/jinmr2/Dropbox/Code/data/toppcell_test/\" # define output folder\n", "adata = sc.read(\"/Users/jinmr2/Dropbox/Code/data/COVID-19_data_normalized_Blish.h5ad\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 44721 × 26361\n", " obs: 'Admission', 'ClusterID', 'DPS', 'DTF', 'Donor_full', 'HLA1', 'IFN1', 'Sex', 'Status', 'Ventilated', 'cell_type_coarse', 'cell_type_fine', 'nCount_RNA', 'nCount_SCT', 'nFeature_RNA', 'nFeature_SCT', 'percent_mt', 'percent_rpl', 'percent_rps', 'percent_rrna', 'seurat_clusters', 'singler'\n", " var: 'Selected', 'sct_detection_rate', 'sct_gmean', 'sct_residual_mean', 'sct_residual_variance', 'sct_variable', 'sct_variance'\n", " uns: 'assay', 'authors', 'disease', 'log1p', 'organism', 'preprint', 'short_name', 'tissue'\n", " obsm: 'X_pca', 'X_umap'\n", " varm: 'pca_feature_loadings'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Admission | \n", "ClusterID | \n", "DPS | \n", "DTF | \n", "Donor_full | \n", "HLA1 | \n", "IFN1 | \n", "Sex | \n", "Status | \n", "Ventilated | \n", "... | \n", "nCount_RNA | \n", "nCount_SCT | \n", "nFeature_RNA | \n", "nFeature_SCT | \n", "percent_mt | \n", "percent_rpl | \n", "percent_rps | \n", "percent_rrna | \n", "seurat_clusters | \n", "singler | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
covid_555_1.1 | \n", "ICU | \n", "13 | \n", "9 | \n", "9 | \n", "C1 A | \n", "-0.026053 | \n", "0.015625 | \n", "M | \n", "COVID | \n", "NonVent | \n", "... | \n", "1222.0 | \n", "1682.0 | \n", "125 | \n", "126 | \n", "1.309329 | \n", "0.245499 | \n", "0.081833 | \n", "46.644844 | \n", "12 | \n", "Erythroblast | \n", "
covid_555_1.2 | \n", "ICU | \n", "10 | \n", "9 | \n", "9 | \n", "C1 A | \n", "0.023525 | \n", "-0.000322 | \n", "M | \n", "COVID | \n", "NonVent | \n", "... | \n", "1099.0 | \n", "1700.0 | \n", "160 | \n", "160 | \n", "13.102820 | \n", "0.363967 | \n", "0.363967 | \n", "58.780708 | \n", "9 | \n", "B_cell | \n", "
covid_555_1.3 | \n", "ICU | \n", "19 | \n", "9 | \n", "9 | \n", "C1 A | \n", "-0.044271 | \n", "0.086385 | \n", "M | \n", "COVID | \n", "NonVent | \n", "... | \n", "1055.0 | \n", "1661.0 | \n", "212 | \n", "213 | \n", "2.938389 | \n", "0.947867 | \n", "0.663507 | \n", "55.829384 | \n", "18 | \n", "B_cell | \n", "
covid_555_1.7 | \n", "ICU | \n", "10 | \n", "9 | \n", "9 | \n", "C1 A | \n", "-0.038040 | \n", "0.022590 | \n", "M | \n", "COVID | \n", "NonVent | \n", "... | \n", "2411.0 | \n", "1971.0 | \n", "312 | \n", "312 | \n", "10.908337 | \n", "0.165906 | \n", "0.041477 | \n", "67.399422 | \n", "9 | \n", "B_cell | \n", "
covid_555_1.8 | \n", "ICU | \n", "22 | \n", "9 | \n", "9 | \n", "C1 A | \n", "-0.043605 | \n", "0.010739 | \n", "M | \n", "COVID | \n", "NonVent | \n", "... | \n", "2276.0 | \n", "1948.0 | \n", "336 | \n", "336 | \n", "11.203866 | \n", "0.307557 | \n", "0.263620 | \n", "67.355011 | \n", "21 | \n", "B_cell | \n", "
5 rows × 22 columns
\n", "