{ "cells": [ { "cell_type": "markdown", "id": "8c81ef75-6ba8-4ad7-b023-defcd7b22887", "metadata": {}, "source": [ "# Pseudobulk data with fraction and median non-zero\n", "\n", "For use in plotting, we'll compute \"pseudobulk\" summary statistics for gene expression based on cell metadata grouping.\n", "\n", "Note that this approach doesn't use the careful pseudobulk approaches implemented in packages like `scran` - we're instead taking the fraction of cells in which gene expression was detected (`pct_exp`) and the median of expression in those non-zero cells (`median_expression`).\n", "\n", "For use in our visualization tools, we'll group by each of our cell type labeling levels (AIFI_L1, _L2, and _L3), as well as by both cell type and the originating, age-related cohort for these cells." ] }, { "cell_type": "markdown", "id": "397d8491-1006-400e-b28a-f78b98f9a632", "metadata": {}, "source": [ "## Load packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "2b94a539-8d6b-483b-93c1-d6e5ac4893f1", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "warnings.simplefilter(action='ignore', category=RuntimeWarning)\n", "\n", "from datetime import date\n", "import hisepy\n", "import numpy as np\n", "import os\n", "import pandas as pd\n", "import pickle\n", "import re\n", "import scanpy as sc\n", "import scipy.sparse as scs" ] }, { "cell_type": "code", "execution_count": 2, "id": "3f700a4a-402c-483f-a13d-7376b31f29a7", "metadata": {}, "outputs": [], "source": [ "if not os.path.exists('output'):\n", " os.mkdir('output')" ] }, { "cell_type": "code", "execution_count": 3, "id": "c5fdbbf7-7c03-468e-b443-dde31833164e", "metadata": {}, "outputs": [], "source": [ "out_files = []" ] }, { "cell_type": "markdown", "id": "61b7c6d3-fbd8-4e52-b53b-cc06ebb0f175", "metadata": {}, "source": [ "## Helper functions" ] }, { "cell_type": "code", "execution_count": 4, "id": "b1863da8-23d2-4b2f-80aa-fa5948276a23", "metadata": {}, "outputs": [], "source": [ "def read_adata_uuid(h5ad_uuid):\n", " h5ad_path = '/home/jupyter/cache/{u}'.format(u = h5ad_uuid)\n", " if not os.path.isdir(h5ad_path):\n", " hise_res = hisepy.cache_files([h5ad_uuid])\n", " h5ad_filename = os.listdir(h5ad_path)[0]\n", " h5ad_file = '{p}/{f}'.format(p = h5ad_path, f = h5ad_filename)\n", " adata = sc.read_h5ad(h5ad_file)\n", " return adata" ] }, { "cell_type": "code", "execution_count": 5, "id": "90ce8067-33c3-4ce2-81a3-9bd69acf4566", "metadata": {}, "outputs": [], "source": [ "def sparse_nz_median(sparse_mat, feat_names):\n", " # transpose the matrix so values for each gene are together in memory\n", " sparse_mat = sparse_mat.transpose().tocsr()\n", " \n", " # get dimensions for calculations\n", " n_cells = sparse_mat.shape[1]\n", " \n", " # compute the fraction non-zero per gene (row = axis 1)\n", " pct_exp = sparse_mat.getnnz(axis = 1) / n_cells\n", " \n", " # split non-zero values from the matrix directly\n", " split_values = np.split(sparse_mat.data, sparse_mat.indptr)\n", " \n", " # remove first and last entry in split - indptr starts with 0 and ends with the last value\n", " # so the first and last split entries are not meaningful\n", " split_values = split_values[1:-1].copy()\n", " \n", " # compute medians for every split\n", " median_expression = [np.median(x) for x in split_values]\n", "\n", " # assemble a DataFrame with results\n", " res_df = pd.DataFrame({\n", " 'gene': feat_names,\n", " 'pct_exp': pct_exp,\n", " 'median_expression': median_expression\n", " })\n", " \n", " # replace missing values from splits without any values\n", " # these were not detected so median expression is 0\n", " res_df['median_expression'] = res_df['median_expression'].fillna(0)\n", " \n", " return res_df" ] }, { "cell_type": "markdown", "id": "3e82c8a8-1ea0-404d-af6b-daaadf67a5c2", "metadata": {}, "source": [ "## Retrieve atlas data from HISE" ] }, { "cell_type": "code", "execution_count": 6, "id": "ea3fa84a-b9df-412d-895c-aad9023fb147", "metadata": {}, "outputs": [], "source": [ "h5ad_uuid = '5b3a0cc8-1811-4126-90c7-e9cdd41459fd'" ] }, { "cell_type": "code", "execution_count": 7, "id": "afab8559-2f7d-4757-a1c3-7ae0c57a5589", "metadata": {}, "outputs": [], "source": [ "adata = read_adata_uuid(h5ad_uuid)" ] }, { "cell_type": "code", "execution_count": 8, "id": "7d596218-3a23-4075-82c7-d3426d7c7327", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1823666, 33538)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata.shape" ] }, { "cell_type": "code", "execution_count": 9, "id": "2e62ac5a-0bc9-4dee-a6f3-e006418e54e7", "metadata": {}, "outputs": [], "source": [ "obs = adata.obs" ] }, { "cell_type": "markdown", "id": "fd3a8bc6-2f61-4362-9ef7-7f2aee408b59", "metadata": {}, "source": [ "### Summarized expression by cell type\n", "\n", "For our UMAP viewer, we plot plot expression at each level of cell type resolution" ] }, { "cell_type": "code", "execution_count": 10, "id": "3e7d51f6-a36e-49e4-85bf-f49ae89b5f29", "metadata": {}, "outputs": [], "source": [ "levels = ['AIFI_L1', 'AIFI_L2', 'AIFI_L3']" ] }, { "cell_type": "code", "execution_count": 11, "id": "b8948d6b-c3fb-406c-b492-d34698d9a6c9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">>> AIFI_L1\n", ">>> AIFI_L2\n", ">>> AIFI_L3\n", "CPU times: user 7min 2s, sys: 1min 29s, total: 8min 31s\n", "Wall time: 8min 31s\n" ] } ], "source": [ "%%time\n", "level_results = {}\n", "\n", "for level in levels:\n", " print('>>> {l}'.format(l = level))\n", " \n", " level_obs = obs.groupby(level)\n", " \n", " level_dict = {}\n", " for cell_type, type_obs in level_obs:\n", " type_bc = type_obs['barcodes']\n", " type_adata = adata[adata.obs['barcodes'].isin(type_bc)]\n", " \n", " type_res = sparse_nz_median(type_adata.X, type_adata.var_names.tolist())\n", " \n", " level_dict[cell_type] = type_res\n", " \n", " level_results[level] = level_dict" ] }, { "cell_type": "markdown", "id": "fc4f03d7-c118-41e3-bc33-f5dd9e3913e6", "metadata": {}, "source": [ "### Save assembled results for each level" ] }, { "cell_type": "code", "execution_count": 12, "id": "10e79d57-408b-4fc4-b704-bca00e342c23", "metadata": {}, "outputs": [], "source": [ "level_combined = {}\n", "for level in levels:\n", " all_level = pd.concat(level_results[level])\n", " all_level = all_level.reset_index(drop = False, names = [level, 'row_num'])\n", " all_level = all_level.drop('row_num', axis = 1)\n", " level_combined[level] = all_level" ] }, { "cell_type": "code", "execution_count": 13, "id": "719f8a2c-4a65-4aa5-a29a-8534ea025405", "metadata": {}, "outputs": [], "source": [ "for level, df in level_combined.items():\n", " out_csv = 'output/pbmc-ref_{l}_nz-pct-median_pseudobulk_{d}.csv'.format(l = level, d = date.today())\n", " df.to_csv(out_csv)\n", " out_files.append(out_csv)\n", " out_parquet = 'output/pbmc-ref_{l}_nz-pct-median_pseudobulk_{d}.parquet'.format(l = level, d = date.today())\n", " df.to_parquet(out_parquet)\n", " out_files.append(out_parquet)" ] }, { "cell_type": "markdown", "id": "51da7b67-9f3e-45c8-b2de-eb0368452ec6", "metadata": {}, "source": [ "### Restructure for use in visualization apps" ] }, { "cell_type": "code", "execution_count": 14, "id": "b6231894-f906-4a06-a059-e3022bdca9cd", "metadata": {}, "outputs": [], "source": [ "gene_results = {}\n", "for level in levels:\n", " all_res = pd.concat(level_results[level])\n", " split_res = all_res.groupby('gene')\n", " split_dict = {}\n", " for gene, df in split_res:\n", " df = df.reset_index(drop = False)\n", " df = df.drop('level_1', axis = 1)\n", " df = df.rename({'level_0': 'obs_group'}, axis = 1)\n", " split_dict[gene] = df\n", " \n", " gene_results[level] = split_dict" ] }, { "cell_type": "markdown", "id": "b2b6f5a3-754b-49fc-8ce5-3764482f5cba", "metadata": {}, "source": [ "Check that a gene or two look accurate" ] }, { "cell_type": "code", "execution_count": 15, "id": "52457d5c-cd46-4903-b399-64dfbe73868b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
obs_groupgenepct_expmedian_expression
0B cellCD79A0.9883652.726919
1DCCD79A0.0799590.725910
2ErythrocyteCD79A0.0139261.230366
3ILCCD79A0.1208531.526564
4MonocyteCD79A0.0173430.818575
5NK cellCD79A0.0198091.260133
6PlateletCD79A0.0111352.423751
7Progenitor cellCD79A0.1507210.677979
8T cellCD79A0.0344701.093967
\n", "
" ], "text/plain": [ " obs_group gene pct_exp median_expression\n", "0 B cell CD79A 0.988365 2.726919\n", "1 DC CD79A 0.079959 0.725910\n", "2 Erythrocyte CD79A 0.013926 1.230366\n", "3 ILC CD79A 0.120853 1.526564\n", "4 Monocyte CD79A 0.017343 0.818575\n", "5 NK cell CD79A 0.019809 1.260133\n", "6 Platelet CD79A 0.011135 2.423751\n", "7 Progenitor cell CD79A 0.150721 0.677979\n", "8 T cell CD79A 0.034470 1.093967" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gene_results['AIFI_L1']['CD79A']" ] }, { "cell_type": "code", "execution_count": 16, "id": "90062555-5530-415a-9027-13a7b251246f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
obs_groupgenepct_expmedian_expression
0ASDCFCN10.0555560.595097
1CD8aaFCN10.0352101.298121
2CD14 monocyteFCN10.9962543.177890
3CD16 monocyteFCN10.9339072.601019
4CD56bright NK cellFCN10.0488471.143188
5CD56dim NK cellFCN10.0491481.298217
6DN T cellFCN10.0540661.043778
7Effector B cellFCN10.0522550.973340
8ErythrocyteFCN10.0709551.265223
9ILCFCN10.0462091.412349
10Intermediate monocyteFCN10.9971593.025013
11MAITFCN10.0485191.132813
12Memory B cellFCN10.0482811.045450
13Memory CD4 T cellFCN10.0497161.050017
14Memory CD8 T cellFCN10.0484061.210995
15Naive B cellFCN10.0410791.271097
16Naive CD4 T cellFCN10.0478381.111647
17Naive CD8 T cellFCN10.0440301.023475
18Plasma cellFCN10.1032080.351336
19PlateletFCN10.0536512.532171
20Progenitor cellFCN10.0537350.564411
21Proliferating NK cellFCN10.0541590.986173
22Proliferating T cellFCN10.0646550.571081
23Transitional B cellFCN10.0426921.196304
24TregFCN10.0486861.184835
25cDC1FCN10.0837750.453340
26cDC2FCN10.7964882.183593
27gdTFCN10.0464151.194536
28pDCFCN10.0761830.794052
\n", "
" ], "text/plain": [ " obs_group gene pct_exp median_expression\n", "0 ASDC FCN1 0.055556 0.595097\n", "1 CD8aa FCN1 0.035210 1.298121\n", "2 CD14 monocyte FCN1 0.996254 3.177890\n", "3 CD16 monocyte FCN1 0.933907 2.601019\n", "4 CD56bright NK cell FCN1 0.048847 1.143188\n", "5 CD56dim NK cell FCN1 0.049148 1.298217\n", "6 DN T cell FCN1 0.054066 1.043778\n", "7 Effector B cell FCN1 0.052255 0.973340\n", "8 Erythrocyte FCN1 0.070955 1.265223\n", "9 ILC FCN1 0.046209 1.412349\n", "10 Intermediate monocyte FCN1 0.997159 3.025013\n", "11 MAIT FCN1 0.048519 1.132813\n", "12 Memory B cell FCN1 0.048281 1.045450\n", "13 Memory CD4 T cell FCN1 0.049716 1.050017\n", "14 Memory CD8 T cell FCN1 0.048406 1.210995\n", "15 Naive B cell FCN1 0.041079 1.271097\n", "16 Naive CD4 T cell FCN1 0.047838 1.111647\n", "17 Naive CD8 T cell FCN1 0.044030 1.023475\n", "18 Plasma cell FCN1 0.103208 0.351336\n", "19 Platelet FCN1 0.053651 2.532171\n", "20 Progenitor cell FCN1 0.053735 0.564411\n", "21 Proliferating NK cell FCN1 0.054159 0.986173\n", "22 Proliferating T cell FCN1 0.064655 0.571081\n", "23 Transitional B cell FCN1 0.042692 1.196304\n", "24 Treg FCN1 0.048686 1.184835\n", "25 cDC1 FCN1 0.083775 0.453340\n", "26 cDC2 FCN1 0.796488 2.183593\n", "27 gdT FCN1 0.046415 1.194536\n", "28 pDC FCN1 0.076183 0.794052" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gene_results['AIFI_L2']['FCN1']" ] }, { "cell_type": "code", "execution_count": 17, "id": "e7922203-d2b9-4cda-973d-19b784ec82db", "metadata": {}, "outputs": [], "source": [ "type_pkl = 'output/pbmc-ref_type_nz-pct-median_pseudobulk_{d}.pkl'.format(d = date.today())\n", "with open(type_pkl, 'wb') as out_file:\n", " pickle.dump(gene_results, out_file)\n", "out_files.append(type_pkl)" ] }, { "cell_type": "markdown", "id": "966f7334-61fd-44f5-a4ea-0a1c8b00dda5", "metadata": {}, "source": [ "### Summarized expression by cell type and cohort\n", "\n", "For our Gene Expression Summary viewer, we plot plot expression at each level of cell type resolution partitioned by each cohort in the reference." ] }, { "cell_type": "code", "execution_count": 18, "id": "d75604e4-1e6e-4cc3-8cb6-87d10ba420e7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">>> AIFI_L1\n", ">>> AIFI_L2\n", ">>> AIFI_L3\n", "CPU times: user 10min 21s, sys: 1min 33s, total: 11min 55s\n", "Wall time: 11min 55s\n" ] } ], "source": [ "%%time\n", "level_results = {}\n", "\n", "for level in levels:\n", " print('>>> {l}'.format(l = level))\n", " \n", " level_obs = obs.groupby(['cohort.cohortGuid', level])\n", " \n", " level_dict = {}\n", " for group_tuple, type_obs in level_obs:\n", " cohort = group_tuple[0]\n", " cell_type = group_tuple[1]\n", " c_t = '{c}_{t}'.format(c = cohort, t = cell_type)\n", " \n", " type_bc = type_obs['barcodes']\n", " type_adata = adata[adata.obs['barcodes'].isin(type_bc)]\n", " \n", " type_res = sparse_nz_median(type_adata.X, type_adata.var_names.tolist())\n", " \n", " type_res['cohort'] = [cohort] * type_res.shape[0]\n", " level_dict[c_t] = type_res\n", " \n", " level_results[level] = level_dict" ] }, { "cell_type": "markdown", "id": "51c7c28a-edac-4156-b9c5-91c113f7785f", "metadata": {}, "source": [ "### Save assembled results for each level" ] }, { "cell_type": "code", "execution_count": 19, "id": "95e62b72-bf5c-45d9-9050-8e6a583d775f", "metadata": {}, "outputs": [], "source": [ "level_combined = {}\n", "for level in levels:\n", " all_level = pd.concat(level_results[level])\n", " all_level = all_level.reset_index(drop = False, names = ['group', 'row_num'])\n", " all_level[level] = [re.sub('.+_', '', x) for x in all_level['group']]\n", " all_level = all_level.drop(['group','row_num'], axis = 1)\n", " all_level = all_level[['cohort', level, 'gene', 'pct_exp', 'median_expression']]\n", " level_combined[level] = all_level" ] }, { "cell_type": "code", "execution_count": 20, "id": "7a667426-fdf0-4491-b29b-30857455606b", "metadata": {}, "outputs": [], "source": [ "for level, df in level_combined.items():\n", " out_csv = 'output/pbmc-ref_cohort-{l}_nz-pct-median_pseudobulk_{d}.csv'.format(l = level, d = date.today())\n", " df.to_csv(out_csv)\n", " out_files.append(out_csv)\n", " out_parquet = 'output/pbmc-ref_cohort-{l}_nz-pct-median_pseudobulk_{d}.parquet'.format(l = level, d = date.today())\n", " df.to_parquet(out_parquet)\n", " out_files.append(out_parquet)" ] }, { "cell_type": "markdown", "id": "4ffa5da8-6c12-40d9-86e8-99483e2af242", "metadata": {}, "source": [ "### Restructure for use in visualization apps" ] }, { "cell_type": "code", "execution_count": 21, "id": "63db080d-2695-454b-a814-b4c348c3e657", "metadata": {}, "outputs": [], "source": [ "gene_results = {}\n", "for level in levels:\n", " all_res = pd.concat(level_results[level])\n", " split_res = all_res.groupby('gene')\n", " \n", " split_dict = {}\n", " for gene, df in split_res:\n", " df = df.reset_index(drop = False)\n", " df = df.drop('level_1', axis = 1)\n", " df['obs_group'] = [re.sub('.+_', '', x) for x in df['level_0']]\n", " df = df.drop('level_0', axis = 1)\n", " df = df[['obs_group', 'gene', 'pct_exp', 'median_expression', 'cohort']]\n", " split_dict[gene] = df\n", " gene_results[level] = split_dict" ] }, { "cell_type": "markdown", "id": "5780d759-12e8-476b-8663-2ece3ef0de69", "metadata": {}, "source": [ "Check that a gene or two look accurate" ] }, { "cell_type": "code", "execution_count": 22, "id": "f5837cd7-20c3-4faa-ac69-8783c9811f80", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
obs_groupgenepct_expmedian_expressioncohort
0B cellCD79A0.9908222.746202BR1
1DCCD79A0.0877090.714458BR1
2ErythrocyteCD79A0.0200002.958432BR1
3ILCCD79A0.1194441.399105BR1
4MonocyteCD79A0.0183080.798966BR1
5NK cellCD79A0.0206231.232686BR1
6PlateletCD79A0.0115202.419073BR1
7Progenitor cellCD79A0.1578950.659168BR1
8T cellCD79A0.0416341.068730BR1
9B cellCD79A0.9880872.716038BR2
10DCCD79A0.0728280.710909BR2
11ErythrocyteCD79A0.0079371.125242BR2
12ILCCD79A0.1131391.515654BR2
13MonocyteCD79A0.0166890.844489BR2
14NK cellCD79A0.0192501.271189BR2
15PlateletCD79A0.0095802.481542BR2
16Progenitor cellCD79A0.1286640.651074BR2
17T cellCD79A0.0235651.097746BR2
18B cellCD79A0.9834922.706248UP1
19DCCD79A0.0771810.821561UP1
20ErythrocyteCD79A0.0156251.007309UP1
21ILCCD79A0.1333331.595389UP1
22MonocyteCD79A0.0158710.794617UP1
23NK cellCD79A0.0188341.324052UP1
24PlateletCD79A0.0156862.211980UP1
25Progenitor cellCD79A0.1988300.790927UP1
26T cellCD79A0.0422971.163522UP1
\n", "
" ], "text/plain": [ " obs_group gene pct_exp median_expression cohort\n", "0 B cell CD79A 0.990822 2.746202 BR1\n", "1 DC CD79A 0.087709 0.714458 BR1\n", "2 Erythrocyte CD79A 0.020000 2.958432 BR1\n", "3 ILC CD79A 0.119444 1.399105 BR1\n", "4 Monocyte CD79A 0.018308 0.798966 BR1\n", "5 NK cell CD79A 0.020623 1.232686 BR1\n", "6 Platelet CD79A 0.011520 2.419073 BR1\n", "7 Progenitor cell CD79A 0.157895 0.659168 BR1\n", "8 T cell CD79A 0.041634 1.068730 BR1\n", "9 B cell CD79A 0.988087 2.716038 BR2\n", "10 DC CD79A 0.072828 0.710909 BR2\n", "11 Erythrocyte CD79A 0.007937 1.125242 BR2\n", "12 ILC CD79A 0.113139 1.515654 BR2\n", "13 Monocyte CD79A 0.016689 0.844489 BR2\n", "14 NK cell CD79A 0.019250 1.271189 BR2\n", "15 Platelet CD79A 0.009580 2.481542 BR2\n", "16 Progenitor cell CD79A 0.128664 0.651074 BR2\n", "17 T cell CD79A 0.023565 1.097746 BR2\n", "18 B cell CD79A 0.983492 2.706248 UP1\n", "19 DC CD79A 0.077181 0.821561 UP1\n", "20 Erythrocyte CD79A 0.015625 1.007309 UP1\n", "21 ILC CD79A 0.133333 1.595389 UP1\n", "22 Monocyte CD79A 0.015871 0.794617 UP1\n", "23 NK cell CD79A 0.018834 1.324052 UP1\n", "24 Platelet CD79A 0.015686 2.211980 UP1\n", "25 Progenitor cell CD79A 0.198830 0.790927 UP1\n", "26 T cell CD79A 0.042297 1.163522 UP1" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gene_results['AIFI_L1']['CD79A']" ] }, { "cell_type": "code", "execution_count": 23, "id": "3940006b-a4f2-4649-8473-796df0869020", "metadata": {}, "outputs": [], "source": [ "cohort_type_pkl = 'output/pbmc-ref_cohort-type_nz-pct-median_pseudobulk_{d}.pkl'.format(d = date.today())\n", "with open(cohort_type_pkl, 'wb') as out_file:\n", " pickle.dump(gene_results, out_file)\n", "out_files.append(cohort_type_pkl)" ] }, { "cell_type": "markdown", "id": "b8f709bf-9afb-41e2-8b92-a329673b17ea", "metadata": {}, "source": [ "## Upload Results to HISE\n", "\n", "Finally, we'll use `hisepy.upload.upload_files()` to send a copy of our output to HISE to use for downstream analysis steps." ] }, { "cell_type": "code", "execution_count": 24, "id": "061fa34e-ac82-4057-bf67-9e0e88e73b34", "metadata": {}, "outputs": [], "source": [ "study_space_uuid = '64097865-486d-43b3-8f94-74994e0a72e0'\n", "title = 'PBMC Reference Pseudobulk Frac Medians {d}'.format(d = date.today())" ] }, { "cell_type": "code", "execution_count": 25, "id": "3dc6573c-64e4-4db4-82f1-465e540c35ed", "metadata": {}, "outputs": [], "source": [ "in_files = [h5ad_uuid]" ] }, { "cell_type": "code", "execution_count": 26, "id": "42e769f2-9eec-44e4-8f14-9cd46f10d6a8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['5b3a0cc8-1811-4126-90c7-e9cdd41459fd']" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "in_files" ] }, { "cell_type": "code", "execution_count": 27, "id": "b57ec44f-0376-49d4-80ae-be37f9900195", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['output/pbmc-ref_AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_type_nz-pct-median_pseudobulk_2024-03-27.pkl',\n", " 'output/pbmc-ref_cohort-AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_cohort-AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_cohort-AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_cohort-AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_cohort-AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_cohort-AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_cohort-type_nz-pct-median_pseudobulk_2024-03-27.pkl']" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "out_files" ] }, { "cell_type": "code", "execution_count": 28, "id": "a0034b02-9445-424e-9d68-997d1405aeae", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "output/pbmc-ref_AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.csv\n", "output/pbmc-ref_AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.parquet\n", "output/pbmc-ref_AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.csv\n", "output/pbmc-ref_AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.parquet\n", "output/pbmc-ref_AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.csv\n", "output/pbmc-ref_AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.parquet\n", "output/pbmc-ref_type_nz-pct-median_pseudobulk_2024-03-27.pkl\n", "output/pbmc-ref_cohort-AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.csv\n", "output/pbmc-ref_cohort-AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.parquet\n", "output/pbmc-ref_cohort-AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.csv\n", "output/pbmc-ref_cohort-AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.parquet\n", "output/pbmc-ref_cohort-AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.csv\n", "output/pbmc-ref_cohort-AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.parquet\n", "output/pbmc-ref_cohort-type_nz-pct-median_pseudobulk_2024-03-27.pkl\n", "you are trying to upload file_ids... ['output/pbmc-ref_AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.csv', 'output/pbmc-ref_AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.parquet', 'output/pbmc-ref_AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.csv', 'output/pbmc-ref_AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.parquet', 'output/pbmc-ref_AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.csv', 'output/pbmc-ref_AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.parquet', 'output/pbmc-ref_type_nz-pct-median_pseudobulk_2024-03-27.pkl', 'output/pbmc-ref_cohort-AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.csv', 'output/pbmc-ref_cohort-AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.parquet', 'output/pbmc-ref_cohort-AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.csv', 'output/pbmc-ref_cohort-AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.parquet', 'output/pbmc-ref_cohort-AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.csv', 'output/pbmc-ref_cohort-AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.parquet', 'output/pbmc-ref_cohort-type_nz-pct-median_pseudobulk_2024-03-27.pkl']. Do you truly want to proceed?\n" ] }, { "name": "stdin", "output_type": "stream", "text": [ "(y/n) y\n" ] }, { "data": { "text/plain": [ "{'trace_id': '78f8b150-b1a4-48af-a87c-648dcd2d7ad5',\n", " 'files': ['output/pbmc-ref_AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_type_nz-pct-median_pseudobulk_2024-03-27.pkl',\n", " 'output/pbmc-ref_cohort-AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_cohort-AIFI_L1_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_cohort-AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_cohort-AIFI_L2_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_cohort-AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.csv',\n", " 'output/pbmc-ref_cohort-AIFI_L3_nz-pct-median_pseudobulk_2024-03-27.parquet',\n", " 'output/pbmc-ref_cohort-type_nz-pct-median_pseudobulk_2024-03-27.pkl']}" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hisepy.upload.upload_files(\n", " files = out_files,\n", " study_space_id = study_space_uuid,\n", " title = title,\n", " input_file_ids = in_files\n", ")" ] }, { "cell_type": "code", "execution_count": 29, "id": "ed42395a-e9b8-4987-b0fa-b953afbb3b5e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "Click to view session information\n", "
\n",
       "-----\n",
       "anndata             0.10.3\n",
       "hisepy              0.3.0\n",
       "numpy               1.24.0\n",
       "pandas              2.1.4\n",
       "scanpy              1.9.6\n",
       "scipy               1.11.4\n",
       "session_info        1.0.0\n",
       "-----\n",
       "
\n", "
\n", "Click to view modules imported as dependencies\n", "
\n",
       "PIL                         10.0.1\n",
       "anyio                       NA\n",
       "arrow                       1.3.0\n",
       "asttokens                   NA\n",
       "attr                        23.2.0\n",
       "attrs                       23.2.0\n",
       "babel                       2.14.0\n",
       "beatrix_jupyterlab          NA\n",
       "brotli                      NA\n",
       "cachetools                  5.3.1\n",
       "certifi                     2024.02.02\n",
       "cffi                        1.16.0\n",
       "charset_normalizer          3.3.2\n",
       "cloudpickle                 2.2.1\n",
       "colorama                    0.4.6\n",
       "comm                        0.1.4\n",
       "cryptography                41.0.7\n",
       "cycler                      0.10.0\n",
       "cython_runtime              NA\n",
       "dateutil                    2.8.2\n",
       "db_dtypes                   1.1.1\n",
       "debugpy                     1.8.0\n",
       "decorator                   5.1.1\n",
       "defusedxml                  0.7.1\n",
       "deprecated                  1.2.14\n",
       "exceptiongroup              1.2.0\n",
       "executing                   2.0.1\n",
       "fastjsonschema              NA\n",
       "fqdn                        NA\n",
       "google                      NA\n",
       "greenlet                    2.0.2\n",
       "grpc                        1.58.0\n",
       "grpc_status                 NA\n",
       "h5py                        3.10.0\n",
       "idna                        3.6\n",
       "igraph                      0.10.8\n",
       "importlib_metadata          NA\n",
       "ipykernel                   6.28.0\n",
       "ipython_genutils            0.2.0\n",
       "ipywidgets                  8.1.1\n",
       "isoduration                 NA\n",
       "jedi                        0.19.1\n",
       "jinja2                      3.1.2\n",
       "joblib                      1.3.2\n",
       "json5                       NA\n",
       "jsonpointer                 2.4\n",
       "jsonschema                  4.20.0\n",
       "jsonschema_specifications   NA\n",
       "jupyter_events              0.9.0\n",
       "jupyter_server              2.12.1\n",
       "jupyterlab_server           2.25.2\n",
       "jwt                         2.8.0\n",
       "kiwisolver                  1.4.5\n",
       "leidenalg                   0.10.1\n",
       "llvmlite                    0.41.0\n",
       "lz4                         4.3.2\n",
       "markupsafe                  2.1.3\n",
       "matplotlib                  3.8.0\n",
       "matplotlib_inline           0.1.6\n",
       "mpl_toolkits                NA\n",
       "mpmath                      1.3.0\n",
       "natsort                     8.4.0\n",
       "nbformat                    5.9.2\n",
       "numba                       0.58.0\n",
       "opentelemetry               NA\n",
       "overrides                   NA\n",
       "packaging                   23.2\n",
       "parso                       0.8.3\n",
       "pexpect                     4.8.0\n",
       "pickleshare                 0.7.5\n",
       "pkg_resources               NA\n",
       "platformdirs                4.1.0\n",
       "plotly                      5.18.0\n",
       "prettytable                 3.9.0\n",
       "prometheus_client           NA\n",
       "prompt_toolkit              3.0.42\n",
       "proto                       NA\n",
       "psutil                      NA\n",
       "ptyprocess                  0.7.0\n",
       "pure_eval                   0.2.2\n",
       "pyarrow                     13.0.0\n",
       "pydev_ipython               NA\n",
       "pydevconsole                NA\n",
       "pydevd                      2.9.5\n",
       "pydevd_file_utils           NA\n",
       "pydevd_plugins              NA\n",
       "pydevd_tracing              NA\n",
       "pygments                    2.17.2\n",
       "pynvml                      NA\n",
       "pyparsing                   3.1.1\n",
       "pyreadr                     0.5.0\n",
       "pythonjsonlogger            NA\n",
       "pytz                        2023.3.post1\n",
       "referencing                 NA\n",
       "requests                    2.31.0\n",
       "rfc3339_validator           0.1.4\n",
       "rfc3986_validator           0.1.1\n",
       "rpds                        NA\n",
       "send2trash                  NA\n",
       "shapely                     1.8.5.post1\n",
       "six                         1.16.0\n",
       "sklearn                     1.3.2\n",
       "sniffio                     1.3.0\n",
       "socks                       1.7.1\n",
       "sql                         NA\n",
       "sqlalchemy                  2.0.21\n",
       "sqlparse                    0.4.4\n",
       "stack_data                  0.6.2\n",
       "sympy                       1.12\n",
       "termcolor                   NA\n",
       "texttable                   1.7.0\n",
       "threadpoolctl               3.2.0\n",
       "torch                       2.1.2+cu121\n",
       "torchgen                    NA\n",
       "tornado                     6.3.3\n",
       "tqdm                        4.66.1\n",
       "traitlets                   5.9.0\n",
       "typing_extensions           NA\n",
       "uri_template                NA\n",
       "urllib3                     1.26.18\n",
       "wcwidth                     0.2.12\n",
       "webcolors                   1.13\n",
       "websocket                   1.7.0\n",
       "wrapt                       1.15.0\n",
       "xarray                      2023.12.0\n",
       "yaml                        6.0.1\n",
       "zipp                        NA\n",
       "zmq                         25.1.2\n",
       "zoneinfo                    NA\n",
       "zstandard                   0.22.0\n",
       "
\n", "
\n", "
\n",
       "-----\n",
       "IPython             8.19.0\n",
       "jupyter_client      8.6.0\n",
       "jupyter_core        5.6.1\n",
       "jupyterlab          4.1.2\n",
       "notebook            6.5.4\n",
       "-----\n",
       "Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0]\n",
       "Linux-5.15.0-1054-gcp-x86_64-with-glibc2.31\n",
       "-----\n",
       "Session information updated at 2024-03-27 23:45\n",
       "
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import session_info\n", "session_info.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "738c6b8e-2739-42aa-8eed-131f090f06f0", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }