{ "cells": [ { "cell_type": "markdown", "id": "415ef5ba-6365-45dd-a773-2533147a5292", "metadata": {}, "source": [ "# Assign Other annotations\n", "\n", "To assemble our annotations, we'll read our Other cell data and assign our expert annotations to those clusters. We'll then inspect the annotations in our UMAP projections, and output final labels for these cells" ] }, { "cell_type": "code", "execution_count": 1, "id": "495a9db1-a4d7-4e43-8ea6-ad084a053ead", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "warnings.simplefilter(action='ignore', category=RuntimeWarning)\n", "\n", "from datetime import date\n", "import hisepy\n", "import os\n", "import pandas as pd\n", "import scanpy as sc" ] }, { "cell_type": "markdown", "id": "8598de84-b0ac-4aab-9c95-96212b047b0a", "metadata": {}, "source": [ "### Helper function\n", "\n", "This function makes it easy to pull csv files stored in HISE as a pandas data.frame" ] }, { "cell_type": "code", "execution_count": 2, "id": "d3d4e8b3-a84d-41d0-8c96-e2f5ffd8b49e", "metadata": {}, "outputs": [], "source": [ "def read_csv_uuid(csv_uuid):\n", " csv_path = '/home/jupyter/cache/{u}'.format(u = csv_uuid)\n", " if not os.path.isdir(csv_path):\n", " hise_res = hisepy.reader.cache_files([csv_uuid])\n", " csv_filename = os.listdir(csv_path)[0]\n", " csv_file = '{p}/{f}'.format(p = csv_path, f = csv_filename)\n", " df = pd.read_csv(csv_file, index_col = 0)\n", " return df" ] }, { "cell_type": "markdown", "id": "dabe8bed-84ac-40fd-abc3-d14ab50976f1", "metadata": {}, "source": [ "## Read subclustering results from HISE" ] }, { "cell_type": "code", "execution_count": 3, "id": "1ac32f7d-7976-4047-a801-7aaa5411d531", "metadata": {}, "outputs": [], "source": [ "cell_class = 'other'" ] }, { "cell_type": "code", "execution_count": 4, "id": "07502d9a-52d4-43d9-aa0e-8fcdddf368c0", "metadata": {}, "outputs": [], "source": [ "h5ad_uuid = '1eb6ca8c-b8ed-4968-b515-c954497441dc'\n", "h5ad_path = '/home/jupyter/cache/{u}'.format(u = h5ad_uuid)" ] }, { "cell_type": "code", "execution_count": 5, "id": "ff125aad-00e8-4023-89b9-d193b9bd641f", "metadata": {}, "outputs": [], "source": [ "if not os.path.isdir(h5ad_path):\n", " hise_res = hisepy.reader.cache_files([h5ad_uuid])" ] }, { "cell_type": "code", "execution_count": 6, "id": "91498e93-dbf8-45ca-83b8-519ac3f2dcd1", "metadata": {}, "outputs": [], "source": [ "h5ad_filename = os.listdir(h5ad_path)[0]\n", "h5ad_file = '{p}/{f}'.format(p = h5ad_path, f = h5ad_filename)" ] }, { "cell_type": "code", "execution_count": 7, "id": "e034e17a-1fc9-437a-ba03-45bebad9f408", "metadata": { "tags": [] }, "outputs": [], "source": [ "adata = sc.read_h5ad(h5ad_file)" ] }, { "cell_type": "code", "execution_count": 8, "id": "d6f953b5-bc12-4c4b-85c1-25b65b7a2328", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 24603 × 4305\n", " obs: 'barcodes', 'batch_id', 'cell_name', 'cell_uuid', 'chip_id', 'hto_barcode', 'hto_category', 'n_genes', 'n_mito_umis', 'n_reads', 'n_umis', 'original_barcodes', 'pbmc_sample_id', 'pool_id', 'well_id', 'sample.sampleKitGuid', 'cohort.cohortGuid', 'subject.subjectGuid', 'subject.biologicalSex', 'subject.race', 'subject.ethnicity', 'subject.birthYear', 'sample.visitName', 'sample.drawDate', 'file.id', 'subject.cmv', 'subject.bmi', 'celltypist.low', 'seurat.l1', 'seurat.l1.score', 'seurat.l2', 'seurat.l2.score', 'seurat.l2.5', 'seurat.l2.5.score', 'seurat.l3', 'seurat.l3.score', 'predicted_doublet', 'doublet_score', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito', 'leiden', 'leiden_resolution_1', 'leiden_resolution_1.5', 'leiden_resolution_2'\n", " var: 'mito', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'\n", " uns: 'celltypist.low_colors', 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'seurat.l2.5_colors', 'umap'\n", " obsm: 'X_pca', 'X_pca_harmony', 'X_umap'\n", " varm: 'PCs'\n", " obsp: 'connectivities', 'distances'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata" ] }, { "cell_type": "markdown", "id": "dbd08775-b6ee-407d-959f-eef3aecb0a2c", "metadata": {}, "source": [ "## Read annotations" ] }, { "cell_type": "code", "execution_count": 9, "id": "c4c31233-55ab-41c1-9331-341260bad17f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "downloading fileID: 03817547-e2e3-412b-b36e-538d2bc74c87\n", "Files have been successfully downloaded!\n" ] } ], "source": [ "anno_uuid = '03817547-e2e3-412b-b36e-538d2bc74c87'\n", "anno = read_csv_uuid(anno_uuid)" ] }, { "cell_type": "code", "execution_count": 10, "id": "60fd9af0-cfcb-4c56-ba29-946b1bbaa57a", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | leiden_resolution_1 | \n", "AIFI_L3 | \n", "AIFI_L1 | \n", "AIFI_L1_Final | \n", "AIFI_L2 | \n", "AIFI_L2_Final | \n", "AIFI_L3_Final | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "Platelet | \n", "Platelet | \n", "Yes | \n", "Platelet | \n", "Yes | \n", "Yes | \n", "
1 | \n", "1 | \n", "T+Erythocytes doublet | \n", "T+Erythocytes doublet | \n", "Yes | \n", "T+Erythocytes doublet | \n", "Yes | \n", "Yes | \n", "
2 | \n", "2 | \n", "Platelet | \n", "Platelet | \n", "Yes | \n", "Platelet | \n", "Yes | \n", "Yes | \n", "
3 | \n", "3 | \n", "T+Erythocytes doublet | \n", "T+Erythocytes doublet | \n", "Yes | \n", "T+Erythocytes doublet | \n", "Yes | \n", "Yes | \n", "
4 | \n", "4 | \n", "Monocytes+Erythocytes doublet | \n", "Monocytes+Erythocytes doublet | \n", "Yes | \n", "Monocytes+Erythocytes doublet | \n", "Yes | \n", "Yes | \n", "
\n", " | barcodes | \n", "batch_id | \n", "cell_name | \n", "cell_uuid | \n", "chip_id | \n", "hto_barcode | \n", "hto_category | \n", "n_genes | \n", "n_mito_umis | \n", "n_reads | \n", "... | \n", "leiden | \n", "leiden_resolution_1 | \n", "leiden_resolution_1.5 | \n", "leiden_resolution_2 | \n", "AIFI_L3 | \n", "AIFI_L1 | \n", "AIFI_L1_Final | \n", "AIFI_L2 | \n", "AIFI_L2_Final | \n", "AIFI_L3_Final | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
barcodes | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
cf7341b848b611ea8957bafe6d70929e | \n", "cf7341b848b611ea8957bafe6d70929e | \n", "B001 | \n", "chalky_guileless_waterdogs | \n", "cf7341b848b611ea8957bafe6d70929e | \n", "B001-P1C1 | \n", "TGATGGCCTATTGGG | \n", "singlet | \n", "1482 | \n", "176 | \n", "17043 | \n", "... | \n", "22 | \n", "3 | \n", "2 | \n", "13 | \n", "T+Erythocytes doublet | \n", "T+Erythocytes doublet | \n", "Yes | \n", "T+Erythocytes doublet | \n", "Yes | \n", "Yes | \n", "
cf7400bc48b611ea8957bafe6d70929e | \n", "cf7400bc48b611ea8957bafe6d70929e | \n", "B001 | \n", "illadvised_cogitative_bluejay | \n", "cf7400bc48b611ea8957bafe6d70929e | \n", "B001-P1C1 | \n", "TGATGGCCTATTGGG | \n", "singlet | \n", "1089 | \n", "62 | \n", "12523 | \n", "... | \n", "22 | \n", "4 | \n", "12 | \n", "14 | \n", "Monocytes+Erythocytes doublet | \n", "Monocytes+Erythocytes doublet | \n", "Yes | \n", "Monocytes+Erythocytes doublet | \n", "Yes | \n", "Yes | \n", "
cf763b8448b611ea8957bafe6d70929e | \n", "cf763b8448b611ea8957bafe6d70929e | \n", "B001 | \n", "tricksome_sombrous_cats | \n", "cf763b8448b611ea8957bafe6d70929e | \n", "B001-P1C1 | \n", "TGATGGCCTATTGGG | \n", "singlet | \n", "1146 | \n", "89 | \n", "11185 | \n", "... | \n", "21 | \n", "1 | \n", "1 | \n", "1 | \n", "T+Erythocytes doublet | \n", "T+Erythocytes doublet | \n", "Yes | \n", "T+Erythocytes doublet | \n", "Yes | \n", "Yes | \n", "
cf76501a48b611ea8957bafe6d70929e | \n", "cf76501a48b611ea8957bafe6d70929e | \n", "B001 | \n", "pensive_queasy_tadpole | \n", "cf76501a48b611ea8957bafe6d70929e | \n", "B001-P1C1 | \n", "TGATGGCCTATTGGG | \n", "singlet | \n", "1994 | \n", "108 | \n", "22387 | \n", "... | \n", "29 | \n", "9 | \n", "11 | \n", "8 | \n", "CMP cell | \n", "Progenitor cell | \n", "Yes | \n", "Progenitor cell | \n", "Yes | \n", "Yes | \n", "
cf83c0ba48b611ea8957bafe6d70929e | \n", "cf83c0ba48b611ea8957bafe6d70929e | \n", "B001 | \n", "sodalite_foreign_puffin | \n", "cf83c0ba48b611ea8957bafe6d70929e | \n", "B001-P1C1 | \n", "TGATGGCCTATTGGG | \n", "singlet | \n", "841 | \n", "44 | \n", "7236 | \n", "... | \n", "21 | \n", "1 | \n", "1 | \n", "19 | \n", "T+Erythocytes doublet | \n", "T+Erythocytes doublet | \n", "Yes | \n", "T+Erythocytes doublet | \n", "Yes | \n", "Yes | \n", "
5 rows × 59 columns
\n", "\n", " | barcodes | \n", "batch_id | \n", "cell_name | \n", "cell_uuid | \n", "chip_id | \n", "hto_barcode | \n", "hto_category | \n", "n_genes | \n", "n_mito_umis | \n", "n_reads | \n", "... | \n", "leiden_resolution_1.5 | \n", "leiden_resolution_2 | \n", "AIFI_L3 | \n", "AIFI_L1 | \n", "AIFI_L1_Final | \n", "AIFI_L2 | \n", "AIFI_L2_Final | \n", "AIFI_L3_Final | \n", "umap_1 | \n", "umap_2 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "cf7341b848b611ea8957bafe6d70929e | \n", "B001 | \n", "chalky_guileless_waterdogs | \n", "cf7341b848b611ea8957bafe6d70929e | \n", "B001-P1C1 | \n", "TGATGGCCTATTGGG | \n", "singlet | \n", "1482 | \n", "176 | \n", "17043 | \n", "... | \n", "2 | \n", "13 | \n", "T+Erythocytes doublet | \n", "T+Erythocytes doublet | \n", "Yes | \n", "T+Erythocytes doublet | \n", "Yes | \n", "Yes | \n", "-2.371720 | \n", "1.574610 | \n", "
1 | \n", "cf7400bc48b611ea8957bafe6d70929e | \n", "B001 | \n", "illadvised_cogitative_bluejay | \n", "cf7400bc48b611ea8957bafe6d70929e | \n", "B001-P1C1 | \n", "TGATGGCCTATTGGG | \n", "singlet | \n", "1089 | \n", "62 | \n", "12523 | \n", "... | \n", "12 | \n", "14 | \n", "Monocytes+Erythocytes doublet | \n", "Monocytes+Erythocytes doublet | \n", "Yes | \n", "Monocytes+Erythocytes doublet | \n", "Yes | \n", "Yes | \n", "7.942333 | \n", "-5.333513 | \n", "
2 | \n", "cf763b8448b611ea8957bafe6d70929e | \n", "B001 | \n", "tricksome_sombrous_cats | \n", "cf763b8448b611ea8957bafe6d70929e | \n", "B001-P1C1 | \n", "TGATGGCCTATTGGG | \n", "singlet | \n", "1146 | \n", "89 | \n", "11185 | \n", "... | \n", "1 | \n", "1 | \n", "T+Erythocytes doublet | \n", "T+Erythocytes doublet | \n", "Yes | \n", "T+Erythocytes doublet | \n", "Yes | \n", "Yes | \n", "-0.506895 | \n", "2.742198 | \n", "
3 | \n", "cf76501a48b611ea8957bafe6d70929e | \n", "B001 | \n", "pensive_queasy_tadpole | \n", "cf76501a48b611ea8957bafe6d70929e | \n", "B001-P1C1 | \n", "TGATGGCCTATTGGG | \n", "singlet | \n", "1994 | \n", "108 | \n", "22387 | \n", "... | \n", "11 | \n", "8 | \n", "CMP cell | \n", "Progenitor cell | \n", "Yes | \n", "Progenitor cell | \n", "Yes | \n", "Yes | \n", "12.154108 | \n", "9.127453 | \n", "
4 | \n", "cf83c0ba48b611ea8957bafe6d70929e | \n", "B001 | \n", "sodalite_foreign_puffin | \n", "cf83c0ba48b611ea8957bafe6d70929e | \n", "B001-P1C1 | \n", "TGATGGCCTATTGGG | \n", "singlet | \n", "841 | \n", "44 | \n", "7236 | \n", "... | \n", "1 | \n", "19 | \n", "T+Erythocytes doublet | \n", "T+Erythocytes doublet | \n", "Yes | \n", "T+Erythocytes doublet | \n", "Yes | \n", "Yes | \n", "-0.071208 | \n", "2.975142 | \n", "
5 rows × 61 columns
\n", "\n", "-----\n", "anndata 0.10.3\n", "hisepy 0.3.0\n", "pandas 2.1.4\n", "scanpy 1.9.6\n", "session_info 1.0.0\n", "-----\n", "\n", "
\n", "PIL 10.0.1\n", "anyio NA\n", "arrow 1.3.0\n", "asttokens NA\n", "attr 23.2.0\n", "attrs 23.2.0\n", "babel 2.14.0\n", "beatrix_jupyterlab NA\n", "brotli NA\n", "cachetools 5.3.1\n", "certifi 2023.11.17\n", "cffi 1.16.0\n", "charset_normalizer 3.3.2\n", "cloudpickle 2.2.1\n", "colorama 0.4.6\n", "comm 0.1.4\n", "cryptography 41.0.7\n", "cycler 0.10.0\n", "cython_runtime NA\n", "dateutil 2.8.2\n", "db_dtypes 1.1.1\n", "debugpy 1.8.0\n", "decorator 5.1.1\n", "defusedxml 0.7.1\n", "deprecated 1.2.14\n", "exceptiongroup 1.2.0\n", "executing 2.0.1\n", "fastjsonschema NA\n", "fqdn NA\n", "google NA\n", "greenlet 2.0.2\n", "grpc 1.58.0\n", "grpc_status NA\n", "h5py 3.10.0\n", "idna 3.6\n", "igraph 0.10.8\n", "importlib_metadata NA\n", "ipykernel 6.28.0\n", "ipython_genutils 0.2.0\n", "ipywidgets 8.1.1\n", "isoduration NA\n", "jedi 0.19.1\n", "jinja2 3.1.2\n", "joblib 1.3.2\n", "json5 NA\n", "jsonpointer 2.4\n", "jsonschema 4.20.0\n", "jsonschema_specifications NA\n", "jupyter_events 0.9.0\n", "jupyter_server 2.12.1\n", "jupyterlab_server 2.25.2\n", "jwt 2.8.0\n", "kiwisolver 1.4.5\n", "leidenalg 0.10.1\n", "llvmlite 0.41.0\n", "lz4 4.3.2\n", "markupsafe 2.1.3\n", "matplotlib 3.8.0\n", "matplotlib_inline 0.1.6\n", "mpl_toolkits NA\n", "mpmath 1.3.0\n", "natsort 8.4.0\n", "nbformat 5.9.2\n", "numba 0.58.0\n", "numpy 1.24.0\n", "opentelemetry NA\n", "overrides NA\n", "packaging 23.2\n", "parso 0.8.3\n", "pexpect 4.8.0\n", "pickleshare 0.7.5\n", "pkg_resources NA\n", "platformdirs 4.1.0\n", "plotly 5.18.0\n", "prettytable 3.9.0\n", "prometheus_client NA\n", "prompt_toolkit 3.0.42\n", "proto NA\n", "psutil NA\n", "ptyprocess 0.7.0\n", "pure_eval 0.2.2\n", "pyarrow 13.0.0\n", "pydev_ipython NA\n", "pydevconsole NA\n", "pydevd 2.9.5\n", "pydevd_file_utils NA\n", "pydevd_plugins NA\n", "pydevd_tracing NA\n", "pygments 2.17.2\n", "pynvml NA\n", "pyparsing 3.1.1\n", "pyreadr 0.5.0\n", "pythonjsonlogger NA\n", "pytz 2023.3.post1\n", "referencing NA\n", "requests 2.31.0\n", "rfc3339_validator 0.1.4\n", "rfc3986_validator 0.1.1\n", "rpds NA\n", "scipy 1.11.4\n", "send2trash NA\n", "shapely 1.8.5.post1\n", "six 1.16.0\n", "sklearn 1.3.2\n", "sniffio 1.3.0\n", "socks 1.7.1\n", "sql NA\n", "sqlalchemy 2.0.21\n", "sqlparse 0.4.4\n", "stack_data 0.6.2\n", "sympy 1.12\n", "termcolor NA\n", "texttable 1.7.0\n", "threadpoolctl 3.2.0\n", "torch 2.1.2+cu121\n", "torchgen NA\n", "tornado 6.3.3\n", "tqdm 4.66.1\n", "traitlets 5.9.0\n", "typing_extensions NA\n", "uri_template NA\n", "urllib3 1.26.18\n", "wcwidth 0.2.12\n", "webcolors 1.13\n", "websocket 1.7.0\n", "wrapt 1.15.0\n", "xarray 2023.12.0\n", "yaml 6.0.1\n", "zipp NA\n", "zmq 25.1.2\n", "zoneinfo NA\n", "zstandard 0.22.0\n", "\n", "
\n", "-----\n", "IPython 8.19.0\n", "jupyter_client 8.6.0\n", "jupyter_core 5.6.1\n", "jupyterlab 4.0.10\n", "notebook 6.5.4\n", "-----\n", "Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0]\n", "Linux-5.15.0-1052-gcp-x86_64-with-glibc2.31\n", "-----\n", "Session information updated at 2024-03-01 01:56\n", "\n", "