{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "58053d5a", "metadata": {}, "outputs": [], "source": [ "from descartes_rpa.convert.loom import loom_to_anndata\n", "from descartes_rpa.analyze.analyze import scanpy_format\n", "from descartes_rpa import get_pathways_for_group\n", "from descartes_rpa.fetch.descartes import fetch_descartes_by_tissue" ] }, { "cell_type": "code", "execution_count": 2, "id": "61b65608", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading Pancreas tissue Human Single-Cell data from Descartes database\n", "data url: https://shendure-web.gs.washington.edu/content/members/cao1025/public/FCA_RNA_supp_files/scanpy_cells_by_tissue/Pancreas_processed.loom\n", "Downloaded ../data/input/Pancreas_data.loom to ../data/input\n" ] } ], "source": [ "fetch_descartes_by_tissue([\"Pancreas\"], out_dir=\"../data/input\")" ] }, { "cell_type": "code", "execution_count": 2, "id": "2949c4ce", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/joao/miniconda3/envs/descartes-rpa/lib/python3.9/site-packages/anndata/_core/anndata.py:120: ImplicitModificationWarning: Transforming to str index.\n", " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n", "/home/joao/miniconda3/envs/descartes-rpa/lib/python3.9/site-packages/anndata/utils.py:111: UserWarning: Suffix used (-[0-9]+) to deduplicate index values may make index values difficult to interpret. There values with a similar suffixes in the index. Consider using a different delimiter by passing `join={delimiter}`Example key collisions generated by the make_index_unique algorithm: ['SNORD116-1', 'SNORD116-2', 'SNORD116-3', 'SNORD116-5', 'SNORD116-6']\n", " warnings.warn(\n", "... storing 'Assay' as categorical\n", "... storing 'Batch' as categorical\n", "... storing 'Experiment_batch' as categorical\n", "... storing 'Fetus_id' as categorical\n", "... storing 'Main_cluster_name' as categorical\n", "... storing 'Organ' as categorical\n", "... storing 'Organ_cell_lineage' as categorical\n", "... storing 'RT_group' as categorical\n", "... storing 'Sex' as categorical\n", "... storing 'exon_intron' as categorical\n", "... storing 'gene_type' as categorical\n" ] } ], "source": [ "adata = loom_to_anndata(\"../../data/input/Pancreas_data.loom\")\n", "scanpy_format(adata=adata)\n", "get_pathways_for_group(adata)" ] }, { "cell_type": "code", "execution_count": 3, "id": "ae227701", "metadata": {}, "outputs": [], "source": [ "from descartes_rpa.io.save import save_data_with_pathways" ] }, { "cell_type": "code", "execution_count": 4, "id": "a25d728f", "metadata": {}, "outputs": [], "source": [ "dir_path = \"../../data/output/Pancreas\"\n", "file = \"Pancreas\"" ] }, { "cell_type": "code", "execution_count": 5, "id": "22f613b7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Saving AnnData structure to ../../data/output/Pancreas/Pancreas.h5ad\n", "Saving pathway data from Acinar cells clusters to ../../data/output/Pancreas\n", "Saving pathway data from Ductal cells clusters to ../../data/output/Pancreas\n", "Saving pathway data from Lymphoid cells clusters to ../../data/output/Pancreas\n", "Saving pathway data from Smooth muscle cells clusters to ../../data/output/Pancreas\n", "Saving pathway data from Erythroblasts clusters to ../../data/output/Pancreas\n", "Saving pathway data from ENS neurons clusters to ../../data/output/Pancreas\n", "Saving pathway data from Islet endocrine cells clusters to ../../data/output/Pancreas\n", "Saving pathway data from Myeloid cells clusters to ../../data/output/Pancreas\n", "Saving pathway data from Stromal cells clusters to ../../data/output/Pancreas\n", "Saving pathway data from Vascular endothelial cells clusters to ../../data/output/Pancreas\n", "Saving pathway data from CCL19_CCL21 positive cells clusters to ../../data/output/Pancreas\n", "Saving pathway data from ENS glia clusters to ../../data/output/Pancreas\n", "Saving pathway data from Mesothelial cells clusters to ../../data/output/Pancreas\n", "Saving pathway data from Lymphatic endothelial cells clusters to ../../data/output/Pancreas\n" ] } ], "source": [ "save_data_with_pathways(adata, directory=dir_path, file=file)" ] }, { "cell_type": "code", "execution_count": 6, "id": "9b5520d1", "metadata": {}, "outputs": [], "source": [ "from descartes_rpa.io.load import load_data_with_pathways" ] }, { "cell_type": "code", "execution_count": 7, "id": "173f1cad", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading ../../data/output/Pancreas/Lymphatic_endothelial_cells_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/Smooth_muscle_cells_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/CCL19_CCL21_positive_cells_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/Myeloid_cells_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/Acinar_cells_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/Ductal_cells_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/Lymphoid_cells_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/ENS_glia_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/Vascular_endothelial_cells_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/Islet_endocrine_cells_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/Mesothelial_cells_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/Erythroblasts_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/ENS_neurons_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/Stromal_cells_pathways.csv pathway data.\n", "Loading ../../data/output/Pancreas/Pancreas.h5ad AnnData file.\n" ] } ], "source": [ "loaded_adata = load_data_with_pathways(directory=dir_path)" ] }, { "cell_type": "code", "execution_count": 8, "id": "a33e0409", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "odict_keys(['Lymphatic_endothelial_cells', 'Smooth_muscle_cells', 'CCL19_CCL21_positive_cells', 'Myeloid_cells', 'Acinar_cells', 'Ductal_cells', 'Lymphoid_cells', 'ENS_glia', 'Vascular_endothelial_cells', 'Islet_endocrine_cells', 'Mesothelial_cells', 'Erythroblasts', 'ENS_neurons', 'Stromal_cells'])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "loaded_adata.uns[\"pathways\"].keys()" ] }, { "cell_type": "code", "execution_count": 9, "id": "8011ea31", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | stId | \n", "dbId | \n", "name | \n", "species | \n", "llp | \n", "entities | \n", "reactions | \n", "inDisease | \n", "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", "R-HSA-186712 | \n", "186712 | \n", "Regulation of beta-cell development | \n", "{'dbId': 48887, 'taxId': '9606', 'name': 'Homo... | \n", "False | \n", "{'resource': 'TOTAL', 'total': 67, 'found': 5,... | \n", "{'resource': 'TOTAL', 'total': 26, 'found': 4,... | \n", "False | \n", "
| 1 | \n", "R-HSA-1296052 | \n", "1296052 | \n", "Ca2+ activated K+ channels | \n", "{'dbId': 48887, 'taxId': '9606', 'name': 'Homo... | \n", "True | \n", "{'resource': 'TOTAL', 'total': 10, 'found': 3,... | \n", "{'resource': 'TOTAL', 'total': 3, 'found': 1, ... | \n", "False | \n", "
| 2 | \n", "R-HSA-210745 | \n", "210745 | \n", "Regulation of gene expression in beta cells | \n", "{'dbId': 48887, 'taxId': '9606', 'name': 'Homo... | \n", "True | \n", "{'resource': 'TOTAL', 'total': 35, 'found': 4,... | \n", "{'resource': 'TOTAL', 'total': 12, 'found': 3,... | \n", "False | \n", "
| 3 | \n", "R-HSA-112308 | \n", "112308 | \n", "Presynaptic depolarization and calcium channel... | \n", "{'dbId': 48887, 'taxId': '9606', 'name': 'Homo... | \n", "True | \n", "{'resource': 'TOTAL', 'total': 14, 'found': 3,... | \n", "{'resource': 'TOTAL', 'total': 1, 'found': 1, ... | \n", "False | \n", "
| 4 | \n", "R-HSA-422356 | \n", "422356 | \n", "Regulation of insulin secretion | \n", "{'dbId': 48887, 'taxId': '9606', 'name': 'Homo... | \n", "True | \n", "{'resource': 'TOTAL', 'total': 106, 'found': 5... | \n", "{'resource': 'TOTAL', 'total': 34, 'found': 8,... | \n", "False | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 102 | \n", "R-HSA-556833 | \n", "556833 | \n", "Metabolism of lipids | \n", "{'dbId': 48887, 'taxId': '9606', 'name': 'Homo... | \n", "False | \n", "{'resource': 'TOTAL', 'total': 1437, 'found': ... | \n", "{'resource': 'TOTAL', 'total': 949, 'found': 1... | \n", "False | \n", "
| 103 | \n", "R-HSA-212436 | \n", "212436 | \n", "Generic Transcription Pathway | \n", "{'dbId': 48887, 'taxId': '9606', 'name': 'Homo... | \n", "True | \n", "{'resource': 'TOTAL', 'total': 1555, 'found': ... | \n", "{'resource': 'TOTAL', 'total': 824, 'found': 3... | \n", "False | \n", "
| 104 | \n", "R-HSA-162582 | \n", "162582 | \n", "Signal Transduction | \n", "{'dbId': 48887, 'taxId': '9606', 'name': 'Homo... | \n", "False | \n", "{'resource': 'TOTAL', 'total': 2993, 'found': ... | \n", "{'resource': 'TOTAL', 'total': 2445, 'found': ... | \n", "False | \n", "
| 105 | \n", "R-HSA-73857 | \n", "73857 | \n", "RNA Polymerase II Transcription | \n", "{'dbId': 48887, 'taxId': '9606', 'name': 'Homo... | \n", "False | \n", "{'resource': 'TOTAL', 'total': 1694, 'found': ... | \n", "{'resource': 'TOTAL', 'total': 885, 'found': 3... | \n", "False | \n", "
| 106 | \n", "R-HSA-74160 | \n", "74160 | \n", "Gene expression (Transcription) | \n", "{'dbId': 48887, 'taxId': '9606', 'name': 'Homo... | \n", "False | \n", "{'resource': 'TOTAL', 'total': 1855, 'found': ... | \n", "{'resource': 'TOTAL', 'total': 1000, 'found': ... | \n", "False | \n", "
107 rows × 8 columns
\n", "