{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Trajectory inference analysis: PAGA\n", "\n", "Partly following tutorial https://scanpy-tutorials.readthedocs.io/en/latest/paga-paul15.html" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Loading libraries" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as pl\n", "from matplotlib import rcParams\n", "import scanpy as sc\n", "\n", "import scipy\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-----\n", "anndata 0.8.0\n", "scanpy 1.9.1\n", "-----\n", "PIL 9.4.0\n", "appnope 0.1.3\n", "asttokens NA\n", "backcall 0.2.0\n", "beta_ufunc NA\n", "binom_ufunc NA\n", "cffi 1.15.1\n", "cloudpickle 2.2.1\n", "colorama 0.4.6\n", "comm 0.1.2\n", "cycler 0.10.0\n", "cython_runtime NA\n", "cytoolz 0.12.0\n", "dask 2023.1.0\n", "dateutil 2.8.2\n", "debugpy 1.6.6\n", "decorator 5.1.1\n", "defusedxml 0.7.1\n", "entrypoints 0.4\n", "executing 1.2.0\n", "google NA\n", "h5py 3.8.0\n", "hypergeom_ufunc NA\n", "igraph 0.10.3\n", "invgauss_ufunc NA\n", "ipykernel 6.20.2\n", "ipython_genutils 0.2.0\n", "jedi 0.18.2\n", "jinja2 3.1.2\n", "joblib 1.2.0\n", "jupyter_server 2.1.0\n", "kiwisolver 1.4.4\n", "leidenalg 0.9.1\n", "llvmlite 0.39.1\n", "louvain 0.8.0\n", "markupsafe 2.1.2\n", "matplotlib 3.6.3\n", "mpl_toolkits NA\n", "natsort 8.2.0\n", "nbinom_ufunc NA\n", "ncf_ufunc NA\n", "nct_ufunc NA\n", "ncx2_ufunc NA\n", "numba 0.56.4\n", "numpy 1.23.5\n", "packaging 23.0\n", "pandas 1.5.3\n", "parso 0.8.3\n", "pexpect 4.8.0\n", "pickleshare 0.7.5\n", "pkg_resources NA\n", "platformdirs 2.6.2\n", "prompt_toolkit 3.0.36\n", "psutil 5.9.4\n", "ptyprocess 0.7.0\n", "pure_eval 0.2.2\n", "pydev_ipython NA\n", "pydevconsole NA\n", "pydevd 2.9.5\n", "pydevd_file_utils NA\n", "pydevd_plugins NA\n", "pydevd_tracing NA\n", "pygments 2.14.0\n", "pyparsing 3.0.9\n", "pytz 2022.7.1\n", "scipy 1.10.0\n", "session_info 1.0.0\n", "setuptools 66.1.1\n", "six 1.16.0\n", "skewnorm_ufunc NA\n", "sklearn 1.2.1\n", "stack_data 0.6.2\n", "texttable 1.6.7\n", "threadpoolctl 3.1.0\n", "tlz 0.12.0\n", "toolz 0.12.0\n", "tornado 6.2\n", "traitlets 5.8.1\n", "typing_extensions NA\n", "wcwidth 0.2.6\n", "yaml 6.0\n", "zipp NA\n", "zmq 25.0.0\n", "zoneinfo NA\n", "-----\n", "IPython 8.8.0\n", "jupyter_client 7.4.9\n", "jupyter_core 5.1.5\n", "jupyterlab 3.5.3\n", "notebook 6.5.2\n", "-----\n", "Python 3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 08:55:37) [Clang 14.0.6 ]\n", "macOS-12.6.2-x86_64-i386-64bit\n", "-----\n", "Session information updated at 2023-01-27 11:04\n" ] } ], "source": [ "sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)\n", "sc.logging.print_versions()\n", "\n", "sc.settings.set_figure_params(dpi=100, frameon=False, figsize=(5, 5), facecolor='white', color_map = 'viridis_r') " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading data\n", "\n", "In order to speed up the computations during the exercises, we will be using a subset of a bone marrow dataset (originally containing about 100K cells). The bone marrow is the source of adult immune cells, and contains virtually all differentiation stages of cell from the immune system which later circulate in the blood to all other organs.\n", "\n", "\n", "\n", "All the data has been preprocessed with Seurat. The file `trajectory_scanpy_filtered.h5ad` was converted from the Seurat object using the `SeuratDisk` package. For more information on how it was done, have a look at the script: `convert_to_h5ad.R` in the github repo.\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "infile = './../data/bone_marrow/trajectory_scanpy_filtered.h5ad'\n", "adata = sc.read_h5ad(infile)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 5828 × 3585\n", " obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'batches', 'dataset', 'nCount', 'nUMI', 'pMito', 'pRibo', 'pHb', 'pChY', 'pChX', 'pnonXY', 'pPCG', 'pNCG', 'pMito_UMIs', 'pRibo_UMIs', 'pHb_UMIs', 'pChY_UMIs', 'pChX_UMIs', 'pnonXY_UMIs', 'pPCG_UMIs', 'pNCG_UMIs', 'SEL_nCount', 'SEL_nUMI', 'SEL_pMito', 'SEL_pRibo', 'SEL_pHb', 'SEL_pChY', 'SEL_pChX', 'SEL_pnonXY', 'SEL_pPCG', 'SEL_pNCG', 'SEL_pMito_UMIs', 'SEL_pRibo_UMIs', 'SEL_pHb_UMIs', 'SEL_pChY_UMIs', 'SEL_pChX_UMIs', 'SEL_pnonXY_UMIs', 'SEL_pPCG_UMIs', 'SEL_pNCG_UMIs', 'S.Score', 'G2M.Score', 'Phase', 'metadata_clusters', 'clusters', 'outlier', 'subgroups', 'clusters_use'\n", " var: 'features'\n", " obsm: 'X_harmony', 'X_harmony_Phase', 'X_pca', 'X_umap', 'X_umap3d'\n", " varm: 'PCs'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check what is in the object\n", "adata" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check that the variable names are correct." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | features | \n", "
---|---|
0610040J01Rik | \n", "0610040J01Rik | \n", "
1190007I07Rik | \n", "1190007I07Rik | \n", "
1500009L16Rik | \n", "1500009L16Rik | \n", "
1700012B09Rik | \n", "1700012B09Rik | \n", "
1700020L24Rik | \n", "1700020L24Rik | \n", "
... | \n", "... | \n", "
Sqor | \n", "Sqor | \n", "
Sting1 | \n", "Sting1 | \n", "
Tent5a | \n", "Tent5a | \n", "
Tlcd4 | \n", "Tlcd4 | \n", "
Znrd2 | \n", "Znrd2 | \n", "
3585 rows × 1 columns
\n", "