{ "cells": [ { "cell_type": "markdown", "id": "426da71f", "metadata": {}, "source": [ "# Clonality analysis in 10X data\n", "\n", "September 2022 draft" ] }, { "cell_type": "markdown", "id": "2e74ece6", "metadata": {}, "source": [ "# Loading packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "f5065f5a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "scanpy==1.7.1 anndata==0.7.6 umap==0.5.1 numpy==1.20.1 scipy==1.6.1 pandas==1.2.3 scikit-learn==0.24.1 statsmodels==0.12.2 python-igraph==0.8.3 louvain==0.7.0 leidenalg==0.8.3\n" ] } ], "source": [ "import pandas as pd # Pandas for data analysis.\n", "import numpy as np\n", "import scipy.stats as ss\n", "import matplotlib.pyplot as plt # For basic plotting.\n", "import seaborn as sns # For pretty visualization in Seaborn. See https://seaborn.pydata.org/\n", "from IPython.display import display # Pretty display of data frames.\n", "\n", "from sklearn import base\n", "from sklearn.feature_selection import chi2, f_classif\n", "\n", "import scanpy as sc\n", "sc.settings.verbosity = 1 # verbosity: errors (0), warnings (1), info (2), hints (3)\n", "sc.logging.print_header()\n", "sc.settings.set_figure_params(dpi=80, facecolor='white')\n", "\n", "# Put plots inline rather than in a pop-up.\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "id": "6848eefc", "metadata": {}, "outputs": [], "source": [ "def hrule(repchar = '=', length=80):\n", " '''\n", " A quick function to print a horizontal line.\n", " '''\n", " if len(repchar) == 1:\n", " print(repchar*length)" ] }, { "cell_type": "markdown", "id": "d1ccc687", "metadata": {}, "source": [ "# Loading data\n", "\n", "## Loading a big Loom file" ] }, { "cell_type": "code", "execution_count": 3, "id": "3305ba18", "metadata": {}, "outputs": [], "source": [ "filename = 'Raw/JM_10X_merged.loom'" ] }, { "cell_type": "code", "execution_count": 4, "id": "7f5e84fa", "metadata": {}, "outputs": [], "source": [ "JM_all = sc.read_loom(filename)" ] }, { "cell_type": "code", "execution_count": 5, "id": "c16a8188", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 46913 × 36601\n", " obs: 'clone_id', 'hash.ID', 'nCount_CH', 'nCount_RNA', 'nFeature_CH', 'nFeature_RNA', 'orig.ident', 'percent.mt'\n", " layers: 'counts'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "JM_all" ] }, { "cell_type": "code", "execution_count": 6, "id": "f6154ca9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | clone_id | \n", "hash.ID | \n", "nCount_CH | \n", "nCount_RNA | \n", "nFeature_CH | \n", "nFeature_RNA | \n", "orig.ident | \n", "percent.mt | \n", "
---|---|---|---|---|---|---|---|---|
CellID | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
exp1_AAACCTGAGACAGGCT-1 | \n", "exp1_2 | \n", "exp1_Sample 2 | \n", "1028.0 | \n", "6876.0 | \n", "6 | \n", "2669 | \n", "exp1 | \n", "3.955788 | \n", "
exp1_AAACCTGAGCCCAATT-1 | \n", "exp1_non | \n", "exp1_Sample 1 | \n", "2166.0 | \n", "6246.0 | \n", "6 | \n", "2368 | \n", "exp1 | \n", "2.161383 | \n", "
exp1_AAACCTGAGGACAGAA-1 | \n", "exp1_non | \n", "exp1_Sample 6 | \n", "813.0 | \n", "4615.0 | \n", "6 | \n", "1718 | \n", "exp1 | \n", "2.665222 | \n", "
exp1_AAACCTGCAACAACCT-1 | \n", "exp1_5 | \n", "exp1_Sample 5 | \n", "1092.0 | \n", "11565.0 | \n", "6 | \n", "3613 | \n", "exp1 | \n", "2.075227 | \n", "
exp1_AAACCTGCAGACACTT-1 | \n", "exp1_7 | \n", "exp1_Sample 4 | \n", "847.0 | \n", "4440.0 | \n", "6 | \n", "2005 | \n", "exp1 | \n", "0.990991 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
exp3_TTTGTCATCCCACTTG-1 | \n", "exp3_7 | \n", "exp3_Sample 5 | \n", "315.0 | \n", "2752.0 | \n", "6 | \n", "1475 | \n", "exp3 | \n", "0.109012 | \n", "
exp3_TTTGTCATCCTATGTT-1 | \n", "exp3_9 | \n", "exp3_Sample 3 | \n", "379.0 | \n", "2357.0 | \n", "5 | \n", "1369 | \n", "exp3 | \n", "0.551549 | \n", "
exp3_TTTGTCATCCTCAATT-1 | \n", "exp3_4 | \n", "exp3_Sample 3 | \n", "222.0 | \n", "2895.0 | \n", "6 | \n", "1508 | \n", "exp3 | \n", "0.725389 | \n", "
exp3_TTTGTCATCGCGCCAA-1 | \n", "exp3_15 | \n", "exp3_Sample 1 | \n", "1096.0 | \n", "4231.0 | \n", "6 | \n", "2055 | \n", "exp3 | \n", "1.134484 | \n", "
exp3_TTTGTCATCGTTACAG-1 | \n", "exp3_7 | \n", "exp3_Sample 5 | \n", "247.0 | \n", "2795.0 | \n", "6 | \n", "1591 | \n", "exp3 | \n", "1.037567 | \n", "
46913 rows × 8 columns
\n", "\n", " |
---|
Gene | \n", "
MIR1302-2HG | \n", "
FAM138A | \n", "
OR4F5 | \n", "
AL627309.1 | \n", "
AL627309.3 | \n", "
... | \n", "
AC141272.1 | \n", "
AC023491.2 | \n", "
AC007325.1 | \n", "
AC007325.4 | \n", "
AC007325.2 | \n", "
36601 rows × 0 columns
\n", "\n", " | clone_ID | \n", "hash_ID | \n", "nCount_CH | \n", "nCount_RNA | \n", "nFeature_CH | \n", "nFeature_RNA | \n", "orig_ident | \n", "percent_mt | \n", "clone | \n", "well | \n", "experiment | \n", "CD_type | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
CellID | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
exp1_AAACCTGAGACAGGCT-1 | \n", "exp1_2 | \n", "exp1_Sample2 | \n", "1028.0 | \n", "6876.0 | \n", "6 | \n", "2669 | \n", "exp1 | \n", "3.955788 | \n", "2 | \n", "Sample2 | \n", "exp1 | \n", "CD4 | \n", "
exp1_AAACCTGAGCCCAATT-1 | \n", "exp1_non | \n", "exp1_Sample1 | \n", "2166.0 | \n", "6246.0 | \n", "6 | \n", "2368 | \n", "exp1 | \n", "2.161383 | \n", "non | \n", "Sample1 | \n", "exp1 | \n", "unknown | \n", "
exp1_AAACCTGAGGACAGAA-1 | \n", "exp1_non | \n", "exp1_Sample6 | \n", "813.0 | \n", "4615.0 | \n", "6 | \n", "1718 | \n", "exp1 | \n", "2.665222 | \n", "non | \n", "Sample6 | \n", "exp1 | \n", "unknown | \n", "
exp1_AAACCTGCAACAACCT-1 | \n", "exp1_5 | \n", "exp1_Sample5 | \n", "1092.0 | \n", "11565.0 | \n", "6 | \n", "3613 | \n", "exp1 | \n", "2.075227 | \n", "5 | \n", "Sample5 | \n", "exp1 | \n", "CD4 | \n", "
exp1_AAACCTGCAGACACTT-1 | \n", "exp1_7 | \n", "exp1_Sample4 | \n", "847.0 | \n", "4440.0 | \n", "6 | \n", "2005 | \n", "exp1 | \n", "0.990991 | \n", "7 | \n", "Sample4 | \n", "exp1 | \n", "CD4 | \n", "
\n", " | clone_ID | \n", "hash_ID | \n", "nCount_CH | \n", "nCount_RNA | \n", "nFeature_CH | \n", "nFeature_RNA | \n", "orig_ident | \n", "percent_mt | \n", "clone | \n", "well | \n", "experiment | \n", "CD_type | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
CellID | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
exp1_AAACCTGAGCCCAATT-1 | \n", "exp1_non | \n", "exp1_Sample1 | \n", "2166.0 | \n", "6246.0 | \n", "6 | \n", "2368 | \n", "exp1 | \n", "2.161383 | \n", "non | \n", "Sample1 | \n", "exp1 | \n", "unknown | \n", "
exp1_AAACCTGTCCGCATAA-1 | \n", "exp1_18 | \n", "exp1_Sample1 | \n", "15789.0 | \n", "5096.0 | \n", "6 | \n", "2207 | \n", "exp1 | \n", "2.217425 | \n", "18 | \n", "Sample1 | \n", "exp1 | \n", "CD4 | \n", "
exp1_AAACGGGGTTACGGAG-1 | \n", "exp1_non | \n", "exp1_Sample1 | \n", "3081.0 | \n", "5905.0 | \n", "6 | \n", "1974 | \n", "exp1 | \n", "3.065199 | \n", "non | \n", "Sample1 | \n", "exp1 | \n", "unknown | \n", "
exp1_AAACGGGTCGAATCCA-1 | \n", "exp1_17 | \n", "exp1_Sample1 | \n", "2479.0 | \n", "4264.0 | \n", "6 | \n", "1879 | \n", "exp1 | \n", "1.993433 | \n", "17 | \n", "Sample1 | \n", "exp1 | \n", "CD8 | \n", "
exp1_AAACGGGTCTGTGCAA-1 | \n", "exp1_non | \n", "exp1_Sample1 | \n", "3424.0 | \n", "1655.0 | \n", "6 | \n", "1167 | \n", "exp1 | \n", "3.746224 | \n", "non | \n", "Sample1 | \n", "exp1 | \n", "unknown | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
exp1_TTTGGTTTCACATACG-1 | \n", "exp1_1 | \n", "exp1_Sample1 | \n", "18033.0 | \n", "4451.0 | \n", "6 | \n", "2066 | \n", "exp1 | \n", "2.269153 | \n", "1 | \n", "Sample1 | \n", "exp1 | \n", "CD4 | \n", "
exp1_TTTGTCAAGCTAGTCT-1 | \n", "exp1_1 | \n", "exp1_Sample1 | \n", "3656.0 | \n", "8112.0 | \n", "6 | \n", "2748 | \n", "exp1 | \n", "2.465483 | \n", "1 | \n", "Sample1 | \n", "exp1 | \n", "CD4 | \n", "
exp1_TTTGTCACAAGAAAGG-1 | \n", "exp1_17 | \n", "exp1_Sample1 | \n", "3510.0 | \n", "4699.0 | \n", "6 | \n", "2146 | \n", "exp1 | \n", "2.085550 | \n", "17 | \n", "Sample1 | \n", "exp1 | \n", "CD8 | \n", "
exp1_TTTGTCAGTCTGCCAG-1 | \n", "exp1_1 | \n", "exp1_Sample1 | \n", "4479.0 | \n", "6722.0 | \n", "6 | \n", "2305 | \n", "exp1 | \n", "1.695924 | \n", "1 | \n", "Sample1 | \n", "exp1 | \n", "CD4 | \n", "
exp1_TTTGTCATCTGGTTCC-1 | \n", "exp1_non | \n", "exp1_Sample1 | \n", "4453.0 | \n", "3705.0 | \n", "6 | \n", "1658 | \n", "exp1 | \n", "2.159244 | \n", "non | \n", "Sample1 | \n", "exp1 | \n", "unknown | \n", "
1834 rows × 12 columns
\n", "\n", " | clone_ID | \n", "hash_ID | \n", "nCount_CH | \n", "nCount_RNA | \n", "nFeature_CH | \n", "nFeature_RNA | \n", "orig_ident | \n", "percent_mt | \n", "clone | \n", "well | \n", "experiment | \n", "CD_type | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
CellID | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
exp1_AAACCTGAGACAGGCT-1 | \n", "exp1_2 | \n", "exp1_Sample2 | \n", "1028.0 | \n", "6876.0 | \n", "6 | \n", "2669 | \n", "exp1 | \n", "3.955788 | \n", "2 | \n", "Sample2 | \n", "exp1 | \n", "CD4 | \n", "
exp1_AAACCTGAGCCCAATT-1 | \n", "exp1_non | \n", "exp1_Sample1 | \n", "2166.0 | \n", "6246.0 | \n", "6 | \n", "2368 | \n", "exp1 | \n", "2.161383 | \n", "non | \n", "Sample1 | \n", "exp1 | \n", "unknown | \n", "
exp1_AAACCTGAGGACAGAA-1 | \n", "exp1_non | \n", "exp1_Sample6 | \n", "813.0 | \n", "4615.0 | \n", "6 | \n", "1718 | \n", "exp1 | \n", "2.665222 | \n", "non | \n", "Sample6 | \n", "exp1 | \n", "unknown | \n", "
exp1_AAACCTGCAACAACCT-1 | \n", "exp1_5 | \n", "exp1_Sample5 | \n", "1092.0 | \n", "11565.0 | \n", "6 | \n", "3613 | \n", "exp1 | \n", "2.075227 | \n", "5 | \n", "Sample5 | \n", "exp1 | \n", "CD4 | \n", "
exp1_AAACCTGCAGACACTT-1 | \n", "exp1_7 | \n", "exp1_Sample4 | \n", "847.0 | \n", "4440.0 | \n", "6 | \n", "2005 | \n", "exp1 | \n", "0.990991 | \n", "7 | \n", "Sample4 | \n", "exp1 | \n", "CD4 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
exp1_TTTGTCATCGTCACGG-1 | \n", "exp1_non | \n", "exp1_Sample3 | \n", "156.0 | \n", "2753.0 | \n", "6 | \n", "1290 | \n", "exp1 | \n", "2.542681 | \n", "non | \n", "Sample3 | \n", "exp1 | \n", "unknown | \n", "
exp1_TTTGTCATCTCCAGGG-1 | \n", "exp1_5 | \n", "exp1_Sample5 | \n", "301.0 | \n", "5353.0 | \n", "6 | \n", "2198 | \n", "exp1 | \n", "2.764805 | \n", "5 | \n", "Sample5 | \n", "exp1 | \n", "CD4 | \n", "
exp1_TTTGTCATCTGGTTCC-1 | \n", "exp1_non | \n", "exp1_Sample1 | \n", "4453.0 | \n", "3705.0 | \n", "6 | \n", "1658 | \n", "exp1 | \n", "2.159244 | \n", "non | \n", "Sample1 | \n", "exp1 | \n", "unknown | \n", "
exp1_TTTGTCATCTTCCTTC-1 | \n", "exp1_2 | \n", "exp1_Sample2 | \n", "1984.0 | \n", "9842.0 | \n", "6 | \n", "3079 | \n", "exp1 | \n", "4.216623 | \n", "2 | \n", "Sample2 | \n", "exp1 | \n", "CD4 | \n", "
exp1_TTTGTCATCTTGACGA-1 | \n", "exp1_3 | \n", "exp1_Sample3 | \n", "360.0 | \n", "4343.0 | \n", "6 | \n", "1720 | \n", "exp1 | \n", "1.934147 | \n", "3 | \n", "Sample3 | \n", "exp1 | \n", "CD4 | \n", "
13493 rows × 12 columns
\n", "\n", " | CD_type | \n", "clone | \n", "clone_ID | \n", "experiment | \n", "hash_ID | \n", "nCount_CH | \n", "nCount_RNA | \n", "nFeature_CH | \n", "nFeature_RNA | \n", "orig_ident | \n", "percent_mt | \n", "well | \n", "n_genes_by_counts | \n", "log1p_n_genes_by_counts | \n", "total_counts | \n", "log1p_total_counts | \n", "total_counts_mt | \n", "log1p_total_counts_mt | \n", "pct_counts_mt | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CellID | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
exp3_AAACCTGAGAGTCGGT-1 | \n", "CD4 | \n", "2 | \n", "exp3_2 | \n", "exp3 | \n", "exp3_Sample4 | \n", "440.0 | \n", "3030.0 | \n", "6 | \n", "1503 | \n", "exp3 | \n", "0.363036 | \n", "Sample4 | \n", "1503 | \n", "7.315884 | \n", "3030.0 | \n", "8.016648 | \n", "11.0 | \n", "2.484907 | \n", "0.363036 | \n", "
exp3_AAACCTGAGCCAGTAG-1 | \n", "CD4 | \n", "1 | \n", "exp3_1 | \n", "exp3 | \n", "exp3_Sample3 | \n", "386.0 | \n", "2754.0 | \n", "6 | \n", "1388 | \n", "exp3 | \n", "0.326797 | \n", "Sample3 | \n", "1388 | \n", "7.236339 | \n", "2754.0 | \n", "7.921173 | \n", "9.0 | \n", "2.302585 | \n", "0.326797 | \n", "
exp3_AAACCTGAGTCGAGTG-1 | \n", "CD4 | \n", "8 | \n", "exp3_8 | \n", "exp3 | \n", "exp3_Sample2 | \n", "2098.0 | \n", "8042.0 | \n", "6 | \n", "2983 | \n", "exp3 | \n", "2.549117 | \n", "Sample2 | \n", "2983 | \n", "8.001020 | \n", "8042.0 | \n", "8.992558 | \n", "205.0 | \n", "5.327876 | \n", "2.549117 | \n", "
exp3_AAACCTGAGTTCGCGC-1 | \n", "CD4 | \n", "40 | \n", "exp3_40 | \n", "exp3 | \n", "exp3_Sample2 | \n", "1709.0 | \n", "5359.0 | \n", "6 | \n", "2273 | \n", "exp3 | \n", "1.436835 | \n", "Sample2 | \n", "2273 | \n", "7.729296 | \n", "5359.0 | \n", "8.586720 | \n", "77.0 | \n", "4.356709 | \n", "1.436835 | \n", "
exp3_AAACCTGCAATCCAAC-1 | \n", "CD4 | \n", "3 | \n", "exp3_3 | \n", "exp3 | \n", "exp3_Sample3 | \n", "465.0 | \n", "3411.0 | \n", "5 | \n", "1791 | \n", "exp3 | \n", "2.140135 | \n", "Sample3 | \n", "1791 | \n", "7.491088 | \n", "3411.0 | \n", "8.135054 | \n", "73.0 | \n", "4.304065 | \n", "2.140135 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
exp3_TTTGTCATCCCACTTG-1 | \n", "CD4 | \n", "7 | \n", "exp3_7 | \n", "exp3 | \n", "exp3_Sample5 | \n", "315.0 | \n", "2752.0 | \n", "6 | \n", "1475 | \n", "exp3 | \n", "0.109012 | \n", "Sample5 | \n", "1475 | \n", "7.297091 | \n", "2752.0 | \n", "7.920446 | \n", "3.0 | \n", "1.386294 | \n", "0.109012 | \n", "
exp3_TTTGTCATCCTATGTT-1 | \n", "CD4 | \n", "9 | \n", "exp3_9 | \n", "exp3 | \n", "exp3_Sample3 | \n", "379.0 | \n", "2357.0 | \n", "5 | \n", "1369 | \n", "exp3 | \n", "0.551549 | \n", "Sample3 | \n", "1369 | \n", "7.222566 | \n", "2357.0 | \n", "7.765569 | \n", "13.0 | \n", "2.639057 | \n", "0.551549 | \n", "
exp3_TTTGTCATCCTCAATT-1 | \n", "CD4 | \n", "4 | \n", "exp3_4 | \n", "exp3 | \n", "exp3_Sample3 | \n", "222.0 | \n", "2895.0 | \n", "6 | \n", "1508 | \n", "exp3 | \n", "0.725389 | \n", "Sample3 | \n", "1508 | \n", "7.319202 | \n", "2895.0 | \n", "7.971086 | \n", "21.0 | \n", "3.091043 | \n", "0.725389 | \n", "
exp3_TTTGTCATCGCGCCAA-1 | \n", "CD4 | \n", "15 | \n", "exp3_15 | \n", "exp3 | \n", "exp3_Sample1 | \n", "1096.0 | \n", "4231.0 | \n", "6 | \n", "2055 | \n", "exp3 | \n", "1.134484 | \n", "Sample1 | \n", "2055 | \n", "7.628518 | \n", "4231.0 | \n", "8.350430 | \n", "48.0 | \n", "3.891820 | \n", "1.134484 | \n", "
exp3_TTTGTCATCGTTACAG-1 | \n", "CD4 | \n", "7 | \n", "exp3_7 | \n", "exp3 | \n", "exp3_Sample5 | \n", "247.0 | \n", "2795.0 | \n", "6 | \n", "1591 | \n", "exp3 | \n", "1.037567 | \n", "Sample5 | \n", "1591 | \n", "7.372746 | \n", "2795.0 | \n", "7.935945 | \n", "29.0 | \n", "3.401197 | \n", "1.037567 | \n", "
12424 rows × 19 columns
\n", "