{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Example export to SPRING" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/sam/miniconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", " from ._conv import register_converters as _register_converters\n" ] } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as pl\n", "import scanpy.api as sc\n", "\n", "sc.settings.verbosity = 2 # verbosity: errors (0), warnings (1), info (2), hints (3)\n", "sc.settings.set_figure_params(dpi=150) # low dpi (dots per inch) yields small inline figures\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data prep" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Download counts matrices from 10X" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!wget http://cf.10xgenomics.com/samples/cell-exp/2.1.0/pbmc8k/pbmc8k_filtered_gene_bc_matrices.tar.gz\n", "!tar xfz pbmc8k_filtered_gene_bc_matrices.tar.gz\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "... reading from cache file ./cache/filtered_gene_bc_matrices-GRCh38-matrix.h5ad\n", "Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n" ] } ], "source": [ "path = 'filtered_gene_bc_matrices/GRCh38/'\n", "adata = sc.read(path + 'matrix.mtx', cache=True).T # first time only\n", "adata.var_names = pd.read_csv(path + 'genes.tsv', header=None, sep='\\t')[1]\n", "adata.var_names_make_unique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Store total counts" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "adata.obs['n_counts'] = adata.X.sum(1).A1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Normalize, then store full normalized counts matrix" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "sc.pp.normalize_per_cell(adata, counts_per_cell_after=adata.obs['n_counts'].mean())" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "adata.raw = adata" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Filter genes" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "sc.pp.filter_genes(adata, min_cells=3)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "filter_result = sc.pp.filter_genes_dispersion(\n", " adata.X, min_mean=0.0125, max_mean=3, min_disp=0.5)\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1189" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filter_result['gene_subset'].sum()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sc.pl.filter_genes_dispersion(filter_result)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "adata = adata[:, filter_result.gene_subset]\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Z-score normalize and run PCA" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "sc.pp.scale(adata)\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "sc.tl.pca(adata)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Build k-nearest-neighbor graph" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "computing neighbors\n", " finished (0:00:05.61)\n" ] } ], "source": [ "sc.pp.neighbors(adata, n_neighbors=5, use_rep='X_pca')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get some 2-D embeddings - umap and force-directed graph" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "computing UMAP\n", " finished (0:00:09.11)\n" ] } ], "source": [ "sc.tl.umap(adata)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "drawing single-cell graph using layout \"fa\"\n", " finished (0:00:45.86)\n" ] } ], "source": [ "sc.tl.draw_graph(adata, layout='fa')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Cluster and run graph abstraction" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "running Louvain clustering\n", " using the \"louvain\" package of Traag (2017)\n", " finished (0:00:00.80)\n" ] } ], "source": [ "sc.tl.louvain(adata)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "running partition-based graph abstraction (PAGA)\n", " finished (0:00:00.07)\n" ] } ], "source": [ "sc.tl.paga(adata, groups='louvain')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Export SPRING plot" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, export a subplot using the ForceAtlas2 layout" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing subplot to ./10x_pbmc8k/force/\n", "41.813865184783936\n" ] } ], "source": [ "import time\n", "t0 = time.time()\n", "sc.export_to.spring_project(adata, './10x_pbmc8k', 'draw_graph', subplot_name='force')\n", "print(time.time() - t0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Repeat export, this time using umap embedding. \n", "Should be much faster since hdf5 counts matrices do not need to be rewritten." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing subplot to ./10x_pbmc8k/umap/\n", "./10x_pbmc8k/ is an existing SPRING folder. A new subplot will be created, but you must set `overwrite=True` to overwrite counts matrices.\n", "2.749732255935669\n" ] } ], "source": [ "t0 = time.time()\n", "sc.export_to.spring_project(adata, './10x_pbmc8k', 'umap', subplot_name='umap')\n", "print(time.time() - t0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 1 }