{ "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "# Data pre-processing\n", "156,049 mice nuclei from developing brain and spinal cord at age of p2 or\n", "p11 mice were profiled by [SPLiT-seq](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE110823),\n", "where 26,894 genes were detected." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "import numpy as np\n", "import scipy.io\n", "import pandas as pd\n", "import loompy\n", "import rpy2.robjects as robjects\n", "saveRDS = robjects.r[\"saveRDS\"]\n", "from rpy2.robjects import pandas2ri\n", "pandas2ri.activate()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "E:/DISC/reproducibility/data/BRAIN_SPLiT/raw.loom\n", "(26894, 156049)\n" ] } ], "source": [ "data_path = \"E:/DISC/reproducibility/data/BRAIN_SPLiT/original_data/GSM3017261_150000_CNS_nuclei.mat\"\n", "data = scipy.io.loadmat(data_path)\n", "gene_bc_sparse = data[\"DGE\"].transpose()\n", "gene_name = pd.Series(data['genes']).str.strip(' ').values\n", "sample_type = pd.Series(data['sample_type']).str.strip(' ').values\n", "barcode_str = data[\"barcodes\"].squeeze().astype(np.str)\n", "cell_id = pd.Series(np.repeat(\"Cell\", barcode_str.size)).str.cat(barcode_str, sep='_').values\n", "saveRDS(pd.Series(pd.Series(data['cluster_assignment']).str.strip(' ').values, index=cell_id), \"E:/DISC/reproducibility/data/BRAIN_SPLiT/cell_type.rds\")\n", "\n", "output_path = \"E:/DISC/reproducibility/data/BRAIN_SPLiT/raw.loom\"\n", "row_attrs = {\"Gene\": gene_name}\n", "col_attrs = {\"CellID\": cell_id, \"SampleID\": sample_type}\n", "loompy.create(output_path, gene_bc_sparse, row_attrs, col_attrs)\n", "print(output_path)\n", "print(gene_bc_sparse.shape)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "Reference: \n", "\n", "1. Rosenberg, A. B. et al. Single-cell profiling of the developing mouse brain and spinal cord with split-pool barcoding. Science 360, 176-182 (2018)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2" }, "pycharm": { "stem_cell": { "cell_type": "raw", "metadata": { "collapsed": false }, "source": [] } } }, "nbformat": 4, "nbformat_minor": 1 }