{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Single-cell cooler format Python API examples" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The here provided examples are using the cooler Python API with version 0.8.9. Earlier versions do not support single-cell cooler files.\n", "The single-cell cooler format uses an HDF5 container and can therefore be accessed with any HDF5 library of any programming language. However, the programmer needs to take care of all the operations on his/her own and make sure the single-cell cooler definitions are met. \n", "Please consider our documentation on https://cooler.readthedocs.io/" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import cooler\n", "import os" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create a single-cell cooler file" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Load cooler files" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "path_file1 = 'data/single-cell/GSM2687248_41669_ACAGTG-R1-DpnII.100000.cool'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "path_file2 = 'data/single-cell/GSM2687249_41670_GGCTAC-R1-DpnII.100000.cool'" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "path_file3 = 'data/single-cell/GSM2687250_41671_TTAGGC-R1-DpnII.100000.cool'" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "cooler_file1 = cooler.Cooler(path_file1)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "cooler_file2 = cooler.Cooler(path_file2)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "cooler_file3 = cooler.Cooler(path_file3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Load pixels" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "pixel1 = cooler_file1.pixels()[:]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "pixel2 = cooler_file2.pixels()[:]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "pixel3 = cooler_file3.pixels()[:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Load bins" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "bin1 = cooler_file1.bins()[:]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "bin2 = cooler_file2.bins()[:]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "bin3 = cooler_file3.bins()[:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Cell names" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "cell_name_list = [os.path.basename(cooler_file1.filename), os.path.basename(cooler_file2.filename), os.path.basename(cooler_file3.filename)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create input" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Bins either as pandas pixels or as a dict of name:bins (as pandas pixels)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "bins = bin1" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "bins_dictionary = {cell_name_list[0]:bin1, cell_name_list[1]:bin2, cell_name_list[2]:bin3}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Pixels as dictionary name:pixels" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "pixels_dictionary = {cell_name_list[0]:pixel1, cell_name_list[1]:pixel2, cell_name_list[2]:pixel3}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create scool with bins as pandas dataframe" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/nezar/miniconda3/lib/python3.7/site-packages/dask/dataframe/utils.py:15: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n", " import pandas.util.testing as tm\n" ] } ], "source": [ "cooler.create_scool('outfile_test.scool', bins, pixels_dictionary)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create scool with bins as dictonary" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "cooler.create_scool('outfile_test.scool', bins_dictionary, pixels_dictionary)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read scool" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Get paths of the cells" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "content_of_scool = cooler.fileops.list_scool_cells('outfile_test.scool')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['/cells/GSM2687248_41669_ACAGTG-R1-DpnII.100000.cool',\n", " '/cells/GSM2687249_41670_GGCTAC-R1-DpnII.100000.cool',\n", " '/cells/GSM2687250_41671_TTAGGC-R1-DpnII.100000.cool']" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "content_of_scool" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load the individual cells" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "cell1 = cooler.Cooler('outfile_test.scool' + '::' + content_of_scool[0])" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "cell2 = cooler.Cooler('outfile_test.scool' + '::' + content_of_scool[1])" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "cell3 = cooler.Cooler('outfile_test.scool' + '::' + content_of_scool[2])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Bin content" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The bins are shared and for all cells identical" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
chromstartend
010100000
11100000200000
21200000300000
31300000400000
41400000500000
51500000600000
61600000700000
71700000800000
81800000900000
919000001000000
\n", "
" ], "text/plain": [ " chrom start end\n", "0 1 0 100000\n", "1 1 100000 200000\n", "2 1 200000 300000\n", "3 1 300000 400000\n", "4 1 400000 500000\n", "5 1 500000 600000\n", "6 1 600000 700000\n", "7 1 700000 800000\n", "8 1 800000 900000\n", "9 1 900000 1000000" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cell1.bins()[:10]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
chromstartend
010100000
11100000200000
21200000300000
31300000400000
41400000500000
51500000600000
61600000700000
71700000800000
81800000900000
919000001000000
\n", "
" ], "text/plain": [ " chrom start end\n", "0 1 0 100000\n", "1 1 100000 200000\n", "2 1 200000 300000\n", "3 1 300000 400000\n", "4 1 400000 500000\n", "5 1 500000 600000\n", "6 1 600000 700000\n", "7 1 700000 800000\n", "8 1 800000 900000\n", "9 1 900000 1000000" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cell2.bins()[:10]" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
chromstartend
010100000
11100000200000
21200000300000
31300000400000
41400000500000
51500000600000
61600000700000
71700000800000
81800000900000
919000001000000
\n", "
" ], "text/plain": [ " chrom start end\n", "0 1 0 100000\n", "1 1 100000 200000\n", "2 1 200000 300000\n", "3 1 300000 400000\n", "4 1 400000 500000\n", "5 1 500000 600000\n", "6 1 600000 700000\n", "7 1 700000 800000\n", "8 1 800000 900000\n", "9 1 900000 1000000" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cell3.bins()[:10]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pixel content" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The content of the pixels are in all cells different" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bin1_idbin2_idcount
030301
1309011
230114241
330129231
431312
5323212
632331
732351
832161901
932179391
\n", "
" ], "text/plain": [ " bin1_id bin2_id count\n", "0 30 30 1\n", "1 30 901 1\n", "2 30 11424 1\n", "3 30 12923 1\n", "4 31 31 2\n", "5 32 32 12\n", "6 32 33 1\n", "7 32 35 1\n", "8 32 16190 1\n", "9 32 17939 1" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cell1.pixels()[:10]" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bin1_idbin2_idcount
0322771
133331
233259642
334341
434372
534492
6353512
735362
83559921
93589141
\n", "
" ], "text/plain": [ " bin1_id bin2_id count\n", "0 32 277 1\n", "1 33 33 1\n", "2 33 25964 2\n", "3 34 34 1\n", "4 34 37 2\n", "5 34 49 2\n", "6 35 35 12\n", "7 35 36 2\n", "8 35 5992 1\n", "9 35 8914 1" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cell2.pixels()[:10]" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bin1_idbin2_idcount
030304
13044861
23090791
330160011
431314
531322
631536
731545
8311901
931232531
\n", "
" ], "text/plain": [ " bin1_id bin2_id count\n", "0 30 30 4\n", "1 30 4486 1\n", "2 30 9079 1\n", "3 30 16001 1\n", "4 31 31 4\n", "5 31 32 2\n", "6 31 53 6\n", "7 31 54 5\n", "8 31 190 1\n", "9 31 23253 1" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cell3.pixels()[:10]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }