{ "metadata": { "name": "", "signature": "sha256:385948bcca06a9e75bbf9cbf0d8473fefdb64d718f38a1c81b21ffecf762fa33" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "[``anhima.util``](http://anhima.readthedocs.org/en/latest/util.html) - Miscellaneous utilities" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import print_function, division\n", "import sys\n", "import numpy as np\n", "import random\n", "# import anhima\n", "# dev imports\n", "sys.path.insert(0, '..')\n", "%reload_ext autoreload\n", "%autoreload 1\n", "%aimport anhima.util" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "n_variants = 100000\n", "n_samples = 1000\n", "genotypes = np.random.randint(0, 2, size=(n_variants, n_samples, 2)).astype('i1')\n", "genotypes.shape, genotypes.nbytes / 1e6" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 2, "text": [ "((100000, 1000, 2), 200.0)" ] } ], "prompt_number": 2 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Block apply" ] }, { "cell_type": "code", "collapsed": false, "input": [ "gn = anhima.gt.as_n_alt(genotypes)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "gn2 = anhima.util.block_apply(anhima.gt.as_n_alt, genotypes, block_size=1000)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "assert np.array_equal(gn, gn2)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "ac = anhima.af.allele_counts(genotypes)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "ac2 = anhima.util.block_apply(anhima.af.allele_counts, genotypes, block_size=1000)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "assert np.array_equal(ac, ac2)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "packed = anhima.gt.pack_diploid(genotypes)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "packed2 = anhima.util.block_apply(anhima.gt.pack_diploid, genotypes, block_size=1000)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "assert np.array_equal(packed, packed2)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 11 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Block take2D" ] }, { "cell_type": "code", "collapsed": false, "input": [ "row_indices = sorted(random.sample(range(n_variants), n_variants//10))\n", "col_indices = sorted(random.sample(range(n_samples), n_samples//10))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "g = genotypes.take(row_indices, axis=0).take(col_indices, axis=1)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "g2 = anhima.util.block_take2d(genotypes, row_indices, col_indices, block_size=1000)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "assert np.array_equal(g, g2)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 16 } ], "metadata": {} } ] }