{ "metadata": { "name": "", "signature": "sha256:b0c887c5a5694acb79ce1c574bf670b0ef52058054d0c3dc36edd65b48b81362" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "[``anhima.af``](http://anhima.readthedocs.org/en/latest/af.html) - Allele frequencies" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import division, print_function\n", "import numpy as np\n", "np.random.seed(1)\n", "import scipy.stats\n", "import random\n", "random.seed(1)\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "import sys\n", "import anhima\n", "# # dev imports\n", "# sys.path.insert(0, '../src')\n", "# %reload_ext autoreload\n", "# %autoreload 1\n", "# %aimport anhima.sim\n", "# %aimport anhima.gt\n", "# %aimport anhima.af" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "# simulate genotypes \n", "n_variants = 1000\n", "n_samples = 1000\n", "ploidy = 2\n", "af_dist = scipy.stats.beta(a=.4, b=.6)\n", "p_missing = .1\n", "genotypes = anhima.sim.simulate_biallelic_genotypes(n_variants, n_samples, af_dist, p_missing, ploidy)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "anhima.af.count_variant(genotypes)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 3, "text": [ "962" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "anhima.af.count_non_variant(genotypes)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 4, "text": [ "38" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "n_seg = anhima.af.count_segregating(genotypes)\n", "n_seg" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 5, "text": [ "958" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "n_non_seg = anhima.af.count_non_segregating(genotypes)\n", "n_non_seg" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 6, "text": [ "42" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "assert n_seg + n_non_seg == n_variants" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "n_non_seg_ref = anhima.af.count_non_segregating(genotypes, allele=0)\n", "n_non_seg_ref" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 8, "text": [ "38" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "n_non_seg_alt = anhima.af.count_non_segregating(genotypes, allele=1)\n", "n_non_seg_alt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 9, "text": [ "4" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "assert n_non_seg == n_non_seg_ref + n_non_seg_alt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "anhima.af.count_singletons(genotypes)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 11, "text": [ "12" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "anhima.af.count_doubletons(genotypes)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 12, "text": [ "9" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "an, ac, af = anhima.af.allele_frequencies(genotypes)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "# plot actual distribution of alt allele frequencies\n", "plt.hist(af[:, 1], bins=20);" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAEACAYAAABWLgY0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADkhJREFUeJzt3X+MHOV9x/H3go+TCD68J0eOsU2MDBScpsWN4tKgKhdF\nskylYNpKkJ8iCqqQqAClihScP2r/kzStlAhVEajilywlcWoFBQEKET/KKrQqICQDBseJfeCKo2fT\nBhPbUpXY1eaP57nc5jjfze7Mzu59/X5Jo519Znbm8aPzZ555ZmYXJEmSJEmSJEmSJEmSJEmSNMc6\n4BngNeBV4PZcvhOYAvbm6dqOz2wHDgIHgC11VVSS1L0PAFfl+QuAnwNXAjuAv5tn/Y3AS8AIsB44\nBJzT91pKkua1WAAfIYU2wEngZ8Ca/L4xz/rbgN3AKeAwKeQ3l66lJKkn3fSy1wObgOfy+9uAl4H7\ngRW57CLSMM6MKWYPCpKkmhUN+QuAHwJ3kHr09wCXkIZypoFvLfDZdpkKSpJ6t6zAOiPAQ8B3gYdz\n2dsdy+8DHs3zb5Eu1s5Ym8t+z4YNG9qTk5NdV1aSznKTwKXdfGCxnnyDNByzH7iro3x1x/xfAvvy\n/CPAp4HzSD39y4AX3lPLyUna7bZTu82OHTsGXodhmWwL28K2WHgCNnQT8LB4T/4a4PPAK6RbJQG+\nBnyGNFTTBt4AbsnL9gN78utp4FYcrpGkgVks5P+d+Xv7jy/wmW/kSZI0YN7DPmATExODrsLQsC1m\n2RazbIty5rvXvQ7tPL4kSSqo0WhAl7ltT16SAjPkJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16SAjPk\nJSkwQ16SAivyVcN9cfHFf9jzZy+88H28+OJPGR0drbBGkhTPwL7WYPbbibs3MvIxjh79L5rNZoVV\nkqTh1svXGgysJw+99+TPOefcCushSXE5Ji9JgRnykhSYIS9JgRnykhSYIS9JgRnykhSYIS9JgRny\nkhSYIS9JgRnykhSYIS9JgRnykhSYIS9JgRnykhSYIS9JgRnykhSYIS9JgRnykhSYIS9JgRnykhSY\nIS9JgRnykhTYYiG/DngGeA14Fbg9l48DTwK/AJ4AVnR8ZjtwEDgAbKmyspKk7iwW8qeALwMfAq4G\n/ha4EriTFPKXA0/n9wAbgRvz61bg7gL7kCT1yWIBfAR4Kc+fBH4GrAGuA3bl8l3A9Xl+G7CbdHA4\nDBwCNldXXUlSN7rpZa8HNgHPA6uAo7n8aH4PcBEw1fGZKdJBQZI0AMsKrncB8BBwB3BizrJ2ns7k\nDMt2dsxP5EmSNKPVatFqtUpto1FgnRHgMeBx4K5cdoCUykeA1aSLs1cwOzb/zfz6E2AHqfffqb3w\ncWFho6NNpqdfp9ls9rwNSVpqGo0GFMvt31lsuKYB3A/sZzbgAR4BbsrzNwEPd5R/GjgPuAS4DHih\nmwpJkqqz2HDNNcDngVeAvblsO6mnvge4mXSB9Ya8bH8u3w+cBm6lTJddklRKV93+CjlcI0ld6sdw\njSRpCTPkJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16S\nAjPkJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16SAjPk\nJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16SAjPkJSkwQ16SAisS8g8AR4F9HWU7gSlg\nb56u7Vi2HTgIHAC2VFJLSVJPioT8g8DWOWVt4NvApjw9nss3Ajfm163A3QX3IUnqgyIB/CxwbJ7y\nxjxl24DdwCngMHAI2Nxr5SRJ5ZTpZd8GvAzcD6zIZReRhnFmTAFrSuxDklRCryF/D3AJcBUwDXxr\ngXXbPe5DklTSsh4/93bH/H3Ao3n+LWBdx7K1uWweOzvmJ/IkSZrRarVotVqltjHfuPp81pOC/MP5\n/WpSDx7gy8BHgc+SLrh+nzQOvwZ4CriU9/bm22U6+KOjTaanX6fZbPa8DUlaahqNBhTPbaBYT343\n8HFgJfAmsIPU7b6KlNRvALfkdfcDe/LraeBWHK6RpIHp6ohQIXvyktSlXnry3sMuSYEZ8pIUmCEv\nSYEZ8pIUmCEvSYEZ8pIUmCEvSYEZ8pIUmCEvSYEZ8pIUmCEvSYEZ8pIUmCEvSYEZ8pIUmCEvSYEZ\n8pIUmCEvSYEZ8pIUmCEvSYEZ8pIUmCEvSYEZ8pIUmCEvSYEZ8pIUmCEvSYEZ8pIUmCEvSYEZ8pIU\nmCEvSYEZ8pIUmCEvSYEZ8pIUmCEvSYEZ8pIUmCEvSYEZ8pIUWJGQfwA4CuzrKBsHngR+ATwBrOhY\nth04CBwAtlRTTUlSL4qE/IPA1jlld5JC/nLg6fweYCNwY37dCtxdcB+SpD4oEsDPAsfmlF0H7Mrz\nu4Dr8/w2YDdwCjgMHAI2l66lJKknvfayV5GGcMivq/L8RcBUx3pTwJoe9yFJKqmKoZR2nhZaLkka\ngGU9fu4o8AHgCLAaeDuXvwWs61hvbS6bx86O+Yk8SZJmtFotWq1WqW00Cq63HngU+HB+/0/AL4F/\nJF10XZFfNwLfJ43DrwGeAi7lvb35dpkO/uhok+np12k2mz1vQ5KWmkajAcVzGyjWk98NfBxYCbwJ\n/D3wTWAPcDPpAusNed39uXw/cBq4FYdrJGlgujoiVMievCR1qZeevPewS1JghrwkBWbIS1JgS3ZM\nfmQETp58t+dtLF/e5Pjxd3r+vCTVrV931wylFPC9HyhOnBjU8U2S6uNwjSQFZshLUmCGvCQFZshL\nUmCGvCQFZshLUmCGvCQFZshLUmCGvCQFZshLUmCGvCQFZshLUmCGvCQFZshLUmCGvCQFtmS/T16S\nlpKxsXFOnDhW+34NeUmqQQr43n/oKOn+x44crpGkwAx5SQrMkJekwAx5SQrMkJekwAx5SQrMkJek\nwAx5SQrMkJekwAx5SQrMkJekwAx5SQrMkJekwAx5SQqs7FcNHwaOA/8PnAI2A+PAvwIfzMtvAN4t\nuR9JUg/K9uTbwASwiRTwAHcCTwKXA0/n90NoGY1Go9Q0NjY+6H+EJC2oiuGaud9ifx2wK8/vAq6v\nYB99cJp0jOp9quJXXsbGxj3YSOqb7n9m5Pe9DvyKNFzzL8C9wDGg2bH9dzrez2iX+YWU0dEmv/71\nu5T7lZVGyc+nbbTb5bbRaAxHPST1V1X/1+kyt8uOyV8DTAPvJw3RHJizfKbbO4+dHfMTeZIkzWrl\nqXdle/KddgAngb8hJfYRYDXwDHDFnHXtyc9swZ68dFYYVE++zJj8+cDyPP8+YAuwD3gEuCmX3wQ8\nXGIfkqQSygzXrAJ+1LGd7wFPAC8Ce4Cbmb2FUpI0AFUO13TD4ZqZLThcI/XV2Nh46Tvhli9vcvz4\nO6W2sVQvvErSUEsBXy5cT5wYVH+4PL/WQJICM+RDKPf0rg9TSXE5XBPCzNO7vVnKp6KSFmZPXpIC\nsydfyrJ8xVyShpMhX0q5YZLEg4Sk/nG4RpICsycvaahV8TDT2cyQlzpUEygjpB9K610VT1hGUf5h\npiqGRJfu9TdDXlTxBxwllKp4OrKKr8yIcltrnF740r3+ZsiLKv6Ao4TS8Ch34B2Wg251B031ypBX\nRTwbqJYPuKkahrwq4tmANIy8hVKSAjPkNUT8ojWpag7XaIg4Di1VzZBXIEv3XubqeSFciSGvQJbu\nvczV80K4EkNe0hl4ZhSBIS/pDDwzisC7ayQpMENekgIz5CUpMENekgIz5CUpMENekgIz5CUpMENe\nkgIz5CUpMENekgIz5CUpMENekgIz5CUpMENekgLrV8hvBQ4AB4Gv9mkfkqRF9CPkzwW+Qwr6jcBn\ngCv7sB9J0iL6EfKbgUPAYeAU8ANgWx/2I0laRD9Cfg3wZsf7qVwmSapZP37+r9DvhY2NfarnHZw8\nebLnz0rS2aQfIf8WsK7j/TpSb77T5PHjj20ov6uyvx9Zxe9PRtnGMNShim0MQx2GZRvDUIdh2cYw\n1KGSbUxWUInSlpEqsh44D3gJL7xKUijXAj8nXYDdPuC6SJIkSepWkYei/jkvfxnYVFO9BmGxtvgc\nqQ1eAf4D+KP6qlarog/KfRQ4DfxVHZUakCJtMQHsBV4FWrXUajAWa4uVwE9Iw7+vAl+srWb1ewA4\nCuxbYJ2hyM1zScM164ER5h+b/wvgx3n+T4Hn6qpczYq0xZ8BF+b5rcRsiyLtMLPevwGPAX9dV+Vq\nVqQtVgCvAWvz+5V1Va5mRdpiJ/APeX4l8Ev6c+PIMPhzUnCfKeS7ys1+fndNkYeirgN25fnnSX/U\nq/pYp0Ep0hb/Cfwqzz/P7H/sSIo+KHcb8EPgf2qrWf2KtMVngYeYvTvtf+uqXM2KtMU0MJbnx0gh\nf7qm+tXtWeDYAsu7ys1+hnyRh6LmWydiuHX7gNjNzB6pIyn6N7ENuCe/L/TcxRJUpC0uA8aBZ4AX\ngS/UU7XaFWmLe4EPAf9NGqK4o56qDaWucrOfpztF/3POvXE04n/qbv5NnwC+BFzTp7oMUpF2uAu4\nM6/boJqbk4dRkbYYAf4E+CRwPuls7znSWGwkRdria6RhnAlgA/Ak8MfAif5Va6gVzs1+hnyRh6Lm\nrrM2l0VTpC0gXWy9lzQmv9Dp2lJVpB0+QjpdhzT2ei3pFP6RvteuXkXa4k3SEM3/5emnpGCLFvJF\n2uJjwNfz/CTwBvAHpDOcs83Q5GaRh6I6LyBcTcyLjVCsLS4mjUteXWvN6tXtg3IPEvfumiJtcQXw\nFOnC5PmkC3Eb66tibYq0xbeBHXl+FekgMF5T/QZhPcUuvA48N+d7KOqWPM34Tl7+MunUNKrF2uI+\n0sWkvXl6oe4K1qTI38SMyCEPxdriK6Q7bPYBt9dau3ot1hYrgUdJObGPdFE6qt2kaw+/IZ3NfYmz\nNzclSZIkSZIkSZIkSZIkSZIkSZIknS1+C4VDs2Ak2Y8TAAAAAElFTkSuQmCC\n", "text": [ "" ] } ], "prompt_number": 14 } ], "metadata": {} } ] }