{ "metadata": { "name": "", "signature": "sha256:ebbbd1a5dfe53666784a8a535b0c80474c3776c6d3382adde75214c32e804978" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import print_function, division\n", "import numpy as np\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import vcfnp\n", "vcfnp.__version__" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 1, "text": [ "'2.0.0'" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "# VCF file name\n", "filename = 'fixture/sample.vcf'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "# load data from fixed fields (including INFO)\n", "v = vcfnp.variants(filename, cache=True).view(np.recarray)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "[vcfnp] 2015-01-23 11:22:59.232507 :: caching is enabled\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "[vcfnp] 2015-01-23 11:22:59.232909 :: cache file available\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "[vcfnp] 2015-01-23 11:22:59.233095 :: loading from cache file fixture/sample.vcf.vcfnp_cache/variants.npy\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "v.dtype" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ "dtype([('CHROM', 'S12'), ('POS', '" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "# load data from sample columns\n", "c = vcfnp.calldata_2d(filename, cache=True).view(np.recarray)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "[vcfnp] 2015-01-23 11:22:59.494181 :: caching is enabled\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "[vcfnp] 2015-01-23 11:22:59.494571 :: cache file available\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "[vcfnp] 2015-01-23 11:22:59.494840 :: loading from cache file fixture/sample.vcf.vcfnp_cache/calldata_2d.npy\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "c.dtype" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 11, "text": [ "dtype([('is_called', '?'), ('is_phased', '?'), ('genotype', 'i1', (2,)), ('DP', ' 0, axis=2))\n", "count_missing = np.count_nonzero(~c.is_called)\n", "print('calls (phased, variant, missing): %s (%s, %s, %s)'\n", " % (c.flatten().size, count_phased, count_variant, count_missing))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "calls (phased, variant, missing): 27 (14, 12, 2)\n" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "# plot a histogram of genotype quality\n", "fig = plt.figure(2)\n", "ax = fig.add_subplot(111)\n", "ax.hist(c.GQ.flatten())\n", "ax.set_title('GQ histogram')\n", "ax.set_xlabel('GQ');" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAXEAAAEZCAYAAABhIBWTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFMdJREFUeJzt3XuUrXV93/H3B45IuAgyNkCB9KAN1hhiwARBSRgupicW\nL6uNBkwIkJY21oi1CRHsMpyspOZi6qVW2tUUWMQorqKGcFZIFC/boFwUPNwOINE45CDhwCigwSro\n+faP/Qxuxjlz2bNn9vzOeb/W2ovn+T3P83u+s8/wmWf/nstOVSFJatNu4y5AkjQ8Q1ySGmaIS1LD\nDHFJapghLkkNM8QlqWGGuJqR5Kwk186z/OokZ6xmTdK4GeJaEUlOS3Jjkn9Msi3JDUleN2udFyf5\nZJJvJHkkyVVJnjfsPqvqZVX1vkXUtj3Js4fdj7SWGOIauSS/AbwL+EPgwKo6EPg14CVJ9ujWOQ74\nKPDnwMHA4cCtwGeTHL4aZa5Ip8nuK9GvtCOGuEYqyX7A7wCvq6qPVNVjAFV1S1X9clU93q36R8Bl\nVfWeqnqsqh6uqrcCNwAbF9jH25N8PcnfJdkw0N5L8m+76X+e5NPdEf5DSS7v2v+mW/3WJN9M8uqu\n/Zwkf5vka0n+IsnBA/3+XJIvdn29t+t3Zj9nJflsknckmQYuTPLs7hPGdLfvP+vel5n+ppL8ZpLb\nuhouTnJgkr9K8miSa5Lsv4x/Bu1CDHGN2nHA04G/2NEKSfbq1rtijsX/Fzhlnv5fBNwNTND/Q3Dx\nwLLqXgC/C/x1Ve0PHAK8B6CqfrZb/hNVtW9VXZHkJOBtwKvpfyq4F/hgV+uzujrfDBwAfLGrffB5\nFccAXwZ+uOsnwH/t+noecBhP/cNUwL8GTgaeC5wK/BVwftfHbsC587wH0pMMcY3as4Dpqto+05Dk\nuiQPJ/lWkuPph+FuwD/Msf0DXR87cm9VXVz9h/78KXBwkh+eY73HgfVJDqmqx6vqunn6/CXg4u7T\nwuPABcBxSf4Z8DLgjqq6sqq2V9V/72ocdH9Vvbdb/u2q+nJVfaKqnqiqaeCdwAmztnlPVT1UVfcD\n1wLXV9WtVfUd+kNMR81Tr/QkQ1yj9jXgWUme/N2qqhdX1TO7ZbsBDwPb6R+pznYwsG2e/p8M0Kr6\nVje5zxzr/Rb9I+LPJbkjydnz9Dlz9D3T72NdrYd0y+6btf7s+a2DM93QyAeT3JfkUeB99D85DBr8\nGf/frPlv7+Bnkn6AIa5Rux74DvCqHa3QheT1wGvmWPwa+ic8l6WqtlXVv6+qQ4D/AFw0zxUp9wPr\nZ2aS7E0/dO+j/2nh0IFlGZyf2d2s+bcB3wN+vKr2A85g4f/XVuREq3Z+hrhGqqoeoX9i86Ik/ybJ\nvkl2S/KTwN4Dq54PnJnkDd06z0zye8BPAb+33DqSvDrJTNg+Qj9oZ4Z4tgHPGVj9cuDsJC9I8nT6\nIXxDVf09cDVwZJJXJlkHvB44aIHd7wM8BnwjySHAecv9eaQdMcQ1clX1duA/0x/SeKB7/a9u/vpu\nnc8C/5L+Cb776Q9fvBZ4cVXdO0e38NQTl4Ntc/kp4IYk36R/kvXcqprqlm0ELuvG6X+hqj4BvBX4\ncFfL4cBpXZ3T9E94/hEwTf9E5U30P23sqKbfAY4GHgU2df0u9OD+mjXtg/61KJnvSyGSXAL8K+DB\nqjpyoP0NwH+k/5HxL6vqzStdqHZuSY4E/gY4q6p2eGXLuHVj/VuB11bVp8ddj7TQkfilwIbBhiQn\nAq+gf4nWjwN/vEK1aRdSVbcDLwd+dK3dMNNdJ75/N9Tylq75hnHWJM1YN9/Cqro2yfpZza8Dfr+q\nnujWeWhlStOupqo+A3xm3HXM4TjgA8AewBbgVd2lgNLYzTucAtCF+KaZ4ZQkm+mPMW6gfynUb1bV\nTStbpiRpLvMeic+zzTOr6tgkP03/DjsfJiRJYzBMiN8HfASgqj7fPRFuoqq+NrhSEs+uS9IQqmrR\n9w0Mc4nhlcBJAEmOAPaYHeADhTT7uvDCC8deg/WPv45dsf6Wa98Z6l+qeY/Euye/nQBMJNkK/DZw\nCXBJktvpP5/iV5a8V0nSSCx0dcrpO1jkt6dI0hrgHZs7MDk5Oe4SlsX6x6vl+luuHdqvf6kWvMRw\n6I6TWqm+JWlnlYRa4RObkqQ1whCXpIYZ4pLUMENckhpmiEtSwwxxSWqYIS5JDRvmAViLtuee+65k\n9z/g0EN/hC99acuq7lOSxmlFQ/w737l/Jbuf5V4eeeTUVdyfJI3fioY4rOaR+D6ruC9JWhscE5ek\nhhniktQwQ1ySGmaIS1LDDHFJapghLkkNM8QlqWHzhniSS5Js674Uefay30iyPckBK1eeJGk+Cx2J\nXwpsmN2Y5DDgpcC9K1GUJGlx5g3xqroWeHiORe8AfmtFKpIkLdqSx8STvBK4r6puW4F6JElLsKRn\npyTZC3gL/aGUJ5tHWpEkadGW+gCs5wDrgVuTABwK3JzkmKp68AdX3zgwPdm9JEkzer0evV5v6O1T\nVfOvkKwHNlXVkXMs+wrwwqr6+hzLCubve7SmmJiYZHp6ahX3KUmjlYSqWvQIx0KXGF4OXAcckWRr\nkrNnrbKaKS1JmmXe4ZSqOn2B5c8ebTmSpKXwjk1JapghLkkNM8QlqWGGuCQ1zBCXpIYZ4pLUMENc\nkhpmiEtSwwxxSWqYIS5JDTPEJalhhrgkNcwQl6SGGeKS1DBDXJIaZohLUsMMcUlqmCEuSQ0zxCWp\nYQuGeJJLkmxLcvtA29uT3JXk1iQfSbLfypYpSZrLYo7ELwU2zGr7GPD8qnoBcA9wwagLkyQtbMEQ\nr6prgYdntV1TVdu72RuBQ1egNknSAkYxJv6rwNUj6EeStETrlrNxkv8CPF5VH5h7jY0D05PdS5I0\no9fr0ev1ht4+VbXwSsl6YFNVHTnQdhZwDnByVX17jm0KFu57dKaYmJhkenpqFfcpSaOVhKrKYtcf\n6kg8yQbgPOCEuQJckrQ6FnOJ4eXAdcBzk2xN8qvAe4B9gGuSbE5y0QrXKUmaw4JH4lV1+hzNl6xA\nLZKkJfKOTUlqmCEuSQ0zxCWpYYa4JDXMEJekhhniktQwQ1ySGmaIS1LDDHFJapghLkkNM8QlqWGG\nuCQ1zBCXpIYZ4pLUMENckhpmiEtSwwxxSWqYIS5JDTPEJalh84Z4kkuSbEty+0DbAUmuSXJPko8l\n2X/ly5QkzWWhI/FLgQ2z2s4HrqmqI4BPdPOSpDGYN8Sr6lrg4VnNrwAu66YvA161AnVJkhZhmDHx\nA6tqWze9DThwhPVIkpZg3XI2rqpKUjteY+PA9GT3kiTN6PV69Hq9obdP1TwZDCRZD2yqqiO7+buB\nyap6IMnBwKeq6l/MsV3B/H2P1hQTE5NMT0+t4j4labSSUFVZ7PrDDKdcBZzZTZ8JXDlEH5KkEVjo\nEsPLgeuA5ybZmuRs4A+Alya5Bzipm5ckjcG8Y+JVdfoOFp2yArVIkpbIOzYlqWGGuCQ1zBCXpIYZ\n4pLUMENckhpmiEtSwwxxSWqYIS5JDTPEJalhhrgkNcwQl6SGGeKS1DBDXJIaZohLUsMMcUlqmCEu\nSQ0zxCWpYYa4JDXMEJekhg0d4kkuSLIlye1JPpDk6aMsTJK0sKFCPMl64Bzg6Ko6EtgdOG10ZUmS\nFmPeb7ufxzeAJ4C9knwP2Av46siqkiQtylBH4lX1deC/AX8P3A88UlUfH2VhkqSFDXUknuQ5wH8C\n1gOPAlck+aWqev9T19w4MD3ZvSRJM3q9Hr1eb+jtU1VL3yj5ReClVfXvuvkzgGOr6vUD6xQsve/h\nTTExMcn09NQq7lOSRisJVZXFrj/s1Sl3A8cm+aEkAU4B7hyyL0nSkIYdE78V+FPgJuC2rvl/j6oo\nSdLiDDWcsqiOHU6RpCVbreEUSdIaYIhLUsMMcUlqmCEuSQ0zxCWpYYa4JDXMEJekhhniktQwQ1yS\nGmaIS1LDDHFJapghLkkNM8QlqWGGuCQ1zBCXpIYZ4pLUMENckhpmiEtSwwxxSWrY0CGeZP8kH0py\nV5I7kxw7ysIkSQtbt4xt3w1cXVW/kGQdsPeIapIkLdJQIZ5kP+BnqupMgKr6LvDoKAuTJC1s2OGU\nw4GHklya5AtJ/iTJXqMsTJK0sGGHU9YBRwO/XlWfT/Iu4Hzgt5+62saB6cnuJUma0ev16PV6Q2+f\nqlr6RslBwPVVdXg3fzxwflWdOrBOwdL7Ht4UExOTTE9PreI+JWm0klBVWez6Qw2nVNUDwNYkR3RN\npwBbhulLkjS85Vyd8gbg/Un2AL4MnD2akiRJizV0iFfVrcBPj7AWSdISecemJDXMEJekhhniktQw\nQ1ySGmaIS1LDDHFJapghLkkNM8QlqWGGuCQ1zBCXpIYZ4pLUMENckhpmiEtSwwxxSWqYIS5JDTPE\nJalhhrgkNcwQl6SGGeKS1LBlhXiS3ZNsTrJpVAVJkhZvuUfibwTuBGoEtUiSlmjoEE9yKPAy4P8A\nGVlFkqRFW86R+DuB84DtI6pFkrRE64bZKMmpwINVtTnJ5I7X3DgwPdm9JM2WjO/DbJWjoePU6/Xo\n9XpDb59h/gGTvA04A/gusCfwDODDVfUrA+vU6g6VTzExMcn09NQq7lMajX6IjyNMY4ivMUmoqkX/\nVR9qOKWq3lJVh1XV4cBpwCcHA1yStDpGdZ24f8olaQyGGhMfVFWfBj49glokSUvkHZuS1DBDXJIa\nZohLUsMMcUlqmCEuSQ0zxCWpYYa4JDXMEJekhhniktQwQ1ySGmaIS1LDDHFJapghLkkNM8QlqWGG\nuCQ1zBCXpIYZ4pLUMENckho2dIgnOSzJp5JsSXJHknNHWZgkaWHL+Y7NJ4A3VdUtSfYBbk5yTVXd\nNaLaJEkLGPpIvKoeqKpbuul/BO4C/umoCpMkLWwkY+JJ1gNHATeOoj9J0uIsO8S7oZQPAW/sjsgl\nSatkOWPiJHka8GHgz6rqyh9cY+PA9GT3kta2JOMuYVWN4+etqlXf51rV6/Xo9XpDb59h38z0/+Uv\nA75WVW+aY3nBav5DTTExMcn09NQq7lM7o/6v9mqHzDj2Oa79xhCfRxKqatF/WZcznPIS4JeBE5Ns\n7l4bltGfJGmJhh5OqarP4M1CkjRWhrAkNcwQl6SGGeKS1DBDXJIaZohLUsMMcUlqmCEuSQ0zxCWp\nYYa4JDXMEJekhi3rKYbq25WeerfaDy7ald7bXYlPThwdQ3xkdoWn3o0rUHeVn3NX4r/pqDicIkkN\nM8QlqWGGuCQ1zBCXpIYZ4pLUMENckhpmiEtSw4YO8SQbktyd5G+TvHmURUmSFmeoEE+yO/A/gA3A\njwGnJ3neKAsbt16vN+4Slqk37gKWxfd/nHrjLmCZeuMuYFUNeyR+DPClqpqqqieADwKvHF1Z42eI\njJfv/zj1xl3AMvXGXcCqGjbEDwG2Dszf17VJklbRsM9OWdSDD57xjJcP2f3SVX2L3Xdftd1J0pqQ\nYZ7sleRYYGNVbejmLwC2V9UfDqyzcz4yTJJWWFUt+oldw4b4OuCLwMnA/cDngNOr6q4ldyZJGtpQ\nwylV9d0kvw58FNgduNgAl6TVN9SRuCRpbViROzZbuxEoySVJtiW5faDtgCTXJLknyceS7D/OGnck\nyWFJPpVkS5I7kpzbtbdS/55JbkxyS5I7k/x+195E/TOS7J5kc5JN3Xwz9SeZSnJbV//nuraW6t8/\nyYeS3NX9Dr2ohfqTPLd7z2dejyY5d6m1jzzEG70R6FL69Q46H7imqo4APtHNr0VPAG+qqucDxwKv\n797vJuqvqm8DJ1bVTwI/AZyY5HgaqX/AG4E7+f6VWy3VX8BkVR1VVcd0bS3V/27g6qp6Hv3fobtp\noP6q+mL3nh8FvBD4FvDnLLX2qhrpCzgO+OuB+fOB80e9nxWoez1w+8D83cCB3fRBwN3jrnGRP8eV\nwCkt1g/sBXweeH5L9QOHAh8HTgQ2tfb7A3wFmJjV1kT9wH7A383R3kT9A/X+HHDtMLWvxHDKznIj\n0IFVta2b3gYcOM5iFiPJeuAo4EYaqj/JbkluoV/np6pqCw3VD7wTOA/YPtDWUv0FfDzJTUnO6dpa\nqf9w4KEklyb5QpI/SbI37dQ/4zTg8m56SbWvRIjvdGdKq/8ncU3/XEn2AT4MvLGqvjm4bK3XX1Xb\nqz+ccijws0lOnLV8zdaf5FTgwarazA6+jXct1995SfU/0v88/eG4nxlcuMbrXwccDVxUVUcDjzFr\n+GGN10+SPYCXA1fMXraY2lcixL8KHDYwfxj9o/HWbEtyEECSg4EHx1zPDiV5Gv0Af19VXdk1N1P/\njKp6FPhL+uODrdT/YuAVSb5C/0jqpCTvo536qap/6P77EP0x2WNop/77gPuq6vPd/Ifoh/oDjdQP\n/T+eN3fvPyzxvV+JEL8J+NEk67u/ML8IXLUC+1lpVwFndtNn0h9rXnOSBLgYuLOq3jWwqJX6nzVz\n9j3JDwEvBTbTSP1V9ZaqOqyqDqf/kfiTVXUGjdSfZK8k+3bTe9Mfm72dRuqvqgeArUmO6JpOAbYA\nm2ig/s7pfH8oBZb63q/QIP3P07+j80vABeM+abCIei+nf+fp4/TH888GDqB/suoe4GPA/uOucwe1\nH09/LPYW+uG3mf6VNq3UfyTwha7+24DzuvYm6p/1s5wAXNVS/fTHlG/pXnfM/P/aSv1drS+gf0L8\nVuAj9E92NlE/sDcwDew70Lak2r3ZR5Ia5tezSVLDDHFJapghLkkNM8QlqWGGuCQ1zBCXpIYZ4tpp\nJTkwyQeSfLl7Lsh1SV7VLTu+ewTuXd3rnIX6k9aiYb8oWVrTujtZrwQurarXdm0/Qv8W+YOA9wOv\nrKpbkkwAH03y1aq6enxVS0vnzT7aKSU5GXhrVU3Osex3ge9V1caBtpOAC6vqhFUrUhoBh1O0s3o+\n/dv55/JjwM2z2m7utpGa4nCKdlZP+YiZ5L3AS/j+83Hmemzs01ahLmmkPBLXzmoL/UeSAlBVrwdO\nBv5Jt+yFs9Z/If0HcElNMcS1U6qqTwJ7Jvm1gea96R+hXwScleQFAN2JzT8A3rHqhUrL5IlN7bS6\nq1DeCbwIeIj+t778z6q6ovv2mj+m/9jS9cCbq+rd46pVGpYhrl1ektcBrwFeUbO+2k5a6wxxSWqY\nY+KS1DBDXJIaZohLUsMMcUlqmCEuSQ0zxCWpYYa4JDXs/wNkHSPtNYgH6gAAAABJRU5ErkJggg==\n", "text": [ "" ] } ], "prompt_number": 14 } ], "metadata": {} } ] }