{ "metadata": { "name": "using-screed" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import screed" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "filename = '../data/25k.fq.gz'\n", "\n", "n = 0\n", "for record in screed.open(filename):\n", " n += 1\n", " if n > 10:\n", " break\n", " print record.name\n", " print record.sequence\n", " print record.accuracy" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "895:1:1:1246:14654/1\n", "CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT\n", "][aaX__aa[`ZUZ[NONNFNNNNNO_____^RQ_\n", "895:1:1:1248:9583/1\n", "ACTGGGCGTAGACGGTGTCCTCATCGGCACCAGC\n", "\\UJUWSSV[JQQWNP]]SZ]ZWU^]ZX][^TXR`\n", "895:1:1:1252:19493/1\n", "CCGGCGTGGTTGGTGAGGTCACTGAGCTTCATGTC\n", "OOOKONNNNN__`R]O[TGTRSY[IUZ]]]__X__\n", "895:1:1:1255:18861/1\n", "ACGACGAGAAGCTGATCTACCGCGCCGAGCGCATC\n", "bb_^^bb_XTbbbbbb_bab]KTITZQTZ]ZYT^^\n", "895:1:1:1264:15854/1\n", "CGTGATGATGTGCTTGCGGCCGGAGGGCCTGTTGCCCAGG\n", "````W__ZZ`R__ZSOJNNNQWSQZ\\^X\\W_______J__\n", "895:1:1:1265:2265/1\n", "TATAGCGTGAGGCGATGACGTTGCTGTCCTTGGCGCGGC\n", "`bbbbbbbbbbbbbUbbbbb]X_QXUQ[QWOPTTZ__X_\n", "895:1:1:1273:17782/1\n", "TCGAAAATCACGTGGGAGATGCACTATCACGCGGTCGGTGAGGAAGTGACCGACCACACCGAGCTCGC\n", "OOOOONNNOO^^X^`]TVa]WWLSSKNNONI]S[Q]SWXZX_\\\\^ZF___Y\\V]]_____]KTRV^X^\n", "895:1:1:1274:18571/1\n", "AGCAGGCGAACAGCACGCCGAACAATACTGTCTTCATGCCAAACTGCTGAAAGCCGAGCACAGCAGAAATGCTCCAGAG\n", "VMJPVUIPHRUaZRZ\\___XUZ[X[]]`]X]]]]]_______________XHXX]]S]]Z``X]`]VX]]_____ZZMa\n", "895:1:1:1276:16426/1\n", "GCAGGTATTGGTTTGCCTAACGTTGAAATTGCAGGATTAACG\n", "[[Z[[Z`\\`\\^[^`^U]ZZROFONOOQYPY]ZU]URQYQV``\n", "895:1:1:1283:17864/1\n", "ATTCGTCAACCCGCGGCTCGAGCTGCGCATCC\n", "`Q\\`[Z______`_\\Q`\\[MTYWTW_T_TTX`\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "import calc_gc\n", "\n", "gc_list = []\n", "for record in screed.open(filename):\n", " seq = record.sequence\n", " gc = calc_gc.calc_gc(seq)\n", " gc_list.append(gc)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "hist(gc_list, bins=100, range=(0, 1))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 4, "text": [ "(array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n", " 0, 0, 1, 1, 2, 0, 1, 1, 1, 3, 0,\n", " 5, 1, 3, 2, 3, 4, 11, 7, 11, 9, 10,\n", " 17, 38, 32, 40, 57, 65, 38, 98, 111, 136, 168,\n", " 212, 252, 329, 361, 462, 336, 766, 651, 781, 787, 808,\n", " 909, 960, 972, 1049, 687, 1246, 969, 1124, 886, 1037, 1040,\n", " 918, 892, 885, 853, 533, 581, 583, 437, 324, 416, 251,\n", " 233, 167, 94, 113, 71, 50, 28, 27, 16, 6, 8,\n", " 5, 1, 3, 3, 0, 0, 1, 0, 0, 0, 0,\n", " 0]),\n", " array([ 0. , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08,\n", " 0.09, 0.1 , 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17,\n", " 0.18, 0.19, 0.2 , 0.21, 0.22, 0.23, 0.24, 0.25, 0.26,\n", " 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32, 0.33, 0.34, 0.35,\n", " 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43, 0.44,\n", " 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53,\n", " 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62,\n", " 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7 , 0.71,\n", " 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8 ,\n", " 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89,\n", " 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,\n", " 0.99, 1. ]),\n", " )" ] }, { "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD9CAYAAABUS3cAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGqZJREFUeJzt3X9slPUBx/HPNbWooSiglC310I6udyc/bOV6Raw9GbIu\nBMv8EWDDGYqJ1iUdmzFug0TKHzLERSlbj86kMxsQsuCM6JSGNrliY7g71IWNXgUVVlwWKtbpVcpE\nefZH6dFCW+6u7XPXe96vhOTp8+Pu+3zpfe7b7/N9vo/NMAxDAABLyUh2AQAA5iP8AcCCCH8AsCDC\nHwAsiPAHAAsi/AHAgoYN/8rKSuXk5Gj27NmXbfvtb3+rjIwMdXV1RdfV1tYqPz9fLpdLra2t0fXh\ncFhFRUXKy8vTunXrRrH4AIBEDBv+q1ev1r59+y5bf/LkSe3fv18zZsyIruvs7FRdXZ2am5vl8/lU\nXV0d3fbEE0/oqaeeUigUUktLiw4dOjSKpwAAiNew4V9aWqrJkydftv4Xv/iFnn322QHrAoGAysvL\nZbfbVVZWJsMw1N3dLUl6//33tXz5ck2dOlX33XefAoHAKJ4CACBecff5v/rqq8rNzdWcOXMGrA8G\ng3I6ndGfCwoKFAgE9MEHH2jatGnR9S6XSwcPHhxBkQEAI5UZz85nzpzRM888o/3790fX9c0OMdgs\nETab7bJ1w80mMdj+AIAri3emnrha/h9++KFOnDihuXPn6pZbbtHHH3+s22+/XadOnZLH41FbW1t0\n3/b2drndbs2cOVOnTp2Krm9ra1NJScmwJ8A/Q08//XTSy5Aq/6gL6oK6GP5fIuIK/9mzZ+vUqVM6\nfvy4jh8/rtzcXL377rvKyclRcXGxGhsb1dHRIb/fr4yMDGVnZ0uSHA6Hdu/erdOnT+uVV16Rx+NJ\nqLAAgNExbPivXLlSd9xxh44ePaqbbrpJf/zjHwds799Nk5OTo6qqKi1cuFCPP/64tm7dGt323HPP\n6dlnn5Xb7VZpaanmzZs3yqcBAIiHzUj0b4YxYLPZEv4TJt34/X55vd5kFyMlUBcXURcXURcXJZKd\nhD8AjHOJZCfTOwCABRH+AGBBhD8AWBDhDwAWRPgDgAUR/gBgQYQ/AFgQ4Q8AFkT4A4AFEf4AYEGE\nPwBYEOEPABZE+AOABRH+AGBBhD8AWBDhDwAWRPgDgAUR/gBgQYQ/MI5MmjRFNptNNptNkyZNSXZx\nMI7xDF9gHLHZbJL6PiN8XtCLZ/gCAGJC+AOABQ0b/pWVlcrJydHs2bOj65588kk5nU4VFRVp7dq1\n6unpiW6rra1Vfn6+XC6XWltbo+vD4bCKioqUl5endevWjcFpAADiMWz4r169Wvv27RuwbvHixTpy\n5IgOHTqkL7/8Urt27ZIkdXZ2qq6uTs3NzfL5fKquro4e88QTT+ipp55SKBRSS0uLDh06NAanAgCI\n1bDhX1paqsmTJw9Yd8899ygjI0MZGRn6/ve/r5aWFklSIBBQeXm57Ha7ysrKZBiGuru7JUnvv/++\nli9frqlTp+q+++5TIBAYo9MBAMQicyQHv/jii3rkkUckScFgUE6nM7qtoKBAgUBAM2bM0LRp06Lr\nXS6Xdu7cqZ/+9KeDvuaGDRuiy16vV16vdyRFBIC04/f75ff7R/QaCYf/xo0blZ2drQcffFCSBh1m\n1DssbaArDUfqH/4AgMtd2jCuqamJ+zUSCv+XXnpJjY2Nam5ujq7zeDxqamqK/tze3i63263s7Gyd\nOnUqur6trU0lJSWJvC0AYJTEPdRz37592rJli/bu3aurr746ur64uFiNjY3q6OiQ3+9XRkaGsrOz\nJUkOh0O7d+/W6dOn9corr8jj8YzeGQAA4jbsHb4rV65US0uLTp8+rZycHNXU1GjTpk366quvNGVK\n763l8+fPV11dnSRp69at2rZtm7KyslRfX6/S0lJJva39VatW6bPPPtOKFSu0adOmwQvDHb7AsLjD\nF4NJJDuZ3gEYRwh/DIbpHQAAMSH8AcCCCH8AsCDCHwAsiPAHAAsi/AHAggh/ALAgwh9IA/2f7cvz\nfRELbvICxpGhbvIauH7gNqQ/bvICAMSE8AcACyL8AcCCCH8AsCDCHwAsiPAHAAsi/AHAggh/ALAg\nwh8ALIjwBwALIvyBFNN/nh7m6MFYIfyBFBOJfKbeeXqMC8sjM3DStywmgIMkJnYDUs5Qk7cNt224\nid0uPYYJ4NLPqE/sVllZqZycHM2ePTu6LhKJqKKiQna7XcuWLVN3d3d0W21trfLz8+VyudTa2hpd\nHw6HVVRUpLy8PK1bty6uAgIARt+w4b969Wrt27dvwDqfzye73a5jx44pNzdX27dvlyR1dnaqrq5O\nzc3N8vl8qq6ujh7zxBNP6KmnnlIoFFJLS4sOHTo0BqcCAIjVsOFfWlqqyZMnD1gXDAa1Zs0aTZgw\nQZWVlQoEApKkQCCg8vJy2e12lZWVyTCM6F8F77//vpYvX66pU6fqvvvuix4DAEiOuC/4hkIhORwO\nSZLD4VAwGJTUG/5OpzO6X0FBgQKBgD744ANNmzYtut7lcungwYMjLTcAYAQy4z0gnosKvRea4jt+\nw4YN0WWv1yuv1xvz+wGAFfj9fvn9/hG9Rtzh73a7FQ6HVVhYqHA4LLfbLUnyeDxqamqK7tfe3i63\n263s7GydOnUqur6trU0lJSVDvn7/8AcAXO7ShnFNTU3crxF3t4/H41FDQ4N6enrU0NAQDfLi4mI1\nNjaqo6NDfr9fGRkZys7OltTbPbR7926dPn1ar7zyijweT9wFBQCMImMYK1asML71rW8ZWVlZRm5u\nrtHQ0GB88cUXxr333mvcdNNNRkVFhRGJRKL7v/DCC8Z3vvMdw+l0GgcOHIiuP3LkiFFYWGjcfPPN\nxi9/+csh3+8KxQEsQZIhGRf+KaZtA9cPt23o/TB+JfL/yE1eQIrhJi/Ea9Rv8gJgjv5TMMQuM4Fj\ngF60/IEUMHTrfPiWfywtelr+6Y+WPzBODJxsjZY7zBf3UE8AI3dx5s4+fAHAXLT8AcCCCH8AsCDC\nHwAsiPAHAAsi/AFLy+SRjhbFaB/A0r5W36ijSIQRR1ZCyx8wSWJ38QJjg/AHTHJxbD931CL5CH8A\nsCDCH8AFXPy1EsIfSEuJzPjZd/HXUCQSGTD3EF8G6YfRPkBaujiKJ7F5g/ofz0igdETLHwAsiPAH\nAAsi/AHAggh/ADFgJFC64YIvgBgwDUS6oeUPABZE+AOABSUc/i+++KLuuOMO3X777Vq7dq0kKRKJ\nqKKiQna7XcuWLVN3d3d0/9raWuXn58vlcqm1tXXkJQcAJCyh8O/q6tIzzzyj/fv3KxQK6ejRo2ps\nbJTP55PdbtexY8eUm5ur7du3S5I6OztVV1en5uZm+Xw+VVdXj+pJAADik1D4X3PNNTIMQ59//rl6\nenp05swZXX/99QoGg1qzZo0mTJigyspKBQIBSVIgEFB5ebnsdrvKyspkGL23jwMAkiPh8Pf5fLr5\n5ps1ffp0LViwQB6PR6FQSA6HQ5LkcDgUDAYl9Ya/0+mMHl9QUBDdBqQz5vBHqkpoqOcnn3yiqqoq\ntbW1afLkyXrwwQf1+uuvyzBin6d8qA/Dhg0boster1derzeRIgIp4eIc/lJic+wAl/P7/fL7/SN6\njYTCPxgMqqSkRDNnzpQkPfjgg3rrrbfkdrsVDodVWFiocDgst9stSfJ4PGpqaooe397eHt12qf7h\nDwC43KUN45qamrhfI6Fun9LSUh06dEhdXV363//+pzfffFOLFy+Wx+NRQ0ODenp61NDQoJKSEklS\ncXGxGhsb1dHRIb/fr4yMDGVnZyfy1kBK69/NQ1cPUllCLf9JkyZp/fr1+uEPf6gzZ86ovLxcd999\nt4qLi7Vq1SoVFBSoqKhImzdvliTl5OSoqqpKCxcuVFZWlurr60f1JIBkmjRpyoXunT79uz/5AkBq\nshnxdNSPMZvNFtd1AyAV9Lbw+/frXxr+g22Lfb/+n4mh32v033e4/ficppZEspM7fAHAggh/IAaX\n9uUzsyXGO7p9gBgM7G6R+nd90O2DZKPbBwAQE8IfACyI8AcACyL8AcCCCH8AsCDCHwAsiPAHAAsi\n/AHAggh/ALAgwh8ALCihKZ0BmCWT5wJgTBD+QEr7WjwfAGOBbh8AsCDCHwAsiPAHAAsi/AHAgrjg\nCySEUTgY32j5AwnpG4XDE60wPhH+AGBBhD8AWFDC4f/ll1/q4Ycf1ne/+125XC4FAgFFIhFVVFTI\nbrdr2bJl6u7uju5fW1ur/Px8uVwutba2jkrhAQCJSTj8n376adntdh0+fFiHDx+Ww+GQz+eT3W7X\nsWPHlJubq+3bt0uSOjs7VVdXp+bmZvl8PlVXV4/aCQAA4pdw+Dc1NenXv/61rr76amVmZuq6665T\nMBjUmjVrNGHCBFVWVioQCEiSAoGAysvLZbfbVVZWJsMwFIlERu0kAADxSSj8P/74Y509e1ZVVVXy\neDzavHmzenp6FAqF5HA4JEkOh0PBYFBSb/g7nc7o8QUFBdFtAADzJTTO/+zZszp69Ki2bNmiRYsW\n6dFHH9Vf/vIXGUbsw96GGiO9YcOG6LLX65XX602kiACQtvx+v/x+/4hew2bEk9j9OJ1OhcNhSdKb\nb76pP/3pT/rqq6+0fv16FRYW6p133tGmTZu0Z88evfbaa2pqatLWrVslSbfddpveeustZWdnDyyM\nzRbXFwhglt7GyqWzaxpXWE7f/ficppZEsjPhPv/8/HwFAgGdP39ef/vb37Ro0SJ5PB41NDSop6dH\nDQ0NKikpkSQVFxersbFRHR0d8vv9ysjIuCz4AQDmSXh6h+eee04/+clPdPbsWS1atEgrVqzQ+fPn\ntWrVKhUUFKioqEibN2+WJOXk5KiqqkoLFy5UVlaW6uvrR+0EACTXpElTFIl8duGnqySdkyRlZ0/W\nF190Ja1cGF7C3T5jgW4fpCq6fYbu9hlYN3QPJYOp3T4AgPGL8AcACyL8AcCCCH8AsCDCHwAsiPAH\nAAviMY4A4sQjLNMB4Q8gTn2PsOzDF8F4RLcPAFgQ4Q8AFkT4A4AFEf4AYEGEPwBYEOEPABZE+AOA\nBRH+AGBBhD8AWBDhD/QzadIU2Ww22Ww2TZo0JdnFAcYM0zsA/fQ+i9a4sMy0BUhftPwBwIIIfwCw\nIMIfwBjJjF4/4RpK6qHPH8AYGTj1M9dQUkvCLf9vvvlGhYWFWrp0qSQpEomooqJCdrtdy5YtU3d3\nd3Tf2tpa5efny+VyqbW1deSlBgCMSMLhv3XrVrlcrugTfXw+n+x2u44dO6bc3Fxt375dktTZ2am6\nujo1NzfL5/Opurp6dEoOYNxiSG3yJRT+H3/8sd544w098sgjMozeP+uCwaDWrFmjCRMmqLKyUoFA\nQJIUCARUXl4uu92usrIyGYahSCQyemcAYNy5OKTWuLAMsyUU/j//+c+1ZcsWZWRcPDwUCsnhcEiS\nHA6HgsGgpN7wdzqd0f0KCgqi2wAAyRH3Bd/XX39d06ZNU2Fhofx+f3R9318AsRju4c8bNmyILnu9\nXnm93niLCABpze/3D8jfRMQd/m+//bb27t2rN954Q2fPntUXX3yhhx56SG63W+FwWIWFhQqHw3K7\n3ZIkj8ejpqam6PHt7e3RbYPpH/4AgMtd2jCuqamJ+zXi7vZ55plndPLkSR0/fly7d+/WwoUL9ec/\n/1kej0cNDQ3q6elRQ0ODSkpKJEnFxcVqbGxUR0eH/H6/MjIylJ2dHXdBAQCjZ8Tj/Pu6cKqqqrRq\n1SoVFBSoqKhImzdvliTl5OSoqqpKCxcuVFZWlurr60f6lgCAEbIZ8XTWjzGbzRbXtQNgtPU2Zvp+\nBy/+Pg5c37ut/36DL1thv/heY/D65HM/UolkJ9M7AIAFEf6wtP43Gw03Cg1IN8ztA0vrP39/L74A\nYA20/AHAggh/ALAgun2AIWVyHQBpi5Y/LKf/Rd7h9c1HzzBEpB9a/rCcgRd5admbh7+kUgktfwAm\n4S+pVEL4A4AFEf4AYEGEPwBYEOEPABZE+AOABRH+AGBBhD8AWBDhDwAWRPgDgAUR/gBgQYQ/0h5P\n60p1mQP+fyZNmpLsAlkCE7sh7fG0rlTXN+dPr0iE/x8z0PIHAAsi/AGkmEy6gEyQUPifPHlSd999\nt2699VZ5vV7t2rVLkhSJRFRRUSG73a5ly5apu7s7ekxtba3y8/PlcrnU2to6OqUHkIYuTv3c22WH\nsZBQ+F911VV6/vnndeTIEe3Zs0fr169XJBKRz+eT3W7XsWPHlJubq+3bt0uSOjs7VVdXp+bmZvl8\nPlVXV4/qSQAA4pNQ+E+fPl233XabJOmGG27QrbfeqlAopGAwqDVr1mjChAmqrKxUIBCQJAUCAZWX\nl8tut6usrEyGYSgSiYzeWQCXiP1RjYA1jbjP/4MPPtCRI0dUXFysUCgkh8MhSXI4HAoGg5J6w9/p\ndEaPKSgoiG4DxsLFET48NQoYzIiGekYiES1fvlzPP/+8Jk6cKMOI/YM2VItsw4YN0WWv1yuv1zuS\nIgJA2vH7/fL7/SN6jYTD/9y5c7r//vv10EMPqaKiQpLkdrsVDodVWFiocDgst9stSfJ4PGpqaooe\n297eHt12qf7hDwC43KUN45qamrhfI6FuH8MwtGbNGs2aNUtr166Nrvd4PGpoaFBPT48aGhpUUlIi\nSSouLlZjY6M6Ojrk9/uVkZGh7OzsRN4aADAKbEY8fTUXtLa26q677tKcOXOi3TebNm3SggULtGrV\nKr333nsqKirSjh07NHHiREnS1q1btW3bNmVlZam+vl6lpaWXF8Zmi6vrCBhK7+9l3+9S/+VLf451\nG/sl673IhCtLJDsTCv+xQvhjtBD+hL+VJJKd3OGLtMHwznTE3b5jhYndkDYGTuDGF0B6uDjpGxO+\njS5a/gBgQYQ/AFgQ4Q8AFkT4Y9ziCV1A4rjgi3GLJ3QBiaPlDwAWRPgDgAUR/hhXuJELffr/LnAD\nWPyY3gHjSmLTNqTOVAXpt5+5ZeqfD5f+Llg5O5jeAWmJ1j4w+hjtg5THtA3olUkDYBQR/gDGiYvz\n/PTii2Ak6PYBAAsi/AGkAaZ+jhfhj5TAsD2MTF+XkHHhGhGuhD5/pIT+F3WZtx0Ye7T8kTQM4QSS\nh/BH0lxs7Vv35hyMhcwBs73SjTg4wh+mYQpmmONi/z/XAIZG+GPUDXXxdmBLf7jWfiZfEsAYI/wx\n6vqHfCQSSSDEB7bcgJHp35jIojvoAtPC/8CBA3I6ncrPz9e2bdvMettxy+/3J7sIo6R/kAPJ0P93\n8JzoDuplWvj/7Gc/U319vZqamvT73/9ep0+fNuutx6XxFP705WN8svaFYVPC//PPP5ck3XXXXZox\nY4YWL16sQCBgxlsjBkP10V8a6v3/ZO6/HHtfPpBKrH1h2JTwD4VCcjgc0Z9dLpcOHjxoxlsnxWjc\nrbpp0+aYAjmRsB64bBuyj/7yUD83xDKQDga/NnDp52Xoz9zQ+6WilLvDN926DSKRz0Z8TsO9xvCv\nfy6G5T5Dvcal620xLI/n/VKxTKm+XyqWKdH9+gz9eRn6MxfrfqnBlPB3u9168sknoz8fOXJE5eXl\nl+1n5SfxAICZTOn2ue666yT1jvg5ceKE9u/fL4/HY8ZbAwAGYVq3zwsvvKBHH31U586dU3V1tW64\n4Qaz3hoAcAnThnqWlZUpHA6roaFBPp9v2PH+v/rVr5SXl6fbb79d7e3tZhXRdFe692Hnzp2aO3eu\n5s6dqx/96Ec6evRoEkppjljvAwmFQsrMzNRf//pXE0tnrljqIhQKye12y+l0yuv1mltAE12pLnp6\nevTwww+rsLBQZWVlevXVV5NQyrFXWVmpnJwczZ49e8h94s5Nw2S33Xab0dLSYpw4ccIoKCgwPvnk\nkwHbA4GAsWDBAuPTTz81du3aZSxZssTsIprmSnXx9ttvG//9738NwzCMl156yVi1alUyimmKK9WF\nYRjG119/bdx9993GkiVLjD179iShlOa4Ul2cP3/emDVrlrF//37DMIxB6ypdXKkufD6fUVVVZRiG\nYZw4ccLIy8szzp8/n4yijqkDBw4Y7777rjFr1qxBtyeSm6ZO7xDLeP9AIKAHHnhAU6ZM0cqVKxUO\nh80somliqYv58+dHr5csWbJELS0tppfTDLHeB7Jt2zY98MADuvHGG80uomliqYtDhw5pzpw5WrRo\nkSSlbRdqLHVx3XXXKRKJ6Ny5c+rq6tK1116b0iNsElVaWqrJkycPuT2R3DQ1/GMZ7x8MBuVyuaI/\n33jjjfrwww9NK6NZ4r334Q9/+IOWLl1qRtFMF0td/Pvf/9arr76qqqoqSek3JLhPLHXR2Ngom82m\n0tJSLV26VI2NjWYX0xSx1MXKlSv1zTff6IYbbtCdd96pnTt3ml3MlJBIbqbcOH/DMC4b8pmuH/RY\nNTU1aceOHXr77beTXZSkWbt2rX7zm9/IZrMN+jtiJWfPntXf//53NTU16cyZM7rnnnv0z3/+U9dc\nc02yi2a63/3ud8rMzNR//vMf/eMf/9CSJUv0r3/9SxkZ1pqzMpHcNLWG3G73gAsRR44cUUlJyYB9\nPB6P2traoj9/8sknysvLM62MZomlLiTp8OHDeuyxx7R3715df/31ZhbRNLHUxTvvvKMVK1bolltu\n0csvv6zHH39ce/fuNbuoYy6Wupg/f75+8IMfaPr06crLy9O8efN04MABs4s65mKpiwMHDujHP/6x\nrr32Wnk8Hn37299O64ERQ0kkN00N/1jG+3s8Hr388sv69NNPtWvXLjmdTjOLaJpY6qKjo0P333+/\ndu7cqZkzZyajmKaIpS4++ugjHT9+XMePH9cDDzwgn8+ne++9NxnFHVOx1EVJSYlaWlp05swZdXV1\n6b333tOCBQuSUdwxFUtdfO9739Nrr72m8+fP66OPPlJXV9eAriKrSCQ3Te/2GWy8f319vSTp0Ucf\nVXFxse68807NmzdPU6ZM0Y4dO8wuommuVBcbN25UV1eXHnvsMUnSVVddpWAwmMwij5kr1YWVXKku\npk6dqtWrV2vevHm68cYbtXHjRk2cODHJpR4bV6qLFStWqK2tLVoXW7duTXKJx8bKlSvV0tKi06dP\n66abblJNTY3OneudTiLR3LQZVu48BQCLstZVEQCAJMIfACyJ8AcACyL8AcCCCH8AsCDCHwAs6P/A\nBvWmJaIcaAAAAABJRU5ErkJggg==\n", "text": [ "" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "import calc_gc\n", "\n", "gc_list = []\n", "for record in screed.open(filename):\n", " seq = record.sequence[:10]\n", " gc = calc_gc.calc_gc(seq)\n", " gc_list.append(gc)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "hist(gc_list, bins=10, range=(0, 1))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 6, "text": [ "(array([ 12, 82, 1581, 0, 2846, 10712, 5313, 0, 3114, 1340]),\n", " array([ 0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),\n", " )" ] }, { "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD9CAYAAAC4EtBTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGVNJREFUeJzt3XFslPUdx/HPNUCVcCpUW7bA4Spd706ctHK9Mu16MtQu\nBMtQU9jqDO0i1D8aFmLQsURKjG7TRIGlB5p0ZqukWXSNzCmNJbliQ3p33Vzc2ivQqasaLdYyOKAo\nk2d/dD7jJwjtXXt34vuVXNL73f2e+z6/3j2fe57nnudxWJZlCQCA/8lKdwEAgMxCMAAADAQDAMBA\nMAAADAQDAMBAMAAADBcMhpqaGuXl5emGG26w2x588EF5PB4VFxdr/fr1GhkZsR/btm2bCgoK5PV6\n1dnZabfHYjEVFxcrPz9fmzZtsttPnz6t2tpazZs3T4FAQB9++OFEzhsAIAEXDIY1a9Zoz549Rtvt\nt9+unp4edXd368SJE9q1a5ck6fDhw2psbNTevXsVDAZVX19v99mwYYM2btyoaDSqjo4OdXd3S5Ja\nW1t19OhRxWIxVVRU6NFHH53o+QMAjNMFg6GsrEwzZ8402m677TZlZWUpKytLd9xxhzo6OiRJ4XBY\nFRUVcrlcKi8vl2VZOn78uCTpwIEDqqqqUk5OjlauXKlwOGz3qa6u1vTp03X//ffb7QCA9ElqH8Oz\nzz6r5cuXS5IikYg8Ho/9WGFhocLhsPr7+5Wbm2u3e71edXV12X28Xq8kadasWRocHNQnn3ySTEkA\ngCRNSbTjli1b5HQ6dc8990iSzndmDYfDcU6bZVl2u2VZRr8vOzvH+aYDALi4RM56lNAaw3PPPae2\ntjY1NzfbbX6/X729vfb9vr4++Xw+zZ8/X4ODg3Z7b2+v/H7/OX2Gh4eVl5en7Ozs877m5yHydb89\n8sgjaa8hU26MBWPBWFz4lqhxB8OePXv0xBNPaPfu3brsssvs9pKSErW1tWlgYEChUEhZWVlyOp2S\nJLfbrZaWFg0NDam1tdUIhubmZp04cULPPPOMSktLE54RAMDEuOCmpNWrV6ujo0NDQ0OaO3euGhoa\n9Pjjj+vTTz/V0qVLJUmLFy9WY2Oj8vLyVFdXpyVLlmjatGnauXOnPZ0nn3xS1dXVevjhh7Vq1Sot\nWrRIkvTDH/5Qe/bskcfjUX5+vlpaWiZxVgEAY+GwklnfSBGHw5HUatGlJBQKKRAIpLuMjMBY/B9j\n8X+Mxf8luuwkGADgEpXospNTYgAADAQDAMBAMAAADAQDAMBAMAAADAQDAMBAMAAADAQDAMBAMAAA\nDAQDAMBAMAAADAQDAMBAMAAADAQDAMBAMAAADAQDAMBAMAAADAQDAMAwJd0FAF91V1wxS/H4kbTW\n4HTO1LFjw2mtAZcOrvkMJMnhcEhK9/uTzwjOxTWfAQATgmAAABgIBgCAgWAAABgIBgCAgWAAABgI\nBgCA4YLBUFNTo7y8PN1www12WzweV2VlpVwul1asWKHjx4/bj23btk0FBQXyer3q7Oy022OxmIqL\ni5Wfn69NmzbZ7adPn1Ztba3mzZunQCCgDz/8cCLnDQCQgAsGw5o1a7Rnzx6jLRgMyuVy6dChQ5oz\nZ4527NghSTp8+LAaGxu1d+9eBYNB1dfX2302bNigjRs3KhqNqqOjQ93d3ZKk1tZWHT16VLFYTBUV\nFXr00Ucnev4AAON0wWAoKyvTzJkzjbZIJKLa2lplZ2erpqZG4XBYkhQOh1VRUSGXy6Xy8nJZlmWv\nTRw4cEBVVVXKycnRypUrjT7V1dWaPn267r//frsdAJA+497HEI1G5Xa7JUlut1uRSETS6ELe4/HY\nzyssLFQ4HFZ/f79yc3Ptdq/Xq66uLkmjIeP1eiVJs2bN0uDgoD755JPE5wYAkLRxn0RvPOfdGD2H\nzLn9P2+3LMuY3oWmvXnzZvvvQCCgQCAw5joA4OsgFAopFAolPZ1xB4PP51MsFlNRUZFisZh8Pp8k\nye/3q7293X5eX1+ffD6fnE6nBgcH7fbe3l75/X67T29vrwoLCzU8PKy8vDxlZ2ef93XPDgYAwLm+\n+KW5oaEhoemMe1OS3+9XU1OTRkZG1NTUpNLSUklSSUmJ2traNDAwoFAopKysLDmdTkmjm5xaWlo0\nNDSk1tZWIxiam5t14sQJPfPMM/a0AABpZF3AqlWrrG984xvWtGnTrDlz5lhNTU3WsWPHrDvvvNOa\nO3euVVlZacXjcfv5Tz/9tHXddddZHo/H2rdvn93e09NjFRUVWddee6310EMP2e2ffvqptWbNGmvu\n3LlWeXm59cEHH5y3jouUCaSVJEuy0nzjM4JzJfq+4HoMQJK4HgMyFddjAABMCIIBAGAgGAAABoIB\nAGAgGAAABoIBAGAgGAAABoIBAGAgGAAABoIBAGAgGAAABoIBAGAgGAAABoIBAGAgGAAABoIBAGAg\nGAAABoIBAGAgGAAABoIBAGAgGAAABoIBAGAgGAAABoIBAGAgGAAABoIBAGAgGAAABoIBAGBIOBie\nffZZffe739VNN92k9evXS5Li8bgqKyvlcrm0YsUKHT9+3H7+tm3bVFBQIK/Xq87OTrs9FoupuLhY\n+fn52rRpUxKzAgCYCAkFw/DwsB577DG99tprikajOnjwoNra2hQMBuVyuXTo0CHNmTNHO3bskCQd\nPnxYjY2N2rt3r4LBoOrr6+1pbdiwQRs3blQ0GlVHR4e6u7snZs4AAAlJKBguv/xyWZalo0ePamRk\nRCdPntRVV12lSCSi2tpaZWdnq6amRuFwWJIUDodVUVEhl8ul8vJyWZZlr00cOHBAVVVVysnJ0cqV\nK+0+AID0SDgYgsGgrr32Ws2ePVs333yz/H6/otGo3G63JMntdisSiUgaDQaPx2P3LywsVDgcVn9/\nv3Jzc+12r9errq6uZOYHAJCkKYl0+uijj1RXV6fe3l7NnDlT99xzj15++WVZljXmaTgcjnPaLtR/\n8+bN9t+BQECBQGA8JQPAJS8UCikUCiU9nYSCIRKJqLS0VPPnz5ck3XPPPXr99dfl8/kUi8VUVFSk\nWCwmn88nSfL7/Wpvb7f79/X1yefzyel0anBw0G7v7e1VaWnpeV/z7GAAAJzri1+aGxoaEppOQpuS\nysrK1N3dreHhYX3yySd69dVXdfvtt8vv96upqUkjIyNqamqyF/IlJSVqa2vTwMCAQqGQsrKy5HQ6\nJY1ucmppadHQ0JBaW1vl9/sTmhEAwMRwWOPZ/nOW5557Tr/97W918uRJVVRUqKGhQSdOnFB1dbXe\neOMNFRcXq7m5WTNmzJAkbd26Vdu3b9e0adO0c+dOlZWVSRpdS6iurtaRI0e0atUqPf744+cW6XCM\nazMVkEqjm0XT/f7kM4JzJbrsTDgYUolgQCYjGJCpEl12cuQzAMBAMAAADAQDAMBAMAAADAQDAMBA\nMAAADAQDAMBAMAAADAQDAMBAMAAADAQDAMBAMAAADAQDAMBAMAAADAQDAMBAMAAADAQDAMBAMAAA\nDAQDAMBAMAAADAQDAMBAMAAADAQDAMBAMAAADAQDAMBAMAAADAQDAMBAMAAADAkHw4kTJ3Tffffp\n29/+trxer8LhsOLxuCorK+VyubRixQodP37cfv62bdtUUFAgr9erzs5Ouz0Wi6m4uFj5+fnatGlT\ncnMDAEhawsHwyCOPyOVy6c0339Sbb74pt9utYDAol8ulQ4cOac6cOdqxY4ck6fDhw2psbNTevXsV\nDAZVX19vT2fDhg3auHGjotGoOjo61N3dnfxcAQASlnAwtLe36+c//7kuu+wyTZkyRVdeeaUikYhq\na2uVnZ2tmpoahcNhSVI4HFZFRYVcLpfKy8tlWZa9NnHgwAFVVVUpJydHK1eutPsAANIjoWB47733\ndOrUKdXV1cnv9+tXv/qVRkZGFI1G5Xa7JUlut1uRSETSaDB4PB67f2FhocLhsPr7+5Wbm2u3e71e\ndXV1JTM/AIAkTUmk06lTp3Tw4EE98cQTWrp0qdauXas//OEPsixrzNNwOBzntF2o/+bNm+2/A4GA\nAoHAeEoGgEteKBRSKBRKejoJBcP8+fNVWFio5cuXS5JWr16t3/3ud/L5fIrFYioqKlIsFpPP55Mk\n+f1+tbe32/37+vrk8/nkdDo1ODhot/f29qq0tPS8r3l2MAAAzvXFL80NDQ0JTSfhfQwFBQUKh8M6\nc+aM/vznP2vp0qXy+/1qamrSyMiImpqa7IV8SUmJ2traNDAwoFAopKysLDmdTkmjm5xaWlo0NDSk\n1tZW+f3+REsCAEwAhzWe7T9nOXjwoH7yk5/o1KlTWrp0qRoaGnTmzBlVV1frjTfeUHFxsZqbmzVj\nxgxJ0tatW7V9+3ZNmzZNO3fuVFlZmaTRtYTq6modOXJEq1at0uOPP35ukQ7HuDZTAak0ulk03e9P\nPiM4V6LLzoSDIZUIBmQyggGZKtFlJ0c+AwAMBAMAwEAwAAAMBAMAwEAwAAAMBAMAwEAwAAAMBAMA\nwJDQuZIAZJop5z0xZao5nTN17NhwustAkjjyGUhSphz5nP4aJI7Aziwc+QwAmBAEAwDAQDAAAAwE\nAwDAQDAAAAwEAwDAQDAAAAwEAwDAQDAAAAwEAwDAQDAAAAwEAwDAQDAAAAwEAwDAQDAAAAwEAwDA\nQDAAAAwEAwDAkHAwfPbZZyoqKtLy5cslSfF4XJWVlXK5XFqxYoWOHz9uP3fbtm0qKCiQ1+tVZ2en\n3R6LxVRcXKz8/Hxt2rQpidkAAEyUhINh69at8nq99gXIg8GgXC6XDh06pDlz5mjHjh2SpMOHD6ux\nsVF79+5VMBhUfX29PY0NGzZo48aNikaj6ujoUHd3d5KzAwBIVkLB8N577+mVV17RT3/6U/tC05FI\nRLW1tcrOzlZNTY3C4bAkKRwOq6KiQi6XS+Xl5bIsy16bOHDggKqqqpSTk6OVK1fafQAA6ZNQMPzs\nZz/TE088oays/3ePRqNyu92SJLfbrUgkImk0GDwej/28wsJChcNh9ff3Kzc31273er3q6upKaCYA\nABNnyng7vPzyy8rNzVVRUZFCoZDd/vmaw1h8vvnpbBfrv3nzZvvvQCCgQCAw5tcDgK+DUChkLJcT\nNe5g2L9/v3bv3q1XXnlFp06d0rFjx3TvvffK5/MpFoupqKhIsVhMPp9PkuT3+9Xe3m737+vrk8/n\nk9Pp1ODgoN3e29ur0tLSL33ds4MBAHCuL35pbmhoSGg6496U9Nhjj+ndd9/V22+/rZaWFi1ZskS/\n//3v5ff71dTUpJGRETU1NdkL+ZKSErW1tWlgYEChUEhZWVlyOp2SRjc5tbS0aGhoSK2trfL7/QnN\nBABg4iR9HMPnm4Xq6uo0MDCgwsJCvf/++1q3bp0kKS8vT3V1dVqyZIkeeOABbd261e775JNP6te/\n/rV8Pp/Kysq0aNGiZMsBACTJYY1n50CaOByOce3DAFJp9MtRut+fmVCDJPFZzSSJLjs58hkAYCAY\nAAAGggEAYCAYAAAGggEAYCAYAAAGggEAYCAYAAAGggEAYCAYAAAGggEAYCAYAACGcV+PAQAy2RVX\nzFI8fiTdZcjpnKljx4bTXUZCOLsqkCTOrnq29H9WM+P/IWXKWHB2VQBA0ggGAICBYAAAGNj5jK+s\nTNnJCFxq2PmMr6xM2smY/joyoQYpU3a4Mhb/q4CdzwCAiUAwAAAMBAMAwEAwAAAMBAMAwEAwAAAM\nBAMAwEAwAAAMBAMAwEAwAAAMCQXDu+++q1tvvVXXX3+9AoGAdu3aJUmKx+OqrKyUy+XSihUrdPz4\ncbvPtm3bVFBQIK/Xq87OTrs9FoupuLhY+fn52rRpU5KzAwBIVkLBMHXqVD311FPq6enRCy+8oF/8\n4heKx+MKBoNyuVw6dOiQ5syZox07dkiSDh8+rMbGRu3du1fBYFD19fX2tDZs2KCNGzcqGo2qo6ND\n3d3dEzNnAICEJBQMs2fP1sKFCyVJV199ta6//npFo1FFIhHV1tYqOztbNTU1CofDkqRwOKyKigq5\nXC6Vl5fLsix7beLAgQOqqqpSTk6OVq5cafcBAKRH0vsY+vv71dPTo5KSEkWjUbndbkmS2+1WJBKR\nNBoMHo/H7lNYWKhwOKz+/n7l5uba7V6vV11dXcmWBABIQlLXY4jH46qqqtJTTz2lGTNmjOv0rqOn\nxjVdqP/mzZvtvwOBgAKBwHhKBYBLXigUUigUSno6CQfD6dOnddddd+nee+9VZWWlJMnn8ykWi6mo\nqEixWEw+n0+S5Pf71d7ebvft6+uTz+eT0+nU4OCg3d7b26vS0tLzvt7ZwQAAONcXvzQ3NDQkNJ2E\nNiVZlqXa2lotWLBA69evt9v9fr+ampo0MjKipqYmeyFfUlKitrY2DQwMKBQKKSsrS06nU9LoJqeW\nlhYNDQ2ptbVVfr8/oRkBAEwQKwGvv/665XA4rBtvvNFauHChtXDhQuvVV1+1jh07Zt15553W3Llz\nrcrKSisej9t9nn76aeu6666zPB6PtW/fPru9p6fHKioqsq699lrroYceOu/rJVgmLnGSLMnKgFsm\n1JEJNWTGZ5WxMMciEVzaE19ZmXQJx/TXkQk1SJlyOUvG4n8VcGlPAMBEIBgAAIakfq6K1LviilmK\nx4+kuww5nTN17NhwussAMAnYx/AVw/bTsyrIoLFIfx2ZUIPE++JsmTEW7GMAACSNYAAAGAgGAICB\nnc8AMCmmnPeccF8FBAMATIr/KP07wRMLJjYlAQAMBAMAwEAwAAAMBAMAwEAwAAAMBAMAwEAwAAAM\nBAMAwEAwAAAMBAMAwEAwAAAMBAMAwEAwAAAMBAMAwEAwAAAMBAMAwEAwAAAMBAMAwEAwAAAMGREM\n+/btk8fjUUFBgbZv357ucgDga81hWVa6r1atoqIibd26VfPmzdMdd9yhzs5OXX311fbjDodDGVBm\nRnA4HEr/BcYlKf3/k0wai/TXkQk1SLwvzpYJdST2/0j7GsPRo0clSd/73vc0b9483X777QqHw2mu\n6vyuuGKWHA5HWm8AMNnSHgzRaFRut9u+7/V61dXVlcaKvlw8fkSj3wDSeQOAyTUl3QWMVeZ8W86E\nOjKhhkz5n2RCDVJm1JEJNfC+MGVKHeOT9mDw+Xx68MEH7fs9PT2qqKgwnpPubZYA8HWS9k1JV155\npaTRXya98847eu211+T3+9NcFQB8faV9jUGSnn76aa1du1anT59WfX298YskAEBqpX2NQZLKy8u1\nc+dOTZ06Vdu3b//SYxkefvhh5efn66abblJfX1+Kq0ydix3X8fzzz+vGG2/UjTfeqB/96Ec6ePBg\nGqpMjbEe4xKNRjVlyhT98Y9/TGF1qTWWsYhGo/L5fPJ4PAoEAqktMIUuNhYjIyO67777VFRUpPLy\ncr300ktpqHLy1dTUKC8vTzfccMOXPieh5aaVIRYuXGh1dHRY77zzjlVYWGh99NFHxuPhcNi6+eab\nrY8//tjatWuXtWzZsjRVOvkuNhb79++3/v3vf1uWZVnPPfecVV1dnY4yU+JiY2FZlvWf//zHuvXW\nW61ly5ZZL7zwQhqqTI2LjcWZM2esBQsWWK+99pplWdZ5x+pScbGxCAaDVl1dnWVZlvXOO+9Y+fn5\n1pkzZ9JR6qTat2+f9de//tVasGDBeR9PdLmZEWsMYzmWIRwO6+6779asWbO0evVqxWKxdJQ66cYy\nFosXL7b3zSxbtkwdHR0przMVxnqMy/bt23X33XfrmmuuSXWJKTOWseju7tZ3vvMdLV26VJIu2U2y\nYxmLK6+8UvF4XKdPn9bw8LCmT5+eIb+WmlhlZWWaOXPmlz6e6HIzI4JhLMcyRCIReb1e+/4111yj\nf/7znymrMVXGe1zHM888o+XLl6eitJQby1i8//77eumll1RXVycpU34qOfHGMhZtbW1yOBwqKyvT\n8uXL1dbWluoyU2IsY7F69Wp99tlnuvrqq3XLLbfo+eefT3WZGSHR5WZG7HweC8uyzvnZ6qW6EBir\n9vZ2NTc3a//+/ekuJW3Wr1+vX/7yl/ZpU774Hvk6OXXqlP72t7+pvb1dJ0+e1G233aZ//OMfuvzy\ny9NdWsr95je/0ZQpU/TBBx/o73//u5YtW6Z//etfysrKiO/CKZPocjMjRsnn8xk7RXp6elRaWmo8\nx+/3q7e3177/0UcfKT8/P2U1pspYxkKS3nzzTa1bt067d+/WVVddlcoSU2YsY/GXv/xFq1at0re+\n9S29+OKLeuCBB7R79+5UlzrpxjIWixcv1g9+8APNnj1b+fn5WrRokfbt25fqUifdWMZi3759+vGP\nf6zp06fL7/frm9/85iX9I40vk+hyMyOCYSzHMvj9fr344ov6+OOPtWvXLnk8nnSUOunGMhYDAwO6\n66679Pzzz2v+/PnpKDMlxjIWb731lt5++229/fbbuvvuuxUMBnXnnXemo9xJNZaxKC0tVUdHh06e\nPKnh4WG98cYbuvnmm9NR7qQay1h8//vf15/+9CedOXNGb731loaHh43NT18XiS43M2ZT0vmOZdi5\nc6ckae3atSopKdEtt9yiRYsWadasWWpubk5zxZPnYmOxZcsWDQ8Pa926dZKkqVOnKhKJpLPkSXOx\nsfg6udhY5OTkaM2aNVq0aJGuueYabdmyRTNmzEhz1ZPjYmOxatUq9fb22mOxdevWNFc8OVavXq2O\njg4NDQ1p7ty5amho0OnTpyUlt9zMiNNuAwAyR0ZsSgIAZA6CAQBgIBgAAAaCAQBgIBgAAAaCAQBg\n+C++ha7A8gy7qQAAAABJRU5ErkJggg==\n", "text": [ "" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 } ], "metadata": {} } ] }