{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Getting the necessary data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You just need to download this ~28 MB file only once" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2015-08-06 12:55:05-- ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA18489/sequence_read/SRR003265.filt.fastq.gz\n", " => ‘SRR003265.filt.fastq.gz’\n", "Resolving ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)... 193.62.192.8\n", "Connecting to ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)|193.62.192.8|:21... connected.\n", "Logging in as anonymous ... Logged in!\n", "==> SYST ... done. ==> PWD ... done.\n", "==> TYPE I ... done. ==> CWD (1) /vol1/ftp/phase3/data/NA18489/sequence_read ... done.\n", "==> SIZE SRR003265.filt.fastq.gz ... 28919712\n", "==> PASV ... done. ==> RETR SRR003265.filt.fastq.gz ... done.\n", "Length: 28919712 (28M) (unauthoritative)\n", "\n", "SRR003265.filt.fast 100%[=====================>] 27.58M 2.36MB/s in 11s \n", "\n", "2015-08-06 12:55:17 (2.42 MB/s) - ‘SRR003265.filt.fastq.gz’ saved [28919712]\n", "\n" ] } ], "source": [ "!rm -f SRR003265.filt.fastq.gz 2>/dev/null\n", "!wget -nd ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA18489/sequence_read/SRR003265.filt.fastq.gz" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# The recipe" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from collections import defaultdict\n", "import gzip\n", "\n", "%matplotlib inline\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "from Bio import SeqIO" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('SRR003265.31', 'SRR003265.31 3042NAAXX:3:1:1252:1819 length=51', Seq('GGGAAAAGAAAAACAAACAAACAAAAACAAAACACAGAAACAAAAAAACCA', SingleLetterAlphabet()))\n", "{'phred_quality': [40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 30, 23, 40, 32, 35, 29, 40, 16, 40, 40, 32, 35, 31, 40, 40, 39, 22, 40, 24, 20, 28, 31, 12, 31, 10, 22, 28, 13, 26, 20, 23, 23]}\n" ] } ], "source": [ "recs = SeqIO.parse(gzip.open('SRR003265.filt.fastq.gz'), 'fastq')\n", "rec = next(recs)\n", "print(rec.id, rec.description, rec.seq)\n", "print(rec.letter_annotations)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "A: 28.60 7411965\n", "C: 21.00 5444053\n", "T: 29.58 7666885\n", "G: 20.68 5359334\n", "N: 0.14 37289\n" ] } ], "source": [ "recs = SeqIO.parse(gzip.open('SRR003265.filt.fastq.gz'), 'fastq')\n", "cnt = defaultdict(int)\n", "for rec in recs:\n", " for letter in rec.seq:\n", " cnt[letter] += 1\n", "tot = sum(cnt.values())\n", "for letter, cnt in cnt.items():\n", " print('%s: %.2f %d' % (letter, 100. * cnt / tot, cnt))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(1, 51)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAekAAAFVCAYAAADLxheZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl0pHd95/t3qUqqKi0l9VKSWupFvdg/u23aeCFgG7BJ\nTIwJCWQZZi6QAOcGCNdDkjnJSTIewiwxA3OZZE48JyEZDNgenLkDHhLIgBsnxtc2Dnhr29ju9tOt\nbvWmVkulraoklUq1PPNH1SNVq7WUSrXX53UOh9ZTj6SfHkv66Ld9fy7bthEREZHq01TpBoiIiMjK\nFNIiIiJVSiEtIiJSpRTSIiIiVUohLSIiUqUU0iIiIlXKk89NxpgjQDj75ingC8ADQBp4Dbjbsizb\nGPMJ4JNAErjXsqzvGWP8wDeAIBAFPmpZ1nhRvwoREZE65Fpvn7Qxxgf8k2VZN+Rc+y7wny3LesoY\n82XgB8BPgMeAGwE/8CPgJuBfAu2WZf0HY8w/B262LOt3S/LViIiI1JF8etLXAa3GmB9k7/83wA2W\nZT2Vff1R4OeBFPCMZVkJIGGMGQQOAbcC/yl772Hgj4vYfhERkbqVz5z0LPAly7LuBH4LeHjZ61Gg\nEwiwNCS+/Hpk2TURERFZRz496ePAIIBlWSeMMRPA9TmvB4BpMkHckXO9Y4XrzrVVJZMp2+Nx59V4\nERGROuBa7YV8QvrjZIat7zbG9JEJ2seMMbdZlvUkcBfwOPAc8HljjBfwAVeTWVT2DPBe4PnsvU9d\n/imWTE3N5dEkWU0w2EEoFK10M+qennPp6RmXnp5x6eXzjIPBjlVfyyekvwp83RjjhOvHgQngK8aY\nFuAo8Eh2dfd9wNNkhtHvsSwrnl1Y9qAx5mkgDnwoj88pIiLS8NZd3V1uoVC0uhpUY/SXcXnoOZee\nnnHp6RmXXp496VWHu1XMREREpEoppEVERKqUQlpERKRKKaRFRESqlEJaRESkSimkRUREqpRCWkRE\npEoppEVERKqUQlpERKRKKaRFRESqlEJaRESkSimkRUREqpRCWkREpEoppEVERKqUQlpERKRKKaRF\npKFNRuZJptKVbobIihTSItKwRiZm+cO/+jGPPnu20k0RWZFCWkQalnV2mlTa5vWhyUo3RWRFCmkR\naVinRiIAnL4YIZXWkLdUH4W0iDSsoQuZkF5IpBkOzVa4NSKXU0iLSEOKxZNcGF8K5lPZwBapJgpp\nEWlIpy9GsYE37dsGKKSlOimkRaQhDWXno299Uy/eFjcnL4Qr3CKRyymkRaQhOT3nA/2d7O3t4OLE\nHHPzyQq3SuRSCmkRaUhDIxE621rY0uFlX18nNjB0UUPeUl0U0iLScKaicaaicfb1BXC5XOzrCwCa\nl5bqo5AWkYbjhPHeHZlwXgzpYc1LS3VRSItIw3EWjTnh3NXuZVvAy6mRCLZtV7JpIpdQSItIwzl1\nIYwLGOgNLF7b29dJdC7BeHi+cg0TWUYhLSINJZ22OX0xSu+2Vlp9nsXr+3ZoXlqqj0JaRBrKyMQs\n8wupxVB27O/PvK390lJNFNIi0lCcQzX29l0a0nt6OnA3uRbreYtUA4W0iDQUJ4T3LQvplmY3O4Pt\nnBmdIZnSiVhSHRTSItJQTo1E8Lib2Blsv+y1fX0Bkqk058ZmKtAykcsppEWkYSwkUpwfm2VPbzse\n9+W//pze9Untl5YqoZAWkYZxZjRK2rYXi5gst1jUZETz0lIdFNIi0jBWm4929GxtpdXr0TYsqRoK\naRFpGE4Pefn2K0eTy8XevgBjUzFmYolyNk1kRQppEWkYpy5EaPc3E+zyr3rPUlETzUtL5SmkRaQh\nROYWGA/Ps3dH5uSr1ThFTTTkLdVAIS0iDWFo8eSrjjXv26vyoFJFFNIi0hCWTr7qXPO+jtYWurv8\nDI1ESOtELKkwhbSINIRTefakIbP6e3Y+yejkXKmbJbImhbSI1D3bthkaidDd5aejtWXd+xf3S2vI\nWypMIS0iNenkcJjwTDyve8emYszOJy87VGM1zpC4ippIpSmkRaTmzM0n+eLDR/ji37xEPJFa9/71\n9kcvt6u7HY/bpZ60VJxCWkRqztj0HKm0zejkHN9+8tS69y/OR+fZk272NLG7p4PzYzMs5PFHgEip\nKKRFpOaMTcUW//0PL5zjjTNTa94/NBLB3eRid/flJ1+tZl9fgFTa5sxotOB2imyWQlpEak5oOhPS\nv3jLAC4XfO37x4jFkyvem0ylOTsaZWd3Oy3N7rw/hxaPSTVQSItIzQlNzwPwMwd7+IWb9zAenueb\nTwyueO+5sRmSKTvv+WjH4uIxhbRUkEJaRGqO05MOdvr4pVv3sjPYzpMvX+DVUxOX3XtqnZOvVhPs\n9NHub1YNb6kohbSI1JzQdIyu9hZamt143E385vuuxt3k4uvfP8bs/KWnVy0VMdlYSLtcLvb3BZiI\nxPPe6iVSbAppEakpyVSaicj8JSdZ7e7p4P1v38v0zAJ/8w/HL7l/aCSC3+umd1vrhj+X5qWl0hTS\nIlJTJiLz2DaXHTd519t2s3dHgB+/PsqL1hgAs/MJLk7OMdAboGmNk69W48xLn1RIS4V48rnJGNMN\nvAj8HJAGHsj+/2vA3ZZl2caYTwCfBJLAvZZlfc8Y4we+AQSBKPBRy7LGi/5ViEjDcOaju5eFtLsp\nM+z9777+PA8etrhiZxfnxmaAjc9HO5w635qXlkpZtydtjGkG/hqYBVzAnwH3WJb1zuzb7zfG9AKf\nAW4B7gS+YIxpAT4NvJK99yHgsyX5KkSkYTgru5f3pAF2bGvjV2/bz0wswYOH31gM142u7Ha0+prZ\nsa2VoYtR0mmdiCXll89w95eALwMj2bdvsCzrqey/HwXuAN4CPGNZVsKyrAgwCBwCbgUOZ+89nL1X\nRKRgoWwhk5VCGuCOm3ZidnXx0olx/vHF80D+lcZWsq8vQHwhxYWJ2YI/hkih1hzuNsZ8DAhZlvWY\nMeZfk+k5507sRIFOIACEV7keWXZtTVu2tOLx5F9wQC4XDK5/FJ9snp5z6a30jMOxzOrtqw5sZ0uH\nb8X3+/1fv4nP/OcniM4l2N7l54q92wtuw3VXdvPMqxcZiyxw/cH6+2+u7+PS28wzXm9O+uOAbYy5\nA3gz8CCZ+WVHAJgmE8S5rehY4bpzbU1TUzq/dTOCwQ5CIZUxLDU959Jb7RkPj0ZpaW4iEVsgtGy7\nlcMNfPBnD/DQYYuB3s39t+rwZjoNp4enCYW2FvxxqpG+j0svn2e8VoivGdKWZd3m/NsY8wTwW8CX\njDG3WZb1JHAX8DjwHPB5Y4wX8AFXk1lU9gzwXuD57L1PISJSINu2GZuOEezy41pntfZt1/Xh9bjZ\nv3PdAbw1+b2ZX5Ox+ZXLjoqUUl6ru3PYwO8BX8kuDDsKPJJd3X0f8DSZee57LMuKG2O+DDxojHka\niAMfKmLbRaTBzMQSzC+kLlvZvRKXy8XN1/Zu+nO2+jK/JufiK/faRUop75C2LOtdOW/evsLr9wP3\nL7sWAz5YaONERHKttbK7VFq9zUDmDGuRclMxExGpGWPTmTUr5Qxpn9eNywVzq5yyJVJKCmkRqRmV\n6Ek3uVy0ej0KaakIhbSI1IzF06+6Vt56VSp+r0fD3VIRCmkRqRmhqRguYHtn+XrSkFk8pp60VIJC\nWkRqRigcY0vAS7OnvL+6Wr0e4gspkql0WT+viEJaRGpCIplmKhInWOZeNGRqeAPE1JuWMlNIi0hN\nGA/HsCnvojFHq9fZK62QlvJSSItITVhc2b2lEj3pbEhr8ZiUmUJaRGpCpVZ2g3rSUjkKaRGpCUsh\nXf6etN+n+t1SGQppEakJTkjnU7e72NSTlkpRSItITRibjuFrcdPuby7759actFSKQlpEqp5t24Ty\nPKKyFJZ60joJS8pLIS0iVS8yl2Ahka7IUDcs7ZNWT1rKTSEtIlUvNFW5RWOgOWmpHIW0iFS9xZXd\nFdgjDZqTlspRSItI1avkHmkAb4vOlJbKUEiLSNUbq+AeaVg6U1r7pKXcFNIiUvVC0zFcLtgWqExP\nGrJnSqsnLWWmkBaRqheajrEt4MPjrtyvrDZfs+akpewU0iJS1RYSKaZnFio21O1o9XmIJ3SmtJSX\nQlpEqloonD39qtIhnd2GpTOlpZwU0iJS1Sq9stvhHLKheWkpJ4W0iFS1ShcycSwWNNG8tJSRQlpE\nqtri6VcVKmTiaFVPWipAIS0iVa2S50jnUk9aKkEhLSJVbWw6RqvXQ5uv/EdU5loqDaqTsKR8FNIi\nUrXSts14eL5iNbtztXqzJ2FpuFvKSCEtIlUrPLNAIpmu+FA36JANqQyFtIhUrWrZfgU6rlIqQyEt\nIlVrcWV3FfWkdciGlJNCWkSq1liV7JGGzAEboJ60lJdCWkSqVihcPSHtc86UVk9aykghLSJVKzQd\nw93kYmvAW+mm4MqeKa2etJSTQlpEqlZoKnNEpbupOn5Vtfo82ictZVUd3/kiIsvE4kkic4mqWNnt\naPU2qyctZaWQFpGqNDo5B0BwS2uFW7Kk1edhIZHWmdJSNgppEalKI+OzQHXskXbokA0pN4W0iFSl\n0clsSHdWfmW3wyloor3SUi4KaRGpShcnMsPdlT6iMpd60lJuCmkRqUojE5me9PYq7Elrr7SUi0Ja\nRKrS6MQs7f7mxd5rNWj16SQsKS+FtIhUnXTaZnQyVhWVxnIt9aS1V1rKQyEtIlVnKhonmUpX1cpu\nAL/mpKXMFNIiUnWWjqis1p60QlrKQyEtIlWnmo6ozKXV3VJuCmkRqTpj6kmLAAppEalCVTvc7VNI\nS3kppEWk6oSm5/G4m9jSUfkjKnN5m900uVzMxbW6W8pDIS0iVSc0HaNnq5+mJlelm3IJl8uVPa5S\nPWkpD4W0iFSVycg8M7EEu3o6Kt2UFbV6PVo4JmWjkBaRqnLifBiAqwe2VbglK/P7PDpgQ8pGIS0i\nVWVw2AnprRVuycpavR4WkmkSSZ0pLaW3blFcY4wb+ApwJWADvwXEgQeANPAacLdlWbYx5hPAJ4Ek\ncK9lWd8zxviBbwBBIAp81LKs8RJ8LSJSBwbPh/G4XRzY1cn01Fylm3MZZ4V3LJ6k2dNS4dZIvcun\nJ/0+IG1Z1tuBzwL/EfhT4B7Lst4JuID3G2N6gc8AtwB3Al8wxrQAnwZeyd77UPZjiIhcZn4hybmx\nGQZ6AzR73JVuzooW90prXlrKYN2QtizrO8Cnsm8OAFPAjZZlPZW99ihwB/AW4BnLshKWZUWAQeAQ\ncCtwOHvv4ey9IiKXGboQIW3bHNjZWemmrEp7paWc8pqTtiwrZYx5APhz4GEyvWdHFOgEAkB4leuR\nZddERC5zIjsffaC/en9NLB1Xqb3SUnp5H9RqWdbHjDE9wHNA7tE0AWCaTBDn7pnoWOG6c21VW7a0\n4qnSYa5aEQxW59aVeqPnXHxnQ7MAvPVQP1Cdz7hnWxsAnpbmqmzfRtXD11DtNvOM81k49uvATsuy\nvgDEgBTwgjHmNsuyngTuAh4nE96fN8Z4yYT41WQWlT0DvBd4PnvvU5d/liVTVbhQpJYEgx2EQtFK\nN6Pu6TkXX9q2OTY0Sc8WP4n5BejwVuUzTiVTAFwMRauyfRuh7+PSy+cZrxXi+fSkHwEeMMY8CTQD\nvwO8AXwluzDsKPBIdnX3fcDTZIbR77EsK26M+TLwoDHmaTKrwj+Ux+cUkQZzYXyWWDzJDVdsr3RT\n1uQsHNNeaSmHdUPasqwY8M9XeOn2Fe69H7h/hff/YIHtE5EGMZgtYlLNi8ZAx1VKeamYiYhUBafS\nWDUvGgMdVynlpZAWkapwcjhMq9fDju1tlW7KmpZWdyukpfQU0iJSceHZBcamY+zv76TJVV0nXy2n\nnrSUk0JaRCquVuajAVqam3A36UxpKQ+FtIhU3OBwpnxCtc9HQ+ZMab9XZ0pLeSikRaTiBofDNLlc\n7NsRqHRT8tLq05nSUh4KaRGpqEQyxZmLUXb1tONtqY1qg61enSkt5aGQFpGKOn0xSjJlc0UNDHU7\nWn06U1rKQyEtIhVVS4vGHDquUspFIS0iFVUrRUxyLR1XqRXeUloKaRGpGNu2GRwOsy3gZWvAt/47\nVIlWrwqaSHkopEWkYkanYszEEuyvoV40gN+nQzakPBTSIlIxznz0FTu7KtySjdGctJSLQlpEKqaW\nipjkWpqTVkhLaSmkRaRiBocjeJvd7Oyu7kM1lmvTcZVSJgppEamImViCC+Oz7OsL4G6qrV9FiwvH\n1JOWEqutnwwRqRunLtTe1iuHXz1pKROFtIhUxIkaLGLiWDquUvukpbQU0iJSEYPnw7iA/X01GNLq\nSUuZKKRFpOySqTRDIxH6g22LgVdLWjyZM6W1T1pKTSEtImV3bmyGhWS6JuejIXOmtI6rlHJQSItI\n2dXioRrLtXo9Wt0tJaeQFpGyOzFcuyu7HepJSzkopEWkrGzbZvD8NIG2FoJd/ko3p2CtXg+JZJpE\nMlXppkgdU0iLSFlNROaZnlngQH8nLper0s0pmN/nnISlkJbSUUiLSFkN1sFQN2ivtJSHQlpEyqoe\nFo2B9kpLeSikRaSsBs+H8bib2NPTUemmbIrTk9ZeaSklhbSIlE18IcW50AwDvR00e2r714/Tk55V\nSEsJ1fZPiYjUlNMXI9g27OsLVLopm7Y4J63hbikhhbSIlM3QSBSok5D2aeGYlJ5CWkTKxjmect+O\nOghp50xp9aSlhBTSIlI2p0YidLQ2s63TV+mmbJrTk9bCMSklhbSIlMX0TJzJSJx9OwI1XcTEoS1Y\nUg4KaREpi6ELEaA+5qMht5iJQlpKRyEtImVxasQJ6douYuJo9jThcbvUk5aSUkiLSFmcyvak9+6o\n7SImDpfLpeMqpeQU0iJScum0zdBIhN6trbRmD6aoB35fs3rSUlIKaREpuZHJOeYXUnUzH+1QT1pK\nTSEtIiW3uD+63kLa5yGZ0pnSUjoKaREpuaHF+eg6C2mt8JYSU0iLSMmduhDB425iV3d7pZtSVNor\nLaWmkBaRkoonUpwPzbKntx2Pu75+5agnLaVWXz8xIlJ1zlyMkrZt9u2oj/3RudSTllJTSItISS3u\nj+6rj/3RudSTllJTSItISdVbpbFcfvWkpcQU0iJSUkMXwrT7mwnWwclXyy0eV6kzpaVEFNIiUjLh\nmTgTkTj7+urj5KvlNCctpaaQFpGSWRrqrq/90Q5nTlpnSkupKKRFpGScRWP76qyIiUM9aSk1hbSI\nlMzSyu46DelsT3pWPWkpEYW0iJRE2rY5fTFCz9ZW2uro5KtcLc1uPO4mbcGSklFIi0hJXJyYIxZP\n1e1Qt6PV59Fwt5SMZ60XjTHNwNeAPYAXuBc4BjwApIHXgLsty7KNMZ8APgkkgXsty/qeMcYPfAMI\nAlHgo5ZljZfoaxGRKrI4H12nQ92OzHGV2oIlpbFeT/rDQMiyrHcC7wH+AvhT4J7sNRfwfmNML/AZ\n4BbgTuALxpgW4NPAK9l7HwI+W5ovQ0SqTb2v7HY4PWnbtivdFKlD64X0t4DP5dybAG6wLOup7LVH\ngTuAtwDPWJaVsCwrAgwCh4BbgcPZew9n7xWRBjBUpydfLdfq9ZBM2SSS6Uo3RerQmiFtWdasZVkz\nxpgOMoH92WXvEwU6gQAQXuV6ZNk1EalzC4kU50Mz7Ompv5OvltM2LCmlNeekAYwxu4BvA39hWdb/\nMMb8vzkvB4BpMkGcWz2/Y4XrzrU1bdnSisfjzq/1sqJgsP4OMqhGes6rOzo0QSptc3D/9k09p1p4\nxlu7WgHwtXpror3L1WKba81mnvF6C8d6gMeA/8eyrCeyl18yxtxmWdaTwF3A48BzwOeNMV7AB1xN\nZlHZM8B7geez9z7FOqam5gr8UgQy3wyhULTSzah7es5rO3L0IgA7unwFP6daecYuOzPMfX4kjK/G\nBg1q5RnXsnye8Vohvl5P+h4yQ9SfM8Y4c9O/A9yXXRh2FHgku7r7PuBpMsPh91iWFTfGfBl40Bjz\nNBAHPpTH1yQiNW6oQRaNgY6rlNJaM6Qty/odMqG83O0r3Hs/cP+yazHgg5ton4jUoFMXIpmTr7r8\nlW5KybVmC7XMxbUNS4qvxgZnRKTaRWYXGA/Ps3dHfZ58tZwO2ZBSUkiLSFE1ShETh1Z3SykppEWk\nqBqliIlDc9JSSgppESmqoQuZkgl767xmt0M9aSklhbSIFE3atjk1EqV7i592f32efLWcetJSSgpp\nESma0ck5YvFkwwx1g3rSUloKaREpmsVFYw0y1A3Q7NGZ0lI6CmkRKZqlRWONVaZfZ0pLqSikRaRo\nTo9EcDe56v7kq+VavR5iOlNaSkAhLSJFMxmJsy3go9nTWL9a2nSmtJRIY/0kiUjJpG2b6FyCjrbG\nWNWdy+/LnCm9oDOlpcgU0iJSFHPzSdK2TYe/pdJNKTttw5JSUUiLSFFE5xYA6GhtvJ700iEbCmkp\nLoW0iBRFdC6zcCrQ1rg9aR2yIcWmkBaRoljsSTdIpbFcSwVNtMJbikshLSJF4fSkO1obtyetOWkp\nNoW0iBRFxOlJN+DqbqdO+fTMwqY+zkvHQ/zuf/0R49OxYjRL6oBCWkSKYrEn3YCruwd6O4ClimuF\nevbYKJHZBU4Mh4vRLKkDCmkRKYpGXt29rdNHoK2Fk5sMV+f9Q+pJS5ZCWkSKopHnpF0uF/v7AkxF\n40xG5gv6GFPROBOROKCQliUKaREpiujcAn6vu+FKgjqc4zmdk8A26tSFpV54aLqwoJf605g/TSJS\ndNG5REP2oh0H+jMnfxUa0ieHl95PPWlxKKRFZNMW63Y34Hy0Y6A3gMsFJy8UNi89eCFMk8vFQG8H\n09E4CdUBFxTSIlIEjVy32+FtcbMr2M7pi1GSqY0FbDKV5vRIlJ3dbfQH27CBiQLntqW+KKRFZNOc\nld2BBtwjnWtffyeJZJpzYzMber+zozMkU2n293US7PIDGvKWDIW0iGxaI6/szrW/wMVjztar/f0B\nhbRcQiEtIpvWyHW7c+3PLh7b6Ly0c//+fvWk5VKeSjdARGpfxOlJN+AJWLl6tvhp83k2XNTk5HCE\ndn8z3V1+fC2ZX8vj2oYlqCctIkXQyNXGcrlcLvb1dRKanicym18d7+mZOBOReQ70d+JyuQi0NtPS\n3KSetAAKaREpgkau273c/v6NzUvnzkdDJuiDnX5C4Ri2bZemkVIzFNIismlLq7sV0vv7NjYv7RQx\n2Zd9P4Bgl59YPMWsjr5seAppEdk0pyfd3uALxwD27gjggrznpQcvhHG5YO+OjsVr27t8gBaPiUJa\nRIqg0et252r1edixvY2hkSjp9NrD1U4Rk13B9sUFY4BWeMsi/USJyKZFGrxu93L7+gLEEymGx2fX\nvG+xiEl/5yXXg50KaclQSIvIpqRtm5kGr9u93IE890sv7Y8OXHI9uDjcrW1YjU4hLSKb4tTtDqgn\nvcg5tnK9eemlld2X9qS3a7hbshTSIrIp2iN9ub5tbfha3Otuw8otYpLL2+yms61FIS0KaRHZHNXt\nvlxTk4u9OwKMTMwxO59Y8R6niMn+vgAul+uy14NdfiYjcVJpHVnZyBTSIrIpSz1phXQuZwh7aJXe\n9GpD3Y7tXT7Sts1kJF6aBkpNUEiLyKYs1u3WcPclnBOxBleZl3aKmKwW0lrhLaCQFpFN0pz0yvat\nc2zlyRWKmOTSXmkBhbSIbJIzJ63V3ZfqaG2he4ufUxcipJfV4E6m0py+eHkRk1zONqzxsLZhNTKF\ntIhsiuakV7e/r5O5eJLRyblLrp8bmyGRTLNvlaFuUE+6USSSqTVfV0iLyKaobvfqnCIly+elnbed\neeuVdHV48bhdCuk69+izZ9d8XSEtIpsSmVvA7/WobvcKnBOxls9LOyu7D6zRk25yudjW6VfVsTr3\n2tDkmq/rp0pENiWqkqCr6g+20eJpWlzJ7VgsYrLFv8p7ZgS7fMzEEsTiOrKyHsUXUqtu0XMopEWk\nYKrbvTaPu4mBHQGGx2cWg3a9Iia5NC9d304MT5Na56Q0hbSIFEx1u9e3vy+AbcPpkUyPyelVr7Vo\nzLG0V1pD3pBZcBfJLlSsB2+cmV73HoW0iBRMe6TXt6/PORErG9LZk68OrLFozLF0GpZ60pG5Bf7k\nwef5xg+sSjelaKyzUzStM5qikBaRgqlu9/qWFzU5OZwtYpJXSGd70mGF9MnhMMmUzYnza58sViti\n8SRDI9FVi9k4FNIiUrDIrPZIr2dLh5dtAS8nL4QXi5jsXKOISa7tKg26yNm2Fp5dYCpa+/XMT5wP\nk7ZtrtqzZc37FNIiUrBoTHW787G/v5PoXIIjx0MkkulV63Uv1+rz0O5vZlxz0pzM6UGfuRitYEuK\n442zUwCY3V1r3qeQFpGCOXPSWji2Nmde+rHnzwFrFzFZLtjlYzwcu6y0aCNJptIMXYziTN+eGa39\nkLbOTuFucnFFv0JaREokOquedD72L5uXzrcnDZkh72TKZroOhngLdXY0U0b1hiuCQO33pOfmk5y+\nGGVvXwBvi3vNe9efFAGMMW8FvmhZ1ruMMQeAB4A08Bpwt2VZtjHmE8AngSRwr2VZ3zPG+IFvAEEg\nCnzUsqzxQr8wEaku0ZjmpPOxu6cDj9tFMmXT7m+mZ50iJrly90pvDfhK1cSq5sxH32CCnLwQ5vTF\ntQuAVLvj56exbbhq99rz0ZBHT9oY8wfAVwBv9tKfAfdYlvVOwAW83xjTC3wGuAW4E/iCMaYF+DTw\nSvbeh4DPFvD1iEiViuos6bw0e5rY05NZxbsvjyImuZa2YTXuvHRuGdWB3gDTMwuEZ2p3ZOGNM5n5\n6KvWmY+G/Ia7B4FfIRPIADdYlvVU9t+PAncAbwGesSwrYVlWJPs+h4BbgcPZew9n7xWROuHU7fa4\nNXO2HmdeeiND3bDUkx5v4G1Yg8NhAm0tbO/0sac388fO6Roe8rbOTuNxu9as3e5Yd7jbsqxvG2MG\nci7l/gkYBTqBABBe5Xpk2bU1bdnSisez9hi9rC0YXHvfnRSHnjPMzifp6vCW7FnU0zN+79v3cWI4\nzJ237CWh6UHwAAAULUlEQVQYbM/7/UxT5g+gyHyyJM+j2p9xaCrGVDTOzW/aQXd3gEOmm+/8aIhQ\ndKHq2+7IbefM3AJnx6Ic3LuN/r71e9J5zUkvk875dwCYJhPEuU+rY4XrzrU1TU3NrXeLrCEY7CAU\nqt2/MGuFnnOmbndkZoHtAV9JnkW9PeNOn5vPffQmwN7Q12Wn0jS5XJy/GC3686iFZ/zcsVEAdm1v\nIxSKssWfia2jJ8cJhfoq2bS8LH/GLx0PYduwf8fS9bX+2ChkjOolY8xt2X/fBTwFPAe8wxjjNcZ0\nAleTWVT2DPDeZfeKSB1w6nZrPrq0PO4mtga8DVvQZPD8pcd6drV76WxvqdltWMey+6OvXqeIiWMj\nIe1s0vs94N8bY/6JTE/8EcuyRoH7gKeBx8ksLIsDXwauMcY8Dfwm8O838PlEpIqpbnf5BLv8hGcX\niCdSlW5K2Q0Oh/G4XezpXZoiGOjpYCoaJzxbe4dtZOajmxbLxa4nr+Fuy7JOk1m5jWVZJ4DbV7jn\nfuD+ZddiwAfzaomI1BSVBC2fYJefY2emGA/P07+9rdLNKZt4IsW5sRkGejtozlmrtKe3g1dOTnDm\nYoRD+7dXsIUbMxNLcG5shqt2d13y9axFSzJFpCA6XKN8GvU0rNMjEVJp+7IV8QO9mV5ora3wtrJD\n3evV686lkBaRgjh1uwMa7i653IImjWRw+NL5aIezDavWKo8550fnU8TEoZAWkYJENdxdNo0a0ieH\nVy6juqXDS2dbS8E96fFwjFQ6vf6NRfbGuSlaPE3s3ZF/7XaFtIgURNXGymexoEkDVR2zbZvB4TDb\nO31s6fBe9vqe3sziscgGF4+dHY3yR3/1E77zo9NFaml+InMLDIdmObCzk2ZP/tGrkBaRgqhud/m0\n+Tz4vW5CDVR1bHQqxkwssWqFtoECK489d2yMtG3z9E8vkE6X72Qx6+zGh7pBIS0iBVpa3a2edKm5\nXC62d/oJTcewG+TIypOrzEc7nFroG9kvbds2L1pjAIRnFjiWraFdDm8UsGgMFNIiUqBoLKG63WUU\n7PKzkEgTyU4z1LvVFo05Clk8Njw+y+hUjO7s9ME/vXZxk63M3xtnpvA2uxdHAPKlny4RKUh0LqGV\n3WXUaNuwBofDtDQ3sbN75X3hWzq8BFqbObOBYyuPWCEAPvCOvWzv9HHkeIj4QukLxIRn4oxMzHHF\nzs4N/1GrkBaRDUvbNjNzCc1Hl9HS4rHyhfT8QpLIXPmres3NJ7gQmmXfjgDuppVjyuVysac3wEQk\nvlj9bj0vWCE8bhfXHdjOzdf0Ek+kOHIiVMymr+gNZz56g0PdoJAWkQKobnf5lXsbViye5N6HXuTf\nfvU5kqnyblc6dSGCDRzYufbBiRsZ8h6dmuN8aIaDA1vxez3cfG0vAD8uw5D3YhGTDS4aA4W0iBRg\nqW63etLlsr3TGe4u/TYs27Z58PAbXBifJTy7wOmR8hYNceaj9/etHdIbWeHtDHXfeGUQgN6trezd\nEeD105OEZ+Kbae66jp2dxtfivqT+eL4U0iKyYVrZXX7bO324KE9P+vEXz/PcsTHa/Zn/vkdPT5b8\nc+ZyVnavtv3KMbCBnvSLx0M0uVy8+YqlWt+3XNuLbcOzR0c30dq1TYRjjE7OceWurlWH7teikBaR\nDVPd7vJr9rjp6vCWfK/0yeEw//OHg3S0NvOHH7oel6u8IZ1O25y8EGHHttbFPxJWs6XDS0dr87o9\n6cnIPKcuRDC7uy75nn3L1d24m1z8+PXShfSrg+NAYUPdoJAWkQKobndlBLv8TEXiJZsjjswt8Jd/\n9xpp2+ZTv3QN/cF2Bno7OHkhwvxCsiSfc7nh8VnmF1Lr9qLBWTzWwURknpnY6lvTjhzPDHXfkB3q\ndgRaW7h271bOjEYZHp/dXMNX8erJCQCu2tNV0PsrpEVkw1S3uzKCXT5sYCJc/HnpdNrmK999nalo\nnA+8Yx8HB7YCcHBgK6m0zfFz00X/nCtZb3/0ckvz0qtvxVotpIGSLyB7dXAcv9fD7u6N7Y92KKRF\nZMNUt7sygp2lW+H93WeGeP30FIf2b+MXbt6zeP1gdtvQ0dPlqc41eD6/+WjHnp7MYRWrzUtH5haw\nzk2zvz+wYg3wNx/Yjt/r5idHL5IucjW3ycg8IxOzmF1dNDW5CvoYCmkR2bCIVndXRKm2Yb16aoK/\nf+Y02zt9/Ob7DtLkWgoU50CIcoX0yeEwrV4PO7a15nX/eiu8Xz4xjm3DjVd2r/h6S7ObG003k5E4\nx88Wd7Tg5cX56MKGukEhLSIFWNqCpZ50OS2FdPGGuyfC8/y3776O2+3i0x+49rLFWs0eN1fu7OR8\naIbwBk+c2qjI7AJj0zH293de8ofCWrYGvLT7m1ftSb+Y3Xp1g7l8qNtxyzWZIe9/er14Q96R2QX+\n9qlT+Frc3HTVyn8g5EMhLSIbFo0laFXd7rJbLA1apBXeiWSKv/y715idT/KhO65c9ZxjZ3762JnS\nrvJeOlQj//OWXS4XA70djIcvXzw2N5/g6OlJdne3L9brXsmVu7vYGvDyojXGQqI4ZUL/v8dPMDuf\n5NfvupqtAV/BH0c/YSKyYdHZBfWiKyDQ1kKLp6low91f/e7rDI1EuPmaXm57c9+q9zkhXeoh740u\nGnOsVnnslZMTpNI2N67RiwZocrl428FeYvHU4hD1Zvz05AQ/OTrK3h0BfuHt+zb1sRTSIrIhadtm\nJpbUfHQFuFwutncV58jKZ14d4XvPDNEfbOM33mNwrTG8vKunnTafh6OnJ0t6VObgcBiXC/b25d+T\nhtVXeB9ZHOpef7j55mt6APjJJvdMzy8k+e8/eAN3k4uP3XUV7gIXjDkU0iKyIarbXVnBTh+xeIrZ\n+cL2Ldu2zaPPnuGr3ztGq8/D3b/8JrzN7jXfp8nl4uqBrUxG4oxNlaaYSjKVZmgkyq5gO74Wz4be\nd7EnPTqzeC2+kOLVUxP0bm2lL49FaP3Bdnb3tPPqqYlNHSryt08NMRGJ85637mZX98bLgC6nkBaR\nDYloj3RF9QUzRzd++e9eYyq6sZrTyVSah35g8a0nTrKlw8sX7347vVvzW0V9cMDZilWaeekzo1GS\nqTT71zlUYyXbAr7s4rGlnvRrQxMsJNPcaIJrjhLkuuWaXlJpm+ePjW24DZA5GOQfXzhHzxY/v3Tr\nQEEfYzmFtIhsiFZ2V9Z737aH6/Zv49iZKf7t157j5RP5zaHOzSf582+9wpMvX2B3dzuf/Y2b2LvO\nARa5Sj0vffJ8YfPRkK081tNOaHqe2fnM4rEXswVM1puPzvUzB3twueDHBazyTqbSPPDoMWzgY3dd\nRbNn7dGJfCmkRWRDnEImAfWkK6LN18xv/9ohPvzuK5lfSHHf//opDz92nERy9VXJE+F5vvDwi4vF\nSv7oIzesWNhjLd1dfrZ3+jh2Zop0uvjz0oMXMr3gQkIaYE/vUlGTRDLNK4PjbAv42NOTf6WvrnYv\n1wxs5dSFCBcn5zb0+X/w3FnOh2Z553U7MAXW6V6JQlpENkQ96cpzuVz83I07+eOP3kTf9jYeP3Ke\nP3nwBYZDM5fdOzQS4d6HXmA4NMvP3biT3/7VQxue83UcHNjKXDzJmdHiHl0ZmVvg+NkpAm0ti0dy\nblTuiVjHzkwRi6c2NNTtKKRM6MXJOb7zo9ME2lr4Z+86sKHPtx6FtIhsiE7Aqh67utv544/exO3X\n93M+NMt/ePAFnjhyfnEF9pHjIf7Tw0eIzC3wf91xBR9+95UFl6eE0sxLn7kY5U8eeJ7IXIKbr+nZ\ncKg69uRUHnvRyswpr1Srez03XBHE2+zmx69fzGslu23bPHT4DZKpNB9595W0+Yr7x2thf06JSMNS\n3e7q4m128xt3Gq7du5Wvf/8Y//2x47w2NMm+vgDffvIUzc1NfOZXDl1yjnKhrs6p4/0LNw9s+uM9\nd2yUr33vGAvJNL/8jr2875bCP+b2Th9tPg9DIxHmF1J0trVwoIBFaN4WNzdcGeTHr1/kh0eGeevB\nnjWPzHz6pyO8cXaaNx/YvqH573wppEVkQ1S3uzrdcGWQgd4O7v/fR3npxDgvnRins72F3/216xZ7\nmZvV0drC7p52TpyfJp5Irbt1azXptM23nzrF939yBl+Lm8/86pu4/orNBZxzbKWzsO326/vzLi26\n3G1v7uPZo6M8/A/H+Zt/PM6+vgBv2ruNa/dtY6C3Y3E0IjwT55s/HMTX4uYjP39lwaMAa1FIi8iG\naE66em0N+Pj9f3E9P3juLIPDYT787is3VZJyJQcHtnJ2dIbB82Gu2bt1w+8/N5/kv/396/z05ATd\nW/x85lcP0b+9rShtyw3pzfRqr9zVxb/7+Ft4aXCc105NcHI4wsnhCH/3oyHa/c1cs3crb9q3lSPH\nx5mLJ/nIzxf/OTsU0iKyIarbXd2amlzc9bY9699YoIMDWzj87FmOnp7ccEiPTMzyX//Xq1ycnOPa\nvVv51PuvKeoc7kB2hXebz4PZVfjJUwA7u9vZ2d3OL94ykK0BPsWrpyZ4bWiSZ4+O8uzRTGWyA/2d\n3H59/6bbvhqFtIhsiOp2N7Yrdnbhcbs2vF/6pyfH+evvvk4snuI9b93Nr922f1OL2Fayvy+Ax+3i\nLVf3FPWPyFZfMzdd1c1NV3Vj2zbD47O8emqCs6MzfOAdewseVs+HQlpE8pa2baKxBN15VqmS+uNt\ndnOgvxPr7DTRuYW81iYcfvYs33piEI+niU/84kFuzh4NWWxbAz4+/4m30dlWuvUSLpeLncF2dgY3\nX/IzHxqvEpG8zcYS2DZ0rLHaVerfwYGt2MAbZ6fXvfeJl4b55hODdHV4+aMP31CygHYEu/y0FLig\nrRoppEUkb9ojLZBbInTt/dJHjof4xmMWHa3N/OGHrl/1vGpZnUJaRPLmrOwOtKkn3cgGejvwez1r\nhvSJ89P89Xdfp8Xj5nf/2XV0b9EUSSEU0iKSt8WetF896UbW1OTiqt1dhKbnGZu+/OjKC+Oz3PfI\nT0mlbD79gWvVg94EhbSI5E17pMXhDHkfW9abnorG+S/ffJnZ+SQfu+sqDu3fVonm1Q2FtIjkbbEn\nXcLVs1Iblup4L23FmptP8l+++QoTkTi//M59vP3Qjko1r25oC5aI5G2xJKhWdze83q2tbOnwZo6u\ntG1SKZu/+NtXOR+a4fbr+3nfzaUrqNJI1JMWkbxpdbc4XC4XBwe2MBNLcHY0yte+f4xjZ6a4/ort\nfOTdpalj3YgU0iKSN81JSy5nXvqvvvM6zx4d5UB/J5/6pWuKXkmskSmkRSRv0TnV7ZYlB7NHV45N\nxdixrZXf/rVDdVVIpBroJ01E8hadW9CiMVnU2e5lf3+ArvYW/tUHr1vz3GUpjBaOiUheVLdbVvL7\n/+J6bNvG16I4KQU9VRHJi+p2y0q8Gt4uKQ13i0henJXdAQ13i5SNQlpE8qKV3SLlp5AWkbyobrdI\n+SmkRSQviz1pnYAlUjYKaRHJS0TVxkTKTiEtInmJqm63SNlpC5ZIgztzMcoTL53n3NgM7f4WAm3N\nBNpa6GxtIdC29L/JSBzQ6m6Rcip5SBtjmoC/BA4BceA3Lcs6WerPKyKrS6bSvPDGGD88MszgcBgA\nd5OLVNpe931VVUqkfMrRk/4A0GJZ1i3GmLcCf5q9JiJlNhmZ5/9/+QJPvTxMZC6BCzi0fxs/e0M/\n1+7bRnwhRWRugcjs0v/CswtE5hJEZhfY1d2uut0iZVSOkL4VOAxgWdazxpib1rp5JpYoQ5Pql3d2\nQc+wDGrtOZ8bm+GHR87z0vFx0rZNm8/DnT+zi3dd30/3lqUyn36vB7/XQ88Wlf4UqQblCOkAEMl5\nO2WMabIsK73Szb/950+XoUkijWl3dzs/e+NO3nqwR+UcRWpAOUI6AnTkvL1qQAP8/Z++XweRiggA\nwWDH+jfJpugZl95mnnE5JpeeAd4LYIx5G/DTMnxOERGRmleOnvTfAu82xjyTffvjZficIiIiNc9l\n2+tvuRAREZHy014KERGRKqWQFhERqVIKaRERkSqlkBYREalSOmCjDmTLrX7Rsqx3GWMOAA8AaeA1\n4G7LsrQ6sEDGmGbga8AewAvcCxxDz7iojDFu4CvAlYAN/BaZWv8PoOdcVMaYbuBF4OfIPNsH0DMu\nGmPMESCcffMU8AU28YzVk65xxpg/IPPLzZu99GfAPZZlvRNwAe+vVNvqxIeBUPZ5vgf4CzL15/WM\ni+t9QNqyrLcDnwX+I3rORZf9o/OvgVkyz1S/L4rIGOMDsCzrXdn//d9s8hkrpGvfIPArZP7jA9xg\nWdZT2X8/CtxRkVbVj28Bn8v+uwlIoGdcdJZlfQf4VPbNAWAKuFHPuei+BHwZGMm+re/l4roOaDXG\n/MAY83i2gNemnrFCusZZlvVtIJlzKbes6gzQWd4W1RfLsmYty5oxxnSQCezPcunPjZ5xkViWlTLG\nPAD8OfAw+l4uKmPMx8iMCj2WveRCz7jYZoEvWZZ1J5kpm4eXvb7hZ6yQrj+5ddE7gOlKNaReGGN2\nAT8EHrIs63+gZ1wylmV9DDDA/YAv5yU95837OJnqj08AbwYeBII5r+sZb95xssFsWdYJYALoyXl9\nw89YIV1/XjLG3Jb9913AU2vdLGszxvQAjwF/YFnWA9nLesZFZoz5dWPMv86+GQNSwAt6zsVjWdZt\nlmXdblnWu4CXgd8ADusZF9XHyaylwBjTRyaUH9vMM9bq7vrhrBb8PeArxpgW4CjwSOWaVBfuITM8\n9TljjDM3/TvAfXrGRfUI8IAx5kmgmcwzfgN9L5eSjX5fFNtXga8bY5wg/jiZ3nTBz1i1u0VERKqU\nhrtFRESqlEJaRESkSimkRUREqpRCWkREpEoppEVERKqUQlpERKRKKaRFRESq1P8BeU8HEKuanNoA\nAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "recs = SeqIO.parse(gzip.open('SRR003265.filt.fastq.gz'), 'fastq')\n", "n_cnt = defaultdict(int)\n", "for rec in recs:\n", " for i, letter in enumerate(rec.seq):\n", " pos = i + 1\n", " if letter == 'N':\n", " n_cnt[pos] += 1\n", "seq_len = max(n_cnt.keys())\n", "positions = range(1, seq_len + 1)\n", "fig, ax = plt.subplots()\n", "ax.plot(positions, [n_cnt[x] for x in positions])\n", "ax.set_xlim(1, seq_len)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0: 0.40 52229\n", "1: 1.52 200558\n", "2: 3.77 498679\n", "3: 4.04 533458\n", "4: 4.77 630923\n", "5: 4.88 645266\n", "6: 2.50 330834\n", "7: 2.51 331743\n", "8: 2.53 334410\n", "9: 2.51 332259\n", "10: 4.95 654154\n", "11: 2.41 318303\n", "12: 2.35 309918\n", "13: 2.28 301033\n", "14: 2.20 291341\n", "15: 2.12 280719\n", "16: 2.05 270431\n", "17: 1.97 259779\n", "18: 1.88 248982\n", "19: 1.81 239621\n", "20: 1.73 228923\n", "21: 1.66 219602\n", "22: 1.59 209905\n", "23: 1.52 201164\n", "24: 1.46 193259\n", "25: 1.40 184846\n", "26: 1.33 176263\n", "27: 1.28 168902\n", "28: 1.23 162226\n", "29: 1.17 154892\n", "30: 1.13 149449\n", "31: 1.08 142464\n", "32: 1.03 136763\n", "33: 0.99 131291\n", "34: 0.95 125624\n", "35: 0.91 120704\n", "36: 0.88 115701\n", "37: 0.84 111179\n", "38: 0.80 106290\n", "39: 0.78 102568\n", "40: 22.76 3007221\n" ] } ], "source": [ "recs = SeqIO.parse(gzip.open('SRR003265.filt.fastq.gz'), 'fastq')\n", "cnt_qual = defaultdict(int)\n", "for rec in recs:\n", " for i, qual in enumerate(rec.letter_annotations['phred_quality']):\n", " if i < 25:\n", " continue\n", " cnt_qual[qual] += 1\n", "tot = sum(cnt_qual.values())\n", "for qual, cnt in cnt_qual.items():\n", " print('%d: %.2f %d' % (qual, 100. * cnt / tot, cnt))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd4AAAFVCAYAAABB6Y7YAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt0nPV95/G3LMk2xrLGF9mBBNu58Uu7l6ZJmzRZiCEk\n2ZLAIdtbuk1JQ9qmabKnSZttCIRDuj0khmahKduQbZ2LUzZNDrApXps49ZaAgZZC02bbE4h/2IAl\nGhuQZY1sI1vWZfaPZyRLmhnNPMPo0Xj8fp3DYea5fPSd0c/z1TPz/OZpKxQKSJKkbCxa6AIkSTqT\n2HglScqQjVeSpAzZeCVJypCNV5KkDNl4JUnKUEctG4UQ1gL/CFwCTABbi///AfCRGKNzkiRJqkHV\nI94QQifwZ8ALQBtwC3BtjPEtxftXzGuFkiS1kFreav4c8EXgYPH+62KMDxRv7wTeNh+FSZLUiuZs\nvCGE9wP9McZdxUVtxf8mHQO656c0SZJaT7XPeK8CCiGEtwGvBb4G9Exb3wXkq/2QsbHxQkdHe91F\nSpJ0mmmrtGLOxhtj3DR5O4RwH/Ah4HMhhE0xxt3ApcC91X764OBw7aVKknSa6+npqriuprOapykA\nHwe2hBAWA48Dd9VfmiRJZ5a2LK5O1N9/1OlGkqQzRk9PV8W3mv0CDUmSMmTjlSQpQzZeSZIylPbk\nqoZ48MH72b37uwDk88lspFwuN7V+06a3cuGFF2WaUS4nq4xqj6cRGfU8ntP9efV3M3dGvY+nWTJm\n5/i7adzzqvm14Ee8Q0ODDA0NLnhGM9XSShnNVIsZzVuLGc1dixprwc9qvuGG6wG47ro/rDu/ERnN\nVEsrZTRTLWY0by1mNHctSs+zmiVJahI2XkmSMmTjlSQpQzZeSZIyZOOVJClDNl5JkjJk45UkKUM2\nXkmSMmTjlSQpQzZeSZIyZOOVJClDNl5JkjK0IJcFlCQ1Jy8tOP884pUkVeSlBRvPI15J0pQLL7xo\nxhGtlxZsPI94JUnKkI1XkqQM2XglScqQjVeSpAzZeCVJypBnNUuSmtL0OcWz5xPD6Tun2CNeSVLT\na6X5xB7xSpKa0vQ5xa00n9gjXkmSMmTjlSQpQ3O+1RxCaAe2AOcDBeBDwGJgB/BEcbMvxhjvmM8i\nJUlqFdU+470MmIgxXhBC2AR8BtgO3BxjvGXeq5MkqcXM2XhjjNtCCDuKdzcCeeD1QAghXAHsBT4W\nYzw2r1VKkk4rc00FOl2nATVK1bOaY4zjIYStwLuBXwReCmyJMX4/hHAt8Gng9+fKWLlyGR0d7WXX\ndXYmy3t6ulIV3uiMZqqllTKaqRYzmrcWM5q3lnozurqWTu175Ei+mLF6al3avEY9r82gpulEMcb3\nhxDWAY8Ab44xHiiuuhu4tdr+g4PDFdeNjo4D0N9/tJZS5i2jmWpppYxmqsWM5q3FjOatpd6M1772\nZ3jta38GODUV6OqrPz21Pm1eo57XrMz1B8KcZzWHEK4MIVxTvHscmAC+FUL46eKyS4DvNaJISZLO\nBNWOeO8CtoYQdgOdwEeBPuALIYRR4CDwwfktUZKk1lHt5KrjwHvKrLpgfsqRJKm1+QUakiRlyMYr\nSVKGvEiCJKllNeLSgtMzyuWknZfsEa8k6YzQqEsLvtgcj3glSS2rEZcWnJ7xYnImecQrSVKGbLyS\nJGXIxitJUoZsvJIkZcjGK0lShmy8kiRlyMYrSVKGbLySJGXIxitJUoZsvJIkZcjGK0lShmy8kiRl\nyMYrSVKGbLySJGXIxitJUoZsvJIkZcjGK0lShmy8kiRlyMYrSVKGbLySJGXIxitJUoZsvJIkZcjG\nK0lShmy8kiRlyMYrSVKGOqptEEJoB7YA5wMF4EPACLAVmAB+AHwkxliYvzIlSWoNtRzxXgZMxBgv\nAK4DPgvcDFwbY3wL0AZcMX8lSpLUOqo23hjjNuC3inc3AoPA62OMDxSX7QTeNi/VSZLUYmr6jDfG\nOB5C2Ar8CfB1kqPcSceA7saXJklS66n6Ge+kGOP7QwjrgEeBpdNWdQH5ufZduXIZHR3tZdd1dibL\ne3q6ai1lXjKaqZZWymimWsxo3lrMaN5azGh8Ti0nV10JvCzGuBk4DowD3wshbIox7gYuBe6dK2Nw\ncLjiutHRcQD6+4/WXvU8ZDRTLa2U0Uy1mNG8tZjRvLWYUV/OXE25liPeu4CtIYTdQCfwUWAPsCWE\nsBh4vLiNJEmqomrjjTEeB95TZtVFDa9GkqQW5xdoSJKUIRuvJEkZsvFKkpQhG68kSRmy8UqSlCEb\nryRJGbLxSpKUIRuvJEkZsvFKkpQhG68kSRmy8UqSlCEbryRJGbLxSpKUIRuvJEkZsvFKkpQhG68k\nSRmy8UqSlCEbryRJGerI4ofcfvtX6O3dX3Zdb+/TANxww/Vl12/YsJErr/zAnDmNyEiT06jHI0k6\n82TSeHt799O77wnWd68qWde9KCmh0H+oZF3f0OEyOZH13StmZbQVMw6WyThSkrF/7+Oc1724ZNsV\ni8YBGH9+X8m6Z4ZOzsh4et9jnNNd+obBskUFAE70/7Bk3cGhiZJlkqQzSyaNF2B99yquvejSVPt8\n9v6dZXJWcM1Fb6o5Y/P9D5csO697MR+/4JxUtdz80Mymfk73In7zwmWpMrY8OJxqe0lS6/EzXkmS\nMmTjlSQpQzZeSZIyZOOVJClDmZ1cpZmaZVqTU6MkKVs23gXS27ufp/Y9xtoyU5KWFqckHSszJen5\naVOSenv38+S+x1idayvZrrM9ycgferxk3UC+MCNj377HyOVKa2xvT/5/6NBjJevy+dLtJUnV2XgX\n0NruRfzyRUtS7fPN+0dm3F+da+PyiztTZWy/b3TG/VwO3npJe6qM7947nmp7SVLCz3glScqQjVeS\npAzN+VZzCKET+AqwAVgC3AD8K7ADeKK42RdjjHfMZ5GSJLWKap/xvhfojzFeGUJYCfwz8N+Am2OM\nt8x7dZIktZhqjfdO4K7i7UXAKPB6IIQQrgD2Ah+LMR6bvxIlSWodczbeGOMLACGELpIm/ClgKbAl\nxvj9EMK1wKeB35/vQtXc6p0P7CUbJZ1pqk4nCiGcB3wL+EKM8ZshhO4Y41Bx9d3ArVV/SMciRqtt\nVEFnZzs9PV1Tt09W2b6WjHonwkzmdHa2c6IBGfVqtgyAAweeYe++x1ixsnS7tuKPeG5g5nzgI4Ol\nGU88+RjLVpXOS57oTOYe/+tg6bzk4cOFqZwDB57hh08+Rtvq0oxCMWNPvjSjMFCYUUslk89Xte1O\nh4xmqsWM5q3FjMbnVDu5ah2wC/hwjPG+4uLvhBB+J8b4D8AlwPeq/ZCxsfqvQzs6Ok5//9Gp2wuV\nMT3HjJkZk7dXrIQ3vqP2qeGP7BoryVi2qo3XvDPd9PI93x6b8XjaVrfReUXp9ZbnMrrt5IxaKm5X\nfL6qbXc6ZDRTLWY0by1m1JczV1Ou9gp3LdANXB9CmHx/7mPAH4cQRoGDwAdrL1eSpDNbtc94Pwp8\ntMyqC+anHEmSWptfoCFJUoZsvJIkZcjGK0lShrw6kTSL84klzScbrzRLb+9+fvjkHtpWLytZV+hM\npsbtyfeVrhsYnpURaVtdOqWgULyK4578gTIZL26ag6TmZ+OVymhbvYzFl78m1T4nt++ZldHFksve\nkCpjZMejqbaXdPrxM15JkjJk45UkKUM2XkmSMmTjlSQpQ55cJTWpRk1JasQlGyU1jo1XalLJlKQn\nWLSqu2RdoTN5syoOPleybuLw0Iz7p3JKr9lYKF7eLA72z8oYrLtuSXOz8UpNbNGqbpZctinVPiM7\ndpfJWcmSd72j9ox7dqX6mZJq52e8kiRlyMYrSVKGbLySJGXIxitJUoZsvJIkZcizmiVV5aUSpcax\n8UqqKpkLvJdFq1aXrCt0Jtc5jIOHS9ZNHB6YlbGP9lU9pdt1LgHgicGhknXjh/tLlkmnMxuvpJos\nWrWape+6PNU+J+7ZPuN++6oell32S6kyhnfckWp7qdn5Ga8kSRmy8UqSlCEbryRJGbLxSpKUIRuv\nJEkZ8qxmSacN5xOrFdh4JZ02env3s+fJfbSvfknJuonOswDYmz9Wsm584NlZGU/RsfqlZTKWA7Av\nP1KybmzgR3XXLU1n45V0Wmlf/RK6Lrsq1T5Hd3x1xv2O1S9l5eUfS5UxuP3zqbaXKvEzXkmSMmTj\nlSQpQ3O+1RxC6AS+AmwAlgA3AD8EtgITwA+Aj8QYC/NbpiRJraHaEe97gf4Y41uAnwW+ANwMXFtc\n1gZcMb8lSpLUOqqdXHUncFfx9iJgFHhdjPGB4rKdwDuAu+enPElqPk5J0osxZ+ONMb4AEELoImnC\n1wH/fdomx4Duqj+kYxGjdRbY2dlOT0/X1O2TDcgYf5G1dHa2c6IBGfVqtozJ2wuVMT3HjJkZk7cX\nKmN6TitlHDjwDHuffIoVa9aXbNO2eAUAzw2Nlaw7cqhvxvNa7WcBNW1rRnYZjcipOp0ohHAe8C3g\nCzHGb4QQ/mja6i4gXy1jbGyiruIARkfH6e8/OnV7oTKm55gxM2Py9kJlTM8xY2bG5O2Fypie02oZ\nK9as503v/mSq/R+++8YZz2u1nwXUtK0Z2WXUmjNXU57zM94QwjpgF/CJGOPW4uLvhxA2FW9fCjxQ\nbl9JklSq2hHvtSRvJV8fQpj8wOKjwK0hhMXA45z6DFiSJFVR7TPej5I02tkumpdqJElqcX6BhiRJ\nGbLxSpKUIS+SIEkLpN75wNPnAjun+PRj45WkBdLbu58nn3yaNWXmAy9enHxFwtDQzOlPhw71lWQ8\n9eTTrFtVmrG0M8l4YbB0CtVzh/tKlikbNl5JWkBr1qzniiuuq3n7bdtuKFm2btV6rrz0U6l+7u07\nP5NqezWOn/FKkpQhG68kSRmy8UqSlCEbryRJGbLxSpKUIc9qlqQznHOBs2XjlaQzXG/vfp7e9zTn\n5krnAp/dnswFHjlUOhf4QN65wPWw8UqSODe3ng9ffE2qfW67b/M8VdPa/IxXkqQM2XglScqQjVeS\npAzZeCVJylAmJ1fl83ny+cN89v6dqfbrzR8m13mqxCTnCJvvfzhFxhFynWfNzBg6yc0PHUxVyzND\nJ8ktzqfaR5LOFI2YknSmTGvyrGZJ0ovW27uf/XufYn1X6ZSkFawAYOLZsZJ1fUdPTUlKMp5k/Ypz\nSzPazk4ynjtemnHkQN11L4RMGm8ul6N7dIxrL7o01X6fvX8nbbncrJzjXHPRm2rO2Hz/wyUZXScP\n8fELzklVy80PHaR9Wo4kaab1Xeu5+g1Xp9rnpkdvmpmx4lw++aYPp8q48eHbUm2/0PyMV5KkDNl4\nJUnKkI1XkqQM2XglScqQjVeSpAw5nagO+XyewfwEWx4cTrXfwfwEKzvzUxmH8xN88/6RVBnP5ycY\nm5YxkC+w/b7RVBkD+QJ0OCdZUmuqNB84zVzgeucU1zKf2MYrSWopyXzgfazvfsmM5SsWJV+mNPH8\nsZJ9+oaeLcno3beX9d09Jdt2L1oCQKF/5gFM31B/TfXZeOuQy+VYOnqQ37xwWar9tjw4zNLiXOBc\nLkfH6EF++aIlqTK+ef8Iy6dlMHaAyy/uTJWx/b7RZF9JalHru1/CNRf8Ws3bb37oa2Uyerjmwl+s\nPePBO2vazs94JUnKkI1XkqQM1fRWcwjhjcCNMcaLQwg/CWwH9hZXfzHGeMd8FShJUiup2nhDCJ8A\nfhWY/DT69cAtMcZb5rMwSZJaUS1vNe8Dfg5oK95/PfCuEMLuEMKXQgjL5606SZJaTNUj3hjjt0II\nG6ctegT48xjj90MI1wKfBn5/nurTPEuucQzfvXc85X7QMW0ucD6f58ggPLKr9LJflRwZhCXtMzOG\nBwrs+XbtGQDDAwXybafmNhcGCoxuO5kqozBQII9zmyXNv3qmE/1VjHGoePtu4NaqP6RjEem+4uGU\nzs52enq6pm6nezktn5GuxZTmdHa2c6IBGfVqZEZHR/3n13V0LJp6XuvNaUTG9JxGZDTL76YRGZO3\nFypjek7rZaT7A3F2xuRt6nhFakTG9JzOznZGGpLx4p6TJKM+s5+TenIa3W8qqafxfieE8Dsxxn8A\nLgG+V22HsbGJOn5MYnR0nP7+o1O3Fypjek4rZSxfvoJcDt56SboXo+/eO87y5Sumntfly1ewYiW8\n8R21D6lHdo2VZCxb3cZr3pluWO759qmc5ctX0DbWRucVi1NljG47OZXRLL+bRmRM3l6ojOk5ZszM\nmLy9UBnTc1opY/L2QmfM1XzTvMIViv//EPCFEMIocBD4YF3VSZJ0Bqqp8cYY9wNvLt7+Z+CCeaxJ\nkqSW5RdoSJKUIRuvJEkZ8iIJ0izJlKRhTm7fk2q/wsDw1JSkJOMoIzseTZlx1GlNUovziFeSpAx5\nxCvNksvleJYjLL78Nan2O7l9z9TlFpOMYZZc9oZUGSM7HvWSjVKL84hXkqQM2XglScqQjVeSpAzZ\neCVJypCNV5KkDHlWs9Sk8vk8EwN5RnbsTrXfxECefNuSWTmDjNyzK0XGIPm2zlkZA5y4Z3vKWgbI\nt/n3vTSd/yIkScqQR7xSk8rlcjxXGGHJZZtS7TeyY/eMucBJzihL3vWO2jPu2VUmY4Kl77o8VS0n\n7tnuvGRpFo94JUnKkI1XkqQM2XglScqQjVeSpAzZeCVJypBnNUvKRD6fZ3ygn+Edd6Tab3zgefJt\nhamMsYFDHN3x1VQZYwPPkmdNqn2k+eIRryRJGfKIV1ImcrkczxfaWHbZL6Xab3jHHeRy3VMZ/XTQ\nddlVqTKO7vgqudzyVPtI88UjXkmSMmTjlSQpQzZeSZIyZOOVJClDnlwl6YySTEkaYHD751PtNzbw\nr+RZPZVxZOAwD999Y6qMI4f6WFJYlWoftR6PeCVJypBHvJLOKLlcjkOcxcrLP5Zqv8HtnyeXWzKV\nMdK2nDe9+5OpMh6++0Zy3b7snuk84pUkKUM2XkmSMlTTex4hhDcCN8YYLw4hvArYCkwAPwA+EmMs\nzF+JkiS1jqpHvCGETwBbgCXFRbcA18YY3wK0AVfMX3mSJLWWWt5q3gf8HEmTBXhdjPGB4u2dwNvm\nozBJklpR1beaY4zfCiFsnLaobdrtY0B3o4uSpDNBPp9nYOAw27bdUPM+hw71Upg2Fzifz3N44DC3\n7/xMqp/93EAvq9pWTWUM5g9z232bU2UcyPeysuNURv7oYW569KZUGX1H+8gtnZZxZIAbH74tXcaR\nA+SWrE61z0Kq57z2iWm3u4B81R/SsYjROn4QQGdnOz09XVO3TzYgY/xF1tLZ2c6JBmTUq9kyJm8v\nVMb0HDNmZkzeXqiM6TmtlzH2ojIgeW2sR0fHohedMT2nlTIgeY5H6shodL+ppJ7G+/0QwqYY427g\nUuDeajuMjU1U26Si0dFx+vuPTt1eqIzpOWbMzJi8vVAZ03PMmJkxeXuhMqbnmDEzA2D58hWMj5/N\nFVdcV/P+27bdwPLl7TMy2lafzZWXfipVHbfv/AxnF3OWL19BZ+5sPnzxNakybrtvM0umZSzrWsbV\nb7g6VcZNj97EouUdpzJWdPLJN304VcaND9/GouVnNcWYn8yYq/mmabyTZy5/HNgSQlgMPA7cVVd1\nkiSdgWpqvDHG/cCbi7f3AhfNX0mSJLUuv0BDkqQM2XglScqQjVeSpAxldpmMvqHDfPb+nSXLh04c\nB6B76Vll99nQs2bea5MktY58Pk9+6BCbH/pazfv0DT1LbvGpfpPP58nn+9n84J01Z/Tm+8l1Vt8u\nk8a7YcPGiuuGep8GIFemwW7oWTPnvpIknW4yabxXXvmBiutuuOF6AK677g+zKEWS1OJyuRwrTnZw\nzQW/VvM+mx/6Gotyy2dkdI/CNRf+Yu0ZD95JWy5XdTs/45UkKUM2XkmSMmTjlSQpQzZeSZIylNl0\nokbpGzrC5vsfnrFs6ERyHYrupUvKbr+h55wZy54ZOsnNDx0s2fbISPKl2CuWlF695Jmhk2xce+r+\nwaEJtjw4XLLd0RPJV1p3LW0rWXdwaIKX95y6//zQBN+8v/QaGi8UM84uk/H80ATLp2UM5Atsv6/0\n2k/DxYxlZTIG8gVyztKSpAVxWjXeSlOLTk1JOqdk3Yaec2bsN9f0pCPFnJVrX16ybuPaU/vOlfFc\nMaOnpzTj5T21ZQwUM9aVyVheY8bUc7KmNCO3Zu59JUnz57RqvJWmJaWZktSIqU2tlCFJypaf8UqS\nlCEbryRJGbLxSpKUIRuvJEkZsvFKkpSh0+qsZs2PfB6+e+94yfITJ5L/L11afp81s+YCHxmER3aN\nlWw7klz5kSVnlW6/bvXMZcOHC+z5dmnG6PFkXnLnWaXzkocPF2DlqfuFgQKj206WbFcYTjLalpVm\nFAYKkJt+f5iT2/eUyRgtZpRe+6swMDwr4ygjOx4tkzFSzCidd14YODojQ1LrsfGe4eaaz9tbnAu8\npsxc4DWz5gLXkrNu9cycdatTZBxJMl52bmktrKxtbnPvUJKxIVcmI5c2Y30DMs6dM0NSa7LxnuEa\nNRe4WeYlt1KGpNbkZ7ySJGXIxitJUoZsvJIkZcjGK0lShmy8kiRlyLOapSY2cXiIkR27S5YXjieT\nrNvOKp1kPXF4CFaum7VskJF7dpXJOV7MOatke1b2zFo2wIl7tpfJGC5mLCtTywCsXDV1f/xwP8M7\n7ijd7vgLACw66+ySdeOH+2Fld8ly6XRl45WaVC3zmjecu6505cp1qedHbzh3ZpNlZU+KjKFixqrS\nlStX1Ta3+cjhYkaZuc0ru53brJZi45WalHOspdbkZ7ySJGXIxitJUobqfqs5hPBPwFDx7lMxxl9v\nTEmSJLWuuhpvCGEpQIzx4saWI0lSa6v3iPcngGUhhL8uZlwbY3ykcWVJUnnjA89ydMdXS5ZPDB8D\nYNGy5WX3IfeqqftjAz9icPvny2QcKWasKFk3NvAjyL1i6v6RQ308fPeNJduNDCdvBC5ZVjoF6sih\nPtZ1v2LGskOH+ti27YaSbYeLOctm5Rw61Ed398wrbD13uI/bd36mJOPY8SRj+VmltTx3uI9XrDyV\ncyDfx233bS7Z7uiJJKNraWnGgXwfL5929bK+o33c9OhNJdsNjSQZ3UtKM/qO9rHxJa8oWd7K6m28\nLwCfizF+OYTwamBnCOH8GONEuY1XrlxGR0d72aDOzmR5T09XnaU0JqOZammljGaqxYzmraXWjNe8\n5vypbWd78slDALyy56WlK3u6eeUrX0lPT1eVjIPFjJeUyXh1jRlHAXjZmrWlK9ecyqj+eJI/Atas\nmTnVa82aV9WcMVDMeNnanpJ1ubWvqunxPFfMOOec0ozzz6kt42jxOVl7Xulz8mpOPSedne2MlE2o\nrrOzfeo5qTdndkbpVb3TZVRSb+N9AtgHEGPcG0IYAM4BflRu48HB4YpBo6PJBdj7+4/WWUpjMpqp\nllbKaKZazGjeWmrN+IVf+NWK6yanJF199acrbtPff7RpMuDFPZ5GZEzmNEvG5Diox+jo+NRzUm9O\nIzPmar71ntV8FXAzQAjhXGAFcLDOLEmSzhj1HvF+GfhqCOGB4v2rKr3NLEmSTqmr8cYYx4ArG1yL\nJEktzy/QkCQpQzZeSZIy5EUSJElNo+/IAW58+LaS5UMjydnG3UtKzxbuO3KAjeteOXPZ0LNsfuhr\nszKOFTNK53r3DT3LxrWvmrWsn80P3llay4lkpk730mUl22/oyZVsP5uNV5LUFOa6/OOR3mcBWLmu\ndC7wxnWvrOkylkd6+5OMtaXztDeufVXNl8Ic6h0EINcz8zKWG3pyNV3C0sYrSWoK830pzKwvp1mJ\nn/FKkpQhG68kSRmy8UqSlCEbryRJGbLxSpKUIRuvJEkZsvFKkpQhG68kSRmy8UqSlCEbryRJGbLx\nSpKUIRuvJEkZsvFKkpQhG68kSRmy8UqSlCEbryRJGbLxSpKUIRuvJEkZsvFKkpQhG68kSRmy8UqS\nlCEbryRJGbLxSpKUIRuvJEkZsvFKkpShjnp2CiEsAm4D/j0wAvxGjPHJRhYmSVIrqveI993A4hjj\nm4FPAjc3riRJklpXvY33PwDfAYgxPgL8VMMqkiSphbUVCoXUO4UQtgD/O8b4neL9XuDlMcaJctv3\n9x+d8UMefPB+du/+LgC9vU8DsGHDy6fWb9r0Vi688KI5a2h0RrmcrDKqPZ5GZNTzeE7359XfzdwZ\n9T6eZsmYnePvpnkyZuecib+bnp6utkp59Tbem4G/jzHeWbz/TIzxvNRBkiSdYep9q/lvgXcChBB+\nBviXhlUkSVILq+usZuCvgLeHEP62eP+qBtUjSVJLq+utZkmSVB+/QEOSpAzZeCVJypCNV5KkDNl4\nJUnKUL1nNdcthNAJfAXYACwBbgAeAbYAOaANeF+McX/KjF8BXlLc5OXA38UYfyVlxl7gS0ABeILk\nO6jnPPusQk4f8D+BsWLmh2KMJ+fIaC8+/vOLP/tDJN+BvRWYAH4AfKSGWkpyYoyPFdf9MbAnxvhn\nKevoBG4Fxos1vS/G+HzKjALw58VN9pI8r+Mv4rH8CvBfil9ZmioDWAzsIPn9AnwxxnhHyox+0o3X\nchnXkW68lssYJ8V4rZDRQYqxOi1rLfCPwCUkY3QrKcZquZwY4xPFZVXH6hy1LCPFeK2Q0UHK8TrH\nY6lprFao42xSjNUKGXlSjNUyOd8D3g78ASnGa4VaFpHy9bVMxlmkHK8hhH8Chop3nwI2U8d4nZXz\ndIzxA8XlqcYrLMwR73uB/hjjW4CfBb4A3ATcHmPcBFwP/NuUGX8aY/zPMcaLgf8EDAK/W0cdnwZu\niDFeSNJE31Xn49kC/G4x50fAh6tkXAZMxBgvIHlB/izJ919fW8xtA66ooZbZOZ8JIawJIewELicZ\n8Gnr+DzJC8fFwLeAq+vI+AzwyeIyirWkfiwAIYSfBD5Qw/6VMl4H3BxjvLj4X7UXsnKPJ+14Lamj\njvFaro6047VcRtqxOvnH5p8BL5CMzVtIP1Zn5xBC6EkxVivVkna8lstIPV5nP5bisjRjtVwdryfd\nWC2X8UdURmc/AAAFx0lEQVSkG6uzc4aBQh3jtVwtf0DK19cyGV8ixXgNISwFmPYc/jp1jNcyOR+o\nZ7xOWojGeyfJAJj8+aMk3/18Xgjh/5I0su9W2LdSxti0dX8I3BpjfK6OOo4Dq0MIbUAXUPUv/wo5\nL4sx/n1x2d8Bm+YKiDFuA36reHcjycB+fYzxgeKyncDbqhVSIWc5yQv07SSDLM3+h4H3xBgnvyCl\nk+Q5SpvxczHGh0IIi0n+as7X81hCCKtJXhQ/Vu2xVMjIk7yYvSuEsDuE8KUQwvK0dZByvFbImFTT\neK2QkWq8Vsg4L81YLfoc8EXgYPH+69KO1Qo5Z1PjWK2QUSDleK2Q8fNpx+usDNKO1XIZJH8k1jxW\nK2S8mXSvrZVyJtX6+louo57X19kZqV5bgZ8AloUQ/jqEcG/xC5/qGa+zc95IfeMVWIDGG2N8IcZ4\nLITQRdK0rqP4Ah1jfDvJ27Rz/pVaJuNTMPWWxFtJ3kZIW8engD8F/gR4HFgL7K7z8TwVQnhLcZPL\nSX5B1XLGQwhbiz//68z8RR4DuqtlzMq5FfjLGOP+GOOjtexbYf/nAEIIbwY+AvxxHRmFEMJ6krd1\nVlPjN53Nek6+AXwZ+D2S5yPt45l8Xh8F/mvxCOApkn84aTM2kmK8zsq4FfhLSDdey2R8nfrG6+zH\nkmqshhDeT/IOz67iojbqGKvlctKO1QoZqcZrhYyJNOO1TEYnKcdqmQxIOVYr/G42knKsVshJNV4r\nPJ7/QYrxWqGOtK+tLwCfizH+R5KPVr4+a32tr63lcvrSjNfpFuTkqhDCeSR/ef1FjPEbwADwf4qr\nt1PD1Y5mZXyzuPgXgK/X8n59hYz/BVwYY/wxkr9iarrcYZnH8wHgmhDC3wDPAYdqyYkxvh8IJG+n\nLJ22qova/uqennM+sCWEcFat+1XYf1kI4T0kf3W+M8Y4UE9GjLEvxng+ydtGt6SsJQDbgH9XrOMb\nwI+HEGrKmZaxBdgVY/x+cdXdwE+mzPgSyZFiqvE6LWPqOSHleJ2V8SXgLuoYr7Oej98m3Vi9iuRb\n6+4DXgt8DeiZtr7WsVqSU3xhT6NcxrqU47VsRsrxOjvjX4B/Q7qxWu553ZlyrJbLGCP9WC37nJBu\nvM7O+AuSA5M043V2xlbgGtKN1ycoNtsY416SXrNu2vpax2u5nHNq2K+szBtv8Re4C/hEjHFrcfFD\nnHq/fxPJX5ppMyD58H3ni6hjGXC0ePsgyQkJ9eRcBrw3xvg2kr+Y/7pKxpUhhGuKd4+TnBjyvRDC\n5NsolwIPlN157pyJ4n81qbD/z5McOVwUazgpo0LG3SGEVxWXHSN5fGlzDgI/Xvyc6ZeBx2OMv1dH\nLd8KIfx0cdklJCePpMkYJ/ldpBmv5TImSN7iqnW8lss4ixTjtcLzkWqsxhg3xRgvKv4e/h/wPuA7\nacdquZxYw0lQNdTydlKM1zIZv0byh1HN47VMxo/FGF+dZqxWqOPuNGO1TMaVwD2kGKsVct5XfCeh\n5vFa4XfTTorxWiHjp0gxXkma980AIYRzSRrtrrTjtUzOCkrfhq9Z5mc1A9eSHNpfH0K4nuQzlfcD\nXwoh/DbJXx/VzpYrl/FOkr/in6qzDkj+wd4VQjhBckbkb9aZczPwNyGEEZK3i/6iSsZdwNYQwm6S\nt6k+CuwheQFYTPLWzF011FKSE2Mcmba+2l+qs/f/GPBVoJekYQHsjjH+QcrHcqi47CTJWza/8SIf\nS1sNj6VSLX3AF0IIoyT/cD5YR8Y/k268ljyvMcYTIYTzqX28lqvjOOnGa7mMAunG6mwF4OOkH6u1\n5KbdvoPkrcw047VczmbSj9dKah2r5er4EOnGajkfJ91YnUua8VrOb5D+9XW6NpIjzzTj9cvAV0MI\nk831KpKj1bTjtSQnzrwMbqrfsd/VLElShvwCDUmSMmTjlSQpQzZeSZIyZOOVJClDNl5JkjJk45Uk\nKUM2XkmSMvT/Ae1hl/rnHIrJAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "recs = SeqIO.parse(gzip.open('SRR003265.filt.fastq.gz'), 'fastq')\n", "qual_pos = defaultdict(list)\n", "for rec in recs:\n", " for i, qual in enumerate(rec.letter_annotations['phred_quality']):\n", " if i < 25 or qual == 40:\n", " continue\n", " pos = i + 1\n", " qual_pos[pos].append(qual)\n", "vps = []\n", "poses = qual_pos.keys()\n", "poses.sort()\n", "for pos in poses:\n", " vps.append(qual_pos[pos])\n", "fig, ax = plt.subplots()\n", "sns.boxplot(vps, ax=ax)\n", "ax.set_xticklabels([str(x) for x in range(26, max(qual_pos.keys()) + 1)])\n", "pass" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# There is more..." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Do this to download the paired end data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Be careful as this will be 1GB of data (and fully optional)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2015-02-18 13:41:27-- ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data/NA18489/sequence_read/SRR003265_1.filt.fastq.gz\n", " => ‘SRR003265_1.filt.fastq.gz’\n", "Resolving ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)... 193.62.192.8\n", "Connecting to ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)|193.62.192.8|:21... connected.\n", "Logging in as anonymous ... Logged in!\n", "==> SYST ... done. ==> PWD ... done.\n", "==> TYPE I ... done. ==> CWD (1) /vol1/ftp/data/NA18489/sequence_read ... done.\n", "==> SIZE SRR003265_1.filt.fastq.gz ... 502660639\n", "==> PASV ... done. ==> RETR SRR003265_1.filt.fastq.gz ... done.\n", "Length: 502660639 (479M) (unauthoritative)\n", "\n", "100%[======================================>] 502,660,639 6.29MB/s in 68s \n", "\n", "2015-02-18 13:42:36 (7.01 MB/s) - ‘SRR003265_1.filt.fastq.gz’ saved [502660639]\n", "\n", "--2015-02-18 13:42:36-- ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data/NA18489/sequence_read/SRR003265_2.filt.fastq.gz\n", " => ‘SRR003265_2.filt.fastq.gz’\n", "Resolving ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)... 193.62.192.8\n", "Connecting to ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)|193.62.192.8|:21... connected.\n", "Logging in as anonymous ... Logged in!\n", "==> SYST ... done. ==> PWD ... done.\n", "==> TYPE I ... done. ==> CWD (1) /vol1/ftp/data/NA18489/sequence_read ... done.\n", "==> SIZE SRR003265_2.filt.fastq.gz ... 484084218\n", "==> PASV ... done. ==> RETR SRR003265_2.filt.fastq.gz ... done.\n", "Length: 484084218 (462M) (unauthoritative)\n", "\n", "100%[======================================>] 484,084,218 6.41MB/s in 68s \n", "\n", "2015-02-18 13:43:44 (6.78 MB/s) - ‘SRR003265_2.filt.fastq.gz’ saved [484084218]\n", "\n" ] } ], "source": [ "!rm -f SRR003265_1.filt.fastq.gz 2>/dev/null\n", "!rm -f SRR003265_2.filt.fastq.gz 2>/dev/null\n", "!wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data/NA18489/sequence_read/SRR003265_1.filt.fastq.gz\n", "!wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data/NA18489/sequence_read/SRR003265_2.filt.fastq.gz" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f1 = gzip.open('SRR003265_1.filt.fastq.gz')\n", "f2 = gzip.open('SRR003265_2.filt.fastq.gz')\n", "recs1 = SeqIO.parse(f1, 'fastq')\n", "recs2 = SeqIO.parse(f2, 'fastq')\n", "cnt = 0\n", "for rec1 in recs1:\n", " next(recs2)\n", " cnt +=1\n", "print('Number of pairs: %d' % cnt)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Only do the next cell on Python 3" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#f1 = gzip.open('SRR003265_1.filt.fastq.gz', 'rt', encoding='utf8')\n", "#f2 = gzip.open('SRR003265_2.filt.fastq.gz', 'rt', encoding='utf8')\n", "#recs1 = SeqIO.parse(f1, 'fastq')\n", "#recs2 = SeqIO.parse(f2, 'fastq')\n", "#cnt = 0\n", "#for rec1, rec2 in zip(recs1, recs2):\n", "# cnt +=1\n", "#\n", "#print('Number of pairs: %d' % cnt)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }