{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Getting the necessary data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You just need to do this only once" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2015-06-26 14:36:30-- ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA18489/exome_alignment/NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam\n", " => ‘NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam’\n", "Resolving ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)... 193.62.192.8\n", "Connecting to ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)|193.62.192.8|:21... connected.\n", "Logging in as anonymous ... Logged in!\n", "==> SYST ... done. ==> PWD ... done.\n", "==> TYPE I ... done. ==> CWD (1) /vol1/ftp/phase3/data/NA18489/exome_alignment ... done.\n", "==> SIZE NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam ... 327067172\n", "==> PASV ... done. ==> RETR NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam ... done.\n", "Length: 327067172 (312M) (unauthoritative)\n", "\n", "NA18489.chrom20.ILL 100%[=====================>] 311.92M 5.02MB/s in 31s \n", "\n", "2015-06-26 14:37:02 (9.98 MB/s) - ‘NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam’ saved [327067172]\n", "\n", "--2015-06-26 14:37:02-- ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA18489/exome_alignment/NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai\n", " => ‘NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai’\n", "Resolving ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)... 193.62.192.8\n", "Connecting to ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)|193.62.192.8|:21... connected.\n", "Logging in as anonymous ... Logged in!\n", "==> SYST ... done. ==> PWD ... done.\n", "==> TYPE I ... done. ==> CWD (1) /vol1/ftp/phase3/data/NA18489/exome_alignment ... done.\n", "==> SIZE NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai ... 170688\n", "==> PASV ... done. ==> RETR NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai ... done.\n", "Length: 170688 (167K) (unauthoritative)\n", "\n", "NA18489.chrom20.ILL 100%[=====================>] 166.69K --.-KB/s in 0.08s \n", "\n", "2015-06-26 14:37:02 (2.02 MB/s) - ‘NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai’ saved [170688]\n", "\n" ] } ], "source": [ "!rm -f NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam 2>/dev/null\n", "!rm -f NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai 2>/dev/null\n", "!wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA18489/exome_alignment/NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam\n", "!wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA18489/exome_alignment/NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# The recipe" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from collections import defaultdict\n", "\n", "import numpy as np\n", "\n", "%matplotlib inline\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "import pysam" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "bam = pysam.AlignmentFile('NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam', 'rb')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SQ\n", "\t1\n", "\t\tLN\t249250621\n", "\t\tM5\t1b22b98cdeb4a9304cb5d48026a85128\n", "\t\tSN\t1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t2\n", "\t\tLN\t243199373\n", "\t\tM5\ta0d9851da00400dec1098a9255ac712e\n", "\t\tSN\t2\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t3\n", "\t\tLN\t198022430\n", "\t\tM5\tfdfd811849cc2fadebc929bb925902e5\n", "\t\tSN\t3\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t4\n", "\t\tLN\t191154276\n", "\t\tM5\t23dccd106897542ad87d2765d28a19a1\n", "\t\tSN\t4\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t5\n", "\t\tLN\t180915260\n", "\t\tM5\t0740173db9ffd264d728f32784845cd7\n", "\t\tSN\t5\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t6\n", "\t\tLN\t171115067\n", "\t\tM5\t1d3a93a248d92a729ee764823acbbc6b\n", "\t\tSN\t6\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t7\n", "\t\tLN\t159138663\n", "\t\tM5\t618366e953d6aaad97dbe4777c29375e\n", "\t\tSN\t7\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t8\n", "\t\tLN\t146364022\n", "\t\tM5\t96f514a9929e410c6651697bded59aec\n", "\t\tSN\t8\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t9\n", "\t\tLN\t141213431\n", "\t\tM5\t3e273117f15e0a400f01055d9f393768\n", "\t\tSN\t9\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t10\n", "\t\tLN\t135534747\n", "\t\tM5\t988c28e000e84c26d552359af1ea2e1d\n", "\t\tSN\t10\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t11\n", "\t\tLN\t135006516\n", "\t\tM5\t98c59049a2df285c76ffb1c6db8f8b96\n", "\t\tSN\t11\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t12\n", "\t\tLN\t133851895\n", "\t\tM5\t51851ac0e1a115847ad36449b0015864\n", "\t\tSN\t12\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t13\n", "\t\tLN\t115169878\n", "\t\tM5\t283f8d7892baa81b510a015719ca7b0b\n", "\t\tSN\t13\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t14\n", "\t\tLN\t107349540\n", "\t\tM5\t98f3cae32b2a2e9524bc19813927542e\n", "\t\tSN\t14\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t15\n", "\t\tLN\t102531392\n", "\t\tM5\te5645a794a8238215b2cd77acb95a078\n", "\t\tSN\t15\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t16\n", "\t\tLN\t90354753\n", "\t\tM5\tfc9b1a7b42b97a864f56b348b06095e6\n", "\t\tSN\t16\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t17\n", "\t\tLN\t81195210\n", "\t\tM5\t351f64d4f4f9ddd45b35336ad97aa6de\n", "\t\tSN\t17\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t18\n", "\t\tLN\t78077248\n", "\t\tM5\tb15d4b2d29dde9d3e4f93d1d0f2cbc9c\n", "\t\tSN\t18\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t19\n", "\t\tLN\t59128983\n", "\t\tM5\t1aacd71f30db8e561810913e0b72636d\n", "\t\tSN\t19\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t20\n", "\t\tLN\t63025520\n", "\t\tM5\t0dec9660ec1efaaf33281c0d5ea2560f\n", "\t\tSN\t20\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t21\n", "\t\tLN\t48129895\n", "\t\tM5\t2979a6085bfe28e3ad6f552f361ed74d\n", "\t\tSN\t21\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t22\n", "\t\tLN\t51304566\n", "\t\tM5\ta718acaa6135fdca8357d5bfe94211dd\n", "\t\tSN\t22\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t23\n", "\t\tLN\t155270560\n", "\t\tM5\t7e0e2e580297b7764e31dbc80c2540dd\n", "\t\tSN\tX\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t24\n", "\t\tLN\t59373566\n", "\t\tM5\t1fa3474750af0948bdf97d5a0ee52e51\n", "\t\tSN\tY\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t25\n", "\t\tLN\t16569\n", "\t\tM5\tc68f52674c9fb33aef52dcf399755519\n", "\t\tSN\tMT\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t26\n", "\t\tLN\t4262\n", "\t\tM5\tf3814841f1939d3ca19072d9e89f3fd7\n", "\t\tSN\tGL000207.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t27\n", "\t\tLN\t15008\n", "\t\tM5\t1c1b2cd1fccbc0a99b6a447fa24d1504\n", "\t\tSN\tGL000226.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t28\n", "\t\tLN\t19913\n", "\t\tM5\td0f40ec87de311d8e715b52e4c7062e1\n", "\t\tSN\tGL000229.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t29\n", "\t\tLN\t27386\n", "\t\tM5\tba8882ce3a1efa2080e5d29b956568a4\n", "\t\tSN\tGL000231.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t30\n", "\t\tLN\t27682\n", "\t\tM5\t851106a74238044126131ce2a8e5847c\n", "\t\tSN\tGL000210.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t31\n", "\t\tLN\t33824\n", "\t\tM5\t99795f15702caec4fa1c4e15f8a29c07\n", "\t\tSN\tGL000239.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t32\n", "\t\tLN\t34474\n", "\t\tM5\t118a25ca210cfbcdfb6c2ebb249f9680\n", "\t\tSN\tGL000235.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t33\n", "\t\tLN\t36148\n", "\t\tM5\tdfb7e7ec60ffdcb85cb359ea28454ee9\n", "\t\tSN\tGL000201.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t34\n", "\t\tLN\t36422\n", "\t\tM5\t7de00226bb7df1c57276ca6baabafd15\n", "\t\tSN\tGL000247.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t35\n", "\t\tLN\t36651\n", "\t\tM5\t89bc61960f37d94abf0df2d481ada0ec\n", "\t\tSN\tGL000245.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t36\n", "\t\tLN\t37175\n", "\t\tM5\t6f5efdd36643a9b8c8ccad6f2f1edc7b\n", "\t\tSN\tGL000197.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t37\n", "\t\tLN\t37498\n", "\t\tM5\t96358c325fe0e70bee73436e8bb14dbd\n", "\t\tSN\tGL000203.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t38\n", "\t\tLN\t38154\n", "\t\tM5\te4afcd31912af9d9c2546acf1cb23af2\n", "\t\tSN\tGL000246.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t39\n", "\t\tLN\t38502\n", "\t\tM5\t1d78abec37c15fe29a275eb08d5af236\n", "\t\tSN\tGL000249.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t40\n", "\t\tLN\t38914\n", "\t\tM5\td92206d1bb4c3b4019c43c0875c06dc0\n", "\t\tSN\tGL000196.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t41\n", "\t\tLN\t39786\n", "\t\tM5\t5a8e43bec9be36c7b49c84d585107776\n", "\t\tSN\tGL000248.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t42\n", "\t\tLN\t39929\n", "\t\tM5\t0996b4475f353ca98bacb756ac479140\n", "\t\tSN\tGL000244.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t43\n", "\t\tLN\t39939\n", "\t\tM5\t131b1efc3270cc838686b54e7c34b17b\n", "\t\tSN\tGL000238.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t44\n", "\t\tLN\t40103\n", "\t\tM5\t06cbf126247d89664a4faebad130fe9c\n", "\t\tSN\tGL000202.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t45\n", "\t\tLN\t40531\n", "\t\tM5\t93f998536b61a56fd0ff47322a911d4b\n", "\t\tSN\tGL000234.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t46\n", "\t\tLN\t40652\n", "\t\tM5\t3e06b6741061ad93a8587531307057d8\n", "\t\tSN\tGL000232.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t47\n", "\t\tLN\t41001\n", "\t\tM5\t43f69e423533e948bfae5ce1d45bd3f1\n", "\t\tSN\tGL000206.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t48\n", "\t\tLN\t41933\n", "\t\tM5\t445a86173da9f237d7bcf41c6cb8cc62\n", "\t\tSN\tGL000240.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t49\n", "\t\tLN\t41934\n", "\t\tM5\tfdcd739913efa1fdc64b6c0cd7016779\n", "\t\tSN\tGL000236.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t50\n", "\t\tLN\t42152\n", "\t\tM5\tef4258cdc5a45c206cea8fc3e1d858cf\n", "\t\tSN\tGL000241.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t51\n", "\t\tLN\t43341\n", "\t\tM5\tcc34279a7e353136741c9fce79bc4396\n", "\t\tSN\tGL000243.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t52\n", "\t\tLN\t43523\n", "\t\tM5\t2f8694fc47576bc81b5fe9e7de0ba49e\n", "\t\tSN\tGL000242.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t53\n", "\t\tLN\t43691\n", "\t\tM5\tb4eb71ee878d3706246b7c1dbef69299\n", "\t\tSN\tGL000230.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t54\n", "\t\tLN\t45867\n", "\t\tM5\te0c82e7751df73f4f6d0ed30cdc853c0\n", "\t\tSN\tGL000237.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t55\n", "\t\tLN\t45941\n", "\t\tM5\t7fed60298a8d62ff808b74b6ce820001\n", "\t\tSN\tGL000233.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t56\n", "\t\tLN\t81310\n", "\t\tM5\tefc49c871536fa8d79cb0a06fa739722\n", "\t\tSN\tGL000204.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t57\n", "\t\tLN\t90085\n", "\t\tM5\t868e7784040da90d900d2d1b667a1383\n", "\t\tSN\tGL000198.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t58\n", "\t\tLN\t92689\n", "\t\tM5\taa81be49bf3fe63a79bdc6a6f279abf6\n", "\t\tSN\tGL000208.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t59\n", "\t\tLN\t106433\n", "\t\tM5\td75b436f50a8214ee9c2a51d30b2c2cc\n", "\t\tSN\tGL000191.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t60\n", "\t\tLN\t128374\n", "\t\tM5\ta4aead23f8053f2655e468bcc6ecdceb\n", "\t\tSN\tGL000227.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t61\n", "\t\tLN\t129120\n", "\t\tM5\tc5a17c97e2c1a0b6a9cc5a6b064b714f\n", "\t\tSN\tGL000228.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t62\n", "\t\tLN\t137718\n", "\t\tM5\t46c2032c37f2ed899eb41c0473319a69\n", "\t\tSN\tGL000214.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t63\n", "\t\tLN\t155397\n", "\t\tM5\t3238fb74ea87ae857f9c7508d315babb\n", "\t\tSN\tGL000221.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t64\n", "\t\tLN\t159169\n", "\t\tM5\tf40598e2a5a6b26e84a3775e0d1e2c81\n", "\t\tSN\tGL000209.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t65\n", "\t\tLN\t161147\n", "\t\tM5\t1d708b54644c26c7e01c2dad5426d38c\n", "\t\tSN\tGL000218.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t66\n", "\t\tLN\t161802\n", "\t\tM5\tfc35de963c57bf7648429e6454f1c9db\n", "\t\tSN\tGL000220.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t67\n", "\t\tLN\t164239\n", "\t\tM5\t9d424fdcc98866650b58f004080a992a\n", "\t\tSN\tGL000213.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t68\n", "\t\tLN\t166566\n", "\t\tM5\t7daaa45c66b288847b9b32b964e623d3\n", "\t\tSN\tGL000211.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t69\n", "\t\tLN\t169874\n", "\t\tM5\t569af3b73522fab4b40995ae4944e78e\n", "\t\tSN\tGL000199.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t70\n", "\t\tLN\t172149\n", "\t\tM5\t6d243e18dea1945fb7f2517615b8f52e\n", "\t\tSN\tGL000217.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t71\n", "\t\tLN\t172294\n", "\t\tM5\t642a232d91c486ac339263820aef7fe0\n", "\t\tSN\tGL000216.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t72\n", "\t\tLN\t172545\n", "\t\tM5\t5eb3b418480ae67a997957c909375a73\n", "\t\tSN\tGL000215.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t73\n", "\t\tLN\t174588\n", "\t\tM5\td22441398d99caf673e9afb9a1908ec5\n", "\t\tSN\tGL000205.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t74\n", "\t\tLN\t179198\n", "\t\tM5\tf977edd13bac459cb2ed4a5457dba1b3\n", "\t\tSN\tGL000219.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t75\n", "\t\tLN\t179693\n", "\t\tM5\td5b2fc04f6b41b212a4198a07f450e20\n", "\t\tSN\tGL000224.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t76\n", "\t\tLN\t180455\n", "\t\tM5\t399dfa03bf32022ab52a846f7ca35b30\n", "\t\tSN\tGL000223.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t77\n", "\t\tLN\t182896\n", "\t\tM5\t5d9ec007868d517e73543b005ba48535\n", "\t\tSN\tGL000195.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t78\n", "\t\tLN\t186858\n", "\t\tM5\t563531689f3dbd691331fd6c5730a88b\n", "\t\tSN\tGL000212.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t79\n", "\t\tLN\t186861\n", "\t\tM5\t6fe9abac455169f50470f5a6b01d0f59\n", "\t\tSN\tGL000222.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t80\n", "\t\tLN\t187035\n", "\t\tM5\t75e4c8d17cd4addf3917d1703cacaf25\n", "\t\tSN\tGL000200.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t81\n", "\t\tLN\t189789\n", "\t\tM5\tdbb6e8ece0b5de29da56601613007c2a\n", "\t\tSN\tGL000193.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t82\n", "\t\tLN\t191469\n", "\t\tM5\t6ac8f815bf8e845bb3031b73f812c012\n", "\t\tSN\tGL000194.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t83\n", "\t\tLN\t211173\n", "\t\tM5\t63945c3e6962f28ffd469719a747e73c\n", "\t\tSN\tGL000225.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t84\n", "\t\tLN\t547496\n", "\t\tM5\t325ba9e808f669dfeee210fdd7b470ac\n", "\t\tSN\tGL000192.1\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t85\n", "\t\tLN\t171823\n", "\t\tM5\t6743bd63b3ff2b5b8985d8933c53290a\n", "\t\tSN\tNC_007605\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "\t86\n", "\t\tLN\t35477943\n", "\t\tM5\t5b6a4b3a81a2d3c134b7d14bf6ad39f1\n", "\t\tSN\ths37d5\n", "\t\tUR\tftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human\n", "RG\n", "\t1\n", "\t\tLB\tSolexa-51039\n", "\t\tCN\tBI\n", "\t\tDS\tSRP004074\n", "\t\tSM\tNA18489\n", "\t\tPI\t220\n", "\t\tID\tSRR100025\n", "\t\tPL\tILLUMINA\n", "CO\n", "\t1\n" ] }, { "ename": "AttributeError", "evalue": "'str' object has no attribute 'items'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrecord\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrecords\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'\\t%d'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mi\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 6\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mfield\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrecord\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 7\u001b[0m \u001b[1;32mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'\\t\\t%s\\t%s'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mfield\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mAttributeError\u001b[0m: 'str' object has no attribute 'items'" ] } ], "source": [ "headers = bam.header\n", "for record_type, records in headers.items():\n", " print (record_type)\n", " for i, record in enumerate(records):\n", " print('\\t%d' % (i + 1))\n", " for field, value in record.items():\n", " print('\\t\\t%s\\t%s' % (field, value))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('SRR100025.62130839', 19, '20', 59996, 60048)\n", "52M24S\n", "(0, 52, 76)\n", "(19, 60228, 295)\n", "(True, True, False, 60)\n", "array('B', [33, 34, 36, 33, 39, 34, 33, 38, 39, 34, 40, 35, 40, 40, 32, 40, 38, 33, 35, 38, 33, 39, 40, 34, 37, 39, 36, 30, 36, 37, 34, 35, 34, 40, 37, 34, 38, 28, 40, 40, 38, 32, 33, 32, 36, 34, 37, 24, 34, 35, 31, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])\n", "array('B', [33, 34, 36, 33, 39, 34, 33, 38, 39, 34, 40, 35, 40, 40, 32, 40, 38, 33, 35, 38, 33, 39, 40, 34, 37, 39, 36, 30, 36, 37, 34, 35, 34, 40, 37, 34, 38, 28, 40, 40, 38, 32, 33, 32, 36, 34, 37, 24, 34, 35, 31, 2])\n", "CTCAGATCCAGAGGTGGAAGAGGAAGGAAGCTTGGAACCCTATAGAGTTGCTGAGTGCCAGGACCAGATACTGGGC\n" ] } ], "source": [ "#0-based\n", "for rec in bam:\n", " if rec.cigarstring.find('M') > -1 and rec.cigarstring.find('S') > -1 and not rec.is_unmapped and not rec.mate_is_unmapped:\n", " break\n", "print(rec.query_name, rec.reference_id, bam.getrname(rec.reference_id), rec.reference_start, rec.reference_end)\n", "print(rec.cigarstring)\n", "print(rec.query_alignment_start, rec.query_alignment_end, rec.query_alignment_length)\n", "print(rec.next_reference_id, rec.next_reference_start, rec.template_length)\n", "print(rec.is_paired, rec.is_proper_pair, rec.is_unmapped, rec.mapping_quality)\n", "print(rec.query_qualities)\n", "print(rec.query_alignment_qualities)\n", "print(rec.query_sequence)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAesAAAFVCAYAAADPM8ekAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xd8XNWd9/HPzKh3yZZkWbLcfSzLvWPTTC8Gw0ISWggE\n0jabzbO7Sdhk82w2mzzZ3bCEbLJhkyWUkARIKAYMmG5wA/cmlyvbktwlq9nqbeY+f4zkCGMky5Z0\n78x8368XLzxz52p+RzOa75x7zznXY9s2IiIi4l5epwsQERGRnimsRUREXE5hLSIi4nIKaxEREZdT\nWIuIiLicwlpERMTlonraaIzxAo8AU4FW4H7LsvZ323478G2gBXjOsqyHO+/fDJzsfFiJZVn3DUDt\nIiIiEaHHsAZuAmIsy1pgjJkHPNR5H8aYIcBPgBkEg3mFMeZ9YDeAZVmLBqpoERGRSNLbYfCFwBsA\nlmWtA2Z32zYW2GZZ1gnLsmzgI+Bigr3wBGPMm8aYdztDXkRERM5Rb2GdAtR1u+3vPDQOsBcoNMZk\nGWMSgMuBBKAJeNCyrKuBrwJ/7LaPiIiI9FFvh8HrgORut72WZQUALMuqNcb8HfACUA1sBqqAYmBf\n52P2GmOqgRzgyKc9iW3btsfjOedGiIiIhJg+hV5vYb0GuAF4zhgzH9jetcEYEwXMtizrImNMLPAB\n8B/AvQQPhX/dGDOcYO/8WI8VezxUVtb3pe6QkpmZrPaFqHBuG6h9oU7tC12Zmcm9P6ib3sJ6KXCl\nMWZN5+17O0eAJ1mW9agxxm+M2QT4gV9bllVijHkMeMIYs7Jrn67euIiIiPRdj2HdOXDsa6fdXdxt\n+4+AH522Twfw+f4qUEREJNJp4JeIiIjLKaxFRERcTmEtIiLicgprERERl1NYi4iIuJzCWkRExOUU\n1iIiIi6nsBYREXE5hbWIiIjLKaxFRERcTmEtIiLicgprERERl1NYi4iIuJzCWkRExOUU1iIiIi6n\nsBYREXE5hbWIiIjLKaxFRERcTmEtIiLicgprERERl1NYi4iIuJzCWkRExOUU1iIiIi6nsBYREXE5\nhbWIiIjLKaxFRERcTmEtIiLiclFOFyASSfyBAA1N7dQ3tROwbbweDx4PeL0ePB4PyQnRJMZFO12m\niLiMwlpkgFSeaOaDrUfZf+QkdU1t1De109Dc3uM+MVFefvrXC0hJiBmkKkUkFCisRfpRIGCzo6Sa\nFVuOsGN/NTbgARLjo0lJjCEvM5GkhBiSE6LxeTzYNgRsG9u2KTlWx8GKBo7XNiusReRjFNYi/aC1\n3c97mw6zYssRqk62ADB2eAqLZuYy22QRE+3r9We8se4gByv2Ud/UNtDlikiIUViLnKcD5fX877Kd\nHKtuIibay8XThrNoRi4jhyX36eckJwTPVdc39XyoXEQij8Ja5BwFAjavf3SAl1eX4g/YXDErj5su\nGk3COQ4QS+489K2etYicTmEtcg6On2jmt6/uYt/hk6QlxXDf9ZMoHJ1xXj9TPWsR+TQKa5E++nBn\nOU+9adHa5mf2xCzuvtqQFH/+061S1LMWkU+hsBbpg1XbjvLE8j3Ex/q4f3EBFxQOw+Px9MvP7upZ\n16lnLSKnUViLnKX1uyt48o09JMZF8cCdM8nLTOrXnx8T7SM22qeetYh8gpYbFTkLW/dW8eiyXcTF\n+PiH26b3e1B3SU6I1jlrEfkEhbVIL3aW1fDIS0X4fB6+ees0Rg1LGbDnSk6Iob6pDdu2B+w5RCT0\nKKxFerCrtJpfvrAdsPnGLVOZMCJtQJ8vOSGaDr9NS5t/QJ9HREKLzlmLnMYfCHCwooE9B2p57aMD\n+P02X795CoWjzm9q1tn4y/StNuJj9ecpIkH6NJCIZ9s2h443sLOsBuvgCYoPnTjVs/V5Pdy/eBLT\nxw8dlFq6pm/VNbWTlT4oTykiIUBhLRGrrqmNj4rKWb3jGIcrG0/dn50ez9yCdCbmp7Fw5gj8rYM3\n4EurmInImSisJaL4AwGKSmpYvf0YW/dV4Q/Y+LweZk3IZKbJZGJ+OunJsacen5ESR2XlYIa1O1cx\nO1zZwGsfHqCt3Y9tgz9gEwgECNgQHeUlKT76L/8lRJOaGMOkkRnExvR+ARMR6Z3CWiLGjpJq/vTe\nPo5WBXvReZmJXDh1OPMLs11zSUq39qw/2HqUdbsq+rRPfGwUF07JYdHMXIZlJAxQZSKRQWEtYe9I\nVSN/em8vRSU1eDxw4ZQcLpuVy8js5H5bfay/uLVnXVvfCsC/fXk+KYkxeD0evF7weDy0tQdoaGmn\nsbmdhuZ2GpraOVrdyOodx3h74yHe3niIwlHpLJqZx7RxQ/B5NQlFpK8U1hK26pvaeHl1Ke9vOUrA\ntikYmc5tl49nRNbALGjSH7qPBneT2voWonwestLjP/EFJ8rnJSEuCtLiP3b/kgtHs7m4khWbj7Cz\nrJadZbVkp8dz/+JJjM1NHczyRUKewlrCUlFpNb9+aSdNrR1kZyTwuUXjmDZuiOt60qdL7jYa3E1q\n6ltJT47t0+8vyudlbkE2cwuyOVLZwDubDrNy61H+7Q+bWbxgJIsXjCLKp162yNlQWEvYWbXtKL97\nw8Lr9XDbZeO4bFZeyIRCrAvXB+/wB6hraGP8eSwIk5uZxBeumcj8Sdn89tVdvLKmjB0l1dy/eBI5\nQxL7sVqR8BQan2AiZ8G2bV5cWXLqqljfum06V83ND5mg7uK29cFPNrRhAxndRsmfK5Ofzg+/OI+F\nk4dReqyeHz6xgfc2H9byqiK9CK1PMZFP0eEP8NtXd/Hq2jIy0+L4p7tnD/jSoAPFbeuD1zYEB5el\n9UNYAyTERXHf4kn89U2TiY7y8oe3inn89d0EXNJeETfSYXAJeY0t7fzqxR3sOXiCscNT+MatU10z\nFetcdF8f3A1LjnaNBE/vp7DuMntiFuPyUvnlC9tZs6Mcn9fL3dcYvC4fVyDiBPWsJaS1tvn5z2e2\nsufgCWZNyOTbt88I6aAG940Ir61rAfrnMPjp0pJi+fvPTSc/O4mV247y9NvFrjmiIOImCmsJWQHb\n5rev7eJART0XTsnhazdNJiY69FfMSnHZiPCaUz3ruAH5+Ylx0XzrthnkZSby3uYjPPvuPgW2yGl6\nPMZmjPECjwBTgVbgfsuy9nfbfjvwbaAFeM6yrId720ekv7yyupRNViUTRqQFD596w+PwqdtWMRuo\nw+DdJcUHA/unz2zh7Y2HiPJ5uPXSsa6faicyWHrrWd8ExFiWtQD4R+Chrg3GmCHAT4DLgIXAEmPM\njM59Ys+0j0h/2bDnOK+sKWNoahxfv3lyyI347onbVjGrrW/F6/GQmjiwpxdSEmP49m3Tyc5IYPm6\ng/x5xT6qT7YM6HOKhIreRq8sBN4AsCxrnTFmdrdtY4FtlmWdADDGfARcDOQDyz9lH5HzdqC8nsde\n3UVsjI+/vWXqqZ5ouHBjzzo1KWZQjlykJsXyndtn8B9/3Myb6w/x5vpDDE2NY8KINMyINEx+Gplp\nn1xFTSTc9RbWKUBdt9t+Y4zXsqwAsBcoNMZkAQ3A5cDSXvb5VJmZyX0uPpSoff2jtq6FXy3dQbs/\nwD99fi4zCnMG/DkH+7XLb+kAoMP2DMpz9/QcgYDNiYZWxo1IG7TfQ2ZmMj/7u0tYsekwRfur2FlS\nzdqictYWlQMwIjuJRbNGsGjWCIaetsTpp/28cKb2RYbewroO6P6bOhW6lmXVGmP+DngBqAY2A1XA\nkE/bpyeVlfV9qTukZGYmq339oK6pjV8+v52qky3ccskYxmQnDfjzOvHadXReP/t4dYPj7TvZ0Io/\nYJMcFzXov4eFk7JYOCmLgG1ztLIR69AJdh+oZfv+ap56fTe/f303BaPSWTg5h5kTMs94OU797YW2\ncG5fX7+E9BbWa4AbgOeMMfOB7V0bjDFRwGzLsi4yxsQCHwD/QTCwz7iPSF81NLezubiSDbsr2H3g\nBAHbZn5hNtfNH+l0aQPGTeuDD/RI8LPh9XjIy0oiLyuJy2fl0dTSzvo9x1lbVM6uslp2ldUSF+Pj\n2nn5XDU3n9gwmBEgcrrewnopcKUxZk3n7Xs7R4AnWZb1qDHGb4zZBPiBX1uWVWKMKT19n4EpXcKV\nPxBgk1XJ2qJydpbW4A8Ep/GMzklmXkE2i2bmhfU5SzetDz4YI8H7KiEumkun53Lp9Fwqapv4sKic\nFVuOsHRVKe9vPcpfXTyGCyYP0+IqElZ6DGvLsmzga6fdXdxt+4+AH53FPiK9au8IsLboGK9/dIDK\nE8FRwPlZScwpyGJOQTZZZ3F+Mly4ZX3wrrDOSHFPWHeXnZ7ATReN4eq5+bz+0QHe2nCIx17bzTsb\nD/O5y8bpfKeEDefXMpSI19rm54NtR3lz/UFq61uJ8nm4dEYuV87Oi9grMiUnRHPoeAO2bTt6FKGm\nPvilKS3JnWHdJT42ilsuGcul03N5ceV+PtxZwU+f2cJHu49z+2Xjzng+WySUKKzFMbZts2r7MZ5/\nfz8Nze3ERHu5as4Irp6b76rDrk5ITohxxfrgJ7p61iHyegxJjeNLNxRyxewRPP1OMSu3HuFAeR1/\ne8vUiH9PSWhTWIsj6hrbeHL5HrbuqyI+1sfiBaO4cnZe2M2ZPlfd1wd3Mqy7DoP31xW3BsvonBQe\nuGMmz31QwtvrD/Ljpzbyt7dMZeQwHRaX0KSwlkG3dW8VTyzfTX1TOwUj07nv+gIyUpwbbexG3UeE\nZ6U7V0dNfSspiTEhuUJclM/LNz47nbTEaJ5fsZ9//+NmvnzjJGaMz3S6NJE+U1jLoGlp6+DZd/ex\ncttRonxebrtsHFfMGaFRu2eQ4oJVzGzbpra+leFDQ3fcgMfj4dp5I8lOT+B/l+3kv1/YwWcWjePq\nuSPCekaBhB+FtQyK6pMt/OezW6iobWZEVhJfumESeZlJTpflWm5YH7yxpYP2jkDInK/uycwJmXz3\nzln81/Pb+POKfbS2+1ly4WinyxI5a6F3bEtCzomGVh7sDOqr5ozg+3fPVlD3wg3XtK7pvI51qJ2v\n/jQjhyXz/btnMzQ1jpdXl/Jh5/KlIqFAYS0Dqr6pjf98divHa5tZvGAUt10+nugove1685eLeTjX\nsz7REFojwc9GRkoc/+cz04iPjeKJ5buxDtY6XZLIWdGnpgyYppZ2HvrTVo5WNXLl7BHcfJEOO54t\nV/SsXbh6WX8YPjSRr988GduG/35xBxU1TU6XJNIrhbUMiJa2Dh5+bhsHKxq4ZPpwbrt8nAb09IEb\n1gevrXN+XfCBMmlUBndfbWhsCb5PG5qdXy1OpCcKa+l3be1+fvH8dvYfqWN+YTafv8ooqPvIDeuD\n14bYgih9ddG04Vx/wUiO1zbzyxe2097R68UBRRyjsJZ+Zds2j722mz0HTzBzQib3XV+A16ugPhdO\nrw9eWx9eA8zO5OaLxzBnYhZ7D5/kf14qovRYHbZtO12WyCdo6pb0q7VF5WzYc5zxeal85cZCfF59\nHzxXTq8PXlPfSmJcVFhfctLr8XDf9QXUNrSydV8VW/dVkZUWz5yCLOYVZJObmaijQuIKCmvpN5Un\nmvnj28XExfj40uJJGvV9npxeH/xEQytDImBluZhoH9+5fQY79lezfs9xtu6t4rUPD/DahwfIGZLA\nJdNzuXhaDnEx+rgU5+jdJ/0iELB59NVdtLT5ue/6AoZG0OUsB4qT64M3t3bQ3OoPy8FlZxLl8zJj\nQiYzJmTS2u5n+/5q1u+qYNv+ap59dy/L1pRy+aw8Lp+l9evFGQpr6RfL1x1g3+GTzJ6YxYLJw5wu\nJyw4uT54bZhO2zobsdE+5kzMYs7ELBqa23l302He2XiIV9aU8cb6g1w8dThXz81nSGpkfJERd1BY\ny3nbd/gEL60qJS0phruv1sjv/uLk+uDhPhL8bCXFR7PkwtFcMzeflduO8uaGg7yz6TDvbz3CTReN\n4Zq5+RpAKYNCYS3npa3dz8+e3oQ/YHPf9ZNIio92uqSw4eT64DWdI8EjsWd9JrExPq6cM4JFM3NZ\nt6uC59/fz/Pv72fr3iruW1xAdnqC0yVKmNMIIDkvz72/n0MVDVwxK4/C0RlOlxNWnFzFLJIPg/ck\nyudl4ZQcfnT/POYWZLHvyEl+8Ph6Vmw+rClfMqAU1nJOKmqa+O2ru3h302FGZCdz66VjnS4p7Di5\nPrjCumdJ8dF8dclkvnJjIdE+L79/q5if/Xnbqd+bSH/TYXDpk4qaJpatLePDneXYNuRmJvLA3bOJ\n8em8XX/r6lnXOdqz1iCqnsyblM2EEWk8sXw3RSU1/PipjTxwxwyydFhc+pnCWs5KRW0Ty9Z8PKSX\nLBzNTJNJdlYKlZX1TpcYdpzuWcfG+IiPDd8FUfpLenIsf/eZabz+0QFe+KCEnz6zhe/cMZMsTV+U\nfqSwlh51+AMsX3eQZWtK6fDbHwtpr0Z9D6jYaB8x0V7HzllnJMdqZP9Z8ng8XH/BKABe+KCEB5/e\nzAN3zNR6A9JvFNbyqUqP1fHE67s5XNlIalIMt102njkFWQrpQZSSEDPoPeu2dj8Nze3kZycN6vOG\ng+svGIVtw4srS/iPp7fwwJ0zGJqqwJbzp7CWT2ht87N0VQlvbzyEbcPF04bz2UVjSYjTtKzB5sT6\n4LUNneerkzS47FwsXjAK27ZZuqqUnz69hQfumKkFVOS8KazlY0qP1fE/LxVRdbKFrPR4vnDNRApG\nDvLyWXKKE+uDn7qOdYrC+lzdsHA0ARteXl3KT5/ZzPfumkWqvvzIeVBYyym7y2r4xYs7aGv3c938\nkdy4cBQxYXzFpVDQfUT4oIV1g0aC94clF47GH7B5dW0ZTyzfwzdvnaoxAHLONM9aANhSXMnDz23H\n7w/w1zdN5tZLxyqoXcCJEeGaY91/brpoNAUj09m+v5rVO445XY6EMIW1sGbHMX61tAif18M3PzON\nWSbL6ZKkkxOrmHUdBo/0dcH7g9fj4YvXFRAX4+OZd/ZSdbLZ6ZIkRCmsI9w7Gw/x2Gu7iY/18a3b\nplM4SkuGukmKAz1rrQvev4akxnH7FeNpafPzxOt7CGhZUjkHCusItmxNKU+/s5fUxBgeuGMmY3NT\nnS5JTuNIz7q+lSifVxdl6UcXTslh2tgh7D5Qy/tbjjhdjoQghXWE2lxcydJVpQxNjeO7d80kL0tz\nat3IqXPW6ckxGgzVjzweD1+4diKJcVH8ecU+KmqbnC5JQoxGg0egkw2tPLl8D9FRXr75mWlax9jF\nunrWJxpaP3WudXtHgAPl9ew9coJ9h09ysrENjwc8eIL/93jweT0My0hg1LBkRuWkMHxoAj7vx7+r\nN7V0UHmimbrGNsaPSBuU9kWStKRY7rrK8JtXdvLYa7v5xztm6lrYctYU1hHGtm0ef30PDc3t3HHF\neHKHJjpdkvSgq2e9fvdxNhdXMSQlloyUOIakxBEX66OsvJ6yY/V0+AOn9onyebBtgv9h03WKdPeB\n2lOPiYnyMiI7iewhiRw53kDViWYaWzpObc9M07StgTC3IItN1nE2WpW8+mEZNywYpSMYclYU1hFm\nxZYj7CippnB0BpfNynO6HOlFbLSPz181gV0Haqmpa6H6ZMvHQtfjgfysZMblpTI+L5VxualkpHwy\naNs7AhyubOgM9zrKyuspPVrP/iN1RPm8DE2NY8zwVIamxZGZGs+8SdmD2cyI4fF4uOtqQ/GhE7y0\nqpSNeypZvGAks02WetnSI49LLphuh/NVmzIzk11xVaqjVY388MkNxER5+df75vXbaF+3tG8guLFt\nbe1+autbaWhuJzczkbiYc/vO3dbuJy4xlo7W9rBd792Nrx8ELzX78upS1u2uwLYhOyOB6+bnc0Hh\nMKJ8Zz+UyK3t6y/h3L7MzOQ+/dFpgFmE6PAHeHTZLto7Atxz7URNywlhMdE+sjMSGJubes5B3fVz\nhqTGh21Qu1l2RgJfvrGQn3x5PhdPy6HqRDNPvL6H7/7mQ9bvrnC6PHEhhXWEeHl1KQcq6rlwSo4W\nPRFxiez0BO65toD/+OoFXDE7j7qmdn798k4eWbqDusbBvzSquJfCOgIUHzrB6x8dYGjn4gwi4i4Z\nKXHcccUE/vWLcxmfl8pGq5Lv/3Yd63dX4JJTleIwhXWYs22bZ97ZCzZ86YZJg3YxCBHpu+yMBB64\ncya3Xz6etnZ/Zy+7iJPqZUc8hXWY27avmgMV9cwpyGJ8nubOirid1+Phyjkj+OF9c5mQl8qm4kr+\n+bF1lJXXOV2aOEhhHcZs2+bl1aV4gBsWjHK6HBHpg+z0BL5z50w+d9k4GpraefCZLRQfOuF0WeIQ\nhXUY6+pVz56YRW6mlhMVCTVej4er5+bzlSWFtLUH+NmftrKjpNrpssQBCuswZds2L6/p7FUvHOV0\nOSJyHuYWZPONW6ZgA794fjsb9hx3uiQZZArrMLVtfzUHyoO96jz1qkVC3tSxQ/n7z04jOsrLr18u\nYtW2o06XJINIYR2Gus5Vg3rVIuHE5Kfz7dtnkBgXzRPL9/DGh2VOlySDRGEdhrarVy0StkbnpPDA\nnTNJTojmN0t3cOh4g9MlySBQWIeZ7r3qG9WrFglLuUMTufe6Ajr8Af73lZ20tfudLkkGmMI6zGzf\nX02ZetUiYW/6uKFct2AUR6oaef79/U6XIwNMYR1GArbNK2s6e9WaVy0S9u69oZCcIQm8s+mwpnSF\nOYV1mCgrr+P/PbWJ0mP1zDaZ5GWpVy0S7uJiovjKjYX4vB4ee203dU1aljRcKaxDXGNLO79/y+JH\nT26k9Fgd8yZlc/c1E50uS0QGSX52MrdcMpa6xjaefH2PLvwRpnRVhxAVsG3W7ijnuff3Ud/UTs6Q\nBO66cgIFozKcLk1EBtlVc0ewo6Sarfuq+GDrUS6dket0SdLPegxrY4wXeASYCrQC91uWtb/b9puB\n7wE28LhlWb/uvH8zcLLzYSWWZd03ALVHrMaW4DVvd5bWEBPt5dZLx3LVnBFE+XSgRCQSeT0e7ru+\ngB88vp5n391L4egMMtPinS5L+lFvPeubgBjLshYYY+YBD3Xe1+VnwAygEdhljHmGYKhjWdaiAag3\n4h2rbuQXz2+noraZKWOGcPfVhiGpcU6XJSIO67om9qOv7mLZmjK+eH2B0yVJP+qtK7YQeAPAsqx1\nwOzTtrcDaUAC4CHYw54GJBhj3jTGvNsZ8tIPikqq+fFTm6iobeb6C0byzc9MVVCLyCnzJmUzfGgi\na4vKqahtcroc6Ue9hXUK0P0iqv7OQ+NdHgI2ATuAZZZl1RHsZT9oWdbVwFeBP562j/SRbdu8veEQ\nDz+3jfaOAF+6YRK3XDIWr8fjdGki4iJer4cbF44iYNssW1PmdDnSj3o7DF4HJHe77bUsKwBgjMkH\n/gYYCTQBfzDG3Aq8AuwDsCxrrzGmGsgBjvT0RJmZyT1tDnnn2r4Of4D/eWE7b607QHpyLP9071zM\nSPcNIgvn1y+c2wZqX6g7vX3XDkli+bqDfLSznLsXF4b85XHD/fU7W72F9RrgBuA5Y8x8YHu3bXGA\nH2i1LCtgjDkOpAP3EhyQ9nVjzHCCvfNjvRVSWVl/DuWHhszM5HNqn23bPPbabtYWlTNyWDLf+Ksp\nZCREu+53da7tCwXh3DZQ+0Ldp7Xv+vkjeeSlIn63rIgv3VDoQGX9I5xfv75+CektrJcCVxpj1nTe\nvtcYczuQZFnWo8aY3wFrjTEtBHvTT3Q+7gljzMqufbp649I3L68uZW1ROWOGp/Dt22YQG+NzuiQR\nCQEzTSZ5mUl8tKuCxQtGkTMk0emS5Dz1GNaWZdnA1067u7jb9oeBh8+w6+fPv7TItmr7UV5ZU0Zm\nWhx/e8tUBbWInDWvx8OSC0fzq6U7eGVNGV+5MXR71xKkgV8utLO0hqfesEiMi+L/fGYaKYkxTpck\nIiFm5oSh5GclsX5XBUeqGp0uR86TwtplDh1v4FdLd+DxePjGLVN1+EpEzomns3dtA8s6L/AjoUth\n7SK19a38/LlttLT5uX9xARNGpDldkoiEsOnjhzIyO5kNu49zuLLB6XLkPCisXSIQsPnFC9uprW/l\nM5eOZW5BttMliUiI83g8LLko2Lt+6k2LqpPNTpck50hh7RLrdlVwoLyeeZOyuWZevtPliEiYmDZ2\nCNPHDWXf4ZN8/9F1vLq2jPYOTdAJNQprF+jwB3hpdQk+r4dbLh6DRyuTiUg/CY5/mcJ91xcQF+Pj\nxZUl/PPj6ykqrXa6NOkDhbULrNp+jMoTLVw6PZehulKOiPQzj8fDwik5/OTL87l8Vh7Ha5v42Z+2\n8cjSHdQ1tjldnpwFhbXD2tr9vLKmlJgoL4sXjHS6HBEJYwlx0dx55QR+cM8cxuamsNGq5Ee/28DB\nivBcJSycKKwd9t7mI5xsaOOK2SNITYp1uhwRiQD52cl8965Z3HzRaKrrWvnJHzaxcc9xp8uSHiis\nHdTc2sHrHx0gPjZKg8pEZFB5PR5uWDiav/mrKXg8Hh55qYiXVpUQsG2nS5MzUFg76K0Nh2hobuea\nefkkxUc7XY6IRKCZEzL5p7tmMTQ1jlfWlPHI0iJa2jqcLktOo7B2SH1TG2+uP0hyQjRXzs5zuhwR\niWB5WUn83y/MZmJ+GpuLK/n3P26muVWB7SYKa4cs/+ggLW1+Fl8wiriY3i5+JiIysJITYvj7z03n\nwik5HKxo4H9eLsIf0Hxst1BYO6C2vpV3Nx8mIyWWS2cMd7ocEREAonxevnCtYcqYIRSV1PD0O3ux\ndQ7bFRTWDnhrw0HaOwLcuHA00VG69KWIuIfP6+WrSwrJy0xkxeYjvLPxsNMlCQrrQecPBPhwZwWJ\ncVFcUDjM6XJERD4hPjaKb946jdTEGJ59dy9b91Y5XVLEU1gPsp2lNdQ1tjF3UjbRUfr1i4g7DUmN\n429vnUp0lJffvLKTA+VaOMVJSotBtraoHIAFk9WrFhF3G52TwpduKKSt3c9/Pb+N2vpWp0uKWArr\nQdTU0s7m4iqGZSQwJifF6XJERHo1y2Ry66KxnGho449vFztdTsRSWA+iDXuO0+EPsHDKMF1ZS0RC\nxjVz8xl+5sbTAAAXs0lEQVSbm8Lm4kpKj9U5XU5EUlgPojVF5XhAA8tEJKR4PB5uuXgsAC9+sN/h\naiKTwnqQVNQ2se/wSSaOTCcjJc7pckRE+mTiyHQKR6Wzs6yW3QdqnS4n4iisB8mHGlgmIiHury7p\n7F2v3K/FUgaZwnoQBAI2a4vKiY32MctkOl2OiMg5GZ2TwswJmew/Use2/dVOlxNRFNaDYFdpNVUn\nW5htMrUOuIiEtJsvGo0HePEDXU5zMCmsB8F7Gw8BOgQuIqEvNzOJ+YXDOFzZwIbdx50uJ2IorAdY\na7uf1duOMiQlFjMy3elyRETO25KLRuPzeli6qoQOv67MNRgU1gNsS3Elza0dXDB5GF7NrRaRMJCV\nFs/F04ZzvLb51KqMMrAU1gNsTecbWXOrRSScLF4wiugoLy+vLqW9Q73rgaawHiC2bfPm+oPsKq3B\n5KeTMyTR6ZJERPpNenIsl07Ppba+la37dFWugaawHgBNLR08srSIP723j5TEGL588xSnSxIR6XcX\nT8sBYM2OYw5XEv40j6ifHayo55GXijhe24wZkcZXlxQyLj+dykpdXk5EwktuZhKjc5LZUVLNiYZW\n0pJinS4pbKln3Y9Wbz/G//v9Jo7XNnPd/JF86/bppOrNKyJhbOGUHGwbPtypgWYDSWHdD2zb5tl3\n9/L467uJ8nn5xi1TuPXSsfi8+vWKSHibW5BNlM/Dmh3lWoJ0AClN+sFLq0p5a8Mhhg9N5Af3zmHG\neC0pKiKRISk+munjMzla1UhZuU73DRSF9Xl6Y91Blq0tIystnm/dNp2stHinSxIRGVQXTglOTV2t\ngWYDRmF9Hj7YeoQ/r9hHenIs37ptugZXiEhEKhydQWpiDOt3VdDe4Xe6nLCksD5H63dX8NQbFknx\n0fzD56YzVD1qEYlQPq+XCyYPo7Glg637dDWugaCwPgfb91fx6LJdxMX6+IfPTWf4UC14IiKRbWHn\nhYo053pgKKz76GBFPb9aWoTP6+Gbt05j5LBkp0sSEXHc6XOupX8prPvAHwjwxPI9tHcE+MqNhUwY\nkeZ0SSIirqE51wNHYd0Hb284zIHyei4oHMaMCZqeJSLSneZcDxyF9Vk6XtvES6tKSE6I5vYrxjtd\njoiI62jO9cBRWJ8F27b53RsWbR0Bbr9iPEnx0U6XJCLiSl1zrpetKdM0rn6ksD4Lq3ccY/eBWqaO\nHcK8gmynyxERca3C0RmMzklm674qfvzUJsprmpwuKSworHtxsqGVP727j9gYH5+/yuDxeJwuSUTE\ntXxeLw/cMZNLpg/n0PEGfvjkBj7SgLPzprDuxR/f2UtTawe3XjKWIalxTpcjIuJ6MdE+vnDNRL5y\nYyEA/7tsF0+8vpvWdh0WP1cK6x5sKa5k457jjMtNZdHMXKfLEREJKfMmZfMv98whPyuJVduP8ePf\nbaSmrsXpskKSwvpTBAI2z763lyifhy9cOxGvDn+LiPRZdkYC/3T3LBbNzOVIVSMPPruVk41tTpcV\nchTWn2JHSTWVJ1pYMHkYuVpOVETknEVH+bjryglcOy+fipomfvanrTS1tDtdVkhRWH+KFVuOALBo\nRp7DlYiIhD6Px8Otl47l0s6BZz9/bjutbTqHfbYU1mdw/EQzO/ZXMzY3RWt/i4j0E4/Hw11XGeZP\nymbfkZP894vbae8IOF1WSFBYn8H7m49gA5fNVK9aRKQ/eb0evnh9AdPHDWVnWS2/eWUn/oACuzdR\nPW00xniBR4CpQCtwv2VZ+7ttvxn4HmADj1uW9eve9nG7tnY/q7YfJTkhmtkmy+lyRETCTpTPy9du\nKuThP29jc3Elv3/T4p5rC5wuy9V661nfBMRYlrUA+EfgodO2/wy4ElgI/IMxJq1zn9ge9nG19buP\n09jSwcXThhMdpQMPIiIDITrKxzdumUp+dhIrtx1j+/4qp0tytd7SaCHwBoBlWeuA2adtbwfSgATA\nQ7CHvRBY3sM+rrZiy2E8Hrhk+nCnSxERCWvxsVHcd/0kfF4Pv3+zmJa2DqdLcq3ewjoFqOt22995\nmLvLQ8AmYAewzLKsk2exj2uVHquj9Fg908YOZWhqvNPliIiEvRFZSVwzL5/quhZeWlXqdDmu1eM5\na4Kh2304tNeyrACAMSYf+BtgJNAE/MEYc2tP+/QkM9P5Udd/eGcvADcvGt/v9bihfQMpnNsXzm0D\ntS/UhUP77l0yhc3FVbyz8RDXLhzDuBFpp7aFQ/v6Q29hvQa4AXjOGDMf2N5tWxzgB1otywoYY44T\nPCTe0z6fqrLS2WufNjS3s3LLEbLS48nNiOvXejIzkx1v30AK5/aFc9tA7Qt14dS+O68cz38+u5WH\nn9nE//3CbHxeb1i173R9/RLSW1gvBa40xqzpvH2vMeZ2IMmyrEeNMb8D1hpjWoB9wJMEA/xj+/Sp\nIoes2n6UDn+Ay2bkamlREZFBNmlUBgsnD2NNUTlvbzjMNfPynS7JVXoMa8uybOBrp91d3G37w8DD\nZ9j19H1cLWDbrNh8hJgoLwun5jhdjohIRPrsZePYtr+al1aXMMtk6hB4NyEx8Gug7SytoepkC/Mm\nZZMYF+10OSIiESk5IYbbLx9PW3uA379lYdu20yW5hsKa4KUwARZOUa9aRMRJ8wuzKRyVTlFJDWt3\nHHO6HNeI+LC2bZsdJTUkxEYxNjfF6XJERCKax+PhzqsMAC9/EDKLXw64iA/r8pomqutamDQqHZ83\n4n8dIiKOG5aRwOQxGewuq+FgRXiOBu+riE+notIaACaPGeJwJSIi0mXRjFwA3u+8XHGkU1iXdIb1\n6AyHKxERkS7Txg4lMz2eD3dW0NyqZUgjOqzbO/xYB2sZPjSRjJQ4p8sREZFOXq+Ha+aPorXdz9qi\ncqfLcVxEh3Xx4ZO0dQTUqxYRcaEr5+Xj83pYseVIxE/jiuiwLiqpBmDyGIW1iIjbpCfHMXtiFker\nGik+dMLpchwV2WFdWkN0lJcJeWm9P1hERAZd10Cz9zZH9kCziA3r2vpWjlQ2YvLTiIn2OV2OiIic\nwfi8VPIyE9lcXMmJhlany3FMxIb1qUPgozVlS0TErTweD4tm5uEP2KzcdtTpchwTuWFdqilbIiKh\nYP6kbOJifHyw9Sj+QMDpchwRkWEdCNjsKqthSEosOUMSnC5HRER6EB8bxYLJw6itb2Xbvmqny3FE\nRIZ1aXkdjS0dFI4egkfXrhYRcb2ugWYrNh92uBJnRGRYd61aNkVTtkREQkJuZhLj81LZWVYbkQPN\nIjOsS6vxejwUjFRYi4iEihnjMwHY2TnmKJJEXFg3trRTcrSOMbkpJMRFOV2OiIicpa4FrIoU1uFv\nV1kttg1TNApcRCSk5A5NJD05lp2lNQQCkbX8aMSF9V+WGNX8ahGRUOLxeCgcnUFDczsHIuw61xEV\n1jV1LWzbX01SfDQjs5OdLkdERPqoa22Mro5XpIiYsN5VVsO/PLGBusY2Lp2Ri9erKVsiIqFm0qgM\nPJ7IO28d9iOsbNvm9Y8O8OLKErweD3ddNeHUfD0REQktSfHRjMlJYf+ROppaOiJmoHBYt7KppYPH\nX9/N5uJK0pNj+eubJjM2N9XpskRE5DwUjs5g/9E6dh+oYZbJcrqcQRG2h8GPVTfyo6c2srm4kon5\nafzgnjkKahGRMNA1QDiSDoWHZc/6aFUjP316M3VN7VwzL59bLhmDzxu230tERCLK6JxkEuOiKCqp\nwbbtiFg2OuwS7Fh1Iw8+s4W6pnbuvHICn100TkEtIhJGfF4vBaMyqK5robymyelyBkVYpVhFbRMP\nPrOFk41t3H7FeC6fled0SSIiMgD+MoUrMg6Fh01YHz/RzE+f3sKJhjZuu2wcV84e4XRJIiIyQE6F\ndYSctw6LsK460cyDT2+mtr6Vzyway1Vz850uSUREBlBGShy5QxOxDtbS3uF3upwBF/Jh3dDczk+f\n2UJ1XSu3XDKGa+eNdLokEREZBIWjM2jrCFB86KTTpQy4kA/rZ97ZS9XJFhYvGMn1F4xyuhwRERkk\nf7kKV/gvPRrSYb19fxUf7ixn5LBkllw42ulyRERkEE3ISyM6yhsR561DNqybWzv43RsWPq+HL15X\noOlZIiIRJibah8lP40hlI7X1rU6XM6BCNuGee38/tfWtXH/BSEZkJTldjoiIOGDy6M7VzML8Klwh\nGdZ7DtTy/pYj5GYmsnjBKKfLERERh0wdGwzrP6/YF9aBHXJh3dru58nle/B44IvXFRDlC7kmiIhI\nPxmWkcC9102ktd3Pw3/exrK1ZQRs2+my+l3IJd1Lq0o4fqKZq+fkMzonxelyRETEYRdNHc5375pF\nekosS1eW8KsXd9DU0uF0Wf0qpMK65Ggdb204RFZ6PEsu0uhvEREJGp2Twj/fM4eCkels2VvFj57a\nyJGqRqfL6jchE9a2bfOHtyxsG+69diKx0T6nSxIRERdJSYjh7z83jWvm5VNR08SPn9qIdbDW6bL6\nRciEdVFpDWXl9cw2mZj8dKfLERERF/J5vXx20Ti+uqSQjo4AP39uO3sPn3C6rPMWEmFt2zbL1pQB\naPS3iIj0am5BNl9dMpn2jgAP/3kb+4+E9pKkIRHWew7Usu/ISaaPG0p+drLT5YiISAiYZTL5ypJC\n2toD/OzPWyk9Vud0SecsJMJ62doyAG5YOMrROkREJLTMmZjFl26YREubn4ee3cqB8nqnSzonrg/r\n4kMn2HPwBJPHZGiqloiI9Nm8Sdncf/0kmls7+M9nt3CwIvQC2/Vh3dWrvnGBpmqJiMi5uWDyML54\nfQFNLR388oUdtLSF1jxsV4d1ydE6dpbWUDAynXF5qU6XIyIiIWzhlByuu2Ak1XUtvPB+idPl9Imr\nw3rZmlIAbtAIcBER6Qc3LhxFzpAE3t18mOJDoTOly7VhfaC8nm37qxmfl4rJT3O6HBERCQPRUT7u\nva4AD/DE8j20tfudLumsuDasX+0aAb5gFB6Px9liREQkbIzLTeXy2XlU1DTxcucRXLdzZVhX1Dax\nqbiS0TnJFI7OcLocEREJM7dcPJahqXG8ue4QZeXun3/tyrD2eT2MzE7mc5eNV69aRET6XWyMj3uu\nnUjAtnn8tT10+ANOl9QjV4b10NR4fnDvHCaM0LlqEREZGJNGZXDR1BwOVzaw/KMDTpfTI1eGtYiI\nyGD43GXjSEuKYdnaMo5Vu/eSmlE9bTTGeIFHgKlAK3C/ZVn7O7dlA892e/h04AHLsv7XGLMZ6Fo1\nvcSyrPv6vXIREZHzlBAXzR1XTOCRl4pYtqaML99Y6HRJZ9RjWAM3ATGWZS0wxswDHuq8D8uyKoBF\nAMaYC4AfAY8aY+I6ty8asKpFRET6yUyTSV5mIut2V3DTxWPISot3uqRP6O0w+ELgDQDLstYBs09/\ngDHGA/wC+JplWTYwDUgwxrxpjHm3M+RFRERcyevxcN0FI7FteMOl565761mnAN3HtPuNMV7LsroP\nm7sBKLIsa2/n7UbgQcuyHjPGjAeWG2MmnLbPJ2RmhvelL9W+0BXObQO1L9Spff3jugsTeWVNGat3\nlHPvkilkpMQNyvOerd7Cug7o/ps6PagB7gR+3u12MbAPwLKsvcaYaiAHONLTE1VWht5VUM5WZmay\n2heiwrltoPaFOrWvf101ZwRPvWHxzPLdfPaycQP6XH39EtLbYfA1wHUAxpj5wPYzPGa2ZVkfdrt9\nL8Fz2xhjhhPsnR/rU1UiIiKDbOHkHFKTYlix9QgNze1Ol/MxvYX1UqDFGLOGYAD/nTHmdmPMlwCM\nMZn8ZdR3l8eAFGPMSoKjxe/t7RC4iIiI06KjvFw9J5/WNj/vbTrsdDkf0+Nh8M4BY1877e7ibtsr\ngZmn7dMBfL6/ChQRERksl0wfzmsflvH2xkNcNXcEcTG9nS0eHFoURUREpFN8bBSXz8qjsaWDlVuP\nOl3OKQprERGRbq6YPYLYaB9vbjhEe4c7zuIqrEVERLpJio/mkunDqa1v5cOd5U6XAyisRUREPuHq\nufn4vB7XDDRzx5lzERERF0lPjuXzVxsaW9wxhUthLSIicgYXTxvudAmn6DC4iIiIyymsRUREXE5h\nLSIi4nIKaxEREZdTWIuIiLicwlpERMTlFNYiIiIup7AWERFxOYW1iIiIyymsRUREXE5hLSIi4nIK\naxEREZdTWIuIiLicwlpERMTlFNYiIiIup7AWERFxOYW1iIiIyymsRUREXE5hLSIi4nIKaxEREZdT\nWIuIiLicwlpERMTlFNYiIiIup7AWERFxOYW1iIiIyymsRUREXE5hLSIi4nIKaxEREZdTWIuIiLic\nwlpERMTlFNYiIiIup7AWERFxOYW1iIiIyymsRUREXE5hLSIi4nIKaxEREZdTWIuIiLicwlpERMTl\nFNYiIiIup7AWERFxOYW1iIiIyymsRUREXE5hLSIi4nIKaxEREZdTWIuIiLicwlpERMTlFNYiIiIu\nF9XTRmOMF3gEmAq0AvdblrW/c1s28Gy3h08HHgAeBf7nTPuIiIhI3/XWs74JiLEsawHwj8BDXRss\ny6qwLGuRZVmLgO8BmwgG9c1A7Jn2ERERkb7rLawXAm8AWJa1Dph9+gOMMR7gF8DXLMuyO/dZ3tM+\nIiIicvZ6C+sUoK7bbX/nofHubgCKLMva24d9RERE5Cz1eM6aYOgmd7vttSwrcNpj7gR+3sd9TufJ\nzEzu5SGhTe0LXeHcNlD7Qp3aFxl66/GuAa4DMMbMB7af4TGzLcv6sI/7iIiIyFnqrWe9FLjSGLOm\n8/a9xpjbgSTLsh41xmQCJ3vbp//KFRERiTwe27adrkFERER6oIFfIiIiLqewFhERcTmFtYiIiMsp\nrEVERFyut9HgA6qntcdDmTFmHvDvlmUtMsaMA54EAkAR8PXOld5CjjEmGngcGAnEAj8GdhM+7fMR\nXDJ3AmADXyX4vnySMGhfF2NMFsHlgS8n2K4nCZP2GWM285cZKiXAvxFe7fsuwYWoooH/JjhV9knC\noH3GmC8A93TejAemARcC/0WIt68z635L8LMlAHwJ8NOH187pnvWnrj0eqowx3yH4gR/bedfPgO9Z\nlnUx4AGWOFVbP7gTqOxsyzXArwi+ZuHSvsVAwLKsC4HvAz8hvNrX9YXrN0AjwfaEzfvTGBMH0HXN\nAsuy7iO82ncpcEHn5+WlwBjC6P1pWdbvul1vYiPwDeCfCY/2XQUkdn62/Cvn8NnidFj3uvZ4CNoH\n/BXBXz7ATMuyVnb+ezlwhSNV9Y/nCP7xQPC9004Ytc+yrJeBr3TeHAXUArPCpX2dHiR4VbxjnbfD\n5vUj2BNLMMa8aYx5t3NRpnBq31XADmPMS8Ay4BXC7/2JMWY2MMmyrN8SPu1rBlI7r6WRCrTRx7Y5\nHdZht464ZVkvAh3d7vJ0+3cDwRcqJFmW1WhZVoMxJplgcH+fj7+HQrp9AJZl+Y0xTxI89PZHwuj1\nM8bcQ/DIyFudd3kIo/YRPFrwoGVZVxM8hfHH07aHevsygVnArQTb9zTh9fp1+R7ww85/h0v71gBx\nwB6CR7Z+QR/b5nQwnss64qGme3uSgRNOFdIfjDEjgPeApyzLeoYwax+AZVn3AIbgOaa4bptCvX33\nElxdcAXB68//jmAAdAn19hXTGdCdFxaqBrK7bQ/19lUBb1mW1WFZVjHQwsc/4EO9fRhj0oAJlmV9\n0HlXuHy+fAdYY1mWIfi39xTBcQddem2b02EdCeuIbzHGXNL572uBlT092M2MMdnAW8B3LMt6svPu\ncGrf5zsH8EDwsJUf2Bgu7bMs6xLLsi7tPCe4FbgbeCNc2kfwy8hDAMaY4QQ/AN8Ko/atJjhWpKt9\nCcC7YdQ+gIuBd7vdDpfPl0T+chS5luDg7j61zdHR4IT3OuJdo/r+AXjUGBMD7AKed66k8/Y9gt/k\n/9kY03Xu+pvAL8Kkfc8DTxpjPiD4rfebBA9bhcvrdzqb8Hp/PgY8YYzp+tC7l2DvOizaZ1nWa8aY\ni40x6wl2tP4aKCNM2tdpAtB9RlC4vD8fJPjeXEXws+W7BGdknHXbtDa4iIiIyzl9GFxERER6obAW\nERFxOYW1iIiIyymsRUREXE5hLSIi4nIKaxEREZdTWIuIiLjc/wflvwiNZYvHhQAAAABJRU5ErkJg\ngg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "counts = [0] * 76\n", "for n, rec in enumerate(bam.fetch('20', 0, 10000000)):\n", " for i in range(rec.query_alignment_start, rec.query_alignment_end):\n", " counts[i] += 1\n", "freqs = [x / (n + 1.) for x in counts]\n", "plt.plot(range(1, 77), freqs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "phreds = defaultdict(list)\n", "for rec in bam.fetch('20', 0, None):\n", " for i in range(rec.query_alignment_start, rec.query_alignment_end):\n", " phreds[i].append(rec.query_qualities[i])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "maxs = [max(phreds[i]) for i in range(76)]\n", "tops = [np.percentile(phreds[i], 95) for i in range(76)]\n", "medians = [np.percentile(phreds[i], 50) for i in range(76)]\n", "bottoms = [np.percentile(phreds[i], 5) for i in range(76)]\n", "medians_fig = [x - y for x, y in zip(medians, bottoms)]\n", "tops_fig = [x - y for x, y in zip(tops, medians)]\n", "maxs_fig = [x - y for x, y in zip(maxs, tops)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fig, ax = plt.subplots(figsize=(16,9))\n", "ax.stackplot(range(1, 77), (bottoms, medians_fig, tops_fig, maxs_fig))\n", "ax.plot(range(1, 77), maxs, 'k-')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }