{ "metadata": { "name": "", "signature": "sha256:34d4bc6dd9cae988cf85d9c178bd02b80b27d963148d668d0bcda34f4e2403ec" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#Black Abalone Digestive Gland Transcriptome Annotation\n", "Blast output from de novo assembly of Carmel Exposed and Control Libraries \n", "\n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#tab-delited fasta\n", "!date" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Wed Mar 5 11:16:17 PST 2014\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "markdown", "metadata": {}, "source": [ "fasta:\n", "\n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BlackAbalone_Contigs_v3.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ ">Roberts_20100712_CC_F3_trimmed_contig_1 Average coverage: 38.44\r\n", "ATTTAAAGTGTTTGATAAAGACAATAACGGCTTCATCAGTAAATCCGAGCTCCGGCAGGT\r\n", "CATGGTGTCTTTGGAGGGTCACAAGGTCACCGAGCAGGAAATCAGCGAC\r\n", ">Roberts_20100712_CC_F3_trimmed_contig_2 Average coverage: 153.77\r\n", "CTTCAGCACAACTCAGGTGTCTGTCCGGCCGTTACAGCACACCCAGTTTGAGCGGTTCAT\r\n", "CCCTGCAGCCTACCCATATTACACCAGTGCCTTCTCCATGATGTTTGGAGTCCTTATACT\r\n", "GAGTATAGTGTTCTCATGCCCTGTCCTTCTTGGATTCC\r\n", ">Roberts_20100712_CC_F3_trimmed_contig_3 Average coverage: 175.58\r\n", "TGGAGGTGGGGTGCCTCATAGATGGTTTGGACTTGCCGGTCCTATAGGAGCAGGGGAATG\r\n", "GGGAAACCCAAGGTCGAGCTACCTGACACACGCCTTGGCCG\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/Roberts_20100712_CC_F3_trimmed_/BlackAbalone_v3_/g' /Volumes/web/cnidarian/lft_BlackAbalone_v3_fasta.fa\n", "#sed 's/abc/XYZ/g' outfile\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_fasta.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ ">BlackAbalone_v3_contig_1 Average coverage: 38.44\r\n", "ATTTAAAGTGTTTGATAAAGACAATAACGGCTTCATCAGTAAATCCGAGCTCCGGCAGGT\r\n", "CATGGTGTCTTTGGAGGGTCACAAGGTCACCGAGCAGGAAATCAGCGAC\r\n", ">BlackAbalone_v3_contig_2 Average coverage: 153.77\r\n", "CTTCAGCACAACTCAGGTGTCTGTCCGGCCGTTACAGCACACCCAGTTTGAGCGGTTCAT\r\n", "CCCTGCAGCCTACCCATATTACACCAGTGCCTTCTCCATGATGTTTGGAGTCCTTATACT\r\n", "GAGTATAGTGTTCTCATGCCCTGTCCTTCTTGGATTCC\r\n", ">BlackAbalone_v3_contig_3 Average coverage: 175.58\r\n", "TGGAGGTGGGGTGCCTCATAGATGGTTTGGACTTGCCGGTCCTATAGGAGCAGGGGAATG\r\n", "GGGAAACCCAAGGTCGAGCTACCTGACACACGCCTTGGCCG\r\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "!grep -c \">\" /Volumes/web/cnidarian/lft_BlackAbalone_v3_fasta.fa\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "13884\r\n" ] } ], "prompt_number": 7 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Details of assembly in CLC \n", "\"Screenshot%207/10/13%206:34%20AM\"" ] }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Generic Annotation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "code \n", "`./blastx -query /Volumes/web-1/cnidarian/BlackAbalone_Contigs_v3.fa -db /Volumes/web-1/whale/fish546/blast/db/swissprot -out /Volumes/web-1/cnidarian/lft_BlackAbalone_v3_swissprot_blastout -outfmt 6 -evalue 1E-5 -max_target_seqs 1 -num_threads 4`" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_1\tgi|41018621|sp|P60204.2|CALM_EMENI\t64.71\t34\t11\t1\t2\t103\t90\t122\t6e-08\t48.5\r\n", "Roberts_20100712_CC_F3_trimmed_contig_2\tgi|218546747|sp|B1H3C9.1|OST48_XENTR\t81.82\t44\t8\t0\t2\t133\t384\t427\t5e-18\t79.7\r\n", "Roberts_20100712_CC_F3_trimmed_contig_6\tgi|133940|sp|P02350.2|RS31_XENLA\t100.00\t57\t0\t0\t1\t171\t10\t66\t4e-32\t 115\r\n", "Roberts_20100712_CC_F3_trimmed_contig_7\tgi|21362398|sp|P70097.1|C560_CRIGR\t46.76\t139\t70\t2\t7\t414\t3\t140\t9e-29\t 108\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi|302393789|sp|P62972.2|UBIQP_XENLA\t100.00\t69\t0\t0\t2\t208\t8\t76\t9e-42\t 139\r\n", "Roberts_20100712_CC_F3_trimmed_contig_15\tgi|74851961|sp|Q54GK6.1|RL222_DICDI\t59.46\t37\t15\t0\t2\t112\t45\t81\t7e-08\t48.1\r\n", "Roberts_20100712_CC_F3_trimmed_contig_19\tgi|231498|sp|P30163.1|ACT2_ONCVO\t98.08\t260\t5\t0\t1\t780\t32\t291\t0.0\t 537\r\n", "Roberts_20100712_CC_F3_trimmed_contig_22\tgi|158706130|sp|Q08CS6.2|MOXD2_DANRE\t33.73\t83\t51\t2\t1\t237\t448\t530\t1e-08\t54.7\r\n", "Roberts_20100712_CC_F3_trimmed_contig_26\tgi|6226551|sp|P29957.3|AMY_PSEHA\t50.98\t51\t21\t1\t1\t153\t527\t573\t9e-08\t51.2\r\n", "Roberts_20100712_CC_F3_trimmed_contig_28\tgi|133802|sp|P20342.3|RS15_XENLA\t100.00\t44\t0\t0\t3\t134\t102\t145\t5e-25\t95.1\r\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_13606\tgi|48474943|sp|Q99NF1.1|BCDO2_MOUSE\t60.00\t30\t12\t0\t2\t91\t226\t255\t3e-06\t45.8\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13614\tgi|82179554|sp|Q5M9A7.1|PGAP2_XENLA\t68.29\t41\t13\t0\t2\t124\t60\t100\t2e-14\t67.0\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13615\tgi|158957572|sp|Q0P457.2|KTI12_DANRE\t68.57\t35\t11\t0\t3\t107\t239\t273\t2e-10\t56.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13618\tgi|46577124|sp|Q9H3K6.1|BOLA2_HUMAN\t60.00\t35\t14\t0\t3\t107\t52\t86\t2e-09\t52.8\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13654\tgi|2493067|sp|Q36967.1|ATP6_SALTR\t95.12\t41\t2\t0\t3\t125\t2\t42\t1e-18\t76.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13686\tgi|66774221|sp|Q9CZR2.2|NALD2_MOUSE\t60.61\t33\t13\t0\t6\t104\t670\t702\t3e-07\t49.3\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13693\tgi|71153494|sp|Q9H583.3|HEAT1_HUMAN\t47.73\t44\t23\t0\t5\t136\t1991\t2034\t2e-06\t47.4\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13748\tgi|122136056|sp|Q2KIM0.1|FUCO_BOVIN\t55.56\t36\t16\t0\t6\t113\t202\t237\t7e-08\t50.4\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13845\tgi|229462816|sp|Q9UKP5.2|ATS6_HUMAN\t39.29\t84\t44\t3\t46\t297\t556\t632\t3e-09\t57.4\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13882\tgi|166217735|sp|A1USB4.1|GATB_BARBK\t61.29\t31\t12\t0\t10\t102\t12\t42\t1e-06\t47.0\r\n" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "!grep -c \"Roberts\" /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "1842\r\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "#now with version28\n", "!blastx -query /Volumes/web/cnidarian/BlackAbalone_Contigs_v3.fa -db /Volumes/web/whale/fish546/blast/db/swissprot -out /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout_tax1 -outfmt \"6 std stitle staxids sscinames scomnames sblastnames\" -evalue 1E-10 -max_target_seqs 1 " ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 613 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 613 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 24 replaced by X\r\n", "Selenocysteine (U) at position 63 replaced by X\r\n", "Selenocysteine (U) at position 25 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 63 replaced by X\r\n", "Selenocysteine (U) at position 60 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n", "Selenocysteine (U) at position 73 replaced by X\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Selenocysteine (U) at position 64 replaced by X\r\n" ] } ], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "#Updating July 24, 2013\n", "#Want to GOslim pie the transcriptome based on Swiss-Prot\n", "#tr ',' \"\\t\"" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "!tr '|' \"\\t\" /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout_b\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout_b" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_1\tgi\t41018621\tsp\tP60204.2\tCALM_EMENI\t64.71\t34\t11\t1\t2\t103\t90\t122\t6e-08\t48.5\r\n", "Roberts_20100712_CC_F3_trimmed_contig_2\tgi\t218546747\tsp\tB1H3C9.1\tOST48_XENTR\t81.82\t44\t8\t0\t2\t133\t384\t427\t5e-18\t79.7\r\n", "Roberts_20100712_CC_F3_trimmed_contig_6\tgi\t133940\tsp\tP02350.2\tRS31_XENLA\t100.00\t57\t0\t0\t1\t171\t10\t66\t4e-32\t 115\r\n", "Roberts_20100712_CC_F3_trimmed_contig_7\tgi\t21362398\tsp\tP70097.1\tC560_CRIGR\t46.76\t139\t70\t2\t7\t414\t3\t140\t9e-29\t 108\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi\t302393789\tsp\tP62972.2\tUBIQP_XENLA\t100.00\t69\t0\t0\t2\t208\t8\t76\t9e-42\t 139\r\n", "Roberts_20100712_CC_F3_trimmed_contig_15\tgi\t74851961\tsp\tQ54GK6.1\tRL222_DICDI\t59.46\t37\t15\t0\t2\t112\t45\t81\t7e-08\t48.1\r\n", "Roberts_20100712_CC_F3_trimmed_contig_19\tgi\t231498\tsp\tP30163.1\tACT2_ONCVO\t98.08\t260\t5\t0\t1\t780\t32\t291\t0.0\t 537\r\n", "Roberts_20100712_CC_F3_trimmed_contig_22\tgi\t158706130\tsp\tQ08CS6.2\tMOXD2_DANRE\t33.73\t83\t51\t2\t1\t237\t448\t530\t1e-08\t54.7\r\n", "Roberts_20100712_CC_F3_trimmed_contig_26\tgi\t6226551\tsp\tP29957.3\tAMY_PSEHA\t50.98\t51\t21\t1\t1\t153\t527\t573\t9e-08\t51.2\r\n", "Roberts_20100712_CC_F3_trimmed_contig_28\tgi\t133802\tsp\tP20342.3\tRS15_XENLA\t100.00\t44\t0\t0\t3\t134\t102\t145\t5e-25\t95.1\r\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "#also need to get rid of version # on Swiss-Prot ID\n", "#note that it will also break bitscore \n", "!tr '.' \"\\t\" /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout_c" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout_c" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_1\tgi\t41018621\tsp\tP60204\t2\tCALM_EMENI\t64\t71\t34\t11\t1\t2\t103\t90\t122\t6e-08\t48\t5\r\n", "Roberts_20100712_CC_F3_trimmed_contig_2\tgi\t218546747\tsp\tB1H3C9\t1\tOST48_XENTR\t81\t82\t44\t8\t0\t2\t133\t384\t427\t5e-18\t79\t7\r\n", "Roberts_20100712_CC_F3_trimmed_contig_6\tgi\t133940\tsp\tP02350\t2\tRS31_XENLA\t100\t00\t57\t0\t0\t1\t171\t10\t66\t4e-32\t 115\r\n", "Roberts_20100712_CC_F3_trimmed_contig_7\tgi\t21362398\tsp\tP70097\t1\tC560_CRIGR\t46\t76\t139\t70\t2\t7\t414\t3\t140\t9e-29\t 108\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi\t302393789\tsp\tP62972\t2\tUBIQP_XENLA\t100\t00\t69\t0\t0\t2\t208\t8\t76\t9e-42\t 139\r\n", "Roberts_20100712_CC_F3_trimmed_contig_15\tgi\t74851961\tsp\tQ54GK6\t1\tRL222_DICDI\t59\t46\t37\t15\t0\t2\t112\t45\t81\t7e-08\t48\t1\r\n", "Roberts_20100712_CC_F3_trimmed_contig_19\tgi\t231498\tsp\tP30163\t1\tACT2_ONCVO\t98\t08\t260\t5\t0\t1\t780\t32\t291\t0\t0\t 537\r\n", "Roberts_20100712_CC_F3_trimmed_contig_22\tgi\t158706130\tsp\tQ08CS6\t2\tMOXD2_DANRE\t33\t73\t83\t51\t2\t1\t237\t448\t530\t1e-08\t54\t7\r\n", "Roberts_20100712_CC_F3_trimmed_contig_26\tgi\t6226551\tsp\tP29957\t3\tAMY_PSEHA\t50\t98\t51\t21\t1\t1\t153\t527\t573\t9e-08\t51\t2\r\n", "Roberts_20100712_CC_F3_trimmed_contig_28\tgi\t133802\tsp\tP20342\t3\tRS15_XENLA\t100\t00\t44\t0\t0\t3\t134\t102\t145\t5e-25\t95\t1\r\n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/Roberts_20100712_CC_F3_trimmed/Haliotis_cra_v3/g' /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout_d\n", "#sed 's/abc/XYZ/g' outfile\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 32 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout_d" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Haliotis_cra_v3_contig_1\tgi\t41018621\tsp\tP60204\t2\tCALM_EMENI\t64\t71\t34\t11\t1\t2\t103\t90\t122\t6e-08\t48\t5\r\n", "Haliotis_cra_v3_contig_2\tgi\t218546747\tsp\tB1H3C9\t1\tOST48_XENTR\t81\t82\t44\t8\t0\t2\t133\t384\t427\t5e-18\t79\t7\r\n", "Haliotis_cra_v3_contig_6\tgi\t133940\tsp\tP02350\t2\tRS31_XENLA\t100\t00\t57\t0\t0\t1\t171\t10\t66\t4e-32\t 115\r\n", "Haliotis_cra_v3_contig_7\tgi\t21362398\tsp\tP70097\t1\tC560_CRIGR\t46\t76\t139\t70\t2\t7\t414\t3\t140\t9e-29\t 108\r\n", "Haliotis_cra_v3_contig_9\tgi\t302393789\tsp\tP62972\t2\tUBIQP_XENLA\t100\t00\t69\t0\t0\t2\t208\t8\t76\t9e-42\t 139\r\n", "Haliotis_cra_v3_contig_15\tgi\t74851961\tsp\tQ54GK6\t1\tRL222_DICDI\t59\t46\t37\t15\t0\t2\t112\t45\t81\t7e-08\t48\t1\r\n", "Haliotis_cra_v3_contig_19\tgi\t231498\tsp\tP30163\t1\tACT2_ONCVO\t98\t08\t260\t5\t0\t1\t780\t32\t291\t0\t0\t 537\r\n", "Haliotis_cra_v3_contig_22\tgi\t158706130\tsp\tQ08CS6\t2\tMOXD2_DANRE\t33\t73\t83\t51\t2\t1\t237\t448\t530\t1e-08\t54\t7\r\n", "Haliotis_cra_v3_contig_26\tgi\t6226551\tsp\tP29957\t3\tAMY_PSEHA\t50\t98\t51\t21\t1\t1\t153\t527\t573\t9e-08\t51\t2\r\n", "Haliotis_cra_v3_contig_28\tgi\t133802\tsp\tP20342\t3\tRS15_XENLA\t100\t00\t44\t0\t0\t3\t134\t102\t145\t5e-25\t95\t1\r\n" ] } ], "prompt_number": 33 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout_d" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 1842 34752 186913 /Volumes/web/cnidarian/lft_BlackAbalone_v3_swissprot_blastout_d\r\n" ] } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "#lets try again to see if can commandline up to SQLShare\n", "!cd /Users/sr320/pythonclient/tools" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "#cp samtools /usr/local/bin\n", "!cp uploadone.py /usr/local/bin" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "cp: /usr/local/bin: Permission denied\r\n" ] } ], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "#SQLShare direct.\n" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Update: Joining Blast with more info " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"SQLShare_-_View_Query_18B006DC_png\"/" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -d \"[sr320@washington.edu].[lft_BlackAbalone_v3_SP_GO_pathway]\" -f tsv -o /Volumes/web/cnidarian/lft_BlackAbalone_sp_go_path.txt\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head -2 /Volumes/web/cnidarian/lft_BlackAbalone_sp_go_path.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "ContigID\tSPID\tevalue\tEntry\tEntry name\tGene ontology IDs\tInteracts with\tCross-reference (GO)\tGene ontology (GO)\tStatus\tInterPro\tPathway\tProtein names\tGene names\tOrganism\tLength\r", "\r\n", "Haliotis_cra_v3_contig_1\tP60204\t6E-08\tP60204\tCALM_EMENI\tGO:0005509; GO:0001411; GO:0051726; GO:0009847\t\t\tcalcium ion binding; hyphal tip; regulation of cell cycle; spore germination\treviewed\tIPR011992; IPR018247; IPR002048; IPR001125;\t\tCalmodulin (CaM)\tcamA cam AN2047\tEmericella nidulans (strain FGSC A4 / ATCC 38163 / CBS 112.46 / NRRL 194 / M139) (Aspergillus nidulans)\t149\r", "\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc /Volumes/web/cnidarian/lft_BlackAbalone_sp_go_path.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 1843 96683 938139 /Volumes/web/cnidarian/lft_BlackAbalone_sp_go_path.txt\r\n" ] } ], "prompt_number": 10 }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###SQLShare Joining\n", "Input: \n", "\n", "\n", "```\n", " SELECT distinct * \n", " FROM [sr320@washington.edu].[lft_BlackAbalone_v3_swissprot_blastout_d]d\n", " left join\n", " [sr320@washington.edu].[SPID and GO Numbers]go\n", " on\n", " d.SPID = go.SPID\n", "```\n", "\n", "adding Slim info...\n", "\n", "```\n", " SELECT distinct * \n", " FROM [sr320@washington.edu].[lft_BlackAbalone_v3_swissprot_blastout_d]d\n", " left join\n", " [sr320@washington.edu].[SPID and GO Numbers]go\n", " on\n", " d.SPID = go.SPID\n", " left join \n", " [sr320@washington.edu].[GO_to_GOslim]slim\n", " on \n", " go.GOID = slim.GO_id\n", "``` \n", "\n", "\n", "\n", "\n", "To get GO Slim info \n", " \n", "\n", "```\n", " SELECT Distinct\n", " ContigID,\n", " GOSlim_bin,\n", " evalue\n", " FROM [sr320@washington.edu].[lft_BlackAbalone_v3_GO]\n", " Where aspect like 'P'\n", "```\n", "\n", "_might should consider thresholding evalue_ \n", " \n", " \n", "```\n", "SELECT Distinct\n", " ContigID,\n", " GOSlim_bin,\n", " evalue\n", " FROM [sr320@washington.edu].[lft_BlackAbalone_v3_GO]\n", " Where aspect like 'P'\n", " and \n", " evalue < 1E-10\n", "```\n", " \n", " " ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3GOslim.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "ContigID,GOSlim_bin,evalue\r", "\r\n", "Haliotis_cra_v3_contig_10026,death,1E-33\r", "\r\n", "Haliotis_cra_v3_contig_10026,developmental processes,1E-33\r", "\r\n", "Haliotis_cra_v3_contig_10026,other biological processes,1E-33\r", "\r\n", "Haliotis_cra_v3_contig_10026,other metabolic processes,1E-33\r", "\r\n", "Haliotis_cra_v3_contig_10033,other metabolic processes,3E-15\r", "\r\n", "Haliotis_cra_v3_contig_1005,protein metabolism,2E-15\r", "\r\n", "Haliotis_cra_v3_contig_10051,cell organization and biogenesis,4E-14\r", "\r\n", "Haliotis_cra_v3_contig_10051,developmental processes,4E-14\r", "\r\n", "Haliotis_cra_v3_contig_10051,RNA metabolism,4E-14\r", "\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc /Volumes/web/cnidarian/lft_BlackAbalone_v3GOslim.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 2352 5731 133085 /Volumes/web/cnidarian/lft_BlackAbalone_v3GOslim.csv\r\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "#query with no evalue limit to verify accuracy\n", "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3GOslim_jp.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "ContigID,GOSlim_bin,evalue\r", "\r\n", "Haliotis_cra_v3_contig_1002,developmental processes,8E-10\r", "\r\n", "Haliotis_cra_v3_contig_1002,other biological processes,8E-10\r", "\r\n", "Haliotis_cra_v3_contig_10026,other metabolic processes,1E-33\r", "\r\n", "Haliotis_cra_v3_contig_10026,death,1E-33\r", "\r\n", "Haliotis_cra_v3_contig_10026,developmental processes,1E-33\r", "\r\n", "Haliotis_cra_v3_contig_10026,other biological processes,1E-33\r", "\r\n", "Haliotis_cra_v3_contig_10033,other metabolic processes,3E-15\r", "\r\n", "Haliotis_cra_v3_contig_10042,protein metabolism,7E-09\r", "\r\n", "Haliotis_cra_v3_contig_1005,protein metabolism,2E-15\r", "\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc /Volumes/web/cnidarian/lft_BlackAbalone_v3GOslim_jp.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 3430 8417 194682 /Volumes/web/cnidarian/lft_BlackAbalone_v3GOslim_jp.csv\r\n" ] } ], "prompt_number": 12 }, { "cell_type": "raw", "metadata": {}, "source": [ "wwcd - (verified again in exhell)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#will try the R\n", "%pylab inline" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.zmq.pylab.backend_inline].\n", "For more information, type 'help(pylab)'." ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "import matplotlib.pylab as plt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "import rpy2" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "ImportError", "evalue": "No module named rpy2", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mrpy2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mImportError\u001b[0m: No module named rpy2" ] } ], "prompt_number": 7 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Gave up and used EXCEL \n", "\n", "\"Screenshot_7_26_13_6_53_AM_17A2B6D8.png\"/\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##iPath \n", "getting data into format\n", "\n", "```\n", "SELECT Distinct\n", " ContigID,\n", " SPID\n", " FROM [sr320@washington.edu].[lft_BlackAbalone_v3_GO]\n", " Where \n", " evalue < 1E-10\n", "``` \n", "\n", "\n", "\"iPath2__Metabolic_overview_map_17A2BAF1.png\"/" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "1210 IDs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"iPath2__Metabolic_overview_map_17A2BAF1.png\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "code\n", "`\n", "./blastx -query /Volumes/web-1/cnidarian/BlackAbalone_Contigs_v3.fa -db /Volumes/web-1/whale/fish546/blast/db/nr -out /Volumes/web-1/cnidarian/lft_BlackAbalone_v3_nr_blastout -outfmt 6 -evalue 1E-5 -max_target_seqs 1 -num_threads 4 `" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_nr_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_1\tgi|166714376|gb|ABY87953.1|\t67.65\t34\t10\t1\t2\t103\t90\t122\t7e-07\t49.7\r\n", "Roberts_20100712_CC_F3_trimmed_contig_2\tgi|109020229|ref|XP_001111903.1|\t81.82\t44\t8\t0\t2\t133\t72\t115\t1e-17\t79.3\r\n", "Roberts_20100712_CC_F3_trimmed_contig_6\tgi|149068841|gb|EDM18393.1|\t100.00\t57\t0\t0\t1\t171\t10\t66\t4e-32\t 116\r\n", "Roberts_20100712_CC_F3_trimmed_contig_7\tgi|169153945|emb|CAQ14310.1|\t48.94\t141\t67\t2\t4\t414\t1\t140\t4e-34\t 127\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi|342326368|gb|AEL23099.1|\t100.00\t69\t0\t0\t2\t208\t15\t83\t3e-41\t 140\r\n", "Roberts_20100712_CC_F3_trimmed_contig_10\tgi|166406892|gb|ABY87409.1|\t75.76\t33\t8\t0\t6\t104\t43\t75\t4e-09\t56.2\r\n", "Roberts_20100712_CC_F3_trimmed_contig_14\tgi|91992330|gb|ABE72920.1|\t96.43\t56\t2\t0\t43\t210\t371\t426\t2e-30\t 120\r\n", "Roberts_20100712_CC_F3_trimmed_contig_15\tgi|340382506|ref|XP_003389760.1|\t72.97\t37\t10\t0\t2\t112\t56\t92\t6e-09\t55.5\r\n", "Roberts_20100712_CC_F3_trimmed_contig_17\tgi|340370586|ref|XP_003383827.1|\t60.53\t38\t15\t0\t1\t114\t218\t255\t5e-12\t65.5\r\n", "Roberts_20100712_CC_F3_trimmed_contig_19\tgi|37528876|gb|AAQ92368.1|\t99.62\t260\t1\t0\t1\t780\t32\t291\t0.0\t 542\r\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail /Volumes/web/cnidarian/lft_BlackAbalone_v3_nr_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_13618\tgi|348544575|ref|XP_003459756.1|\t71.43\t35\t10\t0\t3\t107\t73\t107\t8e-10\t58.2\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13654\tgi|62240178|gb|AAX77257.1|\t100.00\t41\t0\t0\t3\t125\t120\t160\t8e-19\t83.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13686\tgi|340384210|ref|XP_003390607.1|\t66.67\t33\t11\t0\t6\t104\t700\t732\t1e-07\t55.1\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13693\tgi|115767231|ref|XP_782598.2|\t59.09\t44\t18\t0\t5\t136\t1316\t1359\t8e-09\t58.9\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13748\tgi|328793281|ref|XP_395852.4|\t75.00\t36\t9\t0\t6\t113\t209\t244\t7e-10\t60.8\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13760\tgi|32407325|gb|AAP41556.1|\t93.33\t30\t2\t0\t2\t91\t299\t328\t3e-07\t53.1\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13804\tgi|166406789|gb|ABY87358.1|\t70.45\t44\t13\t0\t2\t133\t239\t282\t3e-14\t73.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13845\tgi|156378102|ref|XP_001630983.1|\t54.69\t64\t22\t3\t49\t240\t58\t114\t6e-09\t57.8\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13855\tgi|261754259|ref|ZP_05997968.1|\t80.49\t41\t8\t0\t123\t1\t14\t54\t2e-15\t72.0\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13882\tgi|144899095|emb|CAM75959.1|\t66.67\t30\t10\t0\t13\t102\t1\t30\t1e-05\t48.9\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!grep -c \"Roberts\" /Volumes/web/cnidarian/lft_BlackAbalone_v3_nr_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "2525\r\n" ] } ], "prompt_number": 10 }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "code\n", "`\n", "./blastn -query /Volumes/web-1/cnidarian/BlackAbalone_Contigs_v3.fa -db /Volumes/CLC_blastdatabases/nt -out /Volumes/web-1/cnidarian/lft_BlackAbalone_v3_nt_blastout -outfmt 6 -evalue 1E-5 -max_target_seqs 1 -num_threads 6 -task blastn\n", "`" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_nt_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_2\tgi|115947556|ref|XM_796732.2|\t84.21\t114\t18\t0\t2\t115\t605\t718\t7e-26\t 125\r\n", "Roberts_20100712_CC_F3_trimmed_contig_4\tgi|177667010|gb|EU595789.1|\t85.97\t221\t31\t0\t3\t223\t9228\t9008\t8e-66\t 259\r\n", "Roberts_20100712_CC_F3_trimmed_contig_6\tgi|345329379|ref|XM_001506155.2|\t86.96\t161\t21\t0\t1\t161\t60\t220\t5e-47\t 196\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi|306922143|dbj|AB490993.1|\t88.67\t203\t23\t0\t5\t207\t364\t566\t6e-67\t 262\r\n", "Roberts_20100712_CC_F3_trimmed_contig_10\tgi|166406891|gb|EU244393.1|\t80.85\t94\t18\t0\t12\t105\t133\t226\t4e-15\t89.7\r\n", "Roberts_20100712_CC_F3_trimmed_contig_14\tgi|91992331|gb|DQ453716.1|\t97.65\t213\t5\t0\t1\t213\t955\t1167\t1e-96\t 361\r\n", "Roberts_20100712_CC_F3_trimmed_contig_15\tgi|345492297|ref|XM_001600219.2|\t75.89\t112\t27\t0\t1\t112\t225\t336\t2e-12\t80.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_17\tgi|260831083|ref|XM_002610443.1|\t86.27\t51\t7\t0\t37\t87\t762\t812\t2e-06\t60.8\r\n", "Roberts_20100712_CC_F3_trimmed_contig_19\tgi|270313645|gb|GU263793.1|\t99.10\t782\t7\t0\t1\t782\t164\t945\t0.0\t1379\r\n", "Roberts_20100712_CC_F3_trimmed_contig_20\tgi|89331166|dbj|AB234872.1|\t95.93\t369\t15\t0\t1\t369\t544\t912\t1e-167\t 598\r\n" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail /Volumes/web/cnidarian/lft_BlackAbalone_v3_nt_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_13802\tgi|300386193|gb|GU995619.1|\t81.36\t118\t21\t1\t7\t124\t422\t538\t1e-21\t 111\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13804\tgi|166406788|gb|EU244341.1|\t81.95\t133\t24\t0\t1\t133\t716\t848\t4e-28\t 132\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13813\tgi|300385867|gb|GU995293.1|\t87.30\t63\t5\t2\t3\t63\t229\t290\t9e-10\t71.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13820\tgi|356473005|gb|HQ650445.1|\t88.89\t54\t6\t0\t1\t54\t54\t107\t8e-10\t71.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13828\tgi|60219427|emb|CR388147.13|\t83.61\t61\t6\t2\t95\t155\t89790\t89734\t2e-06\t60.8\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13855\tgi|3930574|gb|AF069062.1|AF069062\t99.23\t130\t1\t0\t1\t130\t289\t418\t2e-57\t 230\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13858\tgi|13195722|gb|AF133090.2|AF133090\t100.00\t115\t0\t0\t1\t115\t73\t187\t5e-51\t 208\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13860\tgi|190356750|emb|AM999887.1|\t84.09\t132\t19\t2\t1\t131\t1236811\t1236941\t3e-29\t 136\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13863\tgi|82541884|gb|DQ291132.1|\t87.76\t49\t6\t0\t13\t61\t102093\t102045\t5e-07\t62.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13871\tgi|13195722|gb|AF133090.2|AF133090\t100.00\t91\t0\t0\t1\t91\t547\t637\t5e-38\t 165\r\n" ] } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "!grep -c \"Roberts\" /Volumes/web/cnidarian/lft_BlackAbalone_v3_nt_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "2492\r\n" ] } ], "prompt_number": 20 }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "###Attempt to get taxonomy information using blast version 2.2.28\n", "`\n", "./blastn -query /Volumes/web-1/cnidarian/BlackAbalone_Contigs_v3.fa -db /Volumes/CLC_blastdatabases/nt -out /Volumes/web-1/cnidarian/lft_BlackAbalone_v3_nt_blastout_taxa1 -outfmt \"6 qseqid sseqid sallseqid pident length evalue bitscore staxids sscinames scomnames sblastnames qcovs\" -evalue 1E-5 -max_target_seqs 1 -num_threads 10 -task blastn\n", "`\n", "\n", "*without luck*" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_nt_blastout_taxa1" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_2\tgi|115947556|ref|XM_796732.2|\tgi|115947556|ref|XM_796732.2|\t84.21\t114\t7e-26\t 125\t7668\tN/A\tN/A\tN/A\t72\r\n", "Roberts_20100712_CC_F3_trimmed_contig_4\tgi|177667010|gb|EU595789.1|\tgi|177667010|gb|EU595789.1|\t85.97\t221\t8e-66\t 259\t42344\tN/A\tN/A\tN/A\t93\r\n", "Roberts_20100712_CC_F3_trimmed_contig_6\tgi|345329379|ref|XM_001506155.2|\tgi|345329379|ref|XM_001506155.2|\t86.96\t161\t5e-47\t 196\t9258\tN/A\tN/A\tN/A\t93\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi|306922143|dbj|AB490993.1|\tgi|306922143|dbj|AB490993.1|\t88.67\t203\t6e-67\t 262\t214486\tN/A\tN/A\tN/A\t89\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi|306922143|dbj|AB490993.1|\tgi|306922143|dbj|AB490993.1|\t87.25\t204\t1e-62\t 248\t214486\tN/A\tN/A\tN/A\t89\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi|306922143|dbj|AB490993.1|\tgi|306922143|dbj|AB490993.1|\t85.71\t203\t9e-59\t 235\t214486\tN/A\tN/A\tN/A\t89\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi|306922143|dbj|AB490993.1|\tgi|306922143|dbj|AB490993.1|\t90.00\t110\t1e-32\t 149\t214486\tN/A\tN/A\tN/A\t89\r\n", "Roberts_20100712_CC_F3_trimmed_contig_10\tgi|166406891|gb|EU244393.1|\tgi|166406891|gb|EU244393.1|\t80.85\t94\t4e-15\t89.7\t36095\tN/A\tN/A\tN/A\t75\r\n", "Roberts_20100712_CC_F3_trimmed_contig_14\tgi|91992331|gb|DQ453716.1|\tgi|91992331|gb|DQ453716.1|\t97.65\t213\t1e-96\t 361\t6454\tN/A\tN/A\tN/A\t76\r\n", "Roberts_20100712_CC_F3_trimmed_contig_15\tgi|345492297|ref|XM_001600219.2|\tgi|345492297|ref|XM_001600219.2|\t75.89\t112\t2e-12\t80.6\t7425\tN/A\tN/A\tN/A\t100\r\n" ] } ], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": [ "!blastn -help" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "USAGE\r\n", " blastn [-h] [-help] [-import_search_strategy filename]\r\n", " [-export_search_strategy filename] [-task task_name] [-db database_name]\r\n", " [-dbsize num_letters] [-gilist filename] [-seqidlist filename]\r\n", " [-negative_gilist filename] [-entrez_query entrez_query]\r\n", " [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm]\r\n", " [-subject subject_input_file] [-subject_loc range] [-query input_file]\r\n", " [-out output_file] [-evalue evalue] [-word_size int_value]\r\n", " [-gapopen open_penalty] [-gapextend extend_penalty]\r\n", " [-perc_identity float_value] [-xdrop_ungap float_value]\r\n", " [-xdrop_gap float_value] [-xdrop_gap_final float_value]\r\n", " [-searchsp int_value] [-max_hsps_per_subject int_value] [-penalty penalty]\r\n", " [-reward reward] [-no_greedy] [-min_raw_gapped_score int_value]\r\n", " [-template_type type] [-template_length int_value] [-dust DUST_options]\r\n", " [-filtering_db filtering_database]\r\n", " [-window_masker_taxid window_masker_taxid]\r\n", " [-window_masker_db window_masker_db] [-soft_masking soft_masking]\r\n", " [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value]\r\n", " [-best_hit_score_edge float_value] [-window_size int_value]\r\n", " [-off_diagonal_range int_value] [-use_index boolean] [-index_name string]\r\n", " [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines]\r\n", " [-outfmt format] [-show_gis] [-num_descriptions int_value]\r\n", " [-num_alignments int_value] [-html] [-max_target_seqs num_sequences]\r\n", " [-num_threads int_value] [-remote] [-version]\r\n", "\r\n", "DESCRIPTION\r\n", " Nucleotide-Nucleotide BLAST 2.2.28+\r\n", "\r\n", "OPTIONAL ARGUMENTS\r\n", " -h\r\n", " Print USAGE and DESCRIPTION; ignore all other parameters\r\n", " -help\r\n", " Print USAGE, DESCRIPTION and ARGUMENTS; ignore all other parameters\r\n", " -version\r\n", " Print version number; ignore other arguments\r\n", "\r\n", " *** Input query options\r\n", " -query \r\n", " Input file name\r\n", " Default = `-'\r\n", " -query_loc \r\n", " Location on the query sequence in 1-based offsets (Format: start-stop)\r\n", " -strand \r\n", " Query strand(s) to search against database/subject\r\n", " Default = `both'\r\n", "\r\n", " *** General search options\r\n", " -task \r\n", " Task to execute\r\n", " Default = `megablast'\r\n", " -db \r\n", " BLAST database name\r\n", " * Incompatible with: subject, subject_loc\r\n", " -out \r\n", " Output file name\r\n", " Default = `-'\r\n", " -evalue \r\n", " Expectation value (E) threshold for saving hits \r\n", " Default = `10'\r\n", " -word_size =4>\r\n", " Word size for wordfinder algorithm (length of best perfect match)\r\n", " -gapopen \r\n", " Cost to open a gap\r\n", " -gapextend \r\n", " Cost to extend a gap\r\n", " -penalty \r\n", " Penalty for a nucleotide mismatch\r\n", " -reward =0>\r\n", " Reward for a nucleotide match\r\n", " -use_index \r\n", " Use MegaBLAST database index\r\n", " -index_name \r\n", " MegaBLAST database index name\r\n", "\r\n", " *** BLAST-2-Sequences options\r\n", " -subject \r\n", " Subject sequence(s) to search\r\n", " * Incompatible with: db, gilist, seqidlist, negative_gilist,\r\n", " db_soft_mask, db_hard_mask\r\n", " -subject_loc \r\n", " Location on the subject sequence in 1-based offsets (Format: start-stop)\r\n", " * Incompatible with: db, gilist, seqidlist, negative_gilist,\r\n", " db_soft_mask, db_hard_mask, remote\r\n", "\r\n", " *** Formatting options\r\n", " -outfmt \r\n", " alignment view options:\r\n", " 0 = pairwise,\r\n", " 1 = query-anchored showing identities,\r\n", " 2 = query-anchored no identities,\r\n", " 3 = flat query-anchored, show identities,\r\n", " 4 = flat query-anchored, no identities,\r\n", " 5 = XML Blast output,\r\n", " 6 = tabular,\r\n", " 7 = tabular with comment lines,\r\n", " 8 = Text ASN.1,\r\n", " 9 = Binary ASN.1,\r\n", " 10 = Comma-separated values,\r\n", " 11 = BLAST archive format (ASN.1) \r\n", " \r\n", " Options 6, 7, and 10 can be additionally configured to produce\r\n", " a custom format specified by space delimited format specifiers.\r\n", " The supported format specifiers are:\r\n", " \t qseqid means Query Seq-id\r\n", " \t qgi means Query GI\r\n", " \t qacc means Query accesion\r\n", " \t qaccver means Query accesion.version\r\n", " \t qlen means Query sequence length\r\n", " \t sseqid means Subject Seq-id\r\n", " \t sallseqid means All subject Seq-id(s), separated by a ';'\r\n", " \t sgi means Subject GI\r\n", " \t sallgi means All subject GIs\r\n", " \t sacc means Subject accession\r\n", " \t saccver means Subject accession.version\r\n", " \t sallacc means All subject accessions\r\n", " \t slen means Subject sequence length\r\n", " \t qstart means Start of alignment in query\r\n", " \t qend means End of alignment in query\r\n", " \t sstart means Start of alignment in subject\r\n", " \t send means End of alignment in subject\r\n", " \t qseq means Aligned part of query sequence\r\n", " \t sseq means Aligned part of subject sequence\r\n", " \t evalue means Expect value\r\n", " \t bitscore means Bit score\r\n", " \t score means Raw score\r\n", " \t length means Alignment length\r\n", " \t pident means Percentage of identical matches\r\n", " \t nident means Number of identical matches\r\n", " \t mismatch means Number of mismatches\r\n", " \t positive means Number of positive-scoring matches\r\n", " \t gapopen means Number of gap openings\r\n", " \t gaps means Total number of gaps\r\n", " \t ppos means Percentage of positive-scoring matches\r\n", " \t frames means Query and subject frames separated by a '/'\r\n", " \t qframe means Query frame\r\n", " \t sframe means Subject frame\r\n", " \t btop means Blast traceback operations (BTOP)\r\n", " \t staxids means Subject Taxonomy ID(s), separated by a ';'\r\n", " \t sscinames means Subject Scientific Name(s), separated by a ';'\r\n", " \t scomnames means Subject Common Name(s), separated by a ';'\r\n", " \tsblastnames means Subject Blast Name(s), separated by a ';'\r\n", " \t\t\t (in alphabetical order)\r\n", " \tsskingdoms means Subject Super Kingdom(s), separated by a ';'\r\n", " \t\t\t (in alphabetical order) \r\n", " \t stitle means Subject Title\r\n", " \tsalltitles means All Subject Title(s), separated by a '<>'\r\n", " \t sstrand means Subject Strand\r\n", " \t qcovs means Query Coverage Per Subject\r\n", " \t qcovhsp means Query Coverage Per HSP\r\n", " When not provided, the default value is:\r\n", " 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send\r\n", " evalue bitscore', which is equivalent to the keyword 'std'\r\n", " Default = `0'\r\n", " -show_gis\r\n", " Show NCBI GIs in deflines?\r\n", " -num_descriptions =0>\r\n", " Number of database sequences to show one-line descriptions for\r\n", " Not applicable for outfmt > 4\r\n", " Default = `500'\r\n", " * Incompatible with: max_target_seqs\r\n", " -num_alignments =0>\r\n", " Number of database sequences to show alignments for\r\n", " Default = `250'\r\n", " * Incompatible with: max_target_seqs\r\n", " -html\r\n", " Produce HTML output?\r\n", "\r\n", " *** Query filtering options\r\n", " -dust \r\n", " Filter query sequence with DUST (Format: 'yes', 'level window linker', or\r\n", " 'no' to disable)\r\n", " Default = `20 64 1'\r\n", " -filtering_db \r\n", " BLAST database containing filtering elements (i.e.: repeats)\r\n", " -window_masker_taxid \r\n", " Enable WindowMasker filtering using a Taxonomic ID\r\n", " -window_masker_db \r\n", " Enable WindowMasker filtering using this repeats database.\r\n", " -soft_masking \r\n", " Apply filtering locations as soft masks\r\n", " Default = `true'\r\n", " -lcase_masking\r\n", " Use lower case filtering in query and subject sequence(s)?\r\n", "\r\n", " *** Restrict search or results\r\n", " -gilist \r\n", " Restrict search of database to list of GI's\r\n", " * Incompatible with: negative_gilist, seqidlist, remote, subject,\r\n", " subject_loc\r\n", " -seqidlist \r\n", " Restrict search of database to list of SeqId's\r\n", " * Incompatible with: gilist, negative_gilist, remote, subject,\r\n", " subject_loc\r\n", " -negative_gilist \r\n", " Restrict search of database to everything except the listed GIs\r\n", " * Incompatible with: gilist, seqidlist, remote, subject, subject_loc\r\n", " -entrez_query \r\n", " Restrict search with the given Entrez query\r\n", " * Requires: remote\r\n", " -db_soft_mask \r\n", " Filtering algorithm ID to apply to the BLAST database as soft masking\r\n", " * Incompatible with: db_hard_mask, subject, subject_loc\r\n", " -db_hard_mask \r\n", " Filtering algorithm ID to apply to the BLAST database as hard masking\r\n", " * Incompatible with: db_soft_mask, subject, subject_loc\r\n", " -perc_identity \r\n", " Percent identity\r\n", " -culling_limit =0>\r\n", " If the query range of a hit is enveloped by that of at least this many\r\n", " higher-scoring hits, delete the hit\r\n", " * Incompatible with: best_hit_overhang, best_hit_score_edge\r\n", " -best_hit_overhang =0 and =<0.5)>\r\n", " Best Hit algorithm overhang value (recommended value: 0.1)\r\n", " * Incompatible with: culling_limit\r\n", " -best_hit_score_edge =0 and =<0.5)>\r\n", " Best Hit algorithm score edge value (recommended value: 0.1)\r\n", " * Incompatible with: culling_limit\r\n", " -max_target_seqs =1>\r\n", " Maximum number of aligned sequences to keep \r\n", " Not applicable for outfmt <= 4\r\n", " Default = `500'\r\n", " * Incompatible with: num_descriptions, num_alignments\r\n", "\r\n", " *** Discontiguous MegaBLAST options\r\n", " -template_type \r\n", " Discontiguous MegaBLAST template type\r\n", " * Requires: template_length\r\n", " -template_length \r\n", " Discontiguous MegaBLAST template length\r\n", " * Requires: template_type\r\n", "\r\n", " *** Statistical options\r\n", " -dbsize \r\n", " Effective length of the database \r\n", " -searchsp =0>\r\n", " Effective length of the search space\r\n", " -max_hsps_per_subject =0>\r\n", " Override maximum number of HSPs per subject to save for ungapped searches\r\n", " (0 means do not override)\r\n", " Default = `0'\r\n", "\r\n", " *** Search strategy options\r\n", " -import_search_strategy \r\n", " Search strategy to use\r\n", " * Incompatible with: export_search_strategy\r\n", " -export_search_strategy \r\n", " File name to record the search strategy used\r\n", " * Incompatible with: import_search_strategy\r\n", "\r\n", " *** Extension options\r\n", " -xdrop_ungap \r\n", " X-dropoff value (in bits) for ungapped extensions\r\n", " -xdrop_gap \r\n", " X-dropoff value (in bits) for preliminary gapped extensions\r\n", " -xdrop_gap_final \r\n", " X-dropoff value (in bits) for final gapped alignment\r\n", " -no_greedy\r\n", " Use non-greedy dynamic programming extension\r\n", " -min_raw_gapped_score \r\n", " Minimum raw gapped score to keep an alignment in the preliminary gapped and\r\n", " traceback stages\r\n", " -ungapped\r\n", " Perform ungapped alignment only?\r\n", " -window_size =0>\r\n", " Multiple hits window size, use 0 to specify 1-hit algorithm\r\n", " -off_diagonal_range =0>\r\n", " Number of off-diagonals to search for the 2nd hit, use 0 to turn off\r\n", " Default = `0'\r\n", "\r\n", " *** Miscellaneous options\r\n", " -parse_deflines\r\n", " Should the query and subject defline(s) be parsed?\r\n", " -num_threads =1>\r\n", " Number of threads (CPUs) to use in the BLAST search\r\n", " Default = `1'\r\n", " * Incompatible with: remote\r\n", " -remote\r\n", " Execute search remotely?\r\n", " * Incompatible with: gilist, seqidlist, negative_gilist, subject_loc,\r\n", " num_threads\r\n", "\r\n" ] } ], "prompt_number": 6 }, { "cell_type": "raw", "metadata": {}, "source": [ "!blastn -query /Volumes/web/cnidarian/BlackAbalone_Contigs_v3.fa -db /Volumes/Bay3/CLC_blastdatabases/nt -out /Volumes/web/cnidarian/lft_BlackAbalone_v3_nt_blastout_taxa2 -outfmt \"6 std stitle staxids sscinames scomnames sblastnames\" -evalue 1E-10 -max_target_seqs 1 -task blastn" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_nt_blastout_taxa2" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_2\tgi|115947556|ref|XM_796732.2|\t84.21\t114\t18\t0\t2\t115\t605\t718\t7e-26\t 125\tPREDICTED: Strongylocentrotus purpuratus similar to MGC80921 protein, transcript variant 2 (LOC578948), partial mRNA\t7668\tN/A\tN/A\tN/A\r\n", "Roberts_20100712_CC_F3_trimmed_contig_4\tgi|177667010|gb|EU595789.1|\t85.97\t221\t31\t0\t3\t223\t9228\t9008\t8e-66\t 259\tHaliotis discus hannai mitochondrion, partial genome\t42344\tN/A\tN/A\tN/A\r\n", "Roberts_20100712_CC_F3_trimmed_contig_6\tgi|345329379|ref|XM_001506155.2|\t86.96\t161\t21\t0\t1\t161\t60\t220\t5e-47\t 196\tPREDICTED: Ornithorhynchus anatinus 40S ribosomal protein S3-A-like (LOC100074614), partial mRNA\t9258\tN/A\tN/A\tN/A\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi|306922143|dbj|AB490993.1|\t88.67\t203\t23\t0\t5\t207\t364\t566\t6e-67\t 262\tSebastes schlegelii mRNA, clone: BRF 39-G6, induced by treatment of LPS\t214486\tN/A\tN/A\tN/A\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi|306922143|dbj|AB490993.1|\t87.25\t204\t25\t1\t5\t207\t592\t795\t1e-62\t 248\tSebastes schlegelii mRNA, clone: BRF 39-G6, induced by treat" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "ment of LPS\t214486\tN/A\tN/A\tN/A\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi|306922143|dbj|AB490993.1|\t85.71\t203\t29\t0\t5\t207\t136\t338\t9e-59\t 235\tSebastes schlegelii mRNA, clone: BRF 39-G6, induced by treatment of LPS\t214486\tN/A\tN/A\tN/A\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tgi|306922143|dbj|AB490993.1|\t90.00\t110\t11\t0\t98\t207\t1\t110\t1e-32\t 149\tSebastes schlegelii mRNA, clone: BRF 39-G6, induced by treatment of LPS\t214486\tN/A\tN/A\tN/A\r\n", "Roberts_20100712_CC_F3_trimmed_contig_10\tgi|166406891|gb|EU244393.1|\t80.85\t94\t18\t0\t12\t105\t133\t226\t4e-15\t89.7\tHaliotis diversicolor clone HDr4CJ446 CD63 antigen-like protein mRNA, partial cds\t36095\tN/A\tN/A\tN/A\r\n", "Roberts_20100712_CC_F3_trimmed_contig_14\tgi|91992331|gb|DQ453716.1|\t97.65\t213\t5\t0\t1\t213\t955\t1167\t1e-96\t 361\tHaliotis rufescens vitelline envelope zona pellucida domain 3 (VEZP3) mRNA, complete cds\t6454\tN/A\tN/A\tN/A\r\n", "Roberts_20100712_CC_F3_trimmed_contig_15\tgi|345492297|ref|XM_001600219.2|\t75.89\t112\t27\t0\t1\t112\t225\t336\t2e-12\t80.6\tPREDICTED: Nasonia vitripennis 60S ribosomal protein L22-like (LOC100115956), mRNA\t7425\tN/A\tN/A\tN/A\r\n" ] } ], "prompt_number": 25 }, { "cell_type": "raw", "metadata": {}, "source": [ "GENE DESCRIPTION AND Tax IDs produced" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/Roberts_20100712_CC_F3_trimmed_/BlackAbalone_v3_/g' /Volumes/web/cnidarian/lft_BlackAbalone_v3_nt_blastout_taxa3\n", "#sed 's/abc/XYZ/g' outfile" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_nt_blastout_taxa3" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "BlackAbalone_v3_contig_2\tgi|115947556|ref|XM_796732.2|\t84.21\t114\t18\t0\t2\t115\t605\t718\t7e-26\t 125\tPREDICTED: Strongylocentrotus purpuratus similar to MGC80921 protein, transcript variant 2 (LOC578948), partial mRNA\t7668\tN/A\tN/A\tN/A\r\n", "BlackAbalone_v3_contig_4\tgi|177667010|gb|EU595789.1|\t85.97\t221\t31\t0\t3\t223\t9228\t9008\t8e-66\t 259\tHaliotis discus hannai mitochondrion, partial genome\t42344\tN/A\tN/A\tN/A\r\n", "BlackAbalone_v3_contig_6\tgi|345329379|ref|XM_001506155.2|\t86.96\t161\t21\t0\t1\t161\t60\t220\t5e-47\t 196\tPREDICTED: Ornithorhynchus anatinus 40S ribosomal protein S3-A-like (LOC100074614), partial mRNA\t9258\tN/A\tN/A\tN/A\r\n", "BlackAbalone_v3_contig_9\tgi|306922143|dbj|AB490993.1|\t88.67\t203\t23\t0\t5\t207\t364\t566\t6e-67\t 262\tSebastes schlegelii mRNA, clone: BRF 39-G6, induced by treatment of LPS\t214486\tN/A\tN/A\tN/A\r\n", "BlackAbalone_v3_contig_9\tgi|306922143|dbj|AB490993.1|\t87.25\t204\t25\t1\t5\t207\t592\t795\t1e-62\t 248\tSebastes schlegelii mRNA, clone: BRF 39-G6, induced by treatment of LPS\t214486\tN/A\tN/A\tN/A\r\n", "BlackAbalone_v3_contig_9\tgi|306922143|dbj|AB490993.1|\t85.71\t203\t29\t0\t5\t207\t136\t338\t9e-59\t 235\tSebastes schlegelii mRNA, clone: BRF 39-G6, induced by treatment of LPS\t214486\tN/A\tN/A\tN/A\r\n", "BlackAbalone_v3_contig_9\tgi|306922143|dbj|AB490993.1|\t90.00\t110\t11\t0\t98\t207\t1\t110\t1e-32\t 149\tSebastes schlegelii mRNA, clone: BRF 39-G6, induced by treatment of LPS\t214486\tN/A\tN/A\tN/A\r\n", "BlackAbalone_v3_contig_10\tgi|166406891|gb|EU244393.1|\t80.85\t94\t18\t0\t12\t105\t133\t226\t4e-15\t89.7\tHaliotis diversicolor clone HDr4CJ446 CD63 antigen-like protein mRNA, partial cds\t36095\tN/A\tN/A\tN/A\r\n", "BlackAbalone_v3_contig_14\tgi|91992331|gb|DQ453716.1|\t97.65\t213\t5\t0\t1\t213\t955\t1167\t1e-96\t 361\tHaliotis rufescens vitelline envelope zona pellucida domain 3 (VEZP3) mRNA, complete cds\t6454\tN/A\tN/A\tN/A\r\n", "BlackAbalone_v3_contig_15\tgi|345492297|ref|XM_001600219.2|\t75.89\t112\t27\t0\t1\t112\t225\t336\t2e-12\t80.6\tPREDICTED: Nasonia vitripennis 60S ribosomal protein L22-like (LOC100115956), mRNA\t7425\tN/A\tN/A\tN/A\r\n" ] } ], "prompt_number": 8 }, { "cell_type": "raw", "metadata": {}, "source": [ "SELECT \n", " id,\n", " foldchange,\n", " pval,\n", " de.Column3 as SPID,\n", " evalue,\n", " Column13 as NCBI_nt_Des\n", " \n", " FROM [lisa418@washington.edu].[BlkAb_DESeq_SPID]de\n", " left join [sr320@washington.edu].[lft_BlackAbalone_v3_nt_blastout_taxa3]lft\n", " on\n", " de.id = lft.Column1" ] }, { "cell_type": "code", "collapsed": false, "input": [ "SELECT \n", " id,\n", " foldchange,\n", " pval,\n", " Column13 as NCBI_nt_Des\n", " \n", " FROM [lisa418@washington.edu].[BlackAB_DESeq.txt]de\n", " left join [sr320@washington.edu].[lft_BlackAbalone_v3_nt_blastout_taxa3]lft\n", " on\n", " de.id = lft.Column1\u200b" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "raw", "metadata": {}, "source": [ "@fu Try to get taxID from SP blast " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "##Multi-species Blast" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "code\n", "`\n", "./blastn -query /Volumes/web-1/cnidarian/BlackAbalone_Contigs_v3.fa -db /Volumes/web-1/whale/fish546/blast/db/Haliotis_kam_transcriptome -out /Volumes/web-1/cnidarian/lft_BlackAbalone_v3_Hal_kam_blastout -outfmt 6 -evalue 1E-5 -max_target_seqs 1 -num_threads 2 -task blastn`" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_Hal_kam_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_2\tHaliotis_kam_contig3505\t98.50\t133\t2\t0\t1\t133\t70\t202\t6e-62\t 232\r\n", "Roberts_20100712_CC_F3_trimmed_contig_4\tHaliotis_kam_contig17\t90.13\t223\t22\t0\t1\t223\t1408\t1186\t2e-83\t 304\r\n", "Roberts_20100712_CC_F3_trimmed_contig_5\tHaliotis_kam_contig4402\t92.68\t355\t9\t2\t1\t338\t388\t34\t4e-151\t 529\r\n", "Roberts_20100712_CC_F3_trimmed_contig_6\tHaliotis_kam_contig272\t100.00\t173\t0\t0\t1\t173\t38\t210\t2e-86\t 313\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tHaliotis_kam_contig1153\t98.45\t129\t2\t0\t1\t129\t129\t1\t1e-59\t 224\r\n", "Roberts_20100712_CC_F3_trimmed_contig_10\tHaliotis_kam_contig59\t85.42\t96\t14\t0\t10\t105\t1230\t1135\t1e-25\t 111\r\n", "Roberts_20100712_CC_F3_trimmed_contig_15\tHaliotis_kam_contig455\t100.00\t112\t0\t0\t1\t112\t300\t189\t2e-53\t 203\r\n", "Roberts_20100712_CC_F3_trimmed_contig_19\tHaliotis_kam_contig3534\t85.14\t222\t33\t0\t412\t633\t223\t2\t4e-67\t 251\r\n", "Roberts_20100712_CC_F3_trimmed_contig_28\tHaliotis_kam_contig329\t96.36\t165\t6\t0\t1\t165\t337\t501\t8e-74\t 271\r\n", "Roberts_20100712_CC_F3_trimmed_contig_30\tHaliotis_kam_contig854\t98.82\t85\t1\t0\t1\t85\t557\t641\t3e-37\t 149\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail /Volumes/web/cnidarian/lft_BlackAbalone_v3_Hal_kam_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_13830\tHaliotis_kam_contig2270\t97.06\t136\t3\t1\t1\t135\t240\t105\t8e-60\t 224\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13832\tHaliotis_kam_contig2911\t97.06\t102\t3\t0\t9\t110\t185\t84\t1e-43\t 170\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13838\tHaliotis_kam_contig6166\t94.19\t86\t5\t0\t6\t91\t135\t220\t3e-32\t 132\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13843\tHaliotis_kam_contig4288\t96.15\t130\t5\t0\t45\t174\t10\t139\t6e-56\t 212\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13850\tHaliotis_kam_contig4288\t83.53\t85\t14\t0\t2\t86\t138\t222\t1e-19\t91.5\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13851\tHaliotis_kam_contig413\t96.67\t60\t2\t0\t1\t60\t510\t569\t2e-22\t 100\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13855\tHaliotis_kam_contig6826\t84.62\t130\t19\t1\t2\t130\t195\t66\t7e-35\t 141\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13860\tHaliotis_kam_contig911\t71.90\t121\t28\t2\t13\t129\t121\t3\t6e-11\t62.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13870\tHaliotis_kam_contig2899\t100.00\t51\t0\t0\t1\t51\t51\t1\t2e-20\t93.3\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13873\tHaliotis_kam_contig1191\t97.20\t107\t3\t0\t1\t107\t821\t715\t2e-46\t 179\r\n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "!grep -c \"Roberts\" /Volumes/web/cnidarian/lft_BlackAbalone_v3_Hal_kam_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "5471\r\n" ] } ], "prompt_number": 13 }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "code \n", "`./blastn -query /Volumes/web-1/cnidarian/BlackAbalone_Contigs_v3.fa -db /Volumes/web-1/whale/fish546/blast/db/Haliotis_midae_franchini -out /Volumes/web-1/cnidarian/lft_BlackAbalone_v3_Hal_midae_blastout -outfmt 6 -evalue 1E-5 -max_target_seqs 1 -num_threads 2 -task blastn`" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/lft_BlackAbalone_v3_Hal_midae_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_1\tContig_3052_Coverage_84.98\t90.65\t107\t10\t0\t2\t108\t201\t95\t6e-37\t 149\r\n", "Roberts_20100712_CC_F3_trimmed_contig_2\tContig_6000_Coverage_67.17\t95.33\t107\t5\t0\t1\t107\t109\t3\t3e-43\t 170\r\n", "Roberts_20100712_CC_F3_trimmed_contig_3\tContig_18668_Coverage_137.10\t85.45\t55\t8\t0\t34\t88\t622\t568\t2e-11\t64.4\r\n", "Roberts_20100712_CC_F3_trimmed_contig_4\tContig_22684_Coverage_350.78\t86.36\t66\t9\t0\t21\t86\t553\t618\t2e-15\t78.8\r\n", "Roberts_20100712_CC_F3_trimmed_contig_5\tContig_5029_Coverage_76.79\t81.44\t194\t32\t3\t48\t240\t282\t92\t4e-45\t 178\r\n", "Roberts_20100712_CC_F3_trimmed_contig_7\tContig_1511_Coverage_54.42\t90.30\t237\t23\t0\t181\t417\t297\t61\t6e-89\t 324\r\n", "Roberts_20100712_CC_F3_trimmed_contig_8\tContig_789_Coverage_53.73\t87.00\t100\t13\t0\t1\t100\t634\t535\t7e-29\t 122\r\n", "Roberts_20100712_CC_F3_trimmed_contig_9\tContig_21030_Coverage_485.53\t91.75\t206\t17\t0\t2\t207\t205\t410\t1e-80\t 295\r\n", "Roberts_20100712_CC_F3_trimmed_contig_10\tContig_290_Coverage_208.28\t82.98\t94\t16\t0\t12\t105\t610\t703\t1e-21\t98.7\r\n", "Roberts_20100712_CC_F3_trimmed_contig_11\tContig_4226_Coverage_62.51\t85.00\t80\t12\t0\t66\t145\t477\t556\t2e-19\t91.5\r\n" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail /Volumes/web/cnidarian/lft_BlackAbalone_v3_Hal_midae_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Roberts_20100712_CC_F3_trimmed_contig_13839\tContig_13235_Coverage_19.54\t87.39\t111\t14\t0\t4\t114\t41\t151\t1e-33\t 138\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13847\tContig_94_Coverage_67.50\t83.65\t104\t17\t0\t1\t104\t726\t623\t2e-25\t 111\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13853\tContig_16227_Coverage_53.40\t82.14\t56\t8\t1\t3\t58\t56\t109\t5e-08\t53.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13855\tContig_6084_Coverage_35.44\t86.67\t120\t16\t0\t1\t120\t350\t231\t9e-36\t 145\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13857\tContig_94_Coverage_67.50\t89.17\t157\t17\t0\t1\t157\t190\t34\t4e-54\t 206\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13859\tContig_17733_Coverage_12.63\t91.18\t34\t3\t0\t47\t80\t18\t51\t2e-06\t48.2\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13860\tContig_2605_Coverage_135.73\t71.32\t129\t36\t1\t13\t140\t917\t1045\t9e-11\t62.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13861\tContig_21412_Coverage_92.26\t74.53\t106\t22\t2\t16\t116\t227\t122\t6e-12\t66.2\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13879\tContig_10459_Coverage_16.77\t74.47\t94\t24\t0\t8\t101\t44\t137\t7e-11\t62.6\r\n", "Roberts_20100712_CC_F3_trimmed_contig_13883\tContig_3412_Coverage_65.56\t76.83\t82\t16\t2\t1\t81\t471\t550\t3e-09\t57.2\r\n" ] } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "!grep -c \"Roberts\" /Volumes/web/cnidarian/lft_BlackAbalone_v3_Hal_midae_blastout" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "6072\r\n" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "#running blackAb versus other Abalone at 1e-20" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "!blastn -query /Volumes/web/cnidarian/BlackAbalone_Contigs_v3.fa -db /Volumes/web/whale/fish546/blast/db/Haliotis_rufescens_transcriptome -out /Volumes/web/cnidarian/lft_BlackAbalone_v3_Hal_ruf_blastout -outfmt 6 -evalue 1E-20 -max_target_seqs 1 -num_threads 2 -task blastn" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 46 }, { "cell_type": "code", "collapsed": false, "input": [ "!blastn -query /Volumes/web/cnidarian/BlackAbalone_Contigs_v3.fa -db /Volumes/web/whale/fish546/blast/db/Haliotis_midae_franchini -out /Volumes/web/cnidarian/lft_BlackAbalone_v3_Hal_midae_blastout_b -outfmt 6 -evalue 1E-20 -max_target_seqs 1 -num_threads 2 -task blastn" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 47 }, { "cell_type": "code", "collapsed": false, "input": [ "!blastn -query /Volumes/web/cnidarian/BlackAbalone_Contigs_v3.fa -db /Volumes/web/whale/fish546/blast/db/Haliotis_kam_transcriptome -out /Volumes/web/cnidarian/lft_BlackAbalone_v3_Hal_kam_blastout_b -outfmt 6 -evalue 1E-20 -max_target_seqs 1 -num_threads 2 -task blastn" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 48 }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"Venny\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Comparing to OrthoDB - Lottia gigantea \n", "\n", "Modified from Emma's paper.. \n", "To quantify the completeness of the transcriptome, contigs were assessed to determine if they contained orthologs to proteins found in all Metazoa. Specifically, OrthoDB (Waterhouse et al. 2011, http://cegg.unige.ch/orthodb6) was used to obtain a suite of proteins from Lottia gigantea (the giant owl limpet) found as single copy, which have orthologs in all other metazoans in OrthoDB. Sequence comparisons (tBLASTn; Altschul et al. 1997) were performed to find matching contigs. An e-value threshold of 1.0E-10 was used. \n", "\n", "Details at \n", "https://www.evernote.com/shard/s10/sh/8aa69a54-13fd-4bea-8a09-7f6a08c30fe5/63597cc40252a36bc030000d88cf0f1d\n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/OrthDB_meta_Lotgi1.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ ">AAEL010815 AAEL010815-PA Q0IED2 Putative uncharacterized protein IPR001060,IPR018808 EOG62FZF8 AAEGY\r\n", "GEKNNGYEVLYQNMKYGLSATKELAEYFRERSNLEEYNSKLLTKLANKAGSGGGGTFSPLWIILKSTTERLSELHAAKVQ\r\n", "KLTELVKNINKYAEELHKKHKSVKEEESSTQDAVHAMKESTTAVAKAKDVYNTRLQELEKARKDNSAKEIEKSEAKLRKQ\r\n", "QDDYKALVEKHNIIKQEFEKKMTITCKRFQEIEEAHLKQMKEFLTSYMEIVQNNFDLVGQVHSDLKRQFLELTVDKLLEQ\r\n", "FVLNKYTGLEKPEFIELDLVKLGSRSLGTTATAATSNNQLLINTSMPNATSGGSVTTVAEGSVTDSPALSSAAVPTNSPV\r\n", "NLSTSPPASGGRGSLLDALGGSTDRPMSPAAAGDSSASSSAQSTAKTRSRESRDSTTSGGADSVSTSTAGGAGGGGGGSA\r\n", "ISAPTSPNDVHNSNQHGGSNNSNGLASTFIGRNALLRGSKCKCSIDFFSPNSVYLSIWSRREKAKSKKTKKKKDSTENCK\r\n", "TFRCVSICNRTSHNRCIEVYSNFYKTVALKFSILIVFSFENITSFESSSFISEDKDETTKASDAASSNLQTTSAVSTGNV\r\n", "APTATPEVDEDGYSIQPRETTWDSTTLTEKSNNFYSSSDSDSEDERGERKIHVEIKPLNNGAAPISASVDELRATVENLS\r\n", "LSPIGALSSRSQSVSQQLGDRPSNGNDPPNASNASTPTTVHPYAPLQSPTLSMSTTSNNRYADLGDIFSEVGDISASAPA\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "!fgrep -c \">\" /Volumes/web/cnidarian/OrthDB_meta_Lotgi1.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "5684\r\n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "!makeblastdb -in /Volumes/web/cnidarian/BlackAbalone_Contigs_v3.fa -dbtype nucl -out /Volumes/Bay3/Software/ncbi-blast-2.2.27\\+/db/BlackAbalone_Contigs_v3" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "\r\n", "Building a new DB, current time: 07/17/2013 08:36:07\r\n", "New DB name: /Volumes/Bay3/Software/ncbi-blast-2.2.27+/db/BlackAbalone_Contigs_v3\r\n", "New DB title: /Volumes/web/cnidarian/BlackAbalone_Contigs_v3.fa\r\n", "Sequence type: Nucleotide\r\n", "Keep Linkouts: T\r\n", "Keep MBits: T\r\n", "Maximum file size: 1000000000B\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Adding sequences from FASTA; added 13884 sequences in 0.828346 seconds.\r\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "!tblastn -query /Volumes/Bay3/Software/ncbi-blast-2.2.26+/query/OrthDB_meta_Lotgi1.fa -db /Volumes/Bay3/Software/ncbi-blast-2.2.27\\+/db/BlackAbalone_Contigs_v3 -outfmt 6 -out /Volumes/web/cnidarian/OrthoDB_Lotgi1_BlackAb_v3_tblastn.txt -max_target_seqs 1 -num_threads 2 -evalue 1E-10" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "!tblastn -query /Volumes/Bay3/Software/ncbi-blast-2.2.26+/query/OrthDB_meta_Lotgi1.fa -db /Volumes/Bay3/Software/ncbi-blast-2.2.27\\+/db/BlackAbalone_Contigs_v3 -outfmt 6 -out /Volumes/web/cnidarian/OrthoDB_Lotgi1_BlackAb_v3_tblastn_b.txt -max_target_seqs 1 -num_threads 3 -evalue 1E-20" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/OrthoDB_Lotgi1_BlackAb_v3_tblastn_b.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "AAEL010318\tRoberts_20100712_CC_F3_trimmed_contig_3682\t90.00\t60\t5\t1\t73\t132\t1\t177\t5e-32\t 116\r\n", "ENSACAG00000011841\tRoberts_20100712_CC_F3_trimmed_contig_3682\t88.14\t59\t7\t0\t82\t140\t1\t177\t6e-33\t 119\r\n", "ENSACAG00000002544\tRoberts_20100712_CC_F3_trimmed_contig_3682\t94.92\t59\t3\t0\t50\t108\t1\t177\t1e-34\t 123\r\n", "ENSACAG00000016470\tRoberts_20100712_CC_F3_trimmed_contig_3682\t91.53\t59\t5\t0\t82\t140\t1\t177\t1e-34\t 123\r\n", "ACEP20785\tRoberts_20100712_CC_F3_trimmed_contig_3682\t88.33\t60\t6\t1\t53\t112\t1\t177\t1e-31\t 114\r\n", "ACEP20789\tRoberts_20100712_CC_F3_trimmed_contig_3682\t90.00\t60\t5\t1\t82\t141\t1\t177\t2e-32\t 117\r\n", "ADAR004412\tRoberts_20100712_CC_F3_trimmed_contig_3682\t90.00\t60\t5\t1\t82\t141\t1\t177\t8e-32\t 115\r\n", "AECH10224\tRoberts_20100712_CC_F3_trimmed_contig_3682\t88.33\t60\t6\t1\t104\t163\t1\t177\t2e-31\t 115\r\n", "AECH10223\tRoberts_20100712_CC_F3_trimmed_contig_3682\t90.00\t60\t5\t1\t125\t184\t1\t177\t2e-32\t 117\r\n", "g8984\tRoberts_20100712_CC_F3_trimmed_contig_3682\t90.00\t60\t5\t1\t82\t141\t1\t177\t1e-32\t 117\r\n" ] } ], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [ "#wc Print byte, word, and line counts\n", "!wc /Volumes/web/cnidarian/OrthoDB_Lotgi1_BlackAb_v3_tblastn.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 880 10560 87475 /Volumes/web/cnidarian/OrthoDB_Lotgi1_BlackAb_v3_tblastn.txt\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!fgrep -c \"Roberts_20100712_CC\" /Volumes/web/cnidarian/OrthoDB_Lotgi1_BlackAb_v3_tblastn.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "880\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "!fgrep -c \"Roberts_20100712_CC\" /Volumes/web/cnidarian/OrthoDB_Lotgi1_BlackAb_v3_tblastn_b.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "828\r\n" ] } ], "prompt_number": 31 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc /Volumes/web/cnidarian/OrthoDB_Lotgi1_BlackAb_v3_tblastn_b.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 828 9936 82229 /Volumes/web/cnidarian/OrthoDB_Lotgi1_BlackAb_v3_tblastn_b.txt\r\n" ] } ], "prompt_number": 32 }, { "cell_type": "markdown", "metadata": {}, "source": [ "_concerned about the fact there are so few black abalone contigs represented here._\n", "\n", "http://eagle.fish.washington.edu/cnidarian/OrthoDB_Lotgi1_BlackAb_v3_tblastn.txt" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#-A is the number of lines After\n", "!fgrep -A 10 \"Roberts_20100712_CC_F3_trimmed_contig_3682\" /Volumes/web/cnidarian/BlackAbalone_Contigs_v3.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ ">Roberts_20100712_CC_F3_trimmed_contig_3682 Average coverage: 80.91\r\n", "ATCAGAATAATGTGGTCTCAACGAGACCCTTCCTTGAGAAAGTCTGGAGTGGGCAATGTG\r\n", "TTCATCAAGAATTTGGACAAGAGCATCGACAACAAAGCTCTGTATGACACATTCTCTGCT\r\n", "TTTGGCAACATCCTGTCTTGTAAGATAGCTTCTGATGAAAATGGCTCCAAGGGTTATGG\r\n", ">Roberts_20100712_CC_F3_trimmed_contig_3683 Average coverage: 18.90\r\n", "TTGCAATCTAGAAATACGTCCGCTCTTGTACTGTAGCAGTTTTTACAATTACGCCATTGC\r\n", "ATCGAACCCATTAAGACCAGATCATTTACTCCTAGACGAG\r\n", ">Roberts_20100712_CC_F3_trimmed_contig_3684 Average coverage: 97.66\r\n", "GTCTCCTGCCTCTGGTTGGAATTAACAGAAGTGATGAGTTTGTGAAGGAAGTGTGTGATC\r\n", "AGTGCAGCTTCGCCTCCATGCAGAAAAGCAAGTCACCCATGTCCAAGGTCATGTATAGAA\r\n", "AAGGTGAGGTT\r\n" ] } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "#in the meantime will blast other Haliotis databases" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "!tblastn -query /Volumes/Bay3/Software/ncbi-blast-2.2.26+/query/OrthDB_meta_Lotgi1.fa -db /Volumes/web/whale/fish546/blast/db/Haliotis_kam_transcriptome -outfmt 6 -out /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_kam_tblastn.txt -max_target_seqs 1 -num_threads 1 -evalue 1E-20" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 36 }, { "cell_type": "code", "collapsed": false, "input": [ "!tblastn -query /Volumes/Bay3/Software/ncbi-blast-2.2.26+/query/OrthDB_meta_Lotgi1.fa -db /Volumes/web/whale/fish546/blast/db/Haliotis_midae_franchini -outfmt 6 -out /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_midae_franchini_tblastn.txt -max_target_seqs 1 -num_threads 1 -evalue 1E-20" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 37 }, { "cell_type": "code", "collapsed": false, "input": [ "!makeblastdb -in /Volumes/web/cnidarian/H.rufescens_contig.fa -dbtype nucl -out /Volumes/web/whale/fish546/blast/db/Haliotis_rufescens_transcriptome" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "\r\n", "Building a new DB, current time: 07/18/2013 15:46:33\r\n", "New DB name: /Volumes/web/whale/fish546/blast/db/Haliotis_rufescens_transcriptome\r\n", "New DB title: /Volumes/web/cnidarian/H.rufescens_contig.fa\r\n", "Sequence type: Nucleotide\r\n", "Keep Linkouts: T\r\n", "Keep MBits: T\r\n", "Maximum file size: 1000000000B\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Adding sequences from FASTA; added 162928 sequences in 19.2224 seconds.\r\n" ] } ], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": [ "!tblastn -query /Volumes/Bay3/Software/ncbi-blast-2.2.26+/query/OrthDB_meta_Lotgi1.fa -db /Volumes/web/whale/fish546/blast/db/Haliotis_rufescens_transcriptome -outfmt 6 -out /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_rufescens_tblastn.txt -max_target_seqs 1 -num_threads 1 -evalue 1E-20" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 38 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_kam_tblastn.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "AAEL010318\tHaliotis_kam_contig350\t76.46\t429\t93\t5\t1\t423\t235\t1515\t0.0\t 679\r\n", "ENSACAG00000011841\tHaliotis_kam_contig350\t60.55\t659\t229\t11\t1\t651\t208\t2115\t0.0\t 723\r\n", "ENSACAG00000002544\tHaliotis_kam_contig350\t65.60\t593\t189\t6\t4\t585\t313\t2079\t0.0\t 740\r\n", "ENSACAG00000016470\tHaliotis_kam_contig350\t66.67\t627\t193\t8\t1\t614\t208\t2079\t0.0\t 800\r\n", "ACEP20785\tHaliotis_kam_contig350\t66.04\t583\t165\t12\t36\t595\t400\t2118\t0.0\t 690\r\n", "ACEP20789\tHaliotis_kam_contig350\t69.40\t647\t175\t10\t1\t634\t208\t2118\t0.0\t 843\r\n", "ADAR004412\tHaliotis_kam_contig350\t62.10\t657\t205\t11\t1\t640\t208\t2097\t0.0\t 734\r\n", "AECH10224\tHaliotis_kam_contig350\t67.41\t669\t173\t12\t1\t656\t208\t2118\t0.0\t 826\r\n", "AECH10223\tHaliotis_kam_contig350\t65.36\t690\t173\t12\t1\t677\t208\t2118\t0.0\t 826\r\n", "g8984\tHaliotis_kam_contig350\t70.59\t646\t164\t10\t1\t629\t208\t2118\t0.0\t 891\r\n" ] } ], "prompt_number": 39 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_kam_tblastn.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 438 5256 35941 /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_kam_tblastn.txt\r\n" ] } ], "prompt_number": 40 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_midae_franchini_tblastn.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "gi|221125234|ref|XP_002165797.1|\tContig_3019_Coverage_17.03\t55.00\t80\t36\t0\t1\t80\t274\t35\t5e-25\t97.4\r\n", "g8694\tContig_1661_Coverage_123.10\t37.29\t177\t97\t2\t1714\t1877\t679\t152\t1e-34\t 135\r\n", "AAEL000339\tContig_308_Coverage_33.26\t38.55\t166\t98\t2\t255\t416\t1\t498\t1e-35\t 131\r\n", "ENSACAG00000006114\tContig_308_Coverage_33.26\t37.95\t166\t99\t2\t361\t522\t1\t498\t5e-34\t 127\r\n", "ENSACAG00000006855\tContig_308_Coverage_33.26\t39.16\t166\t97\t2\t145\t306\t1\t498\t2e-37\t 135\r\n", "ENSACAG00000003680\tContig_308_Coverage_33.26\t37.35\t166\t100\t2\t326\t487\t1\t498\t5e-32\t 121\r\n", "ACEP15907\tContig_308_Coverage_33.26\t38.79\t165\t97\t2\t230\t390\t1\t495\t4e-36\t 132\r\n", "ADAR002823\tContig_308_Coverage_33.26\t39.88\t163\t94\t2\t118\t276\t1\t489\t5e-37\t 131\r\n", "AECH18169\tContig_308_Coverage_33.26\t38.79\t165\t96\t3\t347\t506\t1\t495\t9e-34\t 127\r\n", "g1442\tContig_308_Coverage_33.26\t37.95\t166\t99\t2\t319\t480\t1\t498\t9e-36\t 132\r\n" ] } ], "prompt_number": 41 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_midae_franchini_tblastn.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 1432 17184 122489 /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_midae_franchini_tblastn.txt\r\n" ] } ], "prompt_number": 42 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_rufescens_tblastn.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "AAEL010815\tcontig145\t46.44\t267\t138\t3\t1\t262\t90\t890\t1e-61\t 216\r\n", "ENSACAG00000009572\tcontig145\t46.13\t271\t142\t2\t4\t274\t72\t872\t1e-76\t 255\r\n", "ENSACAG00000017803\tcontig145\t46.59\t264\t141\t0\t1\t264\t63\t854\t1e-73\t 232\r\n", "ACEP19080\tcontig145\t48.79\t248\t126\t1\t1\t247\t129\t872\t4e-74\t 249\r\n", "ADAR004115\tcontig145\t48.25\t228\t116\t2\t5\t230\t72\t755\t8e-65\t 224\r\n", "AECH14598\tcontig145\t51.31\t267\t129\t1\t5\t270\t72\t872\t5e-88\t 289\r\n", "g3073\tcontig145\t46.40\t250\t133\t1\t1\t250\t129\t875\t6e-72\t 244\r\n", "AGAP012683\tcontig145\t46.67\t255\t134\t2\t10\t262\t90\t854\t6e-66\t 212\r\n", "AGAP002024\tcontig145\t46.30\t270\t140\t3\t5\t269\t72\t881\t9e-66\t 229\r\n", "ENSAMEG00000010758\tcontig145\t46.27\t268\t144\t0\t1\t268\t63\t866\t9e-73\t 245\r\n" ] } ], "prompt_number": 43 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_rufescens_tblastn.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 5311 63732 377427 /Volumes/web/cnidarian/OrthoDB_Lotgi1_Haliotis_rufescens_tblastn.txt\r\n" ] } ], "prompt_number": 44 }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"Venny\"" ] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }