{ "metadata": { "name": "", "signature": "sha256:3d864182e40bb88e2effd69360a21f6c8af8b16815d84e02e5fcc79bc64c1e2b" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Annotation (Blast)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#Contigs from Assembly\n", "!head /Volumes/web/cnidarian/SeaStar_transc_v2.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ ">3291_5903_10007_H94MGADXX_V_CF71_ATCACG_R1_(paired)_trimmed_(paired)_contig_1\r\n", "CAAATATATGAACGGTTGATTGTCAACGATTAGTACATGTTTTCATTGTTCCCCACGCCC\r\n", "GCCCCCCCCCACTCAAACATTTAAAGTGTGAAATATTATTTATCCACAAATTTCCTTAAA\r\n", "CCTGCAAACTTGTCTGCTGTCTCTTATTGGAAGTTATGAAAAAGAACAACGGGTTTTCTT\r\n", "TAAAGGGTCTGCGTGCGATTTTCAACCTTTTGAGTAATAGCAGTTATTTTGATAACCGAT\r\n", "TTTTTTCAAAGCTCAACAGCTTTTTAAAATAAGGAATCCTATAATGGCCAAACGAATACT\r\n", "ATAAAAATAAGGGTTCTCTTAATTGTATAAAACGTATAATTTTATCAATTTTGGGACCGT\r\n", "GTAATTTTTTAAAGACCACAAGAATGTTACATACAACAAATAGACGAAACTCGTAGCTTT\r\n", "GGAAACTACGTCATGGGCGTTTGGTCAAAAGCTGGAGAGAAAGAGAGGTGGGGTGCCAGA\r\n", "CTTAAGTAGTCACGTGATCTGACCAACGCACATCGGAAGCTCGATCGGATGAAATCTTCT\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "#Making the query look better\n", "!sed 's/3291_5903_10007_H94MGADXX_V_CF71_ATCACG_R1_(paired)_trimmed_(paired)/Phel_clc/g' /Users/sr320/Dropbox/Steven/eimd/data/Phel_transcriptome_clc.fa" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Users/sr320/Dropbox/Steven/eimd/data/Phel_transcriptome_clc.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ ">Phel_clc_contig_1\r\n", "CAAATATATGAACGGTTGATTGTCAACGATTAGTACATGTTTTCATTGTTCCCCACGCCC\r\n", "GCCCCCCCCCACTCAAACATTTAAAGTGTGAAATATTATTTATCCACAAATTTCCTTAAA\r\n", "CCTGCAAACTTGTCTGCTGTCTCTTATTGGAAGTTATGAAAAAGAACAACGGGTTTTCTT\r\n", "TAAAGGGTCTGCGTGCGATTTTCAACCTTTTGAGTAATAGCAGTTATTTTGATAACCGAT\r\n", "TTTTTTCAAAGCTCAACAGCTTTTTAAAATAAGGAATCCTATAATGGCCAAACGAATACT\r\n", "ATAAAAATAAGGGTTCTCTTAATTGTATAAAACGTATAATTTTATCAATTTTGGGACCGT\r\n", "GTAATTTTTTAAAGACCACAAGAATGTTACATACAACAAATAGACGAAACTCGTAGCTTT\r\n", "GGAAACTACGTCATGGGCGTTTGGTCAAAAGCTGGAGAGAAAGAGAGGTGGGGTGCCAGA\r\n", "CTTAAGTAGTCACGTGATCTGACCAACGCACATCGGAAGCTCGATCGGATGAAATCTTCT\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "!fgrep -c \">\" /Users/sr320/Dropbox/Steven/eimd/data/Phel_transcriptome_clc.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "30578\r\n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "#IMPORTANT the location of files will change depending on your computer\n", "!blastx \\\n", "-query /Users/sr320/Dropbox/Steven/eimd/data/Phel_transcriptome_clc.fa \\\n", "-db /Users/Shared/data/blast/db/uniprot_sprot \\\n", "-out /Users/sr320/Dropbox/Steven/eimd/data/Phel_clc_blastx_uniprot_sprot_1.tab \\\n", "-outfmt 6 \\\n", "-num_threads 8 \\\n", "-max_hsps 1 \\\n", "-max_target_seqs 1 \\\n", "-evalue 1E-20" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Users/sr320/Dropbox/Steven/eimd/data/Phel_clc_blastx_uniprot_sprot_1.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Phel_clc_contig_4\tsp|P25001|COX1_PISOC\t88.39\t517\t48\t1\t7061\t5547\t1\t517\t0.0\t 749\r\n", "Phel_clc_contig_7\tsp|Q33818|CYB_ASTPE\t79.94\t329\t66\t0\t993\t7\t51\t379\t9e-168\t 479\r\n", "Phel_clc_contig_8\tsp|P68037|UB2L3_MOUSE\t76.97\t152\t35\t0\t4862\t4407\t1\t152\t7e-62\t 214\r\n", "Phel_clc_contig_9\tsp|Q0MVN8|QOR_PIG\t45.61\t239\t129\t1\t796\t80\t90\t327\t5e-63\t 210\r\n", "Phel_clc_contig_17\tsp|Q6DGL8|RT15_DANRE\t35.00\t180\t107\t3\t1177\t638\t61\t230\t5e-22\t99.8\r\n", "Phel_clc_contig_18\tsp|P96202|PPSC_MYCTU\t30.81\t714\t438\t15\t5407\t3386\t1414\t2111\t6e-76\t 286\r\n", "Phel_clc_contig_20\tsp|P46058|EDSP_CYNPY\t31.03\t348\t218\t8\t1731\t703\t4\t334\t5e-38\t 148\r\n", "Phel_clc_contig_22\tsp|Q96LI5|CNO6L_HUMAN\t60.78\t357\t128\t5\t1887\t832\t186\t535\t2e-149\t 450\r\n", "Phel_clc_contig_24\tsp|P63245|GBLP_RAT\t80.77\t312\t60\t0\t1032\t97\t1\t312\t0.0\t 540\r\n", "Phel_clc_contig_25\tsp|Q9QYP1|LRP4_RAT\t32.74\t623\t393\t7\t1816\t5\t393\t1008\t1e-99\t 339\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "!tr '|' \"\\t\" /Users/sr320/Dropbox/Steven/eimd/data/Phel_clc_blastx_uniprot_sprot_sqlready.tab" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Users/sr320/Dropbox/Steven/eimd/data/Phel_clc_blastx_uniprot_sprot_sqlready.tab" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Phel_clc_contig_4\tsp\tP25001\tCOX1_PISOC\t88.39\t517\t48\t1\t7061\t5547\t1\t517\t0.0\t 749\r\n", "Phel_clc_contig_7\tsp\tQ33818\tCYB_ASTPE\t79.94\t329\t66\t0\t993\t7\t51\t379\t9e-168\t 479\r\n", "Phel_clc_contig_8\tsp\tP68037\tUB2L3_MOUSE\t76.97\t152\t35\t0\t4862\t4407\t1\t152\t7e-62\t 214\r\n", "Phel_clc_contig_9\tsp\tQ0MVN8\tQOR_PIG\t45.61\t239\t129\t1\t796\t80\t90\t327\t5e-63\t 210\r\n", "Phel_clc_contig_17\tsp\tQ6DGL8\tRT15_DANRE\t35.00\t180\t107\t3\t1177\t638\t61\t230\t5e-22\t99.8\r\n", "Phel_clc_contig_18\tsp\tP96202\tPPSC_MYCTU\t30.81\t714\t438\t15\t5407\t3386\t1414\t2111\t6e-76\t 286\r\n", "Phel_clc_contig_20\tsp\tP46058\tEDSP_CYNPY\t31.03\t348\t218\t8\t1731\t703\t4\t334\t5e-38\t 148\r\n", "Phel_clc_contig_22\tsp\tQ96LI5\tCNO6L_HUMAN\t60.78\t357\t128\t5\t1887\t832\t186\t535\t2e-149\t 450\r\n", "Phel_clc_contig_24\tsp\tP63245\tGBLP_RAT\t80.77\t312\t60\t0\t1032\t97\t1\t312\t0.0\t 540\r\n", "Phel_clc_contig_25\tsp\tQ9QYP1\tLRP4_RAT\t32.74\t623\t393\t7\t1816\t5\t393\t1008\t1e-99\t 339\r\n" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }