{ "metadata": { "name": "", "signature": "sha256:d1ed0bfb2793e95cbd4ae1719cc44f5a026d1478a3f68c8949d65b75f49b56a9" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Visualizing Emma's Proteomic Data in Genome Browser" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!java -jar samifier.jar -r /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid -m /Volumes/web/oyster/bioinformatics/103B_251_02_mappingfile.txt -g /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff -c /Volumes/web/oyster/bioinformatics/Crassostrea_gigas.GCA_000297895.1.21.dna.genome.fa.fa -o /Users/emmatimminsschiffman/Documents/winter_2014/Bioinformatics/Cg_samifier_out20140220 -l /Users/emmatimminsschiffman/Documents/winter_2014/Bioinformatics/log_20140220 -b /Users/emmatimminsschiffman/Documents/winter_2014/Cg_20140220.bed \n" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "!java -jar /Users/sr320/Desktop/ap11_samifier-master/dist/samifier.jar -r" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "org.apache.commons.cli.MissingOptionException: Missing required options: r, m, g, c, o\r\n", "Version = 1.0.9\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "usage: samifier [-b ] -c -g [-l\r\n", " ] -m -o -r \r\n", " [-s ]\r\n", " -b Filename to write IGV regions of\r\n", " interest (BED) file to\r\n", " -c Directory containing the chromosome\r\n", " files in FASTA format for the given\r\n", " genome\r\n", " -g Genome file in gff format\r\n", " -l Filename to write the log into\r\n", " -m File mapping protein identifier to\r\n", " ordered locus name\r\n", " -o Filename to write the SAM format file to\r\n", " -r Mascot search results file in txt format\r\n", " -s Minimum confidence score for peptides to\r\n", " be included\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "# -r Mascot search results file in txt format\n", "!head -50 /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "\r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " \r\n", " \r\n", " \r\n" ] } ], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!java -jar /Users/sr320/Desktop/ap11_samifier-master/dist/samifier.jar -r /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid -m /Volumes/we!b/oyster/bioinformatics/103B_251_02_mappingfile.txt -g /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff -c /Volumes/web/oyster/bioinformatics/Crassostrea_gigas.GCA_000297895.1.21.dna.genome.fa.fa -o /Users/sr320/Desktop/Cg_samifier_out20140220 -l /Users/sr320/Desktop/log_20140220 -b /Users/sr320/Desktop/Cg_20140220.bed " ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Version = 1.0.9\r\n", "au.org.intersect.samifier.parser.GenomeFileParsingException: Error in /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff:23 C19392\tGLEAN\tCDS\t452\t610\t.\t+\t0\tParent=CGI_10000015;\r\n", " > Stop of sequence in gene CGI_10000015 overflows gene\r\n", "usage: samifier [-b ] -c -g [-l\r\n", " ] -m -o -r \r\n", " [-s ]\r\n", " -b Filename to write IGV regions of\r\n", " interest (BED) file to\r\n", " -c Directory containing the chromosome\r\n", " files in FASTA format for the given\r\n", " genome\r\n", " -g Genome file in gff format\r\n", " -l Filename to write the log into\r\n", " -m File mapping protein identifier to\r\n", " ordered locus name\r\n", " -o Filename to write the SAM format file to\r\n", " -r Mascot search results file in txt format\r\n", " -s Minimum confidence score for peptides to\r\n", " be included\r\n", "au.org.intersect.samifier.parser.GenomeFileParsingException: Error in /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff:23 C19392\tGLEAN\tCDS\t452\t610\t.\t+\t0\tParent=CGI_10000015;\r\n", " > Stop of sequence in gene CGI_10000015 overflows gene\r\n", "\tat au.org.intersect.samifier.parser.GenomeParserImpl.throwParsingException(GenomeParserImpl.java:98)\r\n", "\tat au.org.intersect.samifier.parser.GenomeParserImpl.processSequence(GenomeParserImpl.java:181)\r\n", "\tat au.org.intersect.samifier.parser.GenomeParserImpl.doParsing(GenomeParserImpl.java:84)\r\n", "\tat au.org.intersect.samifier.parser.GenomeParserImpl.parseGenomeFile(GenomeParserImpl.java:46)\r\n", "\tat au.org.intersect.samifier.runner.SamifierRunner.run(SamifierRunner.java:84)\r\n", "\tat au.org.intersect.samifier.Samifier.main(Samifier.java:125)\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -50 /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tmRNA\t35\t385\t0.555898\t-\t.\tID=CGI_10000001;\r\n", "C16582\tGLEAN\tCDS\t35\t385\t.\t-\t0\tParent=CGI_10000001;\r\n", "C17212\tGLEAN\tmRNA\t31\t363\t0.999572\t+\t.\tID=CGI_10000002;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t.\t+\t0\tParent=CGI_10000002;\r\n", "C17316\tGLEAN\tmRNA\t30\t257\t0.555898\t+\t.\tID=CGI_10000003;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t.\t+\t0\tParent=CGI_10000003;\r\n", "C17998\tGLEAN\tmRNA\t196\t387\t1\t-\t.\tID=CGI_10000005;\r\n", "C17998\tGLEAN\tCDS\t196\t387\t.\t-\t0\tParent=CGI_10000005;\r\n", "C18346\tGLEAN\tmRNA\t174\t551\t1\t+\t.\tID=CGI_10000009;\r\n", "C18346\tGLEAN\tCDS\t174\t551\t.\t+\t0\tParent=CGI_10000009;\r\n", "C18428\tGLEAN\tmRNA\t286\t546\t0.555898\t-\t.\tID=CGI_10000010;\r\n", "C18428\tGLEAN\tCDS\t286\t546\t.\t-\t0\tParent=CGI_10000010;\r\n", "C18964\tGLEAN\tmRNA\t203\t658\t0.999572\t-\t.\tID=CGI_10000011;\r\n", "C18964\tGLEAN\tCDS\t203\t658\t.\t-\t0\tParent=CGI_10000011;\r\n", "C18980\tGLEAN\tmRNA\t30\t674\t0.555898\t+\t.\tID=CGI_10000012;\r\n", "C18980\tGLEAN\tCDS\t30\t674\t.\t+\t0\tParent=CGI_10000012;\r\n", "C19100\tGLEAN\tmRNA\t160\t681\t0.999955\t-\t.\tID=CGI_10000013;\r\n", "C19100\tGLEAN\tCDS\t160\t681\t.\t-\t0\tParent=CGI_10000013;\r\n", "C19356\tGLEAN\tmRNA\t355\t597\t1\t+\t.\tID=CGI_10000014;\r\n", "C19356\tGLEAN\tCDS\t355\t597\t.\t+\t0\tParent=CGI_10000014;\r\n", "C19392\tGLEAN\tmRNA\t46\t610\t1\t+\t.\tID=CGI_10000015;\r\n", "C19392\tGLEAN\tCDS\t46\t183\t.\t+\t0\tParent=CGI_10000015;\r\n", "C19392\tGLEAN\tCDS\t452\t610\t.\t+\t0\tParent=CGI_10000015;\r\n", "C19510\tGLEAN\tmRNA\t451\t702\t1\t+\t.\tID=CGI_10000016;\r\n", "C19510\tGLEAN\tCDS\t451\t702\t.\t+\t0\tParent=CGI_10000016;\r\n", "C19532\tGLEAN\tmRNA\t155\t601\t0.575455\t-\t.\tID=CGI_10000017;\r\n", "C19532\tGLEAN\tCDS\t155\t601\t.\t-\t0\tParent=CGI_10000017;\r\n", "C19570\tGLEAN\tmRNA\t208\t411\t0.555898\t-\t.\tID=CGI_10000018;\r\n", "C19570\tGLEAN\tCDS\t208\t411\t.\t-\t0\tParent=CGI_10000018;\r\n", "C19626\tGLEAN\tmRNA\t347\t814\t0.999572\t-\t.\tID=CGI_10000019;\r\n", "C19626\tGLEAN\tCDS\t347\t814\t.\t-\t0\tParent=CGI_10000019;\r\n", "C19672\tGLEAN\tmRNA\t132\t464\t1\t+\t.\tID=CGI_10000021;\r\n", "C19672\tGLEAN\tCDS\t132\t464\t.\t+\t0\tParent=CGI_10000021;\r\n", "C20188\tGLEAN\tmRNA\t437\t967\t0.999572\t-\t.\tID=CGI_10000024;\r\n", "C20188\tGLEAN\tCDS\t437\t967\t.\t-\t0\tParent=CGI_10000024;\r\n", "C20262\tGLEAN\tmRNA\t222\t1005\t1\t-\t.\tID=CGI_10000025;\r\n", "C20262\tGLEAN\tCDS\t872\t1005\t.\t-\t0\tParent=CGI_10000025;\r\n", "C20262\tGLEAN\tCDS\t642\t649\t.\t-\t1\tParent=CGI_10000025;\r\n", "C20262\tGLEAN\tCDS\t222\t538\t.\t-\t2\tParent=CGI_10000025;\r\n", "C20282\tGLEAN\tmRNA\t330\t980\t1\t+\t.\tID=CGI_10000026;\r\n", "C20282\tGLEAN\tCDS\t330\t980\t.\t+\t0\tParent=CGI_10000026;\r\n", "scaffold1224\tGLEAN\tmRNA\t107\t775\t0.639435\t-\t.\tID=CGI_10000027;\r\n", "scaffold1224\tGLEAN\tCDS\t107\t775\t.\t-\t0\tParent=CGI_10000027;\r\n", "C20334\tGLEAN\tmRNA\t273\t1027\t1\t-\t.\tID=CGI_10000028;\r\n", "C20334\tGLEAN\tCDS\t868\t1027\t.\t-\t0\tParent=CGI_10000028;\r\n", "C20334\tGLEAN\tCDS\t273\t523\t.\t-\t2\tParent=CGI_10000028;\r\n", "C20412\tGLEAN\tmRNA\t73\t776\t1\t-\t.\tID=CGI_10000029;\r\n", "C20412\tGLEAN\tCDS\t706\t776\t.\t-\t0\tParent=CGI_10000029;\r\n", "C20412\tGLEAN\tCDS\t410\t463\t.\t-\t1\tParent=CGI_10000029;\r\n", "C20412\tGLEAN\tCDS\t73\t214\t.\t-\t1\tParent=CGI_10000029;\r\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/CDS/exon/g' /Volumes/web/cnidarian/ets_v9.gff" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_v9.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tmRNA\t35\t385\t0.555898\t-\t.\tID=CGI_10000001;\r\n", "C16582\tGLEAN\texon\t35\t385\t.\t-\t0\tParent=CGI_10000001;\r\n", "C17212\tGLEAN\tmRNA\t31\t363\t0.999572\t+\t.\tID=CGI_10000002;\r\n", "C17212\tGLEAN\texon\t31\t363\t.\t+\t0\tParent=CGI_10000002;\r\n", "C17316\tGLEAN\tmRNA\t30\t257\t0.555898\t+\t.\tID=CGI_10000003;\r\n", "C17316\tGLEAN\texon\t30\t257\t.\t+\t0\tParent=CGI_10000003;\r\n", "C17998\tGLEAN\tmRNA\t196\t387\t1\t-\t.\tID=CGI_10000005;\r\n", "C17998\tGLEAN\texon\t196\t387\t.\t-\t0\tParent=CGI_10000005;\r\n", "C18346\tGLEAN\tmRNA\t174\t551\t1\t+\t.\tID=CGI_10000009;\r\n", "C18346\tGLEAN\texon\t174\t551\t.\t+\t0\tParent=CGI_10000009;\r\n" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/mRNA/CDS/g' /Volumes/web/cnidarian/ets_v9_b.gff" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_v9_b.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tCDS\t35\t385\t0.555898\t-\t.\tID=CGI_10000001;\r\n", "C16582\tGLEAN\texon\t35\t385\t.\t-\t0\tParent=CGI_10000001;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t0.999572\t+\t.\tID=CGI_10000002;\r\n", "C17212\tGLEAN\texon\t31\t363\t.\t+\t0\tParent=CGI_10000002;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t0.555898\t+\t.\tID=CGI_10000003;\r\n", "C17316\tGLEAN\texon\t30\t257\t.\t+\t0\tParent=CGI_10000003;\r\n", "C17998\tGLEAN\tCDS\t196\t387\t1\t-\t.\tID=CGI_10000005;\r\n", "C17998\tGLEAN\texon\t196\t387\t.\t-\t0\tParent=CGI_10000005;\r\n", "C18346\tGLEAN\tCDS\t174\t551\t1\t+\t.\tID=CGI_10000009;\r\n", "C18346\tGLEAN\texon\t174\t551\t.\t+\t0\tParent=CGI_10000009;\r\n" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/ID=/PGUUID=/g' /Volumes/web/cnidarian/ets_v9_c.gff" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_v9_c.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tCDS\t35\t385\t0.555898\t-\t.\tPGUUID=CGI_10000001;\r\n", "C16582\tGLEAN\texon\t35\t385\t.\t-\t0\tParent=CGI_10000001;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t0.999572\t+\t.\tPGUUID=CGI_10000002;\r\n", "C17212\tGLEAN\texon\t31\t363\t.\t+\t0\tParent=CGI_10000002;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t0.555898\t+\t.\tPGUUID=CGI_10000003;\r\n", "C17316\tGLEAN\texon\t30\t257\t.\t+\t0\tParent=CGI_10000003;\r\n", "C17998\tGLEAN\tCDS\t196\t387\t1\t-\t.\tPGUUID=CGI_10000005;\r\n", "C17998\tGLEAN\texon\t196\t387\t.\t-\t0\tParent=CGI_10000005;\r\n", "C18346\tGLEAN\tCDS\t174\t551\t1\t+\t.\tPGUUID=CGI_10000009;\r\n", "C18346\tGLEAN\texon\t174\t551\t.\t+\t0\tParent=CGI_10000009;\r\n" ] } ], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/Parent=/ID=/g' /Volumes/web/cnidarian/ets_v9_d.gff" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_v9_d.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tCDS\t35\t385\t0.555898\t-\t.\tPGUUID=CGI_10000001;\r\n", "C16582\tGLEAN\texon\t35\t385\t.\t-\t0\tID=CGI_10000001;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t0.999572\t+\t.\tPGUUID=CGI_10000002;\r\n", "C17212\tGLEAN\texon\t31\t363\t.\t+\t0\tID=CGI_10000002;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t0.555898\t+\t.\tPGUUID=CGI_10000003;\r\n", "C17316\tGLEAN\texon\t30\t257\t.\t+\t0\tID=CGI_10000003;\r\n", "C17998\tGLEAN\tCDS\t196\t387\t1\t-\t.\tPGUUID=CGI_10000005;\r\n", "C17998\tGLEAN\texon\t196\t387\t.\t-\t0\tID=CGI_10000005;\r\n", "C18346\tGLEAN\tCDS\t174\t551\t1\t+\t.\tPGUUID=CGI_10000009;\r\n", "C18346\tGLEAN\texon\t174\t551\t.\t+\t0\tID=CGI_10000009;\r\n" ] } ], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/PGUUID=/Parent=/g' /Volumes/web/cnidarian/ets_v9_f.gff" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_v9_f.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tCDS\t35\t385\t0.555898\t-\t.\tParent=CGI_10000001;\r\n", "C16582\tGLEAN\texon\t35\t385\t.\t-\t0\tID=CGI_10000001;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t0.999572\t+\t.\tParent=CGI_10000002;\r\n", "C17212\tGLEAN\texon\t31\t363\t.\t+\t0\tID=CGI_10000002;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t0.555898\t+\t.\tParent=CGI_10000003;\r\n", "C17316\tGLEAN\texon\t30\t257\t.\t+\t0\tID=CGI_10000003;\r\n", "C17998\tGLEAN\tCDS\t196\t387\t1\t-\t.\tParent=CGI_10000005;\r\n", "C17998\tGLEAN\texon\t196\t387\t.\t-\t0\tID=CGI_10000005;\r\n", "C18346\tGLEAN\tCDS\t174\t551\t1\t+\t.\tParent=CGI_10000009;\r\n", "C18346\tGLEAN\texon\t174\t551\t.\t+\t0\tID=CGI_10000009;\r\n" ] } ], "prompt_number": 25 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Fixed gff issue" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!java -jar /Users/sr320/Desktop/ap11_samifier-master/dist/samifier.jar -r /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid -m /Volumes/web/oyster/bioinformatics/103B_251_02_mappingfile.txt -g /Volumes/web/cnidarian/ets_v9_f.gff -c /Volumes/web/oyster/bioinformatics/Crassostrea_gigas.GCA_000297895.1.21.dna.genome.fa.fa -o /Users/sr320/Desktop/Cg_samifier_out20140220 -l /Users/sr320/Desktop/log_20140220 -b /Users/sr320/Desktop/Cg_20140220.bed " ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Start document\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "End document!\r\n", "Run exception thrown\r\n", "java.lang.NumberFormatException: null\r\n", "\tat java.lang.Integer.parseInt(Integer.java:454)\r\n", "\tat java.lang.Integer.parseInt(Integer.java:527)\r\n", "\tat au.org.intersect.samifier.parser.mzidentml.MzidReader.build(MzidReader.java:113)\r\n", "\tat au.org.intersect.samifier.parser.mzidentml.MzidReader.processEvidence(MzidReader.java:137)\r\n", "\tat au.org.intersect.samifier.parser.mzidentml.MzidReader.run(MzidReader.java:54)\r\n", "\tat au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseMascotPeptideSearchResultsMzidentMLFormat(PeptideSearchResultsParserImpl.java:129)\r\n", "\tat au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:79)\r\n", "\tat au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:56)\r\n", "\tat au.org.intersect.samifier.runner.SamifierRunner.run(SamifierRunner.java:90)\r\n", "\tat au.org.intersect.samifier.Samifier.main(Samifier.java:125)\r\n", "Version = 1.0.9\r\n", "java.lang.NullPointerException\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "usage: samifier [-b ] -c -g [-l\r\n", " ] -m -o -r \r\n", " [-s ]\r\n", " -b Filename to write IGV regions of\r\n", " interest (BED) file to\r\n", " -c Directory containing the chromosome\r\n", " files in FASTA format for the given\r\n", " genome\r\n", " -g Genome file in gff format\r\n", " -l Filename to write the log into\r\n", " -m File mapping protein identifier to\r\n", " ordered locus name\r\n", " -o Filename to write the SAM format file to\r\n", " -r Mascot search results file in txt format\r\n", " -s Minimum confidence score for peptides to\r\n", " be included\r\n", "java.lang.NullPointerException\r\n", "\tat java.util.ArrayList.addAll(ArrayList.java:559)\r\n", "\tat au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:56)\r\n", "\tat au.org.intersect.samifier.runner.SamifierRunner.run(SamifierRunner.java:90)\r\n", "\tat au.org.intersect.samifier.Samifier.main(Samifier.java:125)\r\n" ] } ], "prompt_number": 27 }, { "cell_type": "raw", "metadata": {}, "source": [ "!java -jar /Users/sr320/Desktop/ap11_samifier-master/dist/samifier.jar -r /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid -m /Volumes/web/oyster/bioinformatics/103B_251_02_mappingfile.txt -g /Volumes/web/cnidarian/ets_v9_f.gff -c /Volumes/web/cnidarian/v9_multi -o /Users/sr320/Desktop/Cg_samifier_out20140220 -l /Users/sr320/Desktop/log_20140220 -b /Users/sr320/Desktop/Cg_20140220.bed " ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/oyster.v9.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ ">scaffold360\r\n", "TATTCATATATCATTGAGAATGACAGTTTAAAACAGGATTTTTAATTGTGTTTAATAGGCGACCAACATTTAAGTGTCATTTGTTGAGTTATAAGCGAGTTAAAAAGCTTGTAGTTCTTCGCTATGGAAACAAAACTTTACATTTTGTTTACATTTCAGTTTTAGACCTCAAATGAATGTTTTTATCGTTAGGAACTGTTTATTTATGCTTTAAATTGATAAGAAGATTGACAAATCAGCTTGAAAAAGATTTTTTACTGGTGTATTGAACCCAGATGTA\r\n", ">scaffold18356\r\n", "TTAAACTGGTACAACAGTATATCCAAGAGAAAAAGGACTTTATAAACAATTTAGAACAACATTGTGGGTCGGATCGGAATTCAAGAACACAGGATCCATTGATGGCTTCTACTATGAAGGGCGATACTACCGCTTGGGGTGACGTCAAATACACTCCGTCATCGACAAAATCTCCTGGATTGTATCCATTACAGGAATTGAAGAAATAACTTTCAATGTAAACAATTTACAAAGATATACATATCTTACGTGAATATCTGGCATGGAAACAACACCTACATGATAGAATAAGACAATATTTCCTACGATACATCCATGGGACATGAAGAAAGTGAAACAGTGTATATCCATATTTAAATTATTATTATAACCGTGTAATCCAAAATGAATTTTATTCCTTCTCATCAGTAAGTACACTGAAAATTGACTACTAAGAAGAGGAAAACACAGAAACGCTAGTTACAACCGATGACCGTTATGAACATAATGCGATTATTTCAGTGAACCGTGTATTAGTGATTTCCTATGTAGGAAGGATTAATATAATTTGCTTCCTATTAGTGTAAT\r\n", ">scaffold20428\r\n", "AGAAATTATCCAGGGGTATATTCAGTAATTAAATTTGAAAAAAAAATTGGTTTCAACATATTTTATTGAATAAGGAAAATAGTTTGTATAACTTTAGTTACAAGTTCCTCTTTCTCAATTACTAGAAACTGAATACAACATGATAGAAGAAAATTAGATTTACAGGAATCAATTTTAATTGAGACACACTTGCTATATTGGGGTAGTGACACTTTCTTATAACACAAACTGCTTTTATTTTTTCAGCAGTTCAAAACTTTATTTAAGAGTCTGGAAGATTTTTAAATAATTAATTTTCTTTTTGTTATGGCTATTCTTACACCGTATATCACTCTATTTACCATACAATCACATTTTGTTTTGTTAATGCATGTTTAGGAATATAATTACTATAAAGTCAATATCTACAAATATAATGCATTATCACTATGGCAGTTGTATATATTATACATATTATAATCAAGAGAGAAACATGTAAGTTAAAAGAACTTGTTACCCACCCTCTTGTCCAATAACAATAAAATATGTTCAAATCAAATCAAACAAGAATTGATATACATGTACTAGTATATGGAATTTTTGTTCCTTTTGATGATGTATAGTGAAGAGTTTTTTAACCGCGTCACCTTTCGAATTGAACGTGAAAACCAGCAGGTTGTAAATTTCCTCTACACCATTCTTTTATATTTTTTAATTTTCAAATTTATTGTTGTGATGTTGGCTAAGAATCCTCATTTCAATCCCTTAATTATAATGGTACATGTAAATAAAATGGCGAGTGTCTATCTATGACGTACAAAGGAAGAAGCACTTTCCGGTGACGTAGTTATCACAATGCCCATACAGTGCTTGTCGATTTTATCAACTTTGATAAATAGGTTAAGTTGTATGTATACTGTATAGAATTCATGGATCCAAGGTCATGTAATAATCAACGCAAATTCCATATTGTGTTGAACATCAATTTAGTTTGTGTTATAAATTTGTAAAAGTTTATACATTCATACAAGCACCTGCTGTA\r\n", ">scaffold18720\r\n", "TCATGCCGATGATTTATCATACAAACATACGAGAGAAGTATTTAATAAAGCTGCATTTAGGATAATTATATCATTTTGCCATGCCATGACATATTCTGATCATCTCACATCATTGATAAAAAATTGTGCCTTTAATTCCTACCCTAAAATTAAATATTGAATATTATCATAATATTAAATTCAGTTACCTTAGATGTTCCTTTACTTGAGCCGATAAACTGCATGGTTCTATTCATCTTCACAAATAATATAGGAGACAGCCATGTCACCGAGTTTGAACGTCAGCAGGAGGATGTCTGTACAAAAAAATTGCCTTTAATTTGCTTTCAAATTTTCATTTTAAAGTATTGAGAACATTATTGTATAATAATCACCCATATATAATAAAAATCTGTAAAAGAACATCTGCATACTTACATTATTCCTACAGGCAGCCATGATGATAGCATAAGATCAAGTTACTCTGATGGATTTGTCAAAAATGACATCACAGTTCTTTTGGCGGTAATGTTTCATTCACGAGTGCAGAAAAGTTACTATGCATTGATACTGCATTTCATAAATAGTGTAGACAAGTAAAATGCTTCATGATGGACACACATAATATAGTCATGTACATTTAAAAAAA\r\n", ">scaffold23246\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "GAATTGATTTGCCACAATTTTTTGTGTGGTCAATCTAAACTAGGTTCCTCAGCTCTGTTTCTAAATTAGCATTCTGTCCTTTTTCAGAATAAAATTTTGCACAACTAACCAGGATCTTGTTAACAATTTTGAGTTAAGTCTAAATATAAATTCAAAAAATCATTTAGAGTTATTAGTAACATGATATACTAGTGTAAGTCATTTTTTGAGAATTCCCAAATGTTCTTTCTGTTGAGTGAAGATACATGTAGGTCACGAATCTGGCACGTTTCCATTTTTTGTAAACTTGTAAAATAAAACATTTTAAAAATTTTTAAAGTAAAAAGGTGGCATTTACAAAATCTGTTTTTAGTTATAAAGAACAATCATATTTAAAATATATAAAAATGCTGTCTTTGGACTCCGAATAATATTCCAAATATATTGTCTGAAAGTTGGACAATTTTCTACTTCAAATATATTATAATACGTACGTGTATTTCCGAATCTGTAGTCTAGCAATGGGTTAACATTGTCACATAAAATTGCATGTTATGGGAAAAACCGGACATTCTATATTTTACTAAACTGTACGGTTGCTTCAACCCATATTGTTTAAAAGCAATGCGATTAAATCAAAGGAAAGCATGGTCAAATGGGAATTCAAATATATAAGTTAAAGAGGAAAAAACAAAACCATTTTATAAACTCGTATATATTTCACTATTAATGAAATAAAAAATAAGCAGTCTCACATGCCCACACACCCCATGACTTGGCAATGCTACACATTACAAATGGCAGAGTACCCTTTATATACCCTTAATAATAACATGGACATAACCACAAAAATCAAAGTACTGAAGTACTTAAACCCGGACTCTTTCGGTGTAATTATGCACATAGTGTAGTTAAATATTCTCTCTCTCTCTCTCTCTCTCTCTTATAATGCTTTGTAAAGAAGTTCATTGAATTAAGAAGAAATTAAACAATAGGAAATAAAAGGAAATTTTAAAAGTTCAGCATGATTCTAAGTTGCTCATGGCTACAGAAAAATGTATAAACTTACCTTTTGATGATGTAACGAAAGCACAGACTGCAAATGTAAAGATTACACCCTTCATTCTATATGAATGTCTATAACTAGGAAGACAGGCTATTTAAATACAAACCTGTTTATGTGATTTTATTTTTAGAAATAACTAAAGTTAGAGCAATACGCAAGTTAAACATGTGGGTGATAGAAATGCATGCAAAACACAGTTACGTCTCTTCAAGCCTTATCAGGCCTTCTCGGCAAAGTTTAGAAAGCATTTTAAATGCATCTGGCATATTCACTTGGTTTTACTTTCCACCTCTTGTATACTGACTGTTGTGATACATTGTACAGTTTAAAATTTAGATAAACTTTTCTTATTATTTGGACTTGATTTCATTTCCAAGAATAATTCTATCCTGTTTTGATTAGCACTGCATATTGAATATTTTCACAAAGTGTTTTATTGTTGCGCGTGTATCTATAGTCTGTTCCAAGTTACACTGACTTTTTTTAATGATTTTGAACAAAAACATGTATACACATATACGTATAACAATACAAGTAAAGTAATAAAGCAAACGTTATTGAAATTAATGATAAAAAAGTGCAAAGACTATTTGTAAAAATACATGTAATCATCAAAAGAACACTATACAGTATAGTCATATGCATAAAAAAAATCAATGGTGTTACTCGTTTATTCTGGAATCTGATATGTAAACTTAAAAAGTCATGAATGTTATAAAACTGTTGATTTTTATTGCTTGATTTTTAATTGGACGATAGTATTAACATGCATTATTACTCTGTTATGTTGAGTATTTATTTTATTTATTCATTTGGTCGAAATTTATCGGCATAATTTGTCGTTATTTTCAAATTTCTGGCGGCATTATTATTCAGTGTTTTAAAATGATTATCACATGACCAA\r\n" ] } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "!/Users/sr320/Desktop/pl.pl /Volumes/web/cnidarian/oyster.v9.fa /Volumes/web/cnidarian/v9_multi" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", " SplitMultiFasta\r\n", " -------------------------------------------------\r\n", " Splits multi fasta into single sequences\r\n", " \r\n", " Usage:\r\n", " splitMultiFasta.pl InputFile OutputDirectory\r\n", " -------------------------------------------------\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "^C\r\n" ] } ], "prompt_number": 34 }, { "cell_type": "code", "collapsed": false, "input": [ "!head C8443.fna " ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ ">C8443\r\n", "CTGGCATCTGGCAAGTTTCTGTAAAAGTAATTTGCAGTACATGCATTGCAAGTACTGGTAATGCACAGACCTTATTGTGATTATGGTAAATGTACTATCGTCCTGTTTAATACAGTATAGTACGCATCCTGCCTATGAC\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "!java -jar /Users/sr320/Desktop/ap11_samifier-master/dist/samifier.jar \\\n", "-r /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid \\\n", "-m /Users/sr320/Desktop/103B_251_02_mappingfile.txt \\\n", "-g /Volumes/web/cnidarian/ets_v9_f.gff \\\n", "-c /Users/sr320/Desktop/multi_fasta \\\n", "-o /Users/sr320/Desktop/Cg_samifier_out20140220 \\\n", "-l /Users/sr320/Desktop/log_20140220 \\\n", "-b /Users/sr320/Desktop/Cg_20140220.bed " ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Start document\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "End document!\r\n", "Run exception thrown\r\n", "java.lang.NumberFormatException: null\r\n", "\tat java.lang.Integer.parseInt(Integer.java:454)\r\n", "\tat java.lang.Integer.parseInt(Integer.java:527)\r\n", "\tat au.org.intersect.samifier.parser.mzidentml.MzidReader.build(MzidReader.java:113)\r\n", "\tat au.org.intersect.samifier.parser.mzidentml.MzidReader.processEvidence(MzidReader.java:137)\r\n", "\tat au.org.intersect.samifier.parser.mzidentml.MzidReader.run(MzidReader.java:54)\r\n", "\tat au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseMascotPeptideSearchResultsMzidentMLFormat(PeptideSearchResultsParserImpl.java:129)\r\n", "\tat au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:79)\r\n", "\tat au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:56)\r\n", "\tat au.org.intersect.samifier.runner.SamifierRunner.run(SamifierRunner.java:90)\r\n", "\tat au.org.intersect.samifier.Samifier.main(Samifier.java:125)\r\n", "Version = 1.0.9\r\n", "java.lang.NullPointerException\r\n", "usage: samifier [-b ] -c -g [-l\r\n", " ] -m -o -r \r\n", " [-s ]\r\n", " -b Filename to write IGV regions of\r\n", " interest (BED) file to\r\n", " -c Directory containing the chromosome\r\n", " files in FASTA format for the given\r\n", " genome\r\n", " -g Genome file in gff format\r\n", " -l Filename to write the log into\r\n", " -m File mapping protein identifier to\r\n", " ordered locus name\r\n", " -o Filename to write the SAM format file to\r\n", " -r Mascot search results file in txt format\r\n", " -s Minimum confidence score for peptides to\r\n", " be included\r\n", "java.lang.NullPointerException\r\n", "\tat java.util.ArrayList.addAll(ArrayList.java:559)\r\n", "\tat au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:56)\r\n", "\tat au.org.intersect.samifier.runner.SamifierRunner.run(SamifierRunner.java:90)\r\n", "\tat au.org.intersect.samifier.Samifier.main(Samifier.java:125)\r\n" ] } ], "prompt_number": 63 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Lets Try Example Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " java -jar samifier.jar \\\n", " -c cconcisus_ref/genome_dir \\\n", " -r Mascot_Result/Glimmer/F188715.dat \\\n", " -g pipeline_glimmer.gff \\\n", " -m pipeline_glimmer.accession \\\n", " -l pipeline_glimmer.log \\\n", " -o pipeline_glimmer.sam \\\n", " -b pipeline_glimmer.bed" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!java -jar /Users/sr320/Desktop/ap11_samifier-master/dist/samifier.jar \\\n", "-c /Users/sr320/Desktop/wiki_test_cases/cconcisus_ref/genome_dir \\\n", "-r /Users/sr320/Desktop/wiki_test_cases/Mascot_Result/Glimmer/F188715.dat \\\n", "-g /Users/sr320/Desktop/wiki_test_cases/pipeline_glimmer.gff \\\n", "-m /Users/sr320/Desktop/wiki_test_cases/pipeline_glimmer.accession \\\n", "-l /Users/sr320/Desktop/pipeline_glimmer.log \\\n", "-o /Users/sr320/Desktop/pipeline_glimmer.sam \\\n", "-b /Users/sr320/Desktop/pipeline_glimmer.bed" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 38 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Users/sr320/Desktop/wiki_test_cases/pipeline_glimmer.accession" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "orf00002 orf00002 orf00002\r\n", "orf00003 orf00003 orf00003\r\n", "orf00004 orf00004 orf00004\r\n", "orf00005 orf00005 orf00005\r\n", "orf00006 orf00006 orf00006\r\n", "orf00007 orf00007 orf00007\r\n", "orf00008 orf00008 orf00008\r\n", "orf00009 orf00009 orf00009\r\n", "orf00011 orf00011 orf00011\r\n", "orf00012 orf00012 orf00012\r\n" ] } ], "prompt_number": 64 }, { "cell_type": "code", "collapsed": false, "input": [ "!head 103B_251_02_mappingfile.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "CGI_10000050\tCGI_10000050\tCGI_10000050\r\n", "CGI_10000055\tCGI_10000055\tCGI_10000055\r\n", "CGI_10000067\tCGI_10000067\tCGI_10000067\r\n", "CGI_10000075\tCGI_10000075\tCGI_10000075\r\n", "CGI_10000077\tCGI_10000077\tCGI_10000077\r\n", "CGI_10000174\tCGI_10000174\tCGI_10000174\r\n", "CGI_10000235\tCGI_10000235\tCGI_10000235\r\n", "CGI_10000237\tCGI_10000237\tCGI_10000237\r\n", "CGI_10000384\tCGI_10000384\tCGI_10000384\r\n", "CGI_10000492\tCGI_10000492\tCGI_10000492\r\n" ] } ], "prompt_number": 62 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Likely -r file ( Mascot search results file in txt format ) is wrong format" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head -50 /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "\r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n", " \r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " \r\n", " \r\n", " \r\n" ] } ], "prompt_number": 48 }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "example" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head -50 /Users/sr320/Desktop/wiki_test_cases/Mascot_Result/Glimmer/F188715.dat" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "MIME-Version: 1.0 (Generated by Mascot version 1.0)\r\n", "Content-Type: multipart/mixed; boundary=gc0p4Jq0M2Yt08jU534c0p\r\n", "\r\n", "--gc0p4Jq0M2Yt08jU534c0p\r\n", "Content-Type: application/x-Mascot; name=\"parameters\"\r\n", "\r\n", "LICENSE=Licensed to: Australian Proteomics Computational Facility, (51 processors).\r\n", "MP=\r\n", "NM=\r\n", "COM=Submitted from Test virtual protein by Mascot Daemon on WIN-V0L2R2U0DVL\r\n", "IATOL=\r\n", "IA2TOL=\r\n", "IASTOL=\r\n", "IBTOL=\r\n", "IB2TOL=\r\n", "IBSTOL=\r\n", "IYTOL=\r\n", "IY2TOL=\r\n", "IYSTOL=\r\n", "SEG=\r\n", "SEGT=\r\n", "SEGTU=\r\n", "LTOL=\r\n", "TOL=4\r\n", "TOLU=ppm\r\n", "ITH=\r\n", "ITOL=0.4\r\n", "ITOLU=Da\r\n", "PFA=1\r\n", "DB=contaminants\r\n", "DB2=Campylobacter_PR\r\n", "MODS=\r\n", "MASS=Monoisotopic\r\n", "CLE=Trypsin\r\n", "FILE=C:\\Users\\ignatius\\Documents\\PostDoc\\2012\\ANDS data integration project\\Sample Data\\Cconcisus Reference proteome\\RawSpectra\\BAA-1457 Reference\\mzML\\Nadeem_29_7_10_1.mzML\r\n", "PEAK=\r\n", "QUE=\r\n", "TWO=\r\n", "SEARCH=MIS\r\n", "USERNAME=Carlos Aya\r\n", "USEREMAIL=carlos@intersect.org.au\r\n", "CHARGE=2+ and 3+\r\n", "INTERMEDIATE=\r\n", "REPORT=AUTO\r\n", "OVERVIEW=\r\n", "FORMAT=mzML (.mzML)\r\n", "FORMVER=1.01\r\n", "FRAG=\r\n", "IT_MODS=Carbamidomethyl (C),Oxidation (M)\r\n", "USER00=\r\n" ] } ], "prompt_number": 41 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Users/sr320/Desktop/wiki_test_cases/pipeline_glimmer.accession" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "orf00002 orf00002 orf00002\r\n", "orf00003 orf00003 orf00003\r\n", "orf00004 orf00004 orf00004\r\n", "orf00005 orf00005 orf00005\r\n", "orf00006 orf00006 orf00006\r\n", "orf00007 orf00007 orf00007\r\n", "orf00008 orf00008 orf00008\r\n", "orf00009 orf00009 orf00009\r\n", "orf00011 orf00011 orf00011\r\n", "orf00012 orf00012 orf00012\r\n" ] } ], "prompt_number": 47 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Fixing GFF" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!wget http://eagle.fish.washington.edu/oyster/bioinformatics/oyster.v9.glean.final.rename.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "--2014-03-12 09:08:26-- http://eagle.fish.washington.edu/oyster/bioinformatics/oyster.v9.glean.final.rename.gff\r\n", "Resolving eagle.fish.washington.edu... 128.95.149.81\r\n", "Connecting to eagle.fish.washington.edu|128.95.149.81|:80... connected.\r\n", "HTTP request sent, awaiting response... " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "200 OK\r\n", "Length: 14179523 (14M) [text/plain]\r\n", "Saving to: `oyster.v9.glean.final.rename.gff'\r\n", "\r\n", "\r", " 0% [ ] 0 --.-K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "55% [====================> ] 7,829,120 37.3M/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100%[======================================>] 14,179,523 45.9M/s in 0.3s \r\n", "\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "2014-03-12 09:08:26 (45.9 MB/s) - `oyster.v9.glean.final.rename.gff' saved [14179523/14179523]\r\n", "\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "cd Desktop" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Users/sr320/Desktop\n" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_v9_f.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tCDS\t35\t385\t0.555898\t-\t.\tParent=CGI_10000001;\r\n", "C16582\tGLEAN\texon\t35\t385\t.\t-\t0\tID=CGI_10000001;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t0.999572\t+\t.\tParent=CGI_10000002;\r\n", "C17212\tGLEAN\texon\t31\t363\t.\t+\t0\tID=CGI_10000002;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t0.555898\t+\t.\tParent=CGI_10000003;\r\n", "C17316\tGLEAN\texon\t30\t257\t.\t+\t0\tID=CGI_10000003;\r\n", "C17998\tGLEAN\tCDS\t196\t387\t1\t-\t.\tParent=CGI_10000005;\r\n", "C17998\tGLEAN\texon\t196\t387\t.\t-\t0\tID=CGI_10000005;\r\n", "C18346\tGLEAN\tCDS\t174\t551\t1\t+\t.\tParent=CGI_10000009;\r\n", "C18346\tGLEAN\texon\t174\t551\t.\t+\t0\tID=CGI_10000009;\r\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_v9_g.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tCDS\t35\t385\t0.555898\t-\t.\tParent=CGI_10000001;\r\n", "C16582\tGLEAN\texon\t35\t385\t.\t-\t0\tID=CGI_10000001;\r\n", "C16582\tGLEAN\tgene\t35\t385\t0.555898\t-\t.\tName=CGI_10000001;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t0.999572\t+\t.\tParent=CGI_10000002;\r\n", "C17212\tGLEAN\texon\t31\t363\t.\t+\t0\tID=CGI_10000002;\r\n", "C17212\tGLEAN\tgene\t31\t363\t0.999572\t+\t.\tName=CGI_10000002;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t0.555898\t+\t.\tParent=CGI_10000003;\r\n", "C17316\tGLEAN\texon\t30\t257\t.\t+\t0\tID=CGI_10000003;\r\n", "C17316\tGLEAN\tgene\t30\t257\t0.555898\t+\t.\tName=CGI_10000003;\r\n", "C17998\tGLEAN\tCDS\t196\t387\t1\t-\t.\tParent=CGI_10000005;\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_cgigas_v9_11.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tmRNA\t35\t385\t0.555898\t-\t.\tID=CGI_10000001;\r\n", "C16582\tGLEAN\tCDS\t35\t385\t.\t-\t0\tParent=CGI_10000001;\r\n", "C16582\tGLEAN\tgene\t35\t385\t.\t-\t0\tName=CGI_10000001;\r\n", "C17212\tGLEAN\tmRNA\t31\t363\t0.999572\t+\t.\tID=CGI_10000002;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t.\t+\t0\tParent=CGI_10000002;\r\n", "C17212\tGLEAN\tgene\t31\t363\t.\t+\t0\tName=CGI_10000002;\r\n", "C17316\tGLEAN\tmRNA\t30\t257\t0.555898\t+\t.\tID=CGI_10000003;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t.\t+\t0\tParent=CGI_10000003;\r\n", "C17316\tGLEAN\tgene\t30\t257\t.\t+\t0\tName=CGI_10000003;\r\n", "C17476\tGLEAN\tmRNA\t34\t257\t0.998947\t-\t.\tID=CGI_10000004;\r\n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/ID/Parent/g' /Volumes/web/cnidarian/ets_cgigas_v9_12.gff\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_cgigas_v9_12.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tmRNA\t35\t385\t0.555898\t-\t.\tParent=CGI_10000001;\r\n", "C16582\tGLEAN\tCDS\t35\t385\t.\t-\t0\tParent=CGI_10000001;\r\n", "C16582\tGLEAN\tgene\t35\t385\t.\t-\t0\tName=CGI_10000001;\r\n", "C17212\tGLEAN\tmRNA\t31\t363\t0.999572\t+\t.\tParent=CGI_10000002;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t.\t+\t0\tParent=CGI_10000002;\r\n", "C17212\tGLEAN\tgene\t31\t363\t.\t+\t0\tName=CGI_10000002;\r\n", "C17316\tGLEAN\tmRNA\t30\t257\t0.555898\t+\t.\tParent=CGI_10000003;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t.\t+\t0\tParent=CGI_10000003;\r\n", "C17316\tGLEAN\tgene\t30\t257\t.\t+\t0\tName=CGI_10000003;\r\n", "C17476\tGLEAN\tmRNA\t34\t257\t0.998947\t-\t.\tParent=CGI_10000004;\r\n" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/CDS/mRNA/g' /Volumes/web/cnidarian/ets_v9_i.gff" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_v9_i.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tmRNA\t35\t385\t0.555898\t-\t.\tParent=CGI_10000001;\r\n", "C16582\tGLEAN\texon\t35\t385\t.\t-\t0\tParent=CGI_10000001;\r\n", "C16582\tGLEAN\tgene\t35\t385\t0.555898\t-\t.\tName=CGI_10000001;\r\n", "C17212\tGLEAN\tmRNA\t31\t363\t0.999572\t+\t.\tParent=CGI_10000002;\r\n", "C17212\tGLEAN\texon\t31\t363\t.\t+\t0\tParent=CGI_10000002;\r\n", "C17212\tGLEAN\tgene\t31\t363\t0.999572\t+\t.\tName=CGI_10000002;\r\n", "C17316\tGLEAN\tmRNA\t30\t257\t0.555898\t+\t.\tParent=CGI_10000003;\r\n", "C17316\tGLEAN\texon\t30\t257\t.\t+\t0\tParent=CGI_10000003;\r\n", "C17316\tGLEAN\tgene\t30\t257\t0.555898\t+\t.\tName=CGI_10000003;\r\n", "C17998\tGLEAN\tmRNA\t196\t387\t1\t-\t.\tParent=CGI_10000005;\r\n" ] } ], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/ID/Parent/g' /Volumes/web/cnidarian/ets_v9_h.gff" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "!head -50 /Volumes/web/cnidarian/ets_v9_f.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tCDS\t35\t385\t0.555898\t-\t.\tParent=CGI_10000001;\r\n", "C16582\tGLEAN\texon\t35\t385\t.\t-\t0\tID=CGI_10000001;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t0.999572\t+\t.\tParent=CGI_10000002;\r\n", "C17212\tGLEAN\texon\t31\t363\t.\t+\t0\tID=CGI_10000002;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t0.555898\t+\t.\tParent=CGI_10000003;\r\n", "C17316\tGLEAN\texon\t30\t257\t.\t+\t0\tID=CGI_10000003;\r\n", "C17998\tGLEAN\tCDS\t196\t387\t1\t-\t.\tParent=CGI_10000005;\r\n", "C17998\tGLEAN\texon\t196\t387\t.\t-\t0\tID=CGI_10000005;\r\n", "C18346\tGLEAN\tCDS\t174\t551\t1\t+\t.\tParent=CGI_10000009;\r\n", "C18346\tGLEAN\texon\t174\t551\t.\t+\t0\tID=CGI_10000009;\r\n", "C18428\tGLEAN\tCDS\t286\t546\t0.555898\t-\t.\tParent=CGI_10000010;\r\n", "C18428\tGLEAN\texon\t286\t546\t.\t-\t0\tID=CGI_10000010;\r\n", "C18964\tGLEAN\tCDS\t203\t658\t0.999572\t-\t.\tParent=CGI_10000011;\r\n", "C18964\tGLEAN\texon\t203\t658\t.\t-\t0\tID=CGI_10000011;\r\n", "C18980\tGLEAN\tCDS\t30\t674\t0.555898\t+\t.\tParent=CGI_10000012;\r\n", "C18980\tGLEAN\texon\t30\t674\t.\t+\t0\tID=CGI_10000012;\r\n", "C19100\tGLEAN\tCDS\t160\t681\t0.999955\t-\t.\tParent=CGI_10000013;\r\n", "C19100\tGLEAN\texon\t160\t681\t.\t-\t0\tID=CGI_10000013;\r\n", "C19356\tGLEAN\tCDS\t355\t597\t1\t+\t.\tParent=CGI_10000014;\r\n", "C19356\tGLEAN\texon\t355\t597\t.\t+\t0\tID=CGI_10000014;\r\n", "C19392\tGLEAN\tCDS\t46\t610\t1\t+\t.\tParent=CGI_10000015;\r\n", "C19392\tGLEAN\texon\t46\t183\t.\t+\t0\tID=CGI_10000015;\r\n", "C19392\tGLEAN\texon\t452\t610\t.\t+\t0\tID=CGI_10000015;\r\n", "C19510\tGLEAN\tCDS\t451\t702\t1\t+\t.\tParent=CGI_10000016;\r\n", "C19510\tGLEAN\texon\t451\t702\t.\t+\t0\tID=CGI_10000016;\r\n", "C19532\tGLEAN\tCDS\t155\t601\t0.575455\t-\t.\tParent=CGI_10000017;\r\n", "C19532\tGLEAN\texon\t155\t601\t.\t-\t0\tID=CGI_10000017;\r\n", "C19570\tGLEAN\tCDS\t208\t411\t0.555898\t-\t.\tParent=CGI_10000018;\r\n", "C19570\tGLEAN\texon\t208\t411\t.\t-\t0\tID=CGI_10000018;\r\n", "C19626\tGLEAN\tCDS\t347\t814\t0.999572\t-\t.\tParent=CGI_10000019;\r\n", "C19626\tGLEAN\texon\t347\t814\t.\t-\t0\tID=CGI_10000019;\r\n", "C19672\tGLEAN\tCDS\t132\t464\t1\t+\t.\tParent=CGI_10000021;\r\n", "C19672\tGLEAN\texon\t132\t464\t.\t+\t0\tID=CGI_10000021;\r\n", "C20188\tGLEAN\tCDS\t437\t967\t0.999572\t-\t.\tParent=CGI_10000024;\r\n", "C20188\tGLEAN\texon\t437\t967\t.\t-\t0\tID=CGI_10000024;\r\n", "C20262\tGLEAN\tCDS\t222\t1005\t1\t-\t.\tParent=CGI_10000025;\r\n", "C20262\tGLEAN\texon\t872\t1005\t.\t-\t0\tID=CGI_10000025;\r\n", "C20262\tGLEAN\texon\t642\t649\t.\t-\t1\tID=CGI_10000025;\r\n", "C20262\tGLEAN\texon\t222\t538\t.\t-\t2\tID=CGI_10000025;\r\n", "C20282\tGLEAN\tCDS\t330\t980\t1\t+\t.\tParent=CGI_10000026;\r\n", "C20282\tGLEAN\texon\t330\t980\t.\t+\t0\tID=CGI_10000026;\r\n", "scaffold1224\tGLEAN\tCDS\t107\t775\t0.639435\t-\t.\tParent=CGI_10000027;\r\n", "scaffold1224\tGLEAN\texon\t107\t775\t.\t-\t0\tID=CGI_10000027;\r\n", "C20334\tGLEAN\tCDS\t273\t1027\t1\t-\t.\tParent=CGI_10000028;\r\n", "C20334\tGLEAN\texon\t868\t1027\t.\t-\t0\tID=CGI_10000028;\r\n", "C20334\tGLEAN\texon\t273\t523\t.\t-\t2\tID=CGI_10000028;\r\n", "C20412\tGLEAN\tCDS\t73\t776\t1\t-\t.\tParent=CGI_10000029;\r\n", "C20412\tGLEAN\texon\t706\t776\t.\t-\t0\tID=CGI_10000029;\r\n", "C20412\tGLEAN\texon\t410\t463\t.\t-\t1\tID=CGI_10000029;\r\n", "C20412\tGLEAN\texon\t73\t214\t.\t-\t1\tID=CGI_10000029;\r\n" ] } ], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/CDS/gene/g' /Volumes/web/cnidarian/ets_v9_j.gff" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/Parent/Name/g' /Volumes/web/cnidarian/ets_v9_k.gff" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_v9_k.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tgene\t35\t385\t0.555898\t-\t.\tName=CGI_10000001;\r\n", "C16582\tGLEAN\texon\t35\t385\t.\t-\t0\tID=CGI_10000001;\r\n", "C17212\tGLEAN\tgene\t31\t363\t0.999572\t+\t.\tName=CGI_10000002;\r\n", "C17212\tGLEAN\texon\t31\t363\t.\t+\t0\tID=CGI_10000002;\r\n", "C17316\tGLEAN\tgene\t30\t257\t0.555898\t+\t.\tName=CGI_10000003;\r\n", "C17316\tGLEAN\texon\t30\t257\t.\t+\t0\tID=CGI_10000003;\r\n", "C17998\tGLEAN\tgene\t196\t387\t1\t-\t.\tName=CGI_10000005;\r\n", "C17998\tGLEAN\texon\t196\t387\t.\t-\t0\tID=CGI_10000005;\r\n", "C18346\tGLEAN\tgene\t174\t551\t1\t+\t.\tName=CGI_10000009;\r\n", "C18346\tGLEAN\texon\t174\t551\t.\t+\t0\tID=CGI_10000009;\r\n" ] } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/ID/Parent/g' /Volumes/web/cnidarian/ets_v9_l.gff" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_v9_l.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tgene\t35\t385\t0.555898\t-\t.\tName=CGI_10000001;\r\n", "C16582\tGLEAN\texon\t35\t385\t.\t-\t0\tParent=CGI_10000001;\r\n", "C17212\tGLEAN\tgene\t31\t363\t0.999572\t+\t.\tName=CGI_10000002;\r\n", "C17212\tGLEAN\texon\t31\t363\t.\t+\t0\tParent=CGI_10000002;\r\n", "C17316\tGLEAN\tgene\t30\t257\t0.555898\t+\t.\tName=CGI_10000003;\r\n", "C17316\tGLEAN\texon\t30\t257\t.\t+\t0\tParent=CGI_10000003;\r\n", "C17998\tGLEAN\tgene\t196\t387\t1\t-\t.\tName=CGI_10000005;\r\n", "C17998\tGLEAN\texon\t196\t387\t.\t-\t0\tParent=CGI_10000005;\r\n", "C18346\tGLEAN\tgene\t174\t551\t1\t+\t.\tName=CGI_10000009;\r\n", "C18346\tGLEAN\texon\t174\t551\t.\t+\t0\tParent=CGI_10000009;\r\n" ] } ], "prompt_number": 31 }, { "cell_type": "code", "collapsed": false, "input": [ "!sed 's/exon/CDS/g' /Volumes/web/cnidarian/ets_v9_m.gff" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 32 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail -50 /Volumes/web/cnidarian/ets_v9_o.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "scaffold998\tGLEAN\tCDS\t116508\t117557\t.\t-\t0\tParent=CGI_10006465;\r\n", "scaffold998\tGLEAN\tmRNA\t3874\t24837\t0.410057\t+\t.\tParent=CGI_10006458;\r\n", "scaffold998\tGLEAN\tmRNA\t25310\t28141\t0.813029\t+\t.\tParent=CGI_10006459;\r\n", "scaffold998\tGLEAN\tmRNA\t43399\t61271\t0.812368\t+\t.\tParent=CGI_10006460;\r\n", "scaffold998\tGLEAN\tmRNA\t67122\t73547\t0.999674\t+\t.\tParent=CGI_10006461;\r\n", "scaffold998\tGLEAN\tmRNA\t73620\t80841\t0.721465\t+\t.\tParent=CGI_10006462;\r\n", "scaffold998\tGLEAN\tmRNA\t89231\t90958\t0.999202\t+\t.\tParent=CGI_10006463;\r\n", "scaffold998\tGLEAN\tmRNA\t91399\t104169\t0.99895\t-\t.\tParent=CGI_10006464;\r\n", "scaffold998\tGLEAN\tmRNA\t116508\t131912\t0.996071\t-\t.\tParent=CGI_10006465;\r\n", "scaffold999\tGLEAN\tgene\t23147\t24888\t0.654733\t+\t.\tName=CGI_10006969;\r\n", "scaffold999\tGLEAN\tCDS\t23147\t23288\t.\t+\t0\tParent=CGI_10006969;\r\n", "scaffold999\tGLEAN\tCDS\t24070\t24204\t.\t+\t2\tParent=CGI_10006969;\r\n", "scaffold999\tGLEAN\tCDS\t24468\t24581\t.\t+\t2\tParent=CGI_10006969;\r\n", "scaffold999\tGLEAN\tCDS\t24695\t24888\t.\t+\t2\tParent=CGI_10006969;\r\n", "scaffold999\tGLEAN\tgene\t39254\t45360\t1\t-\t.\tName=CGI_10006970;\r\n", "scaffold999\tGLEAN\tCDS\t45195\t45360\t.\t-\t0\tParent=CGI_10006970;\r\n", "scaffold999\tGLEAN\tCDS\t39995\t40053\t.\t-\t2\tParent=CGI_10006970;\r\n", "scaffold999\tGLEAN\tCDS\t39701\t39892\t.\t-\t0\tParent=CGI_10006970;\r\n", "scaffold999\tGLEAN\tCDS\t39254\t39460\t.\t-\t0\tParent=CGI_10006970;\r\n", "scaffold999\tGLEAN\tgene\t47971\t57911\t0.937649\t-\t.\tName=CGI_10006971;\r\n", "scaffold999\tGLEAN\tCDS\t57819\t57911\t.\t-\t0\tParent=CGI_10006971;\r\n", "scaffold999\tGLEAN\tCDS\t56427\t56498\t.\t-\t0\tParent=CGI_10006971;\r\n", "scaffold999\tGLEAN\tCDS\t53710\t53847\t.\t-\t0\tParent=CGI_10006971;\r\n", "scaffold999\tGLEAN\tCDS\t52115\t52144\t.\t-\t0\tParent=CGI_10006971;\r\n", "scaffold999\tGLEAN\tCDS\t51101\t51218\t.\t-\t0\tParent=CGI_10006971;\r\n", "scaffold999\tGLEAN\tCDS\t48506\t48655\t.\t-\t2\tParent=CGI_10006971;\r\n", "scaffold999\tGLEAN\tCDS\t47971\t48086\t.\t-\t2\tParent=CGI_10006971;\r\n", "scaffold999\tGLEAN\tgene\t88395\t99702\t0.482874\t-\t.\tName=CGI_10006972;\r\n", "scaffold999\tGLEAN\tCDS\t99690\t99702\t.\t-\t0\tParent=CGI_10006972;\r\n", "scaffold999\tGLEAN\tCDS\t96192\t96278\t.\t-\t2\tParent=CGI_10006972;\r\n", "scaffold999\tGLEAN\tCDS\t95628\t95712\t.\t-\t2\tParent=CGI_10006972;\r\n", "scaffold999\tGLEAN\tCDS\t93995\t94089\t.\t-\t1\tParent=CGI_10006972;\r\n", "scaffold999\tGLEAN\tCDS\t88395\t88534\t.\t-\t2\tParent=CGI_10006972;\r\n", "scaffold999\tGLEAN\tgene\t107744\t126675\t0.391684\t+\t.\tName=CGI_10006973;\r\n", "scaffold999\tGLEAN\tCDS\t107744\t107798\t.\t+\t0\tParent=CGI_10006973;\r\n", "scaffold999\tGLEAN\tCDS\t115160\t115384\t.\t+\t2\tParent=CGI_10006973;\r\n", "scaffold999\tGLEAN\tCDS\t116967\t117221\t.\t+\t2\tParent=CGI_10006973;\r\n", "scaffold999\tGLEAN\tCDS\t118234\t118362\t.\t+\t2\tParent=CGI_10006973;\r\n", "scaffold999\tGLEAN\tCDS\t120708\t120818\t.\t+\t2\tParent=CGI_10006973;\r\n", "scaffold999\tGLEAN\tCDS\t121053\t121184\t.\t+\t2\tParent=CGI_10006973;\r\n", "scaffold999\tGLEAN\tCDS\t122398\t122535\t.\t+\t2\tParent=CGI_10006973;\r\n", "scaffold999\tGLEAN\tCDS\t123424\t123555\t.\t+\t2\tParent=CGI_10006973;\r\n", "scaffold999\tGLEAN\tCDS\t124859\t124996\t.\t+\t2\tParent=CGI_10006973;\r\n", "scaffold999\tGLEAN\tCDS\t126012\t126143\t.\t+\t2\tParent=CGI_10006973;\r\n", "scaffold999\tGLEAN\tCDS\t126617\t126675\t.\t+\t2\tParent=CGI_10006973;\r\n", "scaffold999\tGLEAN\tmRNA\t23147\t24888\t0.654733\t+\t.\tParent=CGI_10006969;\r\n", "scaffold999\tGLEAN\tmRNA\t39254\t45360\t1\t-\t.\tParent=CGI_10006970;\r\n", "scaffold999\tGLEAN\tmRNA\t47971\t57911\t0.937649\t-\t.\tParent=CGI_10006971;\r\n", "scaffold999\tGLEAN\tmRNA\t88395\t99702\t0.482874\t-\t.\tParent=CGI_10006972;\r\n", "scaffold999\tGLEAN\tmRNA\t107744\t126675\t0.391684\t+\t.\tParent=CGI_10006973;" ] } ], "prompt_number": 35 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tmRNA\t35\t385\t0.555898\t-\t.\tID=CGI_10000001;\r\n", "C16582\tGLEAN\tCDS\t35\t385\t.\t-\t0\tParent=CGI_10000001;\r\n", "C17212\tGLEAN\tmRNA\t31\t363\t0.999572\t+\t.\tID=CGI_10000002;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t.\t+\t0\tParent=CGI_10000002;\r\n", "C17316\tGLEAN\tmRNA\t30\t257\t0.555898\t+\t.\tID=CGI_10000003;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t.\t+\t0\tParent=CGI_10000003;\r\n", "C17998\tGLEAN\tmRNA\t196\t387\t1\t-\t.\tID=CGI_10000005;\r\n", "C17998\tGLEAN\tCDS\t196\t387\t.\t-\t0\tParent=CGI_10000005;\r\n", "C18346\tGLEAN\tmRNA\t174\t551\t1\t+\t.\tID=CGI_10000009;\r\n", "C18346\tGLEAN\tCDS\t174\t551\t.\t+\t0\tParent=CGI_10000009;\r\n" ] } ], "prompt_number": 36 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/ets_v9_p.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C16582\tGLEAN\tmRNA\t35\t385\t0.555898\t-\t.\tParent=CGI_10000001;\r\n", "C16582\tGLEAN\tgene\t35\t385\t.\t-\t.\tName=CGI_10000001;\r\n", "C16582\tGLEAN\tCDS\t35\t385\t.\t-\t0\tParent=CGI_10000001;\r\n", "C17212\tGLEAN\tmRNA\t31\t363\t0.999572\t+\t.\tParent=CGI_10000002;\r\n", "C17212\tGLEAN\tgene\t31\t363\t.\t+\t.\tName=CGI_10000002;\r\n", "C17212\tGLEAN\tCDS\t31\t363\t.\t+\t0\tParent=CGI_10000002;\r\n", "C17316\tGLEAN\tmRNA\t30\t257\t0.555898\t+\t.\tParent=CGI_10000003;\r\n", "C17316\tGLEAN\tgene\t30\t257\t.\t+\t.\tName=CGI_10000003;\r\n", "C17316\tGLEAN\tCDS\t30\t257\t.\t+\t0\tParent=CGI_10000003;\r\n", "C17998\tGLEAN\tmRNA\t196\t387\t1\t-\t.\tParent=CGI_10000005;\r\n" ] } ], "prompt_number": 37 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/Crassostrea_gigas.GCA_000297895.1.21.gff3" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "##gff-version 3\r\n", "scaffold1611\tprotein_coding\tgene\t1263\t9963\t.\t-\t.\tID=CGI_10014322;Name=CGI_10014322\r\n", "scaffold1611\tprotein_coding\tmRNA\t1263\t9963\t.\t-\t.\tID=EKC25967;Parent=CGI_10014322\r\n", "scaffold1611\tprotein_coding\tstart_codon\t9961\t9963\t.\t-\t0\tID=start_codon:EKC25967:1;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t9922\t9963\t.\t-\t.\tID=exon:EKC25967:1;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t8502\t8667\t.\t-\t.\tID=exon:EKC25967:2;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t7374\t7534\t.\t-\t.\tID=exon:EKC25967:3;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t3861\t4046\t.\t-\t.\tID=exon:EKC25967:4;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t1635\t1742\t.\t-\t.\tID=exon:EKC25967:5;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t1263\t1268\t.\t-\t.\tID=exon:EKC25967:6;Parent=EKC25967\r\n" ] } ], "prompt_number": 38 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/whale/wiki_test_cases/pipeline_glimmer.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "##gff-version 3\r\n", "NC_009802\tGlimmer\tgene\t1\t1311\t16.88\t+\t0\tName=orf00002;ID=orf00002;\r\n", "NC_009802\tGlimmer\tCDS\t1\t1311\t16.88\t+\t0\tName=orf00002;Parent=orf00002;\r\n", "NC_009802\tGlimmer\tgene\t1319\t1465\t1.61\t+\t0\tName=orf00003;ID=orf00003;\r\n", "NC_009802\tGlimmer\tCDS\t1319\t1465\t1.61\t+\t0\tName=orf00003;Parent=orf00003;\r\n", "NC_009802\tGlimmer\tgene\t1465\t2532\t15.89\t+\t0\tName=orf00004;ID=orf00004;\r\n", "NC_009802\tGlimmer\tCDS\t1465\t2532\t15.89\t+\t0\tName=orf00004;Parent=orf00004;\r\n", "NC_009802\tGlimmer\tgene\t2550\t4859\t16.58\t+\t0\tName=orf00005;ID=orf00005;\r\n", "NC_009802\tGlimmer\tCDS\t2550\t4859\t16.58\t+\t0\tName=orf00005;Parent=orf00005;\r\n", "NC_009802\tGlimmer\tgene\t4878\t5474\t16.37\t+\t0\tName=orf00006;ID=orf00006;\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }