{ "metadata": { "name": "", "signature": "sha256:ef4df100e854df96e54d43f7efddee4d73f8e9495de8d62c0a87945f11823cd5" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "cd /Users/sr320/Desktop/" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Users/sr320/Desktop\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "!/Users/sr320/Dropbox/Steven/ipig_r5/ipig /Users/sr320/Dropbox/Steven/ipig_r5/examples/PSM_example.mzid -c /Users/sr320/Dropbox/Steven/ipig_r5/ipig.conf\n", "# /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid \\" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "error: wrong number of parameters\r\n" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "!/Users/sr320/Dropbox/Steven/ipig_r5/ipig \\\n", "/Users/sr320/Dropbox/Steven/ipig_r5/examples/PSM_example.mzid \\\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "error: can't find file (/Users/sr320/Desktop/ipig.conf)\r\n" ] } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -50 /Users/sr320/Desktop/Crassostrea_gigas.GCA_000297895.1.21.dat.dat" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "ID C12728 standard; DNA; HTG; 200 BP.\r\n", "XX\r\n", "AC supercontig:GCA_000297895.1:C12728:1:200:1\r\n", "XX\r\n", "SV C12728.GCA_000297895.1\r\n", "XX\r\n", "DT 29-NOV-2013\r\n", "XX\r\n", "DE Crassostrea gigas supercontig C12728 GCA_000297895.1 full sequence 1..200\r\n", "DE annotated by Ensembl Genomes\r\n", "XX\r\n", "KW .\r\n", "XX\r\n", "OS Crassostrea gigas (Crassostrea gigas)\r\n", "OC cellular organisms; Eukaryota; Opisthokonta; Metazoa; Eumetazoa; Bilateria;\r\n", "OC Protostomia; Lophotrochozoa; Mollusca; Bivalvia; Pteriomorphia; Ostreoida;\r\n", "OC Ostreoidea; Ostreidae; Crassostrea.\r\n", "XX\r\n", "CC This sequence displays annotation from Ensembl Genomes based on underlying\r\n", "CC annotation from GigaDB(http://dx.doi.org/10.5524/100030). See\r\n", "CC http://www.ensemblgenomes.org for more information.\r\n", "XX\r\n", "CC All feature locations are relative to the first (5') base of the sequence\r\n", "CC in this file. The sequence presented is always the forward strand of the\r\n", "CC assembly. Features that lie outside of the sequence contained in this file\r\n", "CC have clonal location coordinates in the format: .:..\r\n", "XX\r\n", "CC The /gene indicates a unique id for a gene, /note=\"transcript_id=...\" a\r\n", "CC unique id for a transcript, /protein_id a unique id for a peptide and\r\n", "CC note=\"exon_id=...\" a unique id for an exon. These ids are maintained\r\n", "CC wherever possible between versions.\r\n", "XX\r\n", "FH Key Location/Qualifiers\r\n", "FT source 1..200\r\n", "FT /organism=\"Crassostrea gigas\"\r\n", "FT /db_xref=\"taxon:29159\"\r\n", "FT misc_feature 1..200\r\n", "FT /note=\"contig AFTI01025347.1 1..200(1)\"\r\n", "XX\r\n", "SQ Sequence 200 BP; 39 A; 37 C; 28 G; 96 T; 0 other;\r\n", " TTTTTCTTAT TTTTATTTGT ACCAGTTAAT CCTGTGAGCC GCTTTTATGT CTCTTTCTGG 60\r\n", " CCGTTTTTAT GTCTCTTTCT GTTGACTTTA TTATTCTCTC ATTTATGATT TAACATCTCA 120\r\n", " TATGAATTAT TATGTGTATC TTGCATGGGA TATTTTGCCA CTTCGTGCCC ATAAAAATCA 180\r\n", " ACACTTGTCT TTCGGTGTAT 200\r\n", "//\r\n", "ID C12722 standard; DNA; HTG; 200 BP.\r\n", "XX\r\n", "AC supercontig:GCA_000297895.1:C12722:1:200:1\r\n", "XX\r\n" ] } ], "prompt_number": 23 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Issue" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "need oyster data in this format \n", "```\n", "Example UCSC Genes:\n", "#name chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds proteinID alignID\n", "uc009vjk.2 chr1 + 322036 326938 324342 325605 3 322036,324287,324438, 322228,324345,326938, C9J4L2 uc009vjk.2\n", "uc001aau.3 chr1 + 323891 328581 324342 325605 3 323891,324287,324438, 324060,324345,328581, C9J4L2 uc001aau.3\n", "\n", "Example Ensemble Genes:\n", "#bin name chrom strand txStart txEnd cdsStart cdsEnd exonCount exonStarts exonEnds score name2 cdsStartStat cdsEndStat exonFrames\n", "9 ENST00000472741 chr1 - 1026425 1051467 1051467 1051467 3 1026425,1027370,1051439, 1026945,1027483,1051467, 0 ENSG00000131591 none none -1,-1,-1,\n", "34 ENST00000478275 chr1 - 212859759 212872097 212872097 212872097 2 212859759,212870302, 212860321,212872097, 0 ENSG00000123685 none none -1,-1,\n", "```" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!tail /Volumes/web/cnidarian/ets_v9_f.gff" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "scaffold22\tGLEAN\texon\t1863760\t1864161\t.\t+\t0\tID=CGI_10028938;\r\n", "scaffold22\tGLEAN\tCDS\t1869336\t1885890\t0.999933\t-\t.\tParent=CGI_10028939;\r\n", "scaffold22\tGLEAN\texon\t1885752\t1885890\t.\t-\t0\tID=CGI_10028939;\r\n", "scaffold22\tGLEAN\texon\t1884184\t1884318\t.\t-\t2\tID=CGI_10028939;\r\n", "scaffold22\tGLEAN\texon\t1882962\t1883093\t.\t-\t2\tID=CGI_10028939;\r\n", "scaffold22\tGLEAN\texon\t1881349\t1881486\t.\t-\t2\tID=CGI_10028939;\r\n", "scaffold22\tGLEAN\texon\t1879212\t1879349\t.\t-\t2\tID=CGI_10028939;\r\n", "scaffold22\tGLEAN\texon\t1870764\t1870843\t.\t-\t2\tID=CGI_10028939;\r\n", "scaffold22\tGLEAN\texon\t1870289\t1870360\t.\t-\t0\tID=CGI_10028939;\r\n", "scaffold22\tGLEAN\texon\t1869336\t1869428\t.\t-\t0\tID=CGI_10028939;\r\n" ] } ], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -20 /Volumes/web/cnidarian/Crassostrea_gigas.GCA_000297895.1.21.gff3" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "##gff-version 3\r\n", "scaffold1611\tprotein_coding\tgene\t1263\t9963\t.\t-\t.\tID=CGI_10014322;Name=CGI_10014322\r\n", "scaffold1611\tprotein_coding\tmRNA\t1263\t9963\t.\t-\t.\tID=EKC25967;Parent=CGI_10014322\r\n", "scaffold1611\tprotein_coding\tstart_codon\t9961\t9963\t.\t-\t0\tID=start_codon:EKC25967:1;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t9922\t9963\t.\t-\t.\tID=exon:EKC25967:1;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t8502\t8667\t.\t-\t.\tID=exon:EKC25967:2;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t7374\t7534\t.\t-\t.\tID=exon:EKC25967:3;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t3861\t4046\t.\t-\t.\tID=exon:EKC25967:4;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t1635\t1742\t.\t-\t.\tID=exon:EKC25967:5;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\texon\t1263\t1268\t.\t-\t.\tID=exon:EKC25967:6;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\tCDS\t9922\t9963\t.\t-\t0\tID=CDS:EKC25967:1;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\tCDS\t8502\t8667\t.\t-\t0\tID=CDS:EKC25967:2;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\tCDS\t7374\t7534\t.\t-\t2\tID=CDS:EKC25967:3;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\tCDS\t3861\t4046\t.\t-\t0\tID=CDS:EKC25967:4;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\tCDS\t1635\t1742\t.\t-\t0\tID=CDS:EKC25967:5;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\tCDS\t1263\t1268\t.\t-\t0\tID=CDS:EKC25967:6;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\tstop_codon\t1263\t1265\t.\t-\t0\tID=stop_codon:EKC25967:1;Parent=EKC25967\r\n", "scaffold1611\tprotein_coding\tgene\t61530\t69027\t.\t-\t.\tID=CGI_10014325;Name=CGI_10014325\r\n", "scaffold1611\tprotein_coding\tmRNA\t61530\t69027\t.\t-\t.\tID=EKC25970;Parent=CGI_10014325\r\n", "scaffold1611\tprotein_coding\tstart_codon\t69025\t69027\t.\t-\t0\tID=start_codon:EKC25970:1;Parent=EKC25970\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }