{ "metadata": { "name": "", "signature": "sha256:2ccf227a1e9af8ba693917fcfec85c74af7fe1cb67ce69de260c8726ae8be274" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Calculating CpG ratio for transcriptome" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#test file\n", "!head -2 /Volumes/web/scaphapoda/Grace/Transcriptomes/mer_tst/query.fa\n", "!echo \n", "!echo number of seqs =\n", "!fgrep -c \">\" /Volumes/web/scaphapoda/Grace/Transcriptomes/mer_tst/query.fa" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ ">Mmercenaria_Contig_1\r\n", "AAGAGTGACTGGTACCACCTGTGTACTACAATGGTTATTTGATACAACTAAATGTAAGCGGTACCACCATGTATTACAATGTGAAATTAGTATCAATAAGTGTGGCTGGTACCTTTATATATTACAGGTGCTGTTATGTTTGACAGGAATACTGATGTGAGATAGTTACTTCCATACTATGTGTAACCTACGGTCCGGCACGTTGAATGGTGGGGTG\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "number of seqs =\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "43\r\n" ] } ], "prompt_number": 78 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Convert fasta to tab\n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!perl -e '$count=0; $len=0; while(<>) {s/\\r?\\n//; s/\\t/ /g; if (s/^>//) { if ($. != 1) {print \"\\n\"} s/ |$/\\t/; $count++; $_ .= \"\\t\";} else {s/ //g; $len += length($_)} print $_;} print \"\\n\"; warn \"\\nConverted $count FASTA records in $. lines to tabular format\\nTotal sequence length: $len\\n\\n\";' \\\n", "/Volumes/web/scaphapoda/Grace/Transcriptomes/mer_tst/query.fa > /Volumes/web/cnidarian/cpgR_1" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "Converted 43 FASTA records in 86 lines to tabular format\r\n", "Total sequence length: 10226\r\n", "\r\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -1 /Volumes/web/cnidarian/cpgR_1" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Mmercenaria_Contig_1\t\tAAGAGTGACTGGTACCACCTGTGTACTACAATGGTTATTTGATACAACTAAATGTAAGCGGTACCACCATGTATTACAATGTGAAATTAGTATCAATAAGTGTGGCTGGTACCTTTATATATTACAGGTGCTGTTATGTTTGACAGGAATACTGATGTGAGATAGTTACTTCCATACTATGTGTAACCTACGGTCCGGCACGTTGAATGGTGGGGTG\r\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "#to make sure we don't get description name issues \n", "!sed 's/Mmercenaria_Contig/999999/g' /Volumes/web/cnidarian/cpgR_2" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -1 /Volumes/web/cnidarian/cpgR_2" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "999999_1\t\tAAGAGTGACTGGTACCACCTGTGTACTACAATGGTTATTTGATACAACTAAATGTAAGCGGTACCACCATGTATTACAATGTGAAATTAGTATCAATAAGTGTGGCTGGTACCTTTATATATTACAGGTGCTGTTATGTTTGACAGGAATACTGATGTGAGATAGTTACTTCCATACTATGTGTAACCTACGGTCCGGCACGTTGAATGGTGGGGTG\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "!fgrep --color \"CG\" /Volumes/web/cnidarian/cpgR_2 " ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "999999_1\t\tAAGAGTGACTGGTACCACCTGTGTACTACAATGGTTATTTGATACAACTAAATGTAAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTACCACCATGTATTACAATGTGAAATTAGTATCAATAAGTGTGGCTGGTACCTTTATATATTACAGGTGCTGTTATGTTTGACAGGAATACTGATGTGAGATAGTTACTTCCATACTATGTGTAACCTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGCA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTGAATGGTGGGGTG\r\n", "999999_2\t\tACAGCTGTCTGATTACTTATACAAAGAACA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGTTTAAAGCAGAAATGATTGATACTCTGTACAACTATGCCAAGTTTCAGTATGAATGTGGTAATTATTCTGCAGCAGCTGAATATCTCTACTTTGTTAGAATCCTGCTACCACCAAATGACAGAAATTACTTGAATGCATTATGGGGGAAGTTAGCTTCAGAGATTCTCATGCAAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACCAGTG\r\n", "999999_3\t\tTGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGACTCTCAAGTTCATTGCAAGAAAGTTTACTGATGCAAAAATGTAATTTATCTCAGTGAAGGTCTATAGGAGTATCCCAGCTTCTTTTGAGGAGTCAACAATTTTCATAGCTGTAGTTAGATGCCAGTCTTCTGTAGAAACTACCCAGGATTCCATTATTTCTTCTGATTGATCAGTGGTTGCCTAGCAATGAAGTGTTTCACAAAAAGCT\r\n", "999999_4\t\tTATTTTGAGCATAACTTATAACC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTCAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTCAAGGTATTGACTTCTGACTGGGAATATAAGTAGGTGGCAATGAAACCATGTGCAGAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTAGGTCAAATGTTAATGTAGTCAGATCTAACTGTCATATTTCATGGTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTACT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACCTCCTTTAATCTAAAACTTTTGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTATATTGCAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCTTTG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGAGATCTTGTTTGATTATAATTTGACTTTGTTATGGCTTTCACTAGTTT\r\n", "999999_5\t\tTAAAAGAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCATACACCCATCAGTTTTGAAACTATTTTAGTTAATTTCATTATACAATTCAGAGTAGGTGTCAAAATTTCATGATAAACTGTCAGAACTGGTAGAAGTCTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACCATTTTACATTTATTTCCCTAACAGATTGTTGTTTATCTCTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTAAATTAAATAGCAACATTTTAAGAACATCTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACAATCA\r\n", "999999_6\t\tTTAAAAATTATCATAAGGGTTTAGCAGTACCTTTTGTAATTTATGCTGATTTTGAATCAATAACTAAAAAAGTTTTAACTGCTTTACCATCAACTGAATCTTCATTTACTGAACCATATCAAAATCATATAGATTGTGGTTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGCTATAAAGTTGTTTGTTGTTATGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATAAATATACTAAACCAACCCAGATTTATAGAGGTCCTAATGCAGTTTATAAGTTTATTGAAAAAATGCTTGAAGAAGAG\r\n", "999999_7\t\tGTGCCACACCAATCTCCAAGGTTATAGTGACCAGTCTCTAAATGCAGGCTTGTGATTGGTCAATGCCAAAATAGCATTCAAAAGCAACCAGTCATATGCTTGAGATTTTAGTCTAGTCTCCAAAGCAGAGGAAGTGCTGTTTGAGTTATATATATGTGTGTACTTTGCAACTTTATA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGTATAATAATGAACAAATATTG\r\n", "999999_8\t\tGGTGATCATTCCATAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTACTAACCAATGGGTGTGCTTTTTTAATATGAATGTTTAAATCTGTTTATGGTCA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTATCTGTTTTTGGATGCATCTGTCTGTTTCTGGACATGTCTGTGTTTTGAAATGCATGTATACTCTTATATTTTATGTTTTCATACTTACACTTGTCTTTCATA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTACTAATACA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATGAGATTTGCATACTGATTT\r\n", "999999_9\t\tCAGAGAAAAGCTGACATGTATTATGCTTATCATTCCATCCAGAGATACACTGATGAACCATTTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTCACATCTGCCAGAGGCATTGTTCAACATTTCA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTATTTGTTACATCAGATGCATGATGACATTCCACATGGTATATCTAAAGCTGGTACACTGTATGCATTAGCAAAGCAGAGCAAGAATCTTGGAGCATACCATGAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTCCATA\r\n", "999999_10\t\tTCCACAATTAGCCCACTTAAAGAATCACT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGAAAGAAACAGCAATTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGACAAGATTAAAGCAATTAAAACCACACAAGTCAAGTTGGAAACTCTGGCAATATATCCCACTTCTAGGTGTGTCTTCAAAAACAGAGTTCAAACACTTACAAATAGAAGCATCACAGTCTGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCTACAGAAACTGAACTCCAAAATCTTGTATA\r\n", "999999_11\t\tTGGAAAAGCTCCAACCATTTCTCAGGATAAAGTACTGAAGTTCTCCAACTACTTTTTGAGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTAAACATGTCCATCAACTCAAGTCAGCATATGCTCTTCTGTCAGCTGTCAACACACTCACAGCTAATAAGTTCCATGTACCAGTTGCTGTTACTCT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCCACCTCAGTGTCTGTGAGCTCATCTAAACCAGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTACAGGTGAGAGTAACACCAGGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTG\r\n", "999999_12\t\tCAGAGACTGTTTGCTCAAGACTATACCCAGGAACAGTGTTCATTTTTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGCCAGAGATTCAAGTTTATGTTTTCACAACAGCT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTATTGCAGAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAACAAAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTAATTTATTTCATTTAAAAGTAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATTTCAGTAAACAAGATTGAAGGTTAAAACACATTTCATACTGTAATGACTTAAAATTATGGTTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGCT\r\n", "999999_13\t\t\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTCATCCTCTTTTCTTTCCTTGGATTTGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTGCTGTATGTCTGGTCAGAGCTGCACTGGTTATCTATGCTATATTGATTGGCCTATGTTTAGCCCTGGAACTTGC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTATTCTGTTTTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGCTGAGATCTGAGATGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAAGG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTTAGGCTGGGTATGAAAGAATCTAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGAAGGCTAATGAAGGGAAAGATGATTACCTTAAAGCCACACAGTATATGTTTAAAACTTTTGAATGCTGTCA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTGATAATGAACACCTGCC\u001b[01;31m\u001b[KCG\u001b[m\u001b[K\u001b[01;31m\u001b[KCG\u001b[m\u001b[KT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAACC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACTCTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTG\r\n", "999999_14\t\tCCATTCTTGTCTTAGAGCCAAAGTTGAAAAGAAATGGTTGAACTGGTCTTGTTGAATTTGCTAAATC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAATGTTATTGCATA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATGTAAAAATCATGCTAATCTTTTGTTATTTACAACTCAATTGGGACTTGATGAATTTATTTGTCTCTGGGATTTGATTGTATAATACTGCATTGAAATTTTGTCATAATCACTA\r\n", "999999_15\t\tAGG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGGCCT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAATAAGAAATCACTAACAGCTGAAGAACTTGATGCACAATTAGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCTTACAATGCTAAGATGGACACAGACTGATGATAACAGACATTACATGTGATACAGACATTTCAAAGGGAATTGATATCAGAGGAACAAAGTGAAAAATTGACAGAAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATTGAATGTGTTCTAAGTTGATCACAGGACAAGTGCAACAGTTAGGGAAATACTTCACATAAGGACTAGAATGTTGCCAAGAAAACATTGGGCATAATAACATGTAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGCCTCTTGGGGG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGCCCT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGGA\r\n", "999999_16\t\tCCT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGAAAGGTCTCAAATAATACCAGTGTGATAATTACATTCCACATATTAAATTAATCAAAGTGTATTTATAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATATGCTGAACTGGCATATTATCCCAGCTCATTAGTAGAATCTTATACTATAATAATTATGTTGTTGTTGTTTCTGTTACTATTGTTGTTATCTTATATGCAGCAAAGATCATTTGAAAGCCATATTTCAATGGAAATACC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCCTTAA\r\n", "999999_17\t\tCAACA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATCATATCCTTTGCTTAAGCACCATATTGACCCATGCA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGATCCAAGGGAATTAAGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGACAACCAATCAGACAAGCAATCCTATAAGACAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[K\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATCATATCCAGATTCTCCTAAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCAAACTGACAGTAAGGAGCAGATATCAGAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTGGAGATGTGGACATGATGAAACAAATGAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGTTAGATTTGAAGAGAG\r\n", "999999_18\t\tTGAAAGTGTTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGATGAAATACAGAAGAAGGATGCCCTTATACAACAGTTGTCAAGTATGGAGTCAGTTACCAAGGTTCCTGTAAGAAGTCAAGCAGACTCTATT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTTAGAGCATATTTACCTGGGTGATAGACAGAGCTTGGATGCAGCC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTAGTGAACTTGCTGCTCTACTTCACTCTTGG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGAAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGCAT\r\n", "999999_19\t\tGAAGTGATAGACAATCTAATACAGTGAACATTTTTAATATTTAAAAAATACTATTGATACATCCTAGATATATTAGCTTTTGGTAGAGAATTGTTTTAAGGTTCATACAAAATTTCTATTAGAACTAACTGGAAGAAAGGTCAATGAAATATTTATGTGCATCTCTTACTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTACATAGTTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGTTAAGACTATTTGAACAAAAATACAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KT\r\n", "999999_20\t\tATATGGAGCAACAGTATAATCCAGACACAGCTAGAATTTATCACTTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATATATG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGAATACAGAGCTGTTTGTGTACTGTGGGCAGTGTTGACAATTATTTGGGCAACTTTTAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTGTTGTCTTCATTCAGCCTCAGTGGATTGGTGATACTCCTGAAAGCATTGGCTATGGACATCATGGTGTTTACAAATACTGTTACC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAATAATATAGAATT\r\n", "999999_21\t\tGACTCAGATTGTAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTTCAGATAGGAAGATTGGTTATTGGGAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATGGCTCAATAATTAGAAGTATTGAAGGTTCCATGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGATCTATCAATGCAATGGATATTTCTCCAGATGGACAATACATTGTTACAGGTGGTGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACAAATTGTTAAAGGTATGGTCATACAAGGAAGGAGAGGTGATAGCAGTAGGAAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGACACAGTGGAAAAATCTTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTATCAGGATCTG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCACA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCCT\r\n", "999999_23\t\tGAGGAAATTCCACACAGCTACTCTGCTATACAATGAAGTAAACACAGTGAAATGTCCACCTTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCAGAATCAATCACTGAAGGAGATATAAGATGGGAGAAAGCTATTGGTGACT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTACAAGTGGATGAGGTTATTGCAGAGGTGGAGACTGACAAGACATCAATTCAGATCCCCAGTCCTGTTGCTGGTATTATAGAATCACTTCTGGTTAATGATGGTGACAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGTC\r\n", "999999_24\t\tAGGTTATGTGTAGAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTCTTATGGATACAAGGTCTCCAGCTTTAGACACTGTCAAAATGCAAGTGTACTTTGACATGAACTATACAACAAGATCTGATTTCTTGGATGAACATAGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGTGTTGGAATCA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTACAGCCAGTACTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGAGATCACAGACAGTAGAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[K\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAACA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGAAGAATTAGAGAGCTTGTAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTAAGATTGTATCTG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTGTTATT\r\n", "999999_25\t\tGGGGATCCATACATAGACAAAATTTTAGTTCATACATTTCAAACATATTAACTAACAGTGACATGTTATGATATTTTTAACTGACACCATATGAGGGAATATCTGCTTTTCATCAAAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTATATTTTAGAATTCTTTCAGTTGTTTTATAATACTGTAGATATATAGCAAAAAAGTTATTTTCTAGCAATGTTTTATAGCAGATAGATGAAATGCAGCTCTTTAATCTT\r\n", "999999_26\t\tGAAGTGGAGGAGGAAGTGTTGCCAAAGAGAAGGGTTGGAAAAGGACTGACACCTGCAGATTTTGCTGATTGTAATGAAAAGATCCTGAGAGTTTTGAATGGAGAACCATATTCCAGTATAACAGAAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGTCCAGACAGTATACAACATGTCACTGATAAAATGTCATCTCTG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATTTATATCAAAAGATAGTGGACACCC\r\n", "999999_28\t\tAGTGGAGAAAATATACTGGAGCAATTTTAAATATATACTGTGATATATCTTGTCTTACATTAAGATGTAAGTCTTCTAAGGTCATTTAAGGATTTGTAATATTCACTCTTAGGCTTTTCAATCAAAAGTGCATGTTATGGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTCTGCACAAATAAGCACTAAACTATGCAGACTTGGGATTATCAGACAATAGAAAATATAATATATATA\r\n", "999999_29\t\tAAAGAAATTGTAACTTTTAAGTGTTAACAACCTTGTCTTTGAAAGTAAGTTCTAACTTTAGTATACAAGATATTAACTAACCATTTTCATTGAAGAAATTACT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTATTGAAATGAAGTGTACCAACAGTTTGTGCCA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATAACACTGTAGAACAGACAATAATTTTTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTATAATGCTTTGATAGCAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTGTTTCTAGTGAAGTTAAGTAGGTAAAGACTGTGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[K\r\n", "999999_30\t\tTAAAAGACAACTTATGAACAAGTGTAAATGGTAATACTTTCCAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTTTACTAAAATGTAATCAAAGATAATGCACAAGTTATTCATTGCTTATTAAATGGTATTGGCATATCATAAATGATAATGTACTACTAGCCTAAGGGCATAGCCAATAAGGTGATGAATAAAGCCTGATGAATTTTCAGTAAGTCTTGTTCAAAAACTAATGACTTGTAAGACATTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTCTTTAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGCCTATATCTTTGTGTAGTTTTAAGTTCTATGAACAGTTTTGTTTCATCTATTTTCAGGTTTTTAATAATAGT\r\n", "999999_31\t\tATAGTAAATGTATTATTAGTATGCAATGGATAGAAAGTTTTATTAGTTGAAGTTCTCCTGATCATGTTTGATAACATTTTGGTCTCTGCCTTCTGGATTAATCTAGTTAAGTTTTGACCATTGTTCCATGTGCCCATTCTTCTGAAACATTTCTGCTTGATGTATTCTACACAAAATTACATTGTGATAAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTAGGTCTGCAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGAAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAATTCA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATCTTGTGTCACTTAAATTTCCTTT\r\n", "999999_32\t\tGTGATGCCTATACACTATGGTTCC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTGGACTTCTAGGTTTCCATCACTTTTACTTGAAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACCAGGATTTGGTGTTCTTTACTTGTTTACATTGGGACTTTTTGGCATTGGTTGGCTGGTGGATTTGTTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTATGC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTCCCTTGTCAAGGC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCCAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAAGACCCTAATGTGTTAAAGGATAAGAGTGTTGGAGCAGCCTATGCTCTTGGAATTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCCACT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGAATCCTTGGCTGTCATCATTTTTACCTGAACAGACCAG\r\n", "999999_33\t\tGGTTCTATCTACAATGCCTGGAAACAAGCAAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGATCAGAGTGATTGACATCAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATAACTCTCCCAAATGGTCCATACCCACTTACCCACAACAAGACAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAAGTCAAGAACAAGTACTTTGGTCTGTACCAAGACAATATTGCAGCTGG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGATGATCCTTACCATCTCTGCTTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACATAGACTCTGGAAATCT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCCTTGGC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTAC\r\n", "999999_34\t\tCAGACAAATTTGAATAACATTGAAAAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[K\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTGACACACTTGATCCTAGAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCTTTATAGACCAGCAAATGAAGGTAAAGATGCATTAAATGACAGAGACAATAGAGATGCATACCTTACACCAAGCAGGTTAGATAAACACAACAGGACTTTTATTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGATGCTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTATGTGTCTATAAGTAAAGCAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGTGAAGACAGTTTGTCTACAGTCACAAAGCACACTATAATATCAGAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTCTCTAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[K\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGA\r\n", "999999_35\t\tCAATATCTCATGGAAATGAGGAGCAGTTATTGCAAAGCAGAAAAAGATAATGACATG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGCAGACATGATTTCAAATTCTTTTAAATGGAGAACAATATTCATTTCATGC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTATTTTCAGATGAATGTATCTTCCCTGAATATTTAAAGAACTCTATTTTAAATGTAAATTGATACCCCTGAGACAGGATTCCACTCTGTGAATTTGTAATTTGAAGGGTCATAATGCCTTTAATTAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCCT\r\n", "999999_36\t\tAATCATGTCCAGTGGGAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTCTAGTCAGAGACAGTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGATTCCAAATATCTTTACACCATCAGTGTAAAAACTAAAAGAGGACCTACTAGTATA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAATATGCTATGAGCATGGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATTTTCACTAGATG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAAAAATCTAAGAGTCAAATGCCAAAGTTTTCTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTATTAGAACTGACAGATTATTACATAAGAAAAAGCAAGGGAAAGAAATCAGAACAATGCAGATTTCT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATAAAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGTAAAAAAGATTTGCCAATAGTGATGTCAAAACCAAAACTGAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAAGTAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCTTAGACATCTAGCCAGGACACTTATAAAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATCCCTGC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACAAGTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGAATATCTTCTCTTGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAAGATCTACCTCTACCAAAACCTTTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGAGTTATTTAAAGGACTATCCATTTCTATACTGAGCCAGATCTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGTATGTGAATGGATTAAGTTTGATGTACA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KG\u001b[01;31m\u001b[KCG\u001b[m\u001b[K\r\n", "999999_37\t\tAATACATGC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGAAAAACTGTACCTATTGTTAAAGGGGGAGAACAAACAAAAATGGAGGAAATGGAATTCTATGCTATAGAAACTTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTAGTACTGGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGGTGTTGTTCATGATGATATGGAAACTTCTCACTACATGAAGAATTTTGAAGTTGGACATGTACCTTTAAGGTTACAGAGAGCTAAAACCCTGCTGAACACAATCAATCAGAATTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTAGCATTCTGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCTGGTTGGAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTAGGAGAGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAAATACCTCA\r\n", "999999_38\t\tAATGGAATATTGTTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTTGGGTGCTATATAGC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAGTTGTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTGAGAAGAAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTGAAGACTGC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCTGAATGTTCAATGTAATTTGACAGTGGGATTCCAAGATCATAATGTATAAACTGATCACACTTCATACACAAGTGATATTTCATTGTCATTGTTTGACTTATCAGCTG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACAGCAGTATTGTTTGAGCAATTTTTG\r\n", "999999_39\t\tTCATATTGTTTGTTGCTGATATTTGTCAAACTTAACACTGACAAAATACTTGTCATGATACACAATTGACTTGATGTTGTAAATTTAATGCTTAGTATCAGTTCTTATCCAATCTTTAATGGAAACATTCATCTTTTTTGGC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCAGTGACTTAATGCAGTATATGATTTTTATGGACTTTTTGTTTTAATTTTACATG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTCA\r\n", "999999_40\t\tTGGAGTTTAACTGCTTGACAAGTGAAAATTTGACAATATTTGGTAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGACAACAGTTAACATTTTCCC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATGGTTATTACATGTTGCATCATTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATGGTGGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTCTTGAGGTTGACACAGAGGG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGCTGACATACTTCCCT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTCCAATGAAAAATATGGAGCAGTTGCTTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGAG\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGAAATACTTAAGATTTGGAACAAGCAAA\r\n", "999999_41\t\t\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTGAATAATTGAGAAGCTGAGACTTGTTTACATGTTCTAAACTGTTGTTTTGAAACAGGAGATCAAGAGGATC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTAATACTAGACTTGTTTAACATACCTTGATATAATGAGATGAAACAAAAAGGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTGGGTGTTTACCT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTATAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTCCAACAGGAAGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KACTGT\u001b[01;31m\u001b[KCG\u001b[m\u001b[K\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAACAACATTCCAGTGTAGTG\r\n", "999999_42\t\tTGCATCTGATGAATTTGACTGATATGTTTTTTTTCTGTAAACTCT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTCTACAGCA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTGTAAATCTCTATTGAATA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGTATTTCACAAATCAGGTTTTAGATTGAGCTACAAGGGA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTCATACCAA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KATAGCCATTGTTATTTGTAAATATTACACATTCTTCATCCCATGTGTTTGAATTTGGTTGAAATTTTT\r\n", "999999_43\t\tTTGGCATGATAATTGGCAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTAAAACAGCTTGATTTTTC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KCATTAACACTAC\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTATCTCAAAAGCATAAATGTGTTCAAAAAGTCTT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGGAAGGATTAACTTTAAAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KGAAAAATATATTTTAGATACATTCTCATAGAAGTTTAACTGTCATTGAAAATA\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTGAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KAATACTGTACCCATTTAATGGTGATTTGGAAT\u001b[01;31m\u001b[KCG\u001b[m\u001b[KTTTAGATTCATGGTTTTGTTGGAACATTTAAATTGCTCA\r\n" ] } ], "prompt_number": 52 }, { "cell_type": "code", "collapsed": false, "input": [ "#add column with length of sequence\n", "!perl -e '$col = 2;' -e 'while (<>) { s/\\r?\\n//; @F = split /\\t/, $_; $len = length($F[$col]); print \"$_\\t$len\\n\" } warn \"\\nAdded column with length of column $col for $. lines.\\n\\n\";' \\\n", "/Volumes/web/cnidarian/cpgR_2 > /Volumes/web/cnidarian/cpgR_2b \n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "Added column with length of column 2 for 43 lines.\r\n", "\r\n" ] } ], "prompt_number": 55 }, { "cell_type": "code", "collapsed": false, "input": [ "#this counts CGs\n", "!awk -F\\CG '{print NF-1}' /Volumes/web/cnidarian/cpgR_2 > /Volumes/web/cnidarian/cpgR_CG" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 50 }, { "cell_type": "code", "collapsed": false, "input": [ "!awk -F\\C '{print NF-1}' /Volumes/web/cnidarian/cpgR_2 > /Volumes/web/cnidarian/cpgR_C" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 44 }, { "cell_type": "code", "collapsed": false, "input": [ "!awk -F\\G '{print NF-1}' /Volumes/web/cnidarian/cpgR_2 > /Volumes/web/cnidarian/cpgR_G" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 45 }, { "cell_type": "code", "collapsed": false, "input": [ "!paste /Volumes/web/cnidarian/cpgR_2b \\\n", "/Volumes/web/cnidarian/cpgR_CG \\\n", "/Volumes/web/cnidarian/cpgR_C \\\n", "/Volumes/web/cnidarian/cpgR_G \\\n", "> /Volumes/web/cnidarian/cpgR_comb\n", "!head -2 /Volumes/web/cnidarian/cpgR_comb" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "999999_1\t\tAAGAGTGACTGGTACCACCTGTGTACTACAATGGTTATTTGATACAACTAAATGTAAGCGGTACCACCATGTATTACAATGTGAAATTAGTATCAATAAGTGTGGCTGGTACCTTTATATATTACAGGTGCTGTTATGTTTGACAGGAATACTGATGTGAGATAGTTACTTCCATACTATGTGTAACCTACGGTCCGGCACGTTGAATGGTGGGGTG\t217\t4\t34\t52\r\n", "999999_2\t\tACAGCTGTCTGATTACTTATACAAAGAACACGGGTTTAAAGCAGAAATGATTGATACTCTGTACAACTATGCCAAGTTTCAGTATGAATGTGGTAATTATTCTGCAGCAGCTGAATATCTCTACTTTGTTAGAATCCTGCTACCACCAAATGACAGAAATTACTTGAATGCATTATGGGGGAAGTTAGCTTCAGAGATTCTCATGCAAACGACCAGTG\t218\t2\t40\t43\r\n" ] } ], "prompt_number": 79 }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"cpgR_comb_1A0585AE.png\"/" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!awk '{print $1, (($4)/($5*$6))*(($3**2)/($3-1))}' /Volumes/web/cnidarian/cpgR_comb" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "999999_1 0.493223\r\n", "999999_2 0.254657\r\n", "999999_3 0.129997\r\n", "999999_4 0.95738\r\n", "999999_5 1.15181\r\n", "999999_6 0.40192\r\n", "999999_7 0.145004\r\n", "999999_8 0.71746\r\n", "999999_9 0.346284\r\n", "999999_10 0.442564\r\n", "999999_11 0.375369\r\n", "999999_12 0.971452\r\n", "999999_13 0.936455\r\n", "999999_14 0.425407\r\n", "999999_15 0.590405\r\n", "999999_16 0.562511\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "999999_17 0.827071\r\n", "999999_18 0.492659\r\n", "999999_19 0.726578\r\n", "999999_20 0.504515\r\n", "999999_21 0.921443\r\n", "999999_22 0\r\n", "999999_23 0.281106\r\n", "999999_24 0.969249\r\n", "999999_25 0.225811\r\n", "999999_26 0.255006\r\n", "999999_27 0\r\n", "999999_28 0.224344\r\n", "999999_29 0.924486\r\n", "999999_30 0.446203\r\n", "999999_31 0.706194\r\n", "999999_32 0.57416\r\n", "999999_33 0.6901\r\n", "999999_34 0.836551\r\n", "999999_35 0.555565\r\n", "999999_36 1.23349\r\n", "999999_37 0.845465\r\n", "999999_38 0.824237\r\n", "999999_39 0.455567\r\n", "999999_40 0.861985\r\n", "999999_41 1.20699\r\n", "999999_42 0.927479\r\n", "999999_43 1.29201\r\n" ] } ], "prompt_number": 69 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }