{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Intersect bed on features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Based on notebook 04 - but separating out to see if there is difference between hypo and hyper methylated." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"half-shell___Lab_notebook_of_Steven_Roberts_1ACECD10.png\"/" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fri Apr 3 06:28:54 PDT 2015\r\n" ] } ], "source": [ "!date" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "source": [ "%pylab inline\n", "import scipy.stats as stats" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Feature (from nb -03) \n", "\n", "**tldr** 4 \"new\" tracks\n", "\"IGV_and_Directory_Listing_of__halfshell_2015-02-hs-bedgraph__1AA51F1B.png\"/\n", "```\n", "/Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf\n", "/Users/sr320/data-genomic/tentacle/rebuilt.gtf\n", "/Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff\n", "/Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# DEGs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`-wb\tWrite the original entry in B for each overlap. Useful for knowing what A overlaps. Restricted by -f and -r.`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "tldr\n", "\n", "\"Screenshot_4_3_15__7_22_AM_1ACED994.png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Separating HYPO and HYPER" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "track type=bedGraph name=\"2M_sig\" description=\"2M_sig\" visibility=full color=100,100,0 altColor=0,100,200 priority=20\r\n", "scaffold1\t163391\t163444\t-1.19635354862016\r\n", "scaffold1\t167390\t167448\t-1.34858424227208\r\n", "scaffold1\t177036\t177092\t-1.32513261026528\r\n", "scaffold1\t180263\t180313\t-1.59644601437398\r\n", "scaffold1\t184151\t184202\t-1.36802539236446\r\n", "scaffold1\t207852\t207911\t-1.4489540693628\r\n", "scaffold1\t221645\t221697\t-1.19168816975966\r\n", "scaffold100\t20261\t20311\t-1.38705592724581\r\n", "scaffold100\t43707\t43766\t-1.94554287545546\r\n" ] } ], "source": [ "!head ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7224\r\n" ] } ], "source": [ "!fgrep -c \"-\" ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph\n", "\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "scaffold1\t163391\t163444\t-1.19635354862016\r\n", "scaffold1\t167390\t167448\t-1.34858424227208\r\n", "scaffold1\t177036\t177092\t-1.32513261026528\r\n", "scaffold1\t180263\t180313\t-1.59644601437398\r\n", "scaffold1\t184151\t184202\t-1.36802539236446\r\n", "scaffold1\t207852\t207911\t-1.4489540693628\r\n", "scaffold1\t221645\t221697\t-1.19168816975966\r\n", "scaffold100\t20261\t20311\t-1.38705592724581\r\n", "scaffold100\t43707\t43766\t-1.94554287545546\r\n", "scaffold100\t46611\t46670\t-1.2435587162076\r\n" ] } ], "source": [ "!fgrep \"-\" ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph | head" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "scaffold1\t163391\t163444\t-1.19635354862016\r\n", "scaffold1\t167390\t167448\t-1.34858424227208\r\n", "scaffold1\t177036\t177092\t-1.32513261026528\r\n", "scaffold1\t180263\t180313\t-1.59644601437398\r\n", "scaffold1\t184151\t184202\t-1.36802539236446\r\n", "scaffold1\t207852\t207911\t-1.4489540693628\r\n", "scaffold1\t221645\t221697\t-1.19168816975966\r\n", "scaffold100\t20261\t20311\t-1.38705592724581\r\n", "scaffold100\t43707\t43766\t-1.94554287545546\r\n", "scaffold100\t46611\t46670\t-1.2435587162076\r\n" ] } ], "source": [ "!fgrep \"-\" \\\n", "./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n", "> /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph\n", "!head /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "scaffold1\t55723\t55780\t-1.14983078196614\r\n", "scaffold1\t165162\t165215\t-1.24601772855566\r\n", "scaffold1\t171392\t171453\t-1.22260744814979\r\n", "scaffold1\t174287\t174343\t-1.69319890151177\r\n", "scaffold1\t176273\t176334\t-1.72785163633438\r\n", "scaffold1\t183256\t183318\t-1.30551922539134\r\n", "scaffold1\t184661\t184715\t-1.4004518443988\r\n", "scaffold1\t214736\t214786\t-1.21921626270337\r\n", "scaffold1\t215096\t215156\t-1.24410534350034\r\n", "scaffold1\t218534\t218584\t-1.13230161854171\r\n" ] } ], "source": [ "!fgrep \"-\" \\\n", "./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n", "> /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph\n", "!head /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "scaffold1\t54599\t54654\t-1.38187662416007\r\n", "scaffold1\t163536\t163586\t-1.15032035523765\r\n", "scaffold1\t174287\t174343\t-1.62903936976887\r\n", "scaffold1\t184271\t184330\t-1.20699853451878\r\n", "scaffold1\t184661\t184715\t-1.61107459826899\r\n", "scaffold1\t185141\t185192\t-1.19168730137504\r\n", "scaffold1\t210863\t210918\t-1.74282743323306\r\n", "scaffold1\t215839\t215890\t-1.34660189199927\r\n", "scaffold1\t224010\t224070\t-1.29699353038817\r\n", "scaffold1\t227414\t227469\t-1.17931294986337\r\n" ] } ], "source": [ "!fgrep \"-\" \\\n", "./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n", "> /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph\n", "!head /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### HYPO" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 726 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 426 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 372 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "117460 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 11 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 177.835, with p=0.000\n", "The uncorrected chi2 value is 178.264, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 2 then Probes\n", "obs = array([[726, 7224], [117460, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 388.828, with p=0.000\n", "The uncorrected chi2 value is 389.505, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 4 then Probes\n", "obs = array([[426, 6560], [117460, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 616.865, with p=0.000\n", "The uncorrected chi2 value is 617.661, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 6 then Probes\n", "obs = array([[372, 7645], [117460, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## HYPER" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "track type=bedGraph name=\"2M_sig\" description=\"2M_sig\" visibility=full color=100,100,0 altColor=0,100,200 priority=20\n", "scaffold100\t250533\t250586\t1.72713841645018\n", "scaffold100\t362779\t362836\t1.24792813025432\n", "scaffold100\t437627\t437684\t1.26968497656438\n", "scaffold100\t439363\t439415\t1.8535900440036\n", "scaffold100\t458442\t458498\t1.33779652899652\n", "scaffold100\t636974\t637034\t1.47573175617257\n", "scaffold100\t637084\t637143\t1.24656795811596\n", "scaffold100\t642110\t642170\t1.32784939242625\n", "scaffold100\t676066\t676117\t2.0744756115782\n", " 2804 /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph\n" ] } ], "source": [ "!fgrep -v \"-\" \\\n", "./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n", "> /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph\n", "!head /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph\n", "!wc -l /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "track type=bedGraph name=\"4M_sig\" description=\"4M_sig\" visibility=full color=100,100,0 altColor=0,100,200 priority=20\n", "scaffold1\t162896\t162952\t1.31051906307266\n", "scaffold1\t174020\t174073\t1.13065801555915\n", "scaffold1\t178210\t178267\t1.2199265031441\n", "scaffold1\t208737\t208792\t1.31462945133609\n", "scaffold100\t91713\t91767\t1.16773934117713\n", "scaffold100\t250282\t250335\t1.35652322667099\n", "scaffold100\t300103\t300158\t1.23146709929105\n", "scaffold100\t303374\t303434\t1.44751323196346\n", "scaffold100\t306375\t306430\t1.14267878234681\n", " 3588 /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph\n" ] } ], "source": [ "!fgrep -v \"-\" \\\n", "./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n", "> /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph\n", "!head /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph\n", "!wc -l /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "track type=bedGraph name=\"6M_sig\" description=\"6M_sig\" visibility=full color=100,100,0 altColor=0,100,200 priority=20\n", "scaffold1\t162129\t162191\t1.85685479189849\n", "scaffold1\t172654\t172714\t1.33561271440876\n", "scaffold1\t178075\t178128\t1.42323539316231\n", "scaffold1\t178685\t178740\t1.30886296151914\n", "scaffold1\t214231\t214288\t1.23355990867606\n", "scaffold1\t219034\t219092\t1.34001786676384\n", "scaffold1\t223041\t223094\t1.32669837521425\n", "scaffold1\t230131\t230189\t1.41307400393928\n", "scaffold100\t244541\t244592\t2.500239239607\n", " 4045 /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph\n" ] } ], "source": [ "!fgrep -v \"-\" \\\n", "./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n", "> /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph\n", "!head /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph\n", "!wc -l /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 154 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 278 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 260 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "117460 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 11 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 201.876, with p=0.000\n", "The uncorrected chi2 value is 202.623, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 2 then Probes\n", "obs = array([[154, 2803], [117460, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 162.143, with p=0.000\n", "The uncorrected chi2 value is 162.728, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 2 then Probes\n", "obs = array([[278, 3587], [117460, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 243.013, with p=0.000\n", "The uncorrected chi2 value is 243.693, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 2 then Probes\n", "obs = array([[260, 4044], [117460, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Rebuilt (new gtf based on RNAseq data)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8768 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7694 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6160 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1197818 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n", "-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n", "| cut -f 11 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 2184.818, with p=0.000\n", "The uncorrected chi2 value is 2185.528, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 2 then Probes\n", "obs = array([[8768, 10028], [1197818, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 3052.863, with p=0.000\n", "The uncorrected chi2 value is 3053.724, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 4 then Probes\n", "obs = array([[7694, 10148], [1197818, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 6233.645, with p=0.000\n", "The uncorrected chi2 value is 6234.874, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 6 then Probes\n", "obs = array([[6160, 11690], [1197818, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Housekeeping Genes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Separating out hypo and hyper" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"Screenshot_4_3_15__7_23_AM_1ACED9DD.png\"/" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2172 GLEAN\n", "1038 GLEAN\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c \n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1988 GLEAN\n", "1381 GLEAN\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c \n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2367 GLEAN\n", "1452 GLEAN\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c \n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "251970 GLEAN\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n", "| cut -f 11 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 34.806, with p=0.000\n", "The uncorrected chi2 value is 34.923, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 2 then Probes\n", "obs = array([[3210, 10028], [251970, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 17.578, with p=0.000\n", "The uncorrected chi2 value is 17.661, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 4 then Probes\n", "obs = array([[3369, 10148], [251970, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 28.378, with p=0.000\n", "The uncorrected chi2 value is 28.476, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 6 then Probes\n", "obs = array([[3819, 11690], [251970, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Environmental Response Genes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "separating Hypo and Hyper" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "tldr\n", "\"Screenshot_4_3_15__7_28_AM_1ACEDB27.png\"/" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2063 GLEAN\n", " 746 GLEAN\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c \n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1873 GLEAN\n", " 865 GLEAN\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c \n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2175 GLEAN\n", "1041 GLEAN\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c \n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "190475 GLEAN\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n", "| cut -f 11 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 1.413, with p=0.235\n", "The uncorrected chi2 value is 1.439, with p=0.230\n" ] } ], "source": [ "# Enter the data comparing Oyster 2 then Probes\n", "obs = array([[2809, 10028], [190475, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 0.280, with p=0.597\n", "The uncorrected chi2 value is 0.291, with p=0.589\n" ] } ], "source": [ "# Enter the data comparing Oyster 4 then Probes\n", "obs = array([[2738, 10148], [190475, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 0.141, with p=0.707\n", "The uncorrected chi2 value is 0.149, with p=0.700\n" ] } ], "source": [ "# Enter the data comparing Oyster 6 then Probes\n", "obs = array([[3216, 11690], [190475, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# TE-Blast" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"Screenshot_4_3_15__7_42_AM_1ACEDE69.png\"/" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 368 WUBlastX\n", " 15 WUBlastX\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c | sed '/#/d'\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c | sed '/#/d'" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 251 WUBlastX\n", " 3 WUBlastX\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c | sed '/#/d'\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c | sed '/#/d'" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 141 WUBlastX\n", " 27 WUBlastX\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c | sed '/#/d'\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c | sed '/#/d'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Promoter" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"Screenshot_4_3_15__7_48_AM_1ACEDFD9.png\"/" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 720 flankbed\tpromoter\n", " 256 flankbed\tpromoter\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 684 flankbed\tpromoter\n", " 308 flankbed\tpromoter\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Chi2 test to compare hypo v hyper?" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 70.128, with p=0.000\n", "The uncorrected chi2 value is 70.329, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster ALL hypo versus hyper -HOUSEKEEPING\n", "obs = array([[6527, 21429], [3871, 10434]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 2.106, with p=0.147\n", "The uncorrected chi2 value is 2.174, with p=0.140\n" ] } ], "source": [ "# Enter the data comparing Oyster ALL hypo versus hyper -DEGS\n", "obs = array([[1524, 21429], [692, 10434]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 264.516, with p=0.000\n", "The uncorrected chi2 value is 265.761, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster ALL hypo versus hyper -TE blast\n", "obs = array([[760, 21429], [45, 10434]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"Screenshot_4_3_15__8_07_AM_1ACEE442.png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Hypo v Hyper on Ensembl gff - gives data on gene body, and repeats" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "hypo\n", "1065 GigaDB\tCDS\n", "1065 GigaDB\texon\n", "6094 GigaDB\tgene\n", "6094 GigaDB\ttranscript\n", " 918 dust\trepeat_region\n", " 819 trf\trepeat_region\n", "hyper\n", " 308 GigaDB\tCDS\n", " 308 GigaDB\texon\n", "2374 GigaDB\tgene\n", "2374 GigaDB\ttranscript\n", " 322 dust\trepeat_region\n", " 156 trf\trepeat_region\n" ] } ], "source": [ "!echo \"hypo\"\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'\n", "!echo \"hyper\"\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "hypo\n", " 1 EnsemblGenomes\texon\n", " 1 EnsemblGenomes\tpseudogenic_tRNA\n", " 1 EnsemblGenomes\ttranscript\n", " 715 GigaDB\tCDS\n", " 715 GigaDB\texon\n", "5389 GigaDB\tgene\n", "5389 GigaDB\ttranscript\n", " 907 dust\trepeat_region\n", " 653 trf\trepeat_region\n", "hyper\n", " 1 EnsemblGenomes\texon\n", " 1 EnsemblGenomes\ttRNA_gene\n", " 1 EnsemblGenomes\ttranscript\n", " 462 GigaDB\tCDS\n", " 462 GigaDB\texon\n", "3102 GigaDB\tgene\n", "3102 GigaDB\ttranscript\n", " 413 dust\trepeat_region\n", " 220 trf\trepeat_region\n" ] } ], "source": [ "!echo \"hypo\"\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'\n", "!echo \"hyper\"\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "hypo\n", " 1 EnsemblGenomes\texon\n", " 1 EnsemblGenomes\tsnRNA\n", " 1 EnsemblGenomes\tsnRNA_gene\n", " 568 GigaDB\tCDS\n", " 568 GigaDB\texon\n", "6295 GigaDB\tgene\n", "6295 GigaDB\ttranscript\n", "1052 dust\trepeat_region\n", " 550 trf\trepeat_region\n", "hyper\n", " 379 GigaDB\tCDS\n", " 380 GigaDB\texon\n", "3394 GigaDB\tgene\n", "3394 GigaDB\ttranscript\n", " 539 dust\trepeat_region\n", " 314 trf\trepeat_region\n" ] } ], "source": [ "!echo \"hypo\"\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'\n", "!echo \"hyper\"\n", "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n", "| cut -f 6,7 \\\n", "| sort | uniq -c | sed '/#/d'" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 5 EnsemblGenomes\tRNA\r\n", " 444 EnsemblGenomes\texon\r\n", " 6 EnsemblGenomes\tgene\r\n", " 2 EnsemblGenomes\tmiRNA\r\n", " 2 EnsemblGenomes\tmiRNA_gene\r\n", " 259 EnsemblGenomes\tpseudogenic_tRNA\r\n", " 14 EnsemblGenomes\tsnRNA\r\n", " 14 EnsemblGenomes\tsnRNA_gene\r\n", " 6 EnsemblGenomes\tsnoRNA\r\n", " 6 EnsemblGenomes\tsnoRNA_gene\r\n", " 152 EnsemblGenomes\ttRNA_gene\r\n", " 422 EnsemblGenomes\ttranscript\r\n", "157279 GigaDB\tCDS\r\n", "157307 GigaDB\texon\r\n", "600445 GigaDB\tgene\r\n", "600445 GigaDB\ttranscript\r\n", "56210 dust\trepeat_region\r\n", "42390 trf\trepeat_region\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n", "-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n", "| cut -f 11,12 \\\n", "| sort | uniq -c | sed '/#/d'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }