{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Intersect bed on gene-centric features" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fri Mar 13 08:02:12 PDT 2015\r\n" ] } ], "source": [ "!date" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "source": [ "%pylab inline\n", "import scipy.stats as stats" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Feature (from nb -03) \n", "\n", "**tldr** 4 \"new\" tracks\n", "\"IGV_and_Directory_Listing_of__halfshell_2015-02-hs-bedgraph__1AA51F1B.png\"/\n", "```\n", "/Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf\n", "/Users/sr320/data-genomic/tentacle/rebuilt.gtf\n", "/Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff\n", "/Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# DEGs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`-wb\tWrite the original entry in B for each overlap. Useful for knowing what A overlaps. Restricted by -f and -r.`" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 880 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 704 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 632 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "117460 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n", "-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n", "| cut -f 11 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 352.138, with p=0.000\n", "The uncorrected chi2 value is 352.654, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 2 then Probes\n", "obs = array([[880, 10028], [117460, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 547.532, with p=0.000\n", "The uncorrected chi2 value is 548.178, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 4 then Probes\n", "obs = array([[704, 10148], [117460, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 853.613, with p=0.000\n", "The uncorrected chi2 value is 854.371, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 6 then Probes\n", "obs = array([[632, 11690], [117460, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Rebuilt (new gtf based on RNAseq data)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8768 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7694 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6160 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1197818 Cufflinks\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n", "-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n", "| cut -f 11 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 2184.818, with p=0.000\n", "The uncorrected chi2 value is 2185.528, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 2 then Probes\n", "obs = array([[8768, 10028], [1197818, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 3052.863, with p=0.000\n", "The uncorrected chi2 value is 3053.724, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 4 then Probes\n", "obs = array([[7694, 10148], [1197818, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 6233.645, with p=0.000\n", "The uncorrected chi2 value is 6234.874, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 6 then Probes\n", "obs = array([[6160, 11690], [1197818, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Housekeeping Genes" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3210 GLEAN\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3369 GLEAN\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3819 GLEAN\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "251970 GLEAN\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n", "| cut -f 11 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 34.806, with p=0.000\n", "The uncorrected chi2 value is 34.923, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 2 then Probes\n", "obs = array([[3210, 10028], [251970, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 17.578, with p=0.000\n", "The uncorrected chi2 value is 17.661, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 4 then Probes\n", "obs = array([[3369, 10148], [251970, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 28.378, with p=0.000\n", "The uncorrected chi2 value is 28.476, with p=0.000\n" ] } ], "source": [ "# Enter the data comparing Oyster 6 then Probes\n", "obs = array([[3819, 11690], [251970, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Environmental Response Genes" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2809 GLEAN\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2738 GLEAN\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3216 GLEAN\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n", "| cut -f 6 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "190475 GLEAN\r\n" ] } ], "source": [ "!intersectbed \\\n", "-wb \\\n", "-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n", "-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n", "| cut -f 11 \\\n", "| sort | uniq -c " ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 1.413, with p=0.235\n", "The uncorrected chi2 value is 1.439, with p=0.230\n" ] } ], "source": [ "# Enter the data comparing Oyster 2 then Probes\n", "obs = array([[2809, 10028], [190475, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 0.280, with p=0.597\n", "The uncorrected chi2 value is 0.291, with p=0.589\n" ] } ], "source": [ "# Enter the data comparing Oyster 4 then Probes\n", "obs = array([[2738, 10148], [190475, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CHI SQUARE\n", "The corrected chi2 value is 0.141, with p=0.707\n", "The uncorrected chi2 value is 0.149, with p=0.700\n" ] } ], "source": [ "# Enter the data comparing Oyster 6 then Probes\n", "obs = array([[3216, 11690], [190475, 697753]])\n", "\n", "# Calculate the chi-square test\n", "chi2_corrected = stats.chi2_contingency(obs, correction=True)\n", "chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n", "\n", "# Print the result\n", "print('CHI SQUARE')\n", "print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n", "print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }