{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Intersect bed on features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Based on notebook 04 - but separating out to see if there is difference between hypo and hyper methylated."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fri Apr 3 06:28:54 PDT 2015\r\n"
]
}
],
"source": [
"!date"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab inline\n",
"import scipy.stats as stats"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Feature (from nb -03) \n",
"\n",
"**tldr** 4 \"new\" tracks\n",
"\n",
"```\n",
"/Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf\n",
"/Users/sr320/data-genomic/tentacle/rebuilt.gtf\n",
"/Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff\n",
"/Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# DEGs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`-wb\tWrite the original entry in B for each overlap. Useful for knowing what A overlaps. Restricted by -f and -r.`"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"tldr\n",
"\n",
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Separating HYPO and HYPER"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"track type=bedGraph name=\"2M_sig\" description=\"2M_sig\" visibility=full color=100,100,0 altColor=0,100,200 priority=20\r\n",
"scaffold1\t163391\t163444\t-1.19635354862016\r\n",
"scaffold1\t167390\t167448\t-1.34858424227208\r\n",
"scaffold1\t177036\t177092\t-1.32513261026528\r\n",
"scaffold1\t180263\t180313\t-1.59644601437398\r\n",
"scaffold1\t184151\t184202\t-1.36802539236446\r\n",
"scaffold1\t207852\t207911\t-1.4489540693628\r\n",
"scaffold1\t221645\t221697\t-1.19168816975966\r\n",
"scaffold100\t20261\t20311\t-1.38705592724581\r\n",
"scaffold100\t43707\t43766\t-1.94554287545546\r\n"
]
}
],
"source": [
"!head ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7224\r\n"
]
}
],
"source": [
"!fgrep -c \"-\" ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"scaffold1\t163391\t163444\t-1.19635354862016\r\n",
"scaffold1\t167390\t167448\t-1.34858424227208\r\n",
"scaffold1\t177036\t177092\t-1.32513261026528\r\n",
"scaffold1\t180263\t180313\t-1.59644601437398\r\n",
"scaffold1\t184151\t184202\t-1.36802539236446\r\n",
"scaffold1\t207852\t207911\t-1.4489540693628\r\n",
"scaffold1\t221645\t221697\t-1.19168816975966\r\n",
"scaffold100\t20261\t20311\t-1.38705592724581\r\n",
"scaffold100\t43707\t43766\t-1.94554287545546\r\n",
"scaffold100\t46611\t46670\t-1.2435587162076\r\n"
]
}
],
"source": [
"!fgrep \"-\" ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph | head"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"scaffold1\t163391\t163444\t-1.19635354862016\r\n",
"scaffold1\t167390\t167448\t-1.34858424227208\r\n",
"scaffold1\t177036\t177092\t-1.32513261026528\r\n",
"scaffold1\t180263\t180313\t-1.59644601437398\r\n",
"scaffold1\t184151\t184202\t-1.36802539236446\r\n",
"scaffold1\t207852\t207911\t-1.4489540693628\r\n",
"scaffold1\t221645\t221697\t-1.19168816975966\r\n",
"scaffold100\t20261\t20311\t-1.38705592724581\r\n",
"scaffold100\t43707\t43766\t-1.94554287545546\r\n",
"scaffold100\t46611\t46670\t-1.2435587162076\r\n"
]
}
],
"source": [
"!fgrep \"-\" \\\n",
"./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"> /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph\n",
"!head /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"scaffold1\t55723\t55780\t-1.14983078196614\r\n",
"scaffold1\t165162\t165215\t-1.24601772855566\r\n",
"scaffold1\t171392\t171453\t-1.22260744814979\r\n",
"scaffold1\t174287\t174343\t-1.69319890151177\r\n",
"scaffold1\t176273\t176334\t-1.72785163633438\r\n",
"scaffold1\t183256\t183318\t-1.30551922539134\r\n",
"scaffold1\t184661\t184715\t-1.4004518443988\r\n",
"scaffold1\t214736\t214786\t-1.21921626270337\r\n",
"scaffold1\t215096\t215156\t-1.24410534350034\r\n",
"scaffold1\t218534\t218584\t-1.13230161854171\r\n"
]
}
],
"source": [
"!fgrep \"-\" \\\n",
"./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"> /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph\n",
"!head /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"scaffold1\t54599\t54654\t-1.38187662416007\r\n",
"scaffold1\t163536\t163586\t-1.15032035523765\r\n",
"scaffold1\t174287\t174343\t-1.62903936976887\r\n",
"scaffold1\t184271\t184330\t-1.20699853451878\r\n",
"scaffold1\t184661\t184715\t-1.61107459826899\r\n",
"scaffold1\t185141\t185192\t-1.19168730137504\r\n",
"scaffold1\t210863\t210918\t-1.74282743323306\r\n",
"scaffold1\t215839\t215890\t-1.34660189199927\r\n",
"scaffold1\t224010\t224070\t-1.29699353038817\r\n",
"scaffold1\t227414\t227469\t-1.17931294986337\r\n"
]
}
],
"source": [
"!fgrep \"-\" \\\n",
"./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"> /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph\n",
"!head /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### HYPO"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 726 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 426 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 372 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"117460 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 177.835, with p=0.000\n",
"The uncorrected chi2 value is 178.264, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[726, 7224], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 388.828, with p=0.000\n",
"The uncorrected chi2 value is 389.505, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[426, 6560], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 616.865, with p=0.000\n",
"The uncorrected chi2 value is 617.661, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[372, 7645], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## HYPER"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"track type=bedGraph name=\"2M_sig\" description=\"2M_sig\" visibility=full color=100,100,0 altColor=0,100,200 priority=20\n",
"scaffold100\t250533\t250586\t1.72713841645018\n",
"scaffold100\t362779\t362836\t1.24792813025432\n",
"scaffold100\t437627\t437684\t1.26968497656438\n",
"scaffold100\t439363\t439415\t1.8535900440036\n",
"scaffold100\t458442\t458498\t1.33779652899652\n",
"scaffold100\t636974\t637034\t1.47573175617257\n",
"scaffold100\t637084\t637143\t1.24656795811596\n",
"scaffold100\t642110\t642170\t1.32784939242625\n",
"scaffold100\t676066\t676117\t2.0744756115782\n",
" 2804 /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph\n"
]
}
],
"source": [
"!fgrep -v \"-\" \\\n",
"./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"> /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph\n",
"!head /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph\n",
"!wc -l /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"track type=bedGraph name=\"4M_sig\" description=\"4M_sig\" visibility=full color=100,100,0 altColor=0,100,200 priority=20\n",
"scaffold1\t162896\t162952\t1.31051906307266\n",
"scaffold1\t174020\t174073\t1.13065801555915\n",
"scaffold1\t178210\t178267\t1.2199265031441\n",
"scaffold1\t208737\t208792\t1.31462945133609\n",
"scaffold100\t91713\t91767\t1.16773934117713\n",
"scaffold100\t250282\t250335\t1.35652322667099\n",
"scaffold100\t300103\t300158\t1.23146709929105\n",
"scaffold100\t303374\t303434\t1.44751323196346\n",
"scaffold100\t306375\t306430\t1.14267878234681\n",
" 3588 /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph\n"
]
}
],
"source": [
"!fgrep -v \"-\" \\\n",
"./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"> /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph\n",
"!head /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph\n",
"!wc -l /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"track type=bedGraph name=\"6M_sig\" description=\"6M_sig\" visibility=full color=100,100,0 altColor=0,100,200 priority=20\n",
"scaffold1\t162129\t162191\t1.85685479189849\n",
"scaffold1\t172654\t172714\t1.33561271440876\n",
"scaffold1\t178075\t178128\t1.42323539316231\n",
"scaffold1\t178685\t178740\t1.30886296151914\n",
"scaffold1\t214231\t214288\t1.23355990867606\n",
"scaffold1\t219034\t219092\t1.34001786676384\n",
"scaffold1\t223041\t223094\t1.32669837521425\n",
"scaffold1\t230131\t230189\t1.41307400393928\n",
"scaffold100\t244541\t244592\t2.500239239607\n",
" 4045 /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph\n"
]
}
],
"source": [
"!fgrep -v \"-\" \\\n",
"./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"> /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph\n",
"!head /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph\n",
"!wc -l /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 154 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 278 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 260 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"117460 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 201.876, with p=0.000\n",
"The uncorrected chi2 value is 202.623, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[154, 2803], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 162.143, with p=0.000\n",
"The uncorrected chi2 value is 162.728, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[278, 3587], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 243.013, with p=0.000\n",
"The uncorrected chi2 value is 243.693, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[260, 4044], [117460, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Rebuilt (new gtf based on RNAseq data)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"8768 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7694 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6160 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1197818 Cufflinks\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 2184.818, with p=0.000\n",
"The uncorrected chi2 value is 2185.528, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[8768, 10028], [1197818, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 3052.863, with p=0.000\n",
"The uncorrected chi2 value is 3053.724, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[7694, 10148], [1197818, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 6233.645, with p=0.000\n",
"The uncorrected chi2 value is 6234.874, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[6160, 11690], [1197818, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Housekeeping Genes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Separating out hypo and hyper"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2172 GLEAN\n",
"1038 GLEAN\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c \n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1988 GLEAN\n",
"1381 GLEAN\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c \n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2367 GLEAN\n",
"1452 GLEAN\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c \n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"251970 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 34.806, with p=0.000\n",
"The uncorrected chi2 value is 34.923, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[3210, 10028], [251970, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 17.578, with p=0.000\n",
"The uncorrected chi2 value is 17.661, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[3369, 10148], [251970, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 28.378, with p=0.000\n",
"The uncorrected chi2 value is 28.476, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[3819, 11690], [251970, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Environmental Response Genes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"separating Hypo and Hyper"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"tldr\n",
""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2063 GLEAN\n",
" 746 GLEAN\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c \n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1873 GLEAN\n",
" 865 GLEAN\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c \n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2175 GLEAN\n",
"1041 GLEAN\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c \n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"190475 GLEAN\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \\\n",
"| cut -f 11 \\\n",
"| sort | uniq -c "
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 1.413, with p=0.235\n",
"The uncorrected chi2 value is 1.439, with p=0.230\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 2 then Probes\n",
"obs = array([[2809, 10028], [190475, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 0.280, with p=0.597\n",
"The uncorrected chi2 value is 0.291, with p=0.589\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 4 then Probes\n",
"obs = array([[2738, 10148], [190475, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 0.141, with p=0.707\n",
"The uncorrected chi2 value is 0.149, with p=0.700\n"
]
}
],
"source": [
"# Enter the data comparing Oyster 6 then Probes\n",
"obs = array([[3216, 11690], [190475, 697753]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# TE-Blast"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 368 WUBlastX\n",
" 15 WUBlastX\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 251 WUBlastX\n",
" 3 WUBlastX\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 141 WUBlastX\n",
" 27 WUBlastX\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE-WUBLASTX.gff \\\n",
"| cut -f 6 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Promoter"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 720 flankbed\tpromoter\n",
" 256 flankbed\tpromoter\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 684 flankbed\tpromoter\n",
" 308 flankbed\tpromoter\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Chi2 test to compare hypo v hyper?"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 70.128, with p=0.000\n",
"The uncorrected chi2 value is 70.329, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster ALL hypo versus hyper -HOUSEKEEPING\n",
"obs = array([[6527, 21429], [3871, 10434]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 2.106, with p=0.147\n",
"The uncorrected chi2 value is 2.174, with p=0.140\n"
]
}
],
"source": [
"# Enter the data comparing Oyster ALL hypo versus hyper -DEGS\n",
"obs = array([[1524, 21429], [692, 10434]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CHI SQUARE\n",
"The corrected chi2 value is 264.516, with p=0.000\n",
"The uncorrected chi2 value is 265.761, with p=0.000\n"
]
}
],
"source": [
"# Enter the data comparing Oyster ALL hypo versus hyper -TE blast\n",
"obs = array([[760, 21429], [45, 10434]])\n",
"\n",
"# Calculate the chi-square test\n",
"chi2_corrected = stats.chi2_contingency(obs, correction=True)\n",
"chi2_uncorrected = stats.chi2_contingency(obs, correction=False)\n",
"\n",
"# Print the result\n",
"print('CHI SQUARE')\n",
"print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))\n",
"print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Hypo v Hyper on Ensembl gff - gives data on gene body, and repeats"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hypo\n",
"1065 GigaDB\tCDS\n",
"1065 GigaDB\texon\n",
"6094 GigaDB\tgene\n",
"6094 GigaDB\ttranscript\n",
" 918 dust\trepeat_region\n",
" 819 trf\trepeat_region\n",
"hyper\n",
" 308 GigaDB\tCDS\n",
" 308 GigaDB\texon\n",
"2374 GigaDB\tgene\n",
"2374 GigaDB\ttranscript\n",
" 322 dust\trepeat_region\n",
" 156 trf\trepeat_region\n"
]
}
],
"source": [
"!echo \"hypo\"\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hypo.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'\n",
"!echo \"hyper\"\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.2M_sig.hyper.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hypo\n",
" 1 EnsemblGenomes\texon\n",
" 1 EnsemblGenomes\tpseudogenic_tRNA\n",
" 1 EnsemblGenomes\ttranscript\n",
" 715 GigaDB\tCDS\n",
" 715 GigaDB\texon\n",
"5389 GigaDB\tgene\n",
"5389 GigaDB\ttranscript\n",
" 907 dust\trepeat_region\n",
" 653 trf\trepeat_region\n",
"hyper\n",
" 1 EnsemblGenomes\texon\n",
" 1 EnsemblGenomes\ttRNA_gene\n",
" 1 EnsemblGenomes\ttranscript\n",
" 462 GigaDB\tCDS\n",
" 462 GigaDB\texon\n",
"3102 GigaDB\tgene\n",
"3102 GigaDB\ttranscript\n",
" 413 dust\trepeat_region\n",
" 220 trf\trepeat_region\n"
]
}
],
"source": [
"!echo \"hypo\"\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hypo.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'\n",
"!echo \"hyper\"\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.4M_sig.hyper.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hypo\n",
" 1 EnsemblGenomes\texon\n",
" 1 EnsemblGenomes\tsnRNA\n",
" 1 EnsemblGenomes\tsnRNA_gene\n",
" 568 GigaDB\tCDS\n",
" 568 GigaDB\texon\n",
"6295 GigaDB\tgene\n",
"6295 GigaDB\ttranscript\n",
"1052 dust\trepeat_region\n",
" 550 trf\trepeat_region\n",
"hyper\n",
" 379 GigaDB\tCDS\n",
" 380 GigaDB\texon\n",
"3394 GigaDB\tgene\n",
"3394 GigaDB\ttranscript\n",
" 539 dust\trepeat_region\n",
" 314 trf\trepeat_region\n"
]
}
],
"source": [
"!echo \"hypo\"\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hypo.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'\n",
"!echo \"hyper\"\n",
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/data-genomic/tentacle/2014.07.02.6M_sig.hyper.bedGraph \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 6,7 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 5 EnsemblGenomes\tRNA\r\n",
" 444 EnsemblGenomes\texon\r\n",
" 6 EnsemblGenomes\tgene\r\n",
" 2 EnsemblGenomes\tmiRNA\r\n",
" 2 EnsemblGenomes\tmiRNA_gene\r\n",
" 259 EnsemblGenomes\tpseudogenic_tRNA\r\n",
" 14 EnsemblGenomes\tsnRNA\r\n",
" 14 EnsemblGenomes\tsnRNA_gene\r\n",
" 6 EnsemblGenomes\tsnoRNA\r\n",
" 6 EnsemblGenomes\tsnoRNA_gene\r\n",
" 152 EnsemblGenomes\ttRNA_gene\r\n",
" 422 EnsemblGenomes\ttranscript\r\n",
"157279 GigaDB\tCDS\r\n",
"157307 GigaDB\texon\r\n",
"600445 GigaDB\tgene\r\n",
"600445 GigaDB\ttranscript\r\n",
"56210 dust\trepeat_region\r\n",
"42390 trf\trepeat_region\r\n"
]
}
],
"source": [
"!intersectbed \\\n",
"-wb \\\n",
"-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \\\n",
"-b /Volumes/web-1/trilobite/Crassostrea_gigas_ensembl_tracks/Crassostrea_gigas.GCA_000297895.1.25.sorted.gff3 \\\n",
"| cut -f 11,12 \\\n",
"| sort | uniq -c | sed '/#/d'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}