{ "metadata": { "name": "", "signature": "sha256:9a51028e32fa02e3d32f7bd408a7594f00dfc8cad5141087a13a7666caaa67ef" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "I have a DEG list, now what?" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Pacific Oyster Tuxedo DEG list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "All data " ] }, { "cell_type": "code", "collapsed": false, "input": [ "#DEG List\n", "!head /Volumes/web/cnidarian/fish546sort_gene.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "gene_id\tgene_name\tsample1\tsample2\tfold_change\tdirection\ttotal_fpkm\tq-value\tgene_description\r\n", "CGI_10006532\t.\tTroco\tUmbo\t8.37\tUP\t55.29\t0.00049485\t\r\n", "CGI_10023338\t.\tTroco\tUmbo\t31.68\tDOWN\t152.66\t0.00049485\t\r\n", "EKC17242\tCGI_10001446\tTroco\tUmbo\t3.76\tDOWN\t42.09\t0.00049485\t\r\n", "EKC17313\tCGI_10001190\tTroco\tUmbo\t7.90\tUP\t16.74\t0.00049485\t\r\n", "EKC17492\tCGI_10000706\tTroco\tUmbo\t14.59\tDOWN\t328.69\t0.00049485\t\r\n", "EKC17563\tCGI_10000573\tTroco\tUmbo\t3.20\tUP\t3470.43\t0.00049485\t\r\n", "EKC17629\tCGI_10000434\tTroco\tUmbo\t97.59\tDOWN\t416.64\t0.00049485\t\r\n", "EKC17663\tCGI_10000384\tTroco\tUmbo\t3.91\tUP\t1535.08\t0.00049485\t\r\n", "EKC17667\tCGI_10000377\tTroco\tUmbo\t182.69\tDOWN\t1851.02\t0.00049485\t\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "raw", "metadata": {}, "source": [ "In order to get enrichment you need to rely on SPID or other (here we use SPID as we have it). This means a simple join to get SPID that correspond to table above" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Users/sr320/sqlshare-pythonclient/tools/singleupload.py -d fish546_DEgene_out /Volumes/web/cnidarian/fish546sort_gene.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "processing chunk line 0 to 2988 (0.863645076752 s elapsed)\r\n", "pushing /Volumes/web/cnidarian/fish546sort_gene.txt...\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "parsing 2D0D0BB8...\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "finished fish546_DEgene_out\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "cd /Volumes/web/cnidarian/" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Volumes/web/cnidarian\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT * FROM [sr320@washington.edu].[fish546_DEgene_out]deg left join [sr320@washington.edu].[qDOD Cgigas Gene Descriptions (Swiss-prot)]sp on deg.gene_name = sp.CGI_ID\" -f tsv -o fish546_DEGjoinSP.txt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!head fish546_DEGjoinSP.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "gene_id\tgene_name\tsample1\tsample2\tfold_change\tdirection\ttotal_fpkm\tq-value\tgene_description\tCGI_ID\tSPID\tevalue\tDescription\r", "\r\n", "EKC17945\tCGI_10000015\tTroco\tUmbo\t2.13\tDOWN\t450.32\t0.0482194\t\tCGI_10000015\tQ9BYW2\t3E-38\tHistone-lysine N-methyltransferase SETD2\r", "\r\n", "EKC17931\tCGI_10000033\tTroco\tUmbo\t2.69\tUP\t1266.47\t0.00205159\t\tCGI_10000033\tP47835\t8E-65\t40S ribosomal protein S3-B\r", "\r\n", "EKC17929\tCGI_10000035\tTroco\tUmbo\t7.55\tDOWN\t14.56\t0.00428566\t\tCGI_10000035\tQ99NE9\t9E-12\tPre-B-cell leukemia transcription factor 4\r", "\r\n", "EKC17923\tCGI_10000041\tTroco\tUmbo\t3.06\tUP\t766.73\t0.012281\t\tCGI_10000041\tP10817\t4E-08\tCytochrome c oxidase subunit 6A2, mitochondrial\r", "\r\n", "EKC17914\tCGI_10000051\tTroco\tUmbo\t3.27\tUP\t80.76\t0.0215094\t\tCGI_10000051\tQ5RF33\t1E-49\tDown syndrome critical region protein 3 homolog\r", "\r\n", "EKC17913\tCGI_10000052\tTroco\tUmbo\t3.67\tUP\t30.54\t0.0182539\t\tCGI_10000052\tA7Y2W8\t1E-39\tSodium- and chloride-dependent glycine transporter 1\r", "\r\n", "EKC17910\tCGI_10000055\tTroco\tUmbo\t3.14\tDOWN\t1322.16\t0.00205159\t\tCGI_10000055\tP84245\t3E-90\tHistone H3.3\r", "\r\n", "EKC17900\tCGI_10000066\tTroco\tUmbo\t2.9\tUP\t89.22\t0.0269801\t\tCGI_10000066\tP23038\t6E-08\tMetallothionein\r", "\r\n", "EKC17893\tCGI_10000073\tTroco\tUmbo\t8.29\tUP\t14.78\t0.00571421\t\tCGI_10000073\tP47727\t5E-105\tCarbonyl reductase [NADPH] 1\r", "\r\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "!awk '{ print $10 }' fish546_DEGjoinSP.txt > fish546_DEGjoinSPonly.txt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 11 }, { "cell_type": "raw", "metadata": {}, "source": [ "!cat fish546_DEGjoinSPonly.txt" ] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "DAVID" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"DAVID__Functional_Annotation_Tools_18A6ABB3_png\"/" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "Now need to upload Backgroung list (all corresponding IDs in oyster genome)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT SPID FROM [sr320@washington.edu].[qDOD Cgigas Gene Descriptions (Swiss-prot)]\" -f tsv -o Cgigas_allSP.txt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "raw", "metadata": {}, "source": [ "!cat Cgigas_allSP.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"DAVID__Functional_Annotation_Tools_18A6AD0F_png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"pc3_18A6AD70_png\"/" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!wget http://david.abcc.ncifcrf.gov/data/download/chart_B89D12125EAE1391883715950.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "--2014-02-08 10:26:32-- http://david.abcc.ncifcrf.gov/data/download/chart_B89D12125EAE1391883715950.txt\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Resolving david.abcc.ncifcrf.gov... " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "129.43.1.164\r\n", "Connecting to david.abcc.ncifcrf.gov|129.43.1.164|:80... " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "connected.\r\n", "HTTP request sent, awaiting response... " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "200 OK\r\n", "Length: 47347 (46K) [text/plain]\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Saving to: `chart_B89D12125EAE1391883715950.txt'\r\n", "\r\n", "\r", " 0% [ ] 0 --.-K/s " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "69% [==========================> ] 33,060 149K/s \r", "100%[======================================>] 47,347 184K/s in 0.3s \r\n", "\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "2014-02-08 10:26:49 (184 KB/s) - `chart_B89D12125EAE1391883715950.txt' saved [47347/47347]\r\n", "\r\n" ] } ], "prompt_number": 20 }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"exclde_18A6AEF4_png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"adhajfh_18A6AF5F_png\"/" ] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Revigo " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"ref_88_18A6AFBF_png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"REVIGO_Output_18A6B01E_png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"REVIGO_Output_18A6B052_png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"Session__New_Session_18A6B255_png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"REVIGO_Output_18A6B2BA_png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Olympia Oyster Example" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#from DESeq \n", "!head /Volumes/web/cnidarian/fish546_0lyMF_DElist" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\"id\"\t\"baseMean\"\t\"baseMeanA\"\t\"baseMeanB\"\t\"foldChange\"\t\"log2FoldChange\"\t\"pval\"\t\"padj\"\r\n", "\"comp10002_c0_seq1\"\t5.11042261412799\t6.56521445318632\t3.65563077506966\t0.556818181818182\t-0.844721774522088\t0.885218680234281\t1\r\n", "\"comp10005_c0_seq1\"\t4.89904260332464\t4.92391083988974\t4.87417436675954\t0.989898989898991\t-0.0146467759644005\t1\t1\r\n", "\"comp10007_c0_seq1\"\t11525.6691667085\t17587.3888682795\t5463.94946513745\t0.310674285197173\t-1.68652526171154\t0.147533184864772\t1\r\n", "\"comp10008_c0_seq1\"\t6329.84902821593\t6860.6491035797\t5799.04895285216\t0.845262432941859\t-0.242528763050997\t0.833689827988819\t1\r\n", "\"comp10009_c0_seq1\"\t11933.5704169675\t13182.9506219981\t10684.1902119369\t0.810455149100568\t-0.303195746233489\t0.792097629317823\t1\r\n", "\"comp1000_c0_seq1\"\t24.3833059520802\t42.6738939457111\t6.09271795844943\t0.142773892773893\t-2.80819589849697\t0.159574074144965\t1\r\n", "\"comp10010_c0_seq1\"\t11.0290629166217\t12.3097770997243\t9.74834873351908\t0.791919191919192\t-0.336574870851763\t0.936698849449615\t1\r\n", "\"comp10012_c0_seq1\"\t2.25057540914152\t3.28260722659316\t1.21854359168989\t0.371212121212121\t-1.42968427524324\t0.920190447196145\t1\r\n", "\"comp10013_c0_seq1\"\t3.87944490415555\t4.10325903324145\t3.65563077506966\t0.890909090909092\t-0.16664986940945\t1\t1\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "Want pval < .02 \n", "\n", "HOW in commandline " ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "raw", "metadata": {}, "source": [ "comp42267_c0_seq1\n", "comp27506_c0_seq1\n", "comp31078_c0_seq1\n", "comp42994_c1_seq12\n", "comp43376_c0_seq11\n", "comp43684_c0_seq63\n", "comp43850_c0_seq2\n", "comp23647_c0_seq1\n", "comp37750_c2_seq5\n", "comp14218_c0_seq1\n", "comp38365_c0_seq23\n", "comp42326_c1_seq2\n", "comp42897_c0_seq4\n", "comp41990_c0_seq3\n", "comp39888_c0_seq2\n", "comp41088_c0_seq3\n", "comp22830_c0_seq1\n", "comp33776_c1_seq2\n", "comp9895_c0_seq2\n", "comp34272_c0_seq1\n", "comp43741_c0_seq9\n", "comp19578_c0_seq1\n", "comp28371_c1_seq1\n", "comp44708_c0_seq2\n", "comp38094_c0_seq2\n", "comp46998_c0_seq1\n", "comp7324_c0_seq1\n", "comp30573_c0_seq1\n", "comp42812_c1_seq2\n", "comp40434_c1_seq2\n", "comp38128_c0_seq1\n", "comp37019_c0_seq1\n", "comp29092_c0_seq2\n", "comp43755_c0_seq29\n", "comp36701_c0_seq7\n", "comp39792_c1_seq4\n", "comp44757_c0_seq12\n", "comp41431_c0_seq1\n", "comp42010_c0_seq2\n", "comp38830_c0_seq3\n", "comp41430_c0_seq1\n", "comp19258_c0_seq1\n", "comp34994_c0_seq1\n", "comp37019_c0_seq9\n", "comp35574_c1_seq2\n", "comp7357_c0_seq1\n", "comp33776_c1_seq3\n", "comp9790_c0_seq2\n", "comp30947_c0_seq1\n", "comp10747_c0_seq1\n", "comp28235_c0_seq1\n", "comp30920_c0_seq1\n", "comp30920_c1_seq1\n", "comp33014_c0_seq7\n", "comp34795_c4_seq2\n", "comp44292_c0_seq1\n", "comp43755_c0_seq14\n", "comp40760_c0_seq3\n", "comp39494_c0_seq7\n", "comp40881_c2_seq3\n", "comp43800_c0_seq2\n", "comp43937_c0_seq7\n", "comp44612_c0_seq13\n", "comp40727_c0_seq1\n", "comp35003_c0_seq2\n", "comp38767_c1_seq4\n", "comp28714_c1_seq1\n", "comp33568_c0_seq2\n", "comp36518_c0_seq7\n", "comp43171_c0_seq2\n", "comp41734_c0_seq1\n", "comp38114_c0_seq2\n", "comp44603_c0_seq5\n", "comp42486_c0_seq1\n", "comp35826_c3_seq1\n", "comp35864_c0_seq3\n", "comp36501_c1_seq2\n", "comp9904_c0_seq1\n", "comp44992_c0_seq1\n", "comp33559_c0_seq2\n", "comp39175_c0_seq4\n", "comp42186_c0_seq7\n", "comp25575_c0_seq1\n", "comp36140_c0_seq1\n", "comp7810_c0_seq1\n", "comp40947_c0_seq2\n", "comp44325_c2_seq1\n", "comp37915_c0_seq2\n", "comp38004_c0_seq1\n", "comp16665_c0_seq1\n", "comp30134_c0_seq1\n", "comp34952_c0_seq1\n", "comp37019_c0_seq14\n", "comp18651_c1_seq1\n", "comp39943_c0_seq3\n", "comp39216_c0_seq1\n", "comp26913_c0_seq4\n", "comp44542_c1_seq2\n", "comp42117_c0_seq2\n", "comp38371_c0_seq1\n", "comp39745_c1_seq10\n", "comp42837_c1_seq2\n", "comp37098_c0_seq5\n", "comp23692_c0_seq2\n", "comp40869_c0_seq2\n", "comp11822_c0_seq1\n", "comp40954_c0_seq1\n", "comp30027_c0_seq1\n", "comp36832_c0_seq1\n", "comp46105_c0_seq1\n", "comp41537_c0_seq5\n", "comp38489_c0_seq2\n", "comp38316_c0_seq1\n", "comp39573_c0_seq3\n", "comp37098_c0_seq7\n", "comp37430_c6_seq1\n", "comp44721_c0_seq5\n", "comp30821_c0_seq3\n", "comp36096_c1_seq2\n", "comp41426_c1_seq3\n", "comp43937_c0_seq5\n", "comp44825_c0_seq2\n", "comp38562_c2_seq4\n", "comp29985_c0_seq1\n", "comp44175_c0_seq9\n", "comp28031_c0_seq1\n", "comp42897_c0_seq7\n", "comp24054_c1_seq1\n", "comp39365_c0_seq10\n", "comp40252_c2_seq4\n", "comp42897_c0_seq5\n", "comp37359_c0_seq1\n", "comp20796_c1_seq1\n", "comp22241_c0_seq1\n", "comp22723_c0_seq1\n", "comp31492_c0_seq3\n", "comp36183_c1_seq1\n", "comp36849_c3_seq1\n", "comp10314_c0_seq1\n", "comp23359_c0_seq2\n", "comp38801_c1_seq7\n", "comp46088_c0_seq1\n", "comp38176_c0_seq1\n", "comp39569_c0_seq1\n", "comp42004_c0_seq1\n", "comp27215_c0_seq1\n", "comp26537_c0_seq3\n", "comp43766_c3_seq1\n", "comp36804_c4_seq9\n", "comp34249_c2_seq1\n", "comp46035_c0_seq1\n", "comp44200_c0_seq12\n", "comp31563_c0_seq2\n", "comp46089_c0_seq1\n", "comp27903_c0_seq1\n", "comp42005_c0_seq5\n", "comp20379_c0_seq1\n", "comp41153_c1_seq2\n", "comp41861_c0_seq8\n", "comp44076_c0_seq18\n", "comp41709_c1_seq5\n", "comp40221_c0_seq2\n", "comp30876_c0_seq1\n", "comp37208_c0_seq5\n", "comp42410_c0_seq3\n", "comp37788_c0_seq2\n", "comp7811_c0_seq1\n", "comp44684_c0_seq2\n", "comp19258_c3_seq1\n", "comp39776_c0_seq4\n", "comp32996_c0_seq11\n", "comp31314_c0_seq1\n", "comp36096_c1_seq1\n", "comp41407_c0_seq1\n", "comp40252_c1_seq1\n", "comp41557_c0_seq5\n", "comp38611_c0_seq21\n", "comp41352_c1_seq2\n", "comp31534_c1_seq1\n", "comp42401_c0_seq2\n", "comp34365_c0_seq2\n", "comp35587_c0_seq1\n", "comp33091_c2_seq2\n", "comp35782_c0_seq2\n", "comp25748_c0_seq1\n", "comp38972_c0_seq2\n", "comp10314_c0_seq2\n", "comp15683_c0_seq1\n", "comp41118_c0_seq2\n", "comp43196_c0_seq3\n", "comp43376_c1_seq1\n", "comp44545_c1_seq1\n", "comp47715_c0_seq1\n", "comp37822_c0_seq1\n", "comp39193_c0_seq2\n", "comp36859_c0_seq1\n", "comp41972_c4_seq8\n", "comp7160_c0_seq1\n", "comp41218_c1_seq13" ] }, { "cell_type": "code", "collapsed": false, "input": [ "What we want is SPID list for DEG and SPID list for all proteins" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"SQLShare_-_View_Query_189AB723_png\"/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }