{ "metadata": { "name": "", "signature": "sha256:917966849e5109185307c9a10a5e0a6f5f3ccc035d4f5809415960f5ad875ea6" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Converting methratio files for methylkit analysis" ] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "REDUX- New reformatting (11/2013)" ] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "M1" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_M1] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -o /Users/sr320/Desktop/test.csv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```sql\n", "SELECT \n", " chr as chr,\n", " pos as start,\n", " '+' as strand,\n", " cast (CT_count as float) as CT_count,\n", " cast (C_count as float) as C_count,\n", " cast (C_count as float) / cast (CT_count as float) as freqC,\n", " 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT\n", " \n", "FROM [sr320@washington.edu].[BiGo_lar_M1]\n", " where \n", "context like '__CG_'\n", "and\n", " CT_Count >= 5 \n", "and \n", " ratio <> 'NA'\u200b\n", "``` " ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2_input.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "chr,start,strand,CT_count,C_count,freqC,freqT\r", "\r\n", "C10295,51,+,5,0,0,1\r", "\r\n", "C11141,58,+,5,0,0,1\r", "\r\n", "C11141,73,+,5,0,0,1\r", "\r\n", "C11141,78,+,5,0,0,1\r", "\r\n", "C11848,108,+,6,0,0,1\r", "\r\n", "C12768,103,+,6,1,0.166666666666667,0.833333333333333\r", "\r\n", "C13766,126,+,9,0,0,1\r", "\r\n", "C13766,145,+,8,0,0,1\r", "\r\n", "C14220,96,+,7,0,0,1\r", "\r\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2_input.csv > /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2_c.csv" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2_c.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C10295,51,+,5,0,0,1\r", "\r\n", "C11141,58,+,5,0,0,1\r", "\r\n", "C11141,73,+,5,0,0,1\r", "\r\n", "C11141,78,+,5,0,0,1\r", "\r\n", "C11848,108,+,6,0,0,1\r", "\r\n", "C12768,103,+,6,1,0.166666666666667,0.833333333333333\r", "\r\n", "C13766,126,+,9,0,0,1\r", "\r\n", "C13766,145,+,8,0,0,1\r", "\r\n", "C14220,96,+,7,0,0,1\r", "\r\n", "C14220,143,+,6,0,0,1\r", "\r\n" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "!tr ',' \"\\t\" /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2.txt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit2.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C10295\t51\t+\t5\t0\t0\t1\r", "\r\n", "C11141\t58\t+\t5\t0\t0\t1\r", "\r\n", "C11141\t73\t+\t5\t0\t0\t1\r", "\r\n", "C11141\t78\t+\t5\t0\t0\t1\r", "\r\n", "C11848\t108\t+\t6\t0\t0\t1\r", "\r\n", "C12768\t103\t+\t6\t1\t0.166666666666667\t0.833333333333333\r", "\r\n", "C13766\t126\t+\t9\t0\t0\t1\r", "\r\n", "C13766\t145\t+\t8\t0\t0\t1\r", "\r\n", "C14220\t96\t+\t7\t0\t0\t1\r", "\r\n", "C14220\t143\t+\t6\t0\t0\t1\r", "\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Hack to get unique ID" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit3_input.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "chr_start,chr,start,strand,CT_count,C_count,freqC,freqT\r", "\r\n", "C10295_51,C10295,51,+,5,0,0,1\r", "\r\n", "C11141_58,C11141,58,+,5,0,0,1\r", "\r\n", "C11141_73,C11141,73,+,5,0,0,1\r", "\r\n", "C11141_78,C11141,78,+,5,0,0,1\r", "\r\n", "C11848_108,C11848,108,+,6,0,0,1\r", "\r\n", "C12768_103,C12768,103,+,6,1,0.166666666666667,0.833333333333333\r", "\r\n", "C13766_126,C13766,126,+,9,0,0,1\r", "\r\n", "C13766_145,C13766,145,+,8,0,0,1\r", "\r\n", "C14220_96,C14220,96,+,7,0,0,1\r", "\r\n" ] } ], "prompt_number": 10 }, { "cell_type": "markdown", "metadata": {}, "source": [ "python tool to download hack" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_M1] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_M1_methylkit4_input.txt\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/Monarch/cnidary/BiGo_lar_M1_methylkit4_input.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "chr_start\tchr\tstart\tstrand\tCT_count\tC_count\tfreqC\tfreqT\r", "\r\n", "C10295_51\tC10295\t51\t+\t5\t0\t0\t1\r", "\r\n", "C11141_58\tC11141\t58\t+\t5\t0\t0\t1\r", "\r\n", "C11141_73\tC11141\t73\t+\t5\t0\t0\t1\r", "\r\n", "C11141_78\tC11141\t78\t+\t5\t0\t0\t1\r", "\r\n", "C11848_108\tC11848\t108\t+\t6\t0\t0\t1\r", "\r\n", "C12768_103\tC12768\t103\t+\t6\t1\t0.166666666666667\t0.833333333333333\r", "\r\n", "C13766_126\tC13766\t126\t+\t9\t0\t0\t1\r", "\r\n", "C13766_145\tC13766\t145\t+\t8\t0\t0\t1\r", "\r\n", "C14220_96\tC14220\t96\t+\t7\t0\t0\t1\r", "\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "T1D3" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_T1D3] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_T1D3_methylkit4_input.txt\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "T1D5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_T1D5] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_T1D5_methylkit4_input.txt\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "M3" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_M3] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_M3_methylkit4_input.txt\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "T3D3" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_T3D3] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_T3D3_methylkit4_input.txt\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "T3D5" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr + '_' + (cast (pos as varchar)) as chr_start, chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC, 1 - (cast (C_count as float) / cast (CT_count as float)) as freqT FROM [sr320@washington.edu].[BiGo_lar_T3D5] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -f tsv -o /Volumes/Monarch/cnidary/BiGo_lar_T3D5_methylkit4_input.txt\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "markdown", "metadata": {}, "source": [ "In excel and textwrangler made a list of non_redundant locis from all 6 files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Left Join in SQLshare on non_redundant ID" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```sql\n", "SELECT * \n", "FROM [sr320@washington.edu].[_BiGo_lar_nonred_ID.txt]id\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_M1_hack]M1\n", "ON id.[chr_start]=M1.[chr_start]\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_T1D3_hack]T1D3\n", "ON id.[chr_start]=T1D3.[chr_start]\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_T1D5_hack]T1D5\n", "ON id.[chr_start]=T1D5.[chr_start]\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_M3_hack]M3\n", "ON id.[chr_start]=M3.[chr_start]\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_T3D3_hack]T3D3\n", "ON id.[chr_start]=T3D3.[chr_start]\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_T3D5_hack]T3D5\n", "ON id.[chr_start]=T3D5.[chr_start]\n", "```" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -d \"[sr320@washington.edu].[BiGo_Larvae_joineddata]\" -o /Volumes/Monarch/cnidary/BiGo_lar_joineddata.csv\n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -d \"[sr320@washington.edu].[Snapshot of BiGo_Larvae_joineddata]\" -o /Volumes/Monarch/cnidary/BiGo_lar_joineddata3.csv\n" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Re ReDux 11/6/2013" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Will try double column join ? as per Dan's Suggestion" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```sql\n", "SELECT * \n", "FROM [sr320@washington.edu].[BiGo_lar_nonred_ID split]id\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_M1_hack]M1\n", "ON id.[chr]=M1.[chr] AND id.[start]=M1.[start]\u200b\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "this worked fine.." ] }, { "cell_type": "code", "collapsed": false, "input": [ "SELECT * \n", "FROM [sr320@washington.edu].[BiGo_lar_nonred_ID split]id\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_M1_hack]M1\n", "ON id.[chr]=M1.[chr] AND id.[start]=M1.[start]\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_T1D3_hack]T1D3\n", "ON id.[chr]=T1D3.[chr] AND id.[start]=T1D3.[start]\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_T1D5_hack]T1D5\n", "ON id.[chr]=T1D5.[chr] AND id.[start]=T1D5.[start]\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_M3_hack]M3\n", "ON id.[chr]=M3.[chr] AND id.[start]=M3.[start]\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_T3D3_hack]T3D3\n", "ON id.[chr]=T3D3.[chr] AND id.[start]=T3D3.[start]\n", "LEFT JOIN [sr320@washington.edu].[_BiGo_lar_T3D5_hack]T3D5\n", "ON id.[chr]=T3D5.[chr] AND id.[start]=T3D5.[start]" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "![im](files/img/BiGo_lar_joineddata.jpg)\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/bin/sh: -c: line 0: syntax error near unexpected token `files/img/BiGo_lar_joineddata.jpg'\r\n", "/bin/sh: -c: line 0: `[im](files/img/BiGo_lar_joineddata.jpg)'\r\n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "![pearl](files/img/pearl.jpeg)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/bin/sh: -c: line 0: syntax error near unexpected token `files/img/pearl.jpeg'\r\n", "/bin/sh: -c: line 0: `[pearl](files/img/pearl.jpeg)'\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"BiGo_lar_joineddata.xls\"" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/Monarch/cnidary/BiGo_lar_lociALL0.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "chr\tstart\tchr_start\tCountingCs\tSummingCs\r\n", "C16318\t177\tC16318_177\t6\t0\r\n", "C16318\t196\tC16318_196\t6\t0\r\n", "C16318\t203\tC16318_203\t6\t0\r\n", "C16318\t302\tC16318_302\t6\t0\r\n", "C19344\t310\tC19344_310\t6\t0\r\n", "C19356\t52\tC19356_52\t6\t0\r\n", "C19356\t107\tC19356_107\t6\t0\r\n", "C19356\t120\tC19356_120\t6\t0\r\n", "C19356\t123\tC19356_123\t6\t0\r\n" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc /Volumes/Monarch/cnidary/BiGo_lar_lociALL0.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 3230 16155 130260 /Volumes/Monarch/cnidary/BiGo_lar_lociALL0.txt\r\n" ] } ], "prompt_number": 14 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Up into SQLshare and generate file where loci covered by all 6 datasets and > 0 C_count\n", "\n", "```\n", "SELECT * FROM [sr320@washington.edu].[_BiGo_lar_joineddata_sums.csv]\n", " where CountingCs = 6\n", " and\n", " SummingCs > 0\n", "```" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/Monarch/cnidary/BiGo_lar_joineddata_mk.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "chr,start,chr_start,CountingCs,SummingCs,chr1,start1,strand,CT_count,C_count,freqC,freqT,chr_start1,chr2,start2,strand1,CT_count1,C_count1,freqC1,freqT1,chr_start2,chr3,start3,strand2,CT_count2,C_count2,freqC2,freqT2,chr_start3,chr4,start4,strand3,CT_count3,C_count3,freqC3,freqT3,chr_start4,chr5,start5,strand4,CT_count4,C_count4,freqC4,freqT4,chr_start5,chr6,start6,strand5,CT_count5,C_count5,freqC5,freqT5\r", "\r\n", "C16318,206,C16318_206,6,1,C16318,206,+,7,0,0,1,C16318_206,C16318,206,+,9,0,0,1,C16318_206,C16318,206,+,7,1,0.142857143,0.857142857,C16318_206,C16318,206,+,12,0,0,1,C16318_206,C16318,206,+,25,0,0,1,C16318_206,C16318,206,+,23,0,0,1\r", "\r\n", "C19576,631,C19576_631,6,16,C19576,631,+,35,5,0.142857143,0.857142857,C19576_631,C19576,631,+,16,3,0.1875,0.8125,C19576_631,C19576,631,+,7,1,0.142857143,0.857142857,C19576_631,C19576,631,+,19,3,0.157894737,0.842105263,C19576_631,C19576,631,+,18,1,0.055555556,0.944444444,C19576_631,C19576,631,+,19,3,0.157894737,0.842105263\r", "\r\n", "C19576,637,C19576_637,6,29,C19576,637,+,37,8,0.216216216,0.783783784,C19576_637,C19576,637,+,14,6,0.428571429,0.571428571,C19576_637,C19576,637,+,7,1,0.142857143,0.857142857,C19576_637,C19576,637,+,20,6,0.3,0.7,C19576_637,C19576,637,+,16,2,0.125,0.875,C19576_637,C19576,637,+,19,6,0.315789474,0.684210526\r", "\r\n", "C19576,403,C19576_403,6,5,C19576,403,+,35,0,0,1,C19576_403,C19576,403,+,12,0,0,1,C19576_403,C19576,403,+,15,0,0,1,C19576_403,C19576,403,+,28,3,0.107142857,0.892857143,C19576_403,C19576,403,+,22,1,0.045454545,0.954545455,C19576_403,C19576,403,+,16,1,0.0625,0.9375\r", "\r\n", "C19902,815,C19902_815,6,193,C19902,815,+,81,51,0.62962963,0.37037037,C19902_815,C19902,815,+,48,31,0.645833333,0.354166667,C19902_815,C19902,815,+,31,19,0.612903226,0.387096774,C19902_815,C19902,815,+,47,30,0.638297872,0.361702128,C19902_815,C19902,815,+,64,34,0.53125,0.46875,C19902_815,C19902,815,+,49,28,0.571428571,0.428571429\r", "\r\n", "C19902,83,C19902_83,6,314,C19902,83,+,112,73,0.651785714,0.348214286,C19902_83,C19902,83,+,62,39,0.629032258,0.370967742,C19902_83,C19902,83,+,51,27,0.529411765,0.470588235,C19902_83,C19902,83,+,72,39,0.541666667,0.458333333,C19902_83,C19902,83,+,110,73,0.663636364,0.336363636,C19902_83,C19902,83,+,95,63,0.663157895,0.336842105\r", "\r\n", "C19902,141,C19902_141,6,376,C19902,141,+,102,74,0.725490196,0.274509804,C19902_141,C19902,141,+,71,53,0.746478873,0.253521127,C19902_141,C19902,141,+,55,38,0.690909091,0.309090909,C19902_141,C19902,141,+,66,47,0.712121212,0.287878788,C19902_141,C19902,141,+,125,93,0.744,0.256,C19902_141,C19902,141,+,98,71,0.724489796,0.275510204\r", "\r\n", "C19902,775,C19902_775,6,376,C19902,775,+,137,95,0.693430657,0.306569343,C19902_775,C19902,775,+,94,67,0.712765957,0.287234043,C19902_775,C19902,775,+,56,37,0.660714286,0.339285714,C19902_775,C19902,775,+,66,45,0.681818182,0.318181818,C19902_775,C19902,775,+,112,72,0.642857143,0.357142857,C19902_775,C19902,775,+,85,60,0.705882353,0.294117647\r", "\r\n", "C19902,781,C19902_781,6,327,C19902,781,+,123,72,0.585365854,0.414634146,C19902_781,C19902,781,+,85,48,0.564705882,0.435294118,C19902_781,C19902,781,+,56,36,0.642857143,0.357142857,C19902_781,C19902,781,+,64,45,0.703125,0.296875,C19902_781,C19902,781,+,105,72,0.685714286,0.314285714,C19902_781,C19902,781,+,82,54,0.658536585,0.341463415\r", "\r\n" ] } ], "prompt_number": 15 }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Creation of new tab delimited text files with following structure \n", "
\n", "`chr\tstart\tstrand\tCT_count\tC_count\tfreqC\tfreqT`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "These files only contain loci covered in all 6 files and at least 1 mCpG" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Quicklist** old \n", "M3 \n", "`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_M3_methylkit.csv` \n", " \n", "T3D3 \n", "`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_T3D3_methylkit.csv` \n", "\n", "T3D5 \n", "`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_T3D5_methylkit.csv` \n", "\n", "M1 \n", "`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_M1_methylkit.csv` \n", "\n", "T1D3 \n", "`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_T1D3_methylkit.csv`\n", "\n", "T1D5 \n", "`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_T1D5_methylkit.csv` \n", "\n", "---\n", "**QC** \n", "M3 \n", "`http://eagle.fish.washington.edu/cnidarian/BiGo_lar_M3_methylkit_oner.csv`\n", "\n", "BiGo(gonad) \n", "`http://eagle.fish.washington.edu/cnidarian/BiGO_betty_plain_methylkit.csv`" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC FROM [sr320@washington.edu].[BiGO_betty_plain_methratio_v1.txt] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -o /Volumes/web/cnidarian/BiGO_betty_plain_methylkit_input.csv\n", "\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGO_betty_plain_methylkit_input.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "chr,start,strand,CT_count,C_count,freqC\r", "\r\n", "C10005,57,+,52,0,0\r", "\r\n", "C10009,70,+,28,0,0\r", "\r\n", "C10009,124,+,13,0,0\r", "\r\n", "C10009,128,+,13,0,0\r", "\r\n", "C10009,136,+,9,0,0\r", "\r\n", "C10011,51,+,14,0,0\r", "\r\n", "C10011,62,+,17,0,0\r", "\r\n", "C10011,101,+,21,0,0\r", "\r\n", "C10011,108,+,21,0,0\r", "\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail -n +2 /Volumes/web/cnidarian/BiGO_betty_plain_methylkit_input.csv > /Volumes/web/cnidarian/BiGO_betty_plain_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGO_betty_plain_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C10005,57,+,52,0,0\r", "\r\n", "C10009,70,+,28,0,0\r", "\r\n", "C10009,124,+,13,0,0\r", "\r\n", "C10009,128,+,13,0,0\r", "\r\n", "C10009,136,+,9,0,0\r", "\r\n", "C10011,51,+,14,0,0\r", "\r\n", "C10011,62,+,17,0,0\r", "\r\n", "C10011,101,+,21,0,0\r", "\r\n", "C10011,108,+,21,0,0\r", "\r\n", "C10011,115,+,19,0,0\r", "\r\n" ] } ], "prompt_number": 4 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "M3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```sql\n", "SELECT \n", " chr as chr,\n", " pos as start,\n", " '+' as strand,\n", " cast (CT_count as float) as CT_count,\n", " cast (C_count as float) as C_count\n", "FROM [sr320@washington.edu].[BiGo_lar_M3]\n", " where \n", "context like '__CG_'\n", "and\n", " CT_Count >= 5 \n", "and \n", " ratio <> 'NA'\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```\n", "SELECT \n", " chr as chr,\n", " start as start,\n", " strand as strand,\n", " CT_count as CT_count,\n", " C_count as C_count,\n", " C_count/CT_count as freqC \n", " \n", " \n", " FROM [sr320@washington.edu].[_M3methylkit_s1]\u200b\n", "``` " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```sql\n", "SELECT \n", " chr as chr,\n", " start as start,\n", " strand as strand,\n", " CT_count as CT_count,\n", " freqC as freqC\n", " \n", " FROM [sr320@washington.edu].[_M3mehthykit_step2]\u200b\n", "```" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_input.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "chr,start,strand,CT_count,freqC\r", "\r\n", "C10295,38,+,15,0\r", "\r\n", "C10295,51,+,21,0\r", "\r\n", "C10295,142,+,9,0\r", "\r\n", "C10845,110,+,6,0\r", "\r\n", "C10845,145,+,6,0\r", "\r\n", "C11248,71,+,5,0\r", "\r\n", "C11870,109,+,6,0\r", "\r\n", "C12272,65,+,5,1\r", "\r\n", "C14220,96,+,6,0\r", "\r\n" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_M3_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_M3_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C10295,38,+,15,0\r", "\r\n", "C10295,51,+,21,0\r", "\r\n", "C10295,142,+,9,0\r", "\r\n", "C10845,110,+,6,0\r", "\r\n", "C10845,145,+,6,0\r", "\r\n", "C11248,71,+,5,0\r", "\r\n", "C11870,109,+,6,0\r", "\r\n", "C12272,65,+,5,1\r", "\r\n", "C14220,96,+,6,0\r", "\r\n", "C14450,60,+,6,0.333333333333333\r", "\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc /Volumes/web/cnidarian/BiGo_lar_M3_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 182475 182475 5044686 /Volumes/web/cnidarian/BiGo_lar_M3_methylkit.csv\r\n" ] } ], "prompt_number": 18 }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#with one line of code... \n", "!head /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_input_oner.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "chr,start,strand,CT_count,C_count,freqC\r", "\r\n", "C10295,38,+,15,0,0\r", "\r\n", "C10295,51,+,21,0,0\r", "\r\n", "C10295,142,+,9,0,0\r", "\r\n", "C10845,110,+,6,0,0\r", "\r\n", "C10845,145,+,6,0,0\r", "\r\n", "C11248,71,+,5,0,0\r", "\r\n", "C11870,109,+,6,0,0\r", "\r\n", "C12272,65,+,5,5,1\r", "\r\n", "C14220,96,+,6,0,0\r", "\r\n" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_input_oner.csv > /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_oner.csv" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_oner.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C10295,38,+,15,0,0\r", "\r\n", "C10295,51,+,21,0,0\r", "\r\n", "C10295,142,+,9,0,0\r", "\r\n", "C10845,110,+,6,0,0\r", "\r\n", "C10845,145,+,6,0,0\r", "\r\n", "C11248,71,+,5,0,0\r", "\r\n", "C11870,109,+,6,0,0\r", "\r\n", "C12272,65,+,5,5,1\r", "\r\n", "C14220,96,+,6,0,0\r", "\r\n", "C14450,60,+,6,2,0.333333333333333\r", "\r\n" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "!wc /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_oner.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " 182475 182475 5410598 /Volumes/web/cnidarian/BiGo_lar_M3_methylkit_oner.csv\r\n" ] } ], "prompt_number": 17 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "T3D3" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_T3D3_methylkit_input.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "chr,start,strand,CT_count,freqC\r", "\r\n", "C10295,38,+,14,0\r", "\r\n", "C10295,51,+,21,0\r", "\r\n", "C10295,142,+,7,0\r", "\r\n", "C10845,110,+,9,0\r", "\r\n", "C10845,145,+,6,0\r", "\r\n", "C11870,109,+,8,0\r", "\r\n", "C12052,112,+,7,0\r", "\r\n", "C12570,118,+,5,0\r", "\r\n", "C13546,84,+,5,0\r", "\r\n" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_T3D3_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_T3D3_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_T3D3_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C10295,38,+,14,0\r", "\r\n", "C10295,51,+,21,0\r", "\r\n", "C10295,142,+,7,0\r", "\r\n", "C10845,110,+,9,0\r", "\r\n", "C10845,145,+,6,0\r", "\r\n", "C11870,109,+,8,0\r", "\r\n", "C12052,112,+,7,0\r", "\r\n", "C12570,118,+,5,0\r", "\r\n", "C13546,84,+,5,0\r", "\r\n", "C13878,147,+,5,0\r", "\r\n" ] } ], "prompt_number": 14 }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "T3D5" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC FROM [sr320@washington.edu].[BiGo_lar_T3D5] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -o /Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit_input.csv" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit_input.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "chr,start,strand,CT_count,C_count,freqC\r", "\r\n", "C10295,38,+,15,0,0\r", "\r\n", "C10295,51,+,18,0,0\r", "\r\n", "C10295,142,+,9,0,0\r", "\r\n", "C14220,96,+,5,0,0\r", "\r\n", "C14450,60,+,5,0,0\r", "\r\n", "C14796,69,+,12,10,0.833333333333333\r", "\r\n", "C15066,82,+,7,0,0\r", "\r\n", "C15066,89,+,5,0,0\r", "\r\n", "C15066,193,+,6,0,0\r", "\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C10295,38,+,15,0,0\r", "\r\n", "C10295,51,+,18,0,0\r", "\r\n", "C10295,142,+,9,0,0\r", "\r\n", "C14220,96,+,5,0,0\r", "\r\n", "C14450,60,+,5,0,0\r", "\r\n", "C14796,69,+,12,10,0.833333333333333\r", "\r\n", "C15066,82,+,7,0,0\r", "\r\n", "C15066,89,+,5,0,0\r", "\r\n", "C15066,193,+,6,0,0\r", "\r\n", "C15066,197,+,6,0,0\r", "\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "markdown", "metadata": {}, "source": [ "FILE \n", "/Volumes/web/cnidarian/BiGo_lar_T3D5_methylkit.csv" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "M1" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit_input.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "chr,start,strand,CT_count,C_count,freqC\r", "\r\n", "C10295,51,+,5,0,0\r", "\r\n", "C11141,58,+,5,0,0\r", "\r\n", "C11141,73,+,5,0,0\r", "\r\n", "C11141,78,+,5,0,0\r", "\r\n", "C11848,108,+,6,0,0\r", "\r\n", "C12768,103,+,6,1,0.166666666666667\r", "\r\n", "C13766,126,+,9,0,0\r", "\r\n", "C13766,145,+,8,0,0\r", "\r\n", "C14220,96,+,7,0,0\r", "\r\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "!tail -n +2 /Volumes/web/cnidarian/BiGo_lar_M1_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_M1_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_M1_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C10295,51,+,5,0,0\r", "\r\n", "C11141,58,+,5,0,0\r", "\r\n", "C11141,73,+,5,0,0\r", "\r\n", "C11141,78,+,5,0,0\r", "\r\n", "C11848,108,+,6,0,0\r", "\r\n", "C12768,103,+,6,1,0.166666666666667\r", "\r\n", "C13766,126,+,9,0,0\r", "\r\n", "C13766,145,+,8,0,0\r", "\r\n", "C14220,96,+,7,0,0\r", "\r\n", "C14220,143,+,6,0,0\r", "\r\n" ] } ], "prompt_number": 7 }, { "cell_type": "markdown", "metadata": {}, "source": [ "FILE \n", "/Volumes/web/cnidarian/BiGo_lar_M1_methylkit.csv" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "T1D3" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC FROM [sr320@washington.edu].[BiGo_lar_T1D3] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -o /Volumes/web/cnidarian/BiGo_lar_T1D3_methylkit_input.csv" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "tail -n +2 /Volumes/web/cnidarian/BiGo_lar_T1D3_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_T1D3_methylkit.csv" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_T1D3_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C10295,38,+,7,0,0\r", "\r\n", "C10295,51,+,10,0,0\r", "\r\n", "C12960,123,+,7,0,0\r", "\r\n", "C13766,126,+,6,0,0\r", "\r\n", "C13766,145,+,8,0,0\r", "\r\n", "C13874,46,+,5,0,0\r", "\r\n", "C14796,69,+,11,8,0.727272727272727\r", "\r\n", "C14944,199,+,6,0,0\r", "\r\n", "C15352,208,+,5,0,0\r", "\r\n", "C15814,143,+,6,0,0\r", "\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": {}, "source": [ "FILE \n", "/Volumes/web/cnidarian/BiGo_lar_T1D3_methylkit.csv" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "T1D5" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "`python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s \"SELECT chr as chr, pos as start, '+' as strand, cast (CT_count as float) as CT_count, cast (C_count as float) as C_count, cast (C_count as float) / cast (CT_count as float) as freqC FROM [sr320@washington.edu].[BiGo_lar_T1D5] where context like '__CG_' and CT_Count >= 5 and ratio <> 'NA'\" -o /Volumes/web/cnidarian/BiGo_lar_T1D5_methylkit_input.csv`" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "tail -n +2 /Volumes/web/cnidarian/BiGo_lar_T1D5_methylkit_input.csv > /Volumes/web/cnidarian/BiGo_lar_T1D5_methylkit.csv" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head /Volumes/web/cnidarian/BiGo_lar_T1D5_methylkit.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "C13766,145,+,5,0,0\r", "\r\n", "C16318,32,+,7,0,0\r", "\r\n", "C16318,177,+,11,0,0\r", "\r\n", "C16318,196,+,7,0,0\r", "\r\n", "C16318,203,+,7,0,0\r", "\r\n", "C16318,206,+,7,1,0.142857142857143\r", "\r\n", "C16318,302,+,16,0,0\r", "\r\n", "C17734,242,+,5,2,0.4\r", "\r\n", "C18932,429,+,6,0,0\r", "\r\n", "C19344,310,+,6,0,0\r", "\r\n" ] } ], "prompt_number": 4 }, { "cell_type": "markdown", "metadata": {}, "source": [ "FILE \n", "/Volumes/web/cnidarian/BiGo_lar_T1D5_methylkit.csv" ] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }