{ "metadata": { "name": "", "signature": "sha256:761d87985a3f607000f91c3913b2df44bbbdf8b01a5e554844bd27f354c42461" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Post Manuscript Review - data crunch" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#file ID\n", "#fid=\"CgM1\"\n", "\n", "#TIMESTAMP\n", "date=!date +%m%d_%H%M\n", "\n", "#working directory (parent)\n", "#wd=\"/Volumes/web/cnidarian/BiGo_larvae_merge/\"\n", "\n", "#where is bsmap\n", "#bsmap=\"/Users/Shared/Apps/bsmap-2.73/\"\n", "bsmap=\"/Volumes/Bay3/Software/BSMAP/bsmap-2.74/\"\n", "\n", "#fastq files location R1 location\n", "R1=\"/Volumes/web/trilobite/Crassostrea_gigas_HTSdata/filtered_174gm_A_BiGosperm_L006_R1.fastq\"\n", "\n", "#fastq files location R2 location\n", "#comment out if SE\n", "R2=\"/Volumes/web/trilobite/Crassostrea_gigas_HTSdata/filtered_174gm_A_BiGosperm_L006_R2.fastq\"\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "#option - number of processes \n", "!{bsmap}bsmap -a {R1} -b {R2} -d /Volumes/web/cnidarian/oyster.v9.fa -o /Volumes/web/cnidarian/BiGo_bsmap_v9_{date}.sam -p 3" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###bsmap on iplant\n", "```\n", "Start at: Fri Apr 4 06:42:38 2014\n", "\n", "Input reference file: oyster.v9.fa.gz \t(format: gzipped FASTA)\n", "Load in 11969 db seqs, total size 558601156 bp. 13 secs passed\n", "total_kmers: 43046721\n", "Create seed table. 36 secs passed\n", "max number of mismatches: read_length * 8% \tmax gap size: 0\n", "kmer cut-off ratio: 5e-07\n", "max multi-hits: 100\tmax Ns: 5\tseed size: 16\tindex interval: 4\n", "quality cutoff: 0\tbase quality char: '!'\n", "min fragment size:28\tmax fragemt size:500\n", "start from read #1\tend at read #4294967295\n", "additional alignment: T in reads => C in reference\n", "mapping strand (read_1): ++,-+\n", "mapping strand (read_2): +-,--\n", "Pair-end alignment(8 threads)\n", "Input read file #1: filtered_174gm_A_BiGosperm_L006_R1.fastq \t(format: FASTQ)\n", "Input read file #2: filtered_174gm_A_BiGosperm_L006_R2.fastq \t(format: FASTQ)\n", "Output file: bsmap_out.sam\t (format: SAM)\n", "Thread #2: \t100000 read pairs finished. 52 secs passed\n", "Total number of aligned reads: \n", "pairs: 90102067 (53%)\n", "single a: 17033002 (9.9%)\n", "single b: 15975991 (9.3%)\n", "Done.\n", "Finished at Fri Apr 4 09:02:07 2014\n", "Total time consumed: 8369 secs\n", "\n", "```" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!curl -o 'BiGo_sperm_v9bsmap_out.sam' 'http://de.iplantcollaborative.org/dl/d/AE429A54-2B38-44B0-8AD7-E92DCDA2DEE8/bsmap_out.sam'" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\r\n", " Dload Upload Total Spent Left Speed\r\n", "\r", " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 1149 0 1149 0 0 937 0 --:--:-- 0:00:01 --:--:-- 1354" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 637k 0 637k 0 0 334k 0 --:--:-- 0:00:01 --:--:-- 417k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 2104k 0 2104k 0 0 722k 0 --:--:-- 0:00:02 --:--:-- 831k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 3610k 0 3610k 0 0 921k 0 --:--:-- 0:00:03 --:--:-- 1019k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 4990k 0 4990k 0 0 1013k 0 --:--:-- 0:00:04 --:--:-- 1098k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 6488k 0 6488k 0 0 1094k 0 --:--:-- 0:00:05 --:--:-- 1379k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 7934k 0 7934k 0 0 1145k 0 --:--:-- 0:00:06 --:--:-- 1453k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 9352k 0 9352k 0 0 1179k 0 --:--:-- 0:00:07 --:--:-- 1444k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 10.6M 0 10.6M 0 0 1218k 0 --:--:-- 0:00:08 --:--:-- 1452k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 12.0M 0 12.0M 0 0 1244k 0 --:--:-- 0:00:09 --:--:-- 1471k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 13.5M 0 13.5M 0 0 1268k 0 --:--:-- 0:00:10 --:--:-- 1474k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 14.8M 0 14.8M 0 0 1277k 0 --:--:-- 0:00:11 --:--:-- 1462k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 16.1M 0 16.1M 0 0 1285k " ] }, { "output_type": "stream", "stream": "stdout", "text": [ " 0 --:--:-- 0:00:12 --:--:-- 1453k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 17.1M 0 17.1M 0 0 1266k 0 --:--:-- 0:00:13 --:--:-- 1351k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 18.3M 0 18.3M 0 0 1259k 0 --:--:-- 0:00:14 --:--:-- 1289k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 19.6M 0 19.6M 0 0 1264k 0 --:--:-- 0:00:15 --:--:-- 1256k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 20.8M 0 20.8M 0 0 1264k 0 --:--:-- 0:00:16 --:--:-- 1232k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 22.0M 0 22.0M 0 0 1262k 0 --:--:-- 0:00:17 --:--:-- 1205k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 23.4M 0 23.4M 0 0 1268k 0 --:--:-- 0:00:18 --:--:-- 1273k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 24.7M 0 24.7M 0 0 1274k 0 --:--:-- 0:00:19 --:--:-- 1319k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 26.2M 0 26.2M 0 0 1286k 0 --:--:-- 0:00:20 --:--:-- 1358k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 27.6M 0 27.6M 0 0 1293k 0 --:--:-- 0:00:21 --:--:-- 1390k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 29.0M 0 29.0M 0 0 1298k 0 --:--:-- 0:00:22 --:--:-- 1426k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 30.5M 0 30.5M 0 0 1307k 0 --:--:-- 0:00:23 --:--:-- 1453k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 31.8M 0 31.8M 0 0 1309k 0 --:--:-- 0:00:24 --:--:-- 1450k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 33.1M 0 33.1M 0 0 1310k 0 --:--:-- 0:00:25 --:--:-- 1410k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 34.3M 0 34.3M 0 0 1306k 0 --:--:-- 0:00:26 --:--:-- 1367k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 35.5M 0 35.5M 0 0 1303k 0 --:--:-- 0:00:27 --:--:-- 1326k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\r", "100 36.7M 0 36.7M 0 0 1301k 0 --:--:-- 0:00:28 --:--:-- 1275k" ] } ] }, { "cell_type": "code", "collapsed": false, "input": [ "running fastqc on iplant" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "!python {bsmap}methratio.py -d {genome} -u -z -g -o methratio_out.txt -s {bsmap}samtools bsmap_out.sam \n", "#command for only obtaining the context '__CG_'\n", "!grep \"[A-Z][A-Z]CG[A-Z]\" methratio_out_CG.txt \n", "#5x coverage\n", "!awk '{if ($8 >= 5) print $1,$2-1,$2+1,\"CpG\",$5}' filt_methratio_out_CG.igv \n", "!tr ' ' \"\\t\" filt_methratio_{fid}.igv" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }