{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mon Apr 11 07:47:32 PDT 2016\r\n" ] } ], "source": [ "!date" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Software:\n", "\n", " System Software Overview:\n", "\n", " System Version: OS X 10.9.5 (13F34)\n", " Kernel Version: Darwin 13.4.0\n", " Boot Volume: Hummingbird\n", " Boot Mode: Normal\n", " Computer Name: hummingbird\n", " User Name: Sam (Sam)\n", " Secure Virtual Memory: Enabled\n", " Time since boot: 142 days 16:38\n", "\n" ] } ], "source": [ "%%bash\n", "system_profiler SPSoftwareDataType" ] }, { "cell_type": "code", "execution_count": 128, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Volumes/nightingales/O_lurida/20160203_mbdseq\n" ] } ], "source": [ "cd /Volumes/nightingales/O_lurida/20160203_mbdseq/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Concatenate entire set of FASTQ files for each individual" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### The commands below use a for loop to:\n", "1. process each FASTQ corresponding to an individual (zr1394_n_*)\n", "2. concatenate each file (cat \\$file) to the designated output file (\\${file/_s*_R1/})\n", "\n", "The output file is named using bash parameter expansion. It takes the file name ($file) and replaces the designated matching text (_s*_R1) with whatever follows the last slash - in this case, it's replaced with an empty string (i.e. deleted)." ] }, { "cell_type": "code", "execution_count": 129, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_1_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 130, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_2_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 131, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_3_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 132, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_4_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 133, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_5_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 134, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_6_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 135, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_7_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_8_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 137, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_9_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 138, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_10_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 139, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_11_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 140, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_12_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 141, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_13_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 142, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_14_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 143, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_15_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 144, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_16_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 145, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_17_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 146, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_18_*; do\n", " cat $file >> ${file/_s*_R1/}\n", "done" ] }, { "cell_type": "code", "execution_count": 147, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-rw-rw- 1 Sam staff 2.4G Apr 11 14:33 zr1394_1.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 3.7G Apr 11 14:50 zr1394_10.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 2.8G Apr 11 14:52 zr1394_11.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 2.8G Apr 11 14:55 zr1394_12.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 3.0G Apr 11 14:59 zr1394_13.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 4.6G Apr 11 15:03 zr1394_14.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 3.2G Apr 11 15:06 zr1394_15.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 3.5G Apr 11 15:09 zr1394_16.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 2.2G Apr 11 15:11 zr1394_17.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 2.5G Apr 11 2016 zr1394_18.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 2.4G Apr 11 14:35 zr1394_2.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 2.5G Apr 11 14:37 zr1394_3.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 2.1G Apr 11 14:38 zr1394_4.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 3.0G Apr 11 14:40 zr1394_5.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 2.3G Apr 11 14:42 zr1394_6.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 2.6G Apr 11 14:43 zr1394_7.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 2.0G Apr 11 14:45 zr1394_8.fastq.gz\n", "-rw-rw-rw- 1 Sam staff 3.9G Apr 11 14:47 zr1394_9.fastq.gz\n" ] } ], "source": [ "%%bash\n", "\n", "ls -lh zr1394_{1..18}.fastq.gz" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Concatenate sets of s4, s5, & s6 of FASTQ files for each individual" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### The commands below use a for loop to:\n", "1. process the s4, s5, & s6 FASTQ corresponding to an individual (zr1394_1_s{4..6}*)\n", "2. concatenate each file (cat \\$file) to the designated output file (\\${file/_s*_R1/_s456})\n", "\n", "The output file is named using bash parameter expansion. It takes the file name ($file) and replaces the designated matching text (_s*_R1) with whatever follows the last slash - in this case, it's replaced with \"_s456\"." ] }, { "cell_type": "code", "execution_count": 148, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_1_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 149, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_2_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 150, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_3_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 151, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_4_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 152, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_5_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 153, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_6_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 154, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_7_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 155, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_8_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 156, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_9_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 157, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_10_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 158, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_11_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 159, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_12_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 160, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_13_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 161, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_14_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 162, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_15_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 163, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_16_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 164, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_17_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 165, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%bash\n", "\n", "for file in zr1394_18_s{4..6}*; do\n", " cat $file >> ${file/_s*_R1/_s456}\n", "done" ] }, { "cell_type": "code", "execution_count": 166, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-rw-rw- 1 Sam staff 1.2G Apr 11 15:22 zr1394_8_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 1.4G Apr 11 2016 zr1394_17_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 1.4G Apr 11 15:18 zr1394_4_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 1.5G Apr 11 15:14 zr1394_1_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 1.5G Apr 11 15:16 zr1394_2_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 1.5G Apr 11 15:17 zr1394_3_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 1.5G Apr 11 15:19 zr1394_5_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 1.5G Apr 11 15:20 zr1394_6_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 1.6G Apr 11 15:22 zr1394_7_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 1.7G Apr 11 2016 zr1394_18_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 1.7G Apr 11 15:27 zr1394_11_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 1.9G Apr 11 15:29 zr1394_12_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 2.0G Apr 11 15:30 zr1394_13_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 2.2G Apr 11 15:35 zr1394_15_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 2.4G Apr 11 15:24 zr1394_9_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 2.4G Apr 11 15:37 zr1394_16_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 2.5G Apr 11 15:26 zr1394_10_s456.fastq.gz\r\n", "-rw-rw-rw- 1 Sam staff 2.8G Apr 11 15:32 zr1394_14_s456.fastq.gz\r\n" ] } ], "source": [ "ls -lh zr1394_*_s456.fastq.gz | sort" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }