{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Archiving of Ostrea lurida (Olympia oyster) GBS data from BGI" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mon Mar 14 16:36:33 PDT 2016\r\n" ] } ], "source": [ "!date" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Print system info" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Software:\n", "\n", " System Software Overview:\n", "\n", " System Version: OS X 10.9.5 (13F34)\n", " Kernel Version: Darwin 13.4.0\n", " Boot Volume: Hummingbird\n", " Boot Mode: Normal\n", " Computer Name: hummingbird\n", " User Name: Sam (Sam)\n", " Secure Virtual Memory: Enabled\n", " Time since boot: 115 days 1:27\n", "\n" ] } ], "source": [ "%%bash\n", "system_profiler SPSoftwareDataType" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### List files provided by BGI" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1HL_10A_1.fq.gz\n", "1HL_10A_2.fq.gz\n", "1HL_11A_1.fq.gz\n", "1HL_11A_2.fq.gz\n", "1HL_12A_1.fq.gz\n", "1HL_12A_2.fq.gz\n", "1HL_13A_1.fq.gz\n", "1HL_13A_2.fq.gz\n", "1HL_14A_1.fq.gz\n", "1HL_14A_2.fq.gz\n", "1HL_15A_1.fq.gz\n", "1HL_15A_2.fq.gz\n", "1HL_16A_1.fq.gz\n", "1HL_16A_2.fq.gz\n", "1HL_17A_1.fq.gz\n", "1HL_17A_2.fq.gz\n", "1HL_19A_1.fq.gz\n", "1HL_19A_2.fq.gz\n", "1HL_1A_1.fq.gz\n", "1HL_1A_2.fq.gz\n", "1HL_20A_1.fq.gz\n", "1HL_20A_2.fq.gz\n", "1HL_21A_1.fq.gz\n", "1HL_21A_2.fq.gz\n", "1HL_22A_1.fq.gz\n", "1HL_22A_2.fq.gz\n", "1HL_23A_1.fq.gz\n", "1HL_23A_2.fq.gz\n", "1HL_24A_1.fq.gz\n", "1HL_24A_2.fq.gz\n", "1HL_25A_1.fq.gz\n", "1HL_25A_2.fq.gz\n", "1HL_26A_1.fq.gz\n", "1HL_26A_2.fq.gz\n", "1HL_27A_1.fq.gz\n", "1HL_27A_2.fq.gz\n", "1HL_28A_1.fq.gz\n", "1HL_28A_2.fq.gz\n", "1HL_29A_1.fq.gz\n", "1HL_29A_2.fq.gz\n", "1HL_2A_1.fq.gz\n", "1HL_2A_2.fq.gz\n", "1HL_31A_1.fq.gz\n", "1HL_31A_2.fq.gz\n", "1HL_33A_1.fq.gz\n", "1HL_33A_2.fq.gz\n", "1HL_34A_1.fq.gz\n", "1HL_34A_2.fq.gz\n", "1HL_35A_1.fq.gz\n", "1HL_35A_2.fq.gz\n", "1HL_3A_1.fq.gz\n", "1HL_3A_2.fq.gz\n", "1HL_4A_1.fq.gz\n", "1HL_4A_2.fq.gz\n", "1HL_5A_1.fq.gz\n", "1HL_5A_2.fq.gz\n", "1HL_6A_1.fq.gz\n", "1HL_6A_2.fq.gz\n", "1HL_7A_1.fq.gz\n", "1HL_7A_2.fq.gz\n", "1HL_8A_1.fq.gz\n", "1HL_8A_2.fq.gz\n", "1HL_9A_1.fq.gz\n", "1HL_9A_2.fq.gz\n", "1NF_10A_1.fq.gz\n", "1NF_10A_2.fq.gz\n", "1NF_11A_1.fq.gz\n", "1NF_11A_2.fq.gz\n", "1NF_12A_1.fq.gz\n", "1NF_12A_2.fq.gz\n", "1NF_13A_1.fq.gz\n", "1NF_13A_2.fq.gz\n", "1NF_14A_1.fq.gz\n", "1NF_14A_2.fq.gz\n", "1NF_15A_1.fq.gz\n", "1NF_15A_2.fq.gz\n", "1NF_16A_1.fq.gz\n", "1NF_16A_2.fq.gz\n", "1NF_17A_1.fq.gz\n", "1NF_17A_2.fq.gz\n", "1NF_18A_1.fq.gz\n", "1NF_18A_2.fq.gz\n", "1NF_19A_1.fq.gz\n", "1NF_19A_2.fq.gz\n", "1NF_1A_1.fq.gz\n", "1NF_1A_2.fq.gz\n", "1NF_20A_1.fq.gz\n", "1NF_20A_2.fq.gz\n", "1NF_21A_1.fq.gz\n", "1NF_21A_2.fq.gz\n", "1NF_22A_1.fq.gz\n", "1NF_22A_2.fq.gz\n", "1NF_23A_1.fq.gz\n", "1NF_23A_2.fq.gz\n", "1NF_24A_1.fq.gz\n", "1NF_24A_2.fq.gz\n", "1NF_25A_1.fq.gz\n", "1NF_25A_2.fq.gz\n", "1NF_26A_1.fq.gz\n", "1NF_26A_2.fq.gz\n", "1NF_27A_1.fq.gz\n", "1NF_27A_2.fq.gz\n", "1NF_28A_1.fq.gz\n", "1NF_28A_2.fq.gz\n", "1NF_29A_1.fq.gz\n", "1NF_29A_2.fq.gz\n", "1NF_2A_1.fq.gz\n", "1NF_2A_2.fq.gz\n", "1NF_30A_1.fq.gz\n", "1NF_30A_2.fq.gz\n", "1NF_31A_1.fq.gz\n", "1NF_31A_2.fq.gz\n", "1NF_32A_1.fq.gz\n", "1NF_32A_2.fq.gz\n", "1NF_33A_1.fq.gz\n", "1NF_33A_2.fq.gz\n", "1NF_4A_1.fq.gz\n", "1NF_4A_2.fq.gz\n", "1NF_5A_1.fq.gz\n", "1NF_5A_2.fq.gz\n", "1NF_6A_1.fq.gz\n", "1NF_6A_2.fq.gz\n", "1NF_7A_1.fq.gz\n", "1NF_7A_2.fq.gz\n", "1NF_8A_1.fq.gz\n", "1NF_8A_2.fq.gz\n", "1NF_9A_1.fq.gz\n", "1NF_9A_2.fq.gz\n", "1SN_10A_1.fq.gz\n", "1SN_10A_2.fq.gz\n", "1SN_11A_1.fq.gz\n", "1SN_11A_2.fq.gz\n", "1SN_12A_1.fq.gz\n", "1SN_12A_2.fq.gz\n", "1SN_13A_1.fq.gz\n", "1SN_13A_2.fq.gz\n", "1SN_14A_1.fq.gz\n", "1SN_14A_2.fq.gz\n", "1SN_15A_1.fq.gz\n", "1SN_15A_2.fq.gz\n", "1SN_16A_1.fq.gz\n", "1SN_16A_2.fq.gz\n", "1SN_17A_1.fq.gz\n", "1SN_17A_2.fq.gz\n", "1SN_18A_1.fq.gz\n", "1SN_18A_2.fq.gz\n", "1SN_19A_1.fq.gz\n", "1SN_19A_2.fq.gz\n", "1SN_1A_1.fq.gz\n", "1SN_1A_2.fq.gz\n", "1SN_20A_1.fq.gz\n", "1SN_20A_2.fq.gz\n", "1SN_21A_1.fq.gz\n", "1SN_21A_2.fq.gz\n", "1SN_22A_1.fq.gz\n", "1SN_22A_2.fq.gz\n", "1SN_23A_1.fq.gz\n", "1SN_23A_2.fq.gz\n", "1SN_24A_1.fq.gz\n", "1SN_24A_2.fq.gz\n", "1SN_25A_1.fq.gz\n", "1SN_25A_2.fq.gz\n", "1SN_26A_1.fq.gz\n", "1SN_26A_2.fq.gz\n", "1SN_27A_1.fq.gz\n", "1SN_27A_2.fq.gz\n", "1SN_28A_1.fq.gz\n", "1SN_28A_2.fq.gz\n", "1SN_29A_1.fq.gz\n", "1SN_29A_2.fq.gz\n", "1SN_2A_1.fq.gz\n", "1SN_2A_2.fq.gz\n", "1SN_30A_1.fq.gz\n", "1SN_30A_2.fq.gz\n", "1SN_31A_1.fq.gz\n", "1SN_31A_2.fq.gz\n", "1SN_32A_1.fq.gz\n", "1SN_32A_2.fq.gz\n", "1SN_3A_1.fq.gz\n", "1SN_3A_2.fq.gz\n", "1SN_4A_1.fq.gz\n", "1SN_4A_2.fq.gz\n", "1SN_5A_1.fq.gz\n", "1SN_5A_2.fq.gz\n", "1SN_6A_1.fq.gz\n", "1SN_6A_2.fq.gz\n", "1SN_7A_1.fq.gz\n", "1SN_7A_2.fq.gz\n", "1SN_8A_1.fq.gz\n", "1SN_8A_2.fq.gz\n", "1SN_9A_1.fq.gz\n", "1SN_9A_2.fq.gz\n", "UnKnow_1.fq.gz\n", "UnKnow_2.fq.gz\n", "Upload.tar.gz\n", "md5.check\n", "md5.txt\n" ] } ], "source": [ "%%bash\n", "ls /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Count the number of FASTQ files" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 194\n" ] } ], "source": [ "%%bash\n", "ls -1 /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz | wc -l" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Generate md5 checksums file" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t14m6.296s\n", "user\t1m33.226s\n", "sys\t0m27.272s\n" ] } ], "source": [ "%%bash\n", "\n", "#For loop generates a md5 checksum has value for each file\n", "#and appends the output to the checksums.md5 file.\n", "time for file in /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz\n", " do\n", " md5 \"$file\" >> /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/checksums.md5\n", " done" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Calculate total number of reads generated by this project" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "557596520\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t31m30.449s\n", "user\t16m21.149s\n", "sys\t1m54.328s\n" ] } ], "source": [ "%%bash\n", "\n", "#Initializes variable.\n", "totalreads=0\n", "\n", "#For loop counts the lines in each file and divides them by four. This is performed because\n", "#Illumina sequencing files are composed of four lines per read.\n", "#A running total of the total number of reads is generated [totalreads=$((readcount+totalreads))]\n", "#and is printed after the for loop completes.\n", "time for file in /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz\n", " do linecount=`gunzip -c \"$file\" | wc -l`\n", " readcount=$((linecount/4))\n", " totalreads=$((readcount+totalreads))\n", "done\n", "echo $totalreads" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Calculate number of reads per file and append filename and corresponding number of reads to readme file" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1HL_10A_1.fq.gz\t3186565\n", "\n", "1HL_10A_2.fq.gz\t3186565\n", "\n", "1HL_11A_1.fq.gz\t3568503\n", "\n", "1HL_11A_2.fq.gz\t3568503\n", "\n", "1HL_12A_1.fq.gz\t2524813\n", "\n", "1HL_12A_2.fq.gz\t2524813\n", "\n", "1HL_13A_1.fq.gz\t2676425\n", "\n", "1HL_13A_2.fq.gz\t2676425\n", "\n", "1HL_14A_1.fq.gz\t2015611\n", "\n", "1HL_14A_2.fq.gz\t2015611\n", "\n", "1HL_15A_1.fq.gz\t2196324\n", "\n", "1HL_15A_2.fq.gz\t2196324\n", "\n", "1HL_16A_1.fq.gz\t2333158\n", "\n", "1HL_16A_2.fq.gz\t2333158\n", "\n", "1HL_17A_1.fq.gz\t3653761\n", "\n", "1HL_17A_2.fq.gz\t3653761\n", "\n", "1HL_19A_1.fq.gz\t3617984\n", "\n", "1HL_19A_2.fq.gz\t3617984\n", "\n", "1HL_1A_1.fq.gz\t2260430\n", "\n", "1HL_1A_2.fq.gz\t2260430\n", "\n", "1HL_20A_1.fq.gz\t2908938\n", "\n", "1HL_20A_2.fq.gz\t2908938\n", "\n", "1HL_21A_1.fq.gz\t2591845\n", "\n", "1HL_21A_2.fq.gz\t2591845\n", "\n", "1HL_22A_1.fq.gz\t2074229\n", "\n", "1HL_22A_2.fq.gz\t2074229\n", "\n", "1HL_23A_1.fq.gz\t3609794\n", "\n", "1HL_23A_2.fq.gz\t3609794\n", "\n", "1HL_24A_1.fq.gz\t2721808\n", "\n", "1HL_24A_2.fq.gz\t2721808\n", "\n", "1HL_25A_1.fq.gz\t2957874\n", "\n", "1HL_25A_2.fq.gz\t2957874\n", "\n", "1HL_26A_1.fq.gz\t3142369\n", "\n", "1HL_26A_2.fq.gz\t3142369\n", "\n", "1HL_27A_1.fq.gz\t3199649\n", "\n", "1HL_27A_2.fq.gz\t3199649\n", "\n", "1HL_28A_1.fq.gz\t3770238\n", "\n", "1HL_28A_2.fq.gz\t3770238\n", "\n", "1HL_29A_1.fq.gz\t3071205\n", "\n", "1HL_29A_2.fq.gz\t3071205\n", "\n", "1HL_2A_1.fq.gz\t2758150\n", "\n", "1HL_2A_2.fq.gz\t2758150\n", "\n", "1HL_31A_1.fq.gz\t2453753\n", "\n", "1HL_31A_2.fq.gz\t2453753\n", "\n", "1HL_33A_1.fq.gz\t2708943\n", "\n", "1HL_33A_2.fq.gz\t2708943\n", "\n", "1HL_34A_1.fq.gz\t2899019\n", "\n", "1HL_34A_2.fq.gz\t2899019\n", "\n", "1HL_35A_1.fq.gz\t2630313\n", "\n", "1HL_35A_2.fq.gz\t2630313\n", "\n", "1HL_3A_1.fq.gz\t2211401\n", "\n", "1HL_3A_2.fq.gz\t2211401\n", "\n", "1HL_4A_1.fq.gz\t2571031\n", "\n", "1HL_4A_2.fq.gz\t2571031\n", "\n", "1HL_5A_1.fq.gz\t1980666\n", "\n", "1HL_5A_2.fq.gz\t1980666\n", "\n", "1HL_6A_1.fq.gz\t2070051\n", "\n", "1HL_6A_2.fq.gz\t2070051\n", "\n", "1HL_7A_1.fq.gz\t2793544\n", "\n", "1HL_7A_2.fq.gz\t2793544\n", "\n", "1HL_8A_1.fq.gz\t2544013\n", "\n", "1HL_8A_2.fq.gz\t2544013\n", "\n", "1HL_9A_1.fq.gz\t2832945\n", "\n", "1HL_9A_2.fq.gz\t2832945\n", "\n", "1NF_10A_1.fq.gz\t2149301\n", "\n", "1NF_10A_2.fq.gz\t2149301\n", "\n", "1NF_11A_1.fq.gz\t2266647\n", "\n", "1NF_11A_2.fq.gz\t2266647\n", "\n", "1NF_12A_1.fq.gz\t2179923\n", "\n", "1NF_12A_2.fq.gz\t2179923\n", "\n", "1NF_13A_1.fq.gz\t3890118\n", "\n", "1NF_13A_2.fq.gz\t3890118\n", "\n", "1NF_14A_1.fq.gz\t2719198\n", "\n", "1NF_14A_2.fq.gz\t2719198\n", "\n", "1NF_15A_1.fq.gz\t2554324\n", "\n", "1NF_15A_2.fq.gz\t2554324\n", "\n", "1NF_16A_1.fq.gz\t2682948\n", "\n", "1NF_16A_2.fq.gz\t2682948\n", "\n", "1NF_17A_1.fq.gz\t3119328\n", "\n", "1NF_17A_2.fq.gz\t3119328\n", "\n", "1NF_18A_1.fq.gz\t1750070\n", "\n", "1NF_18A_2.fq.gz\t1750070\n", "\n", "1NF_19A_1.fq.gz\t2036640\n", "\n", "1NF_19A_2.fq.gz\t2036640\n", "\n", "1NF_1A_1.fq.gz\t1355760\n", "\n", "1NF_1A_2.fq.gz\t1355760\n", "\n", "1NF_20A_1.fq.gz\t2457026\n", "\n", "1NF_20A_2.fq.gz\t2457026\n", "\n", "1NF_21A_1.fq.gz\t3046937\n", "\n", "1NF_21A_2.fq.gz\t3046937\n", "\n", "1NF_22A_1.fq.gz\t2895176\n", "\n", "1NF_22A_2.fq.gz\t2895176\n", "\n", "1NF_23A_1.fq.gz\t3206742\n", "\n", "1NF_23A_2.fq.gz\t3206742\n", "\n", "1NF_24A_1.fq.gz\t3038626\n", "\n", "1NF_24A_2.fq.gz\t3038626\n", "\n", "1NF_25A_1.fq.gz\t2203229\n", "\n", "1NF_25A_2.fq.gz\t2203229\n", "\n", "1NF_26A_1.fq.gz\t2934794\n", "\n", "1NF_26A_2.fq.gz\t2934794\n", "\n", "1NF_27A_1.fq.gz\t2272652\n", "\n", "1NF_27A_2.fq.gz\t2272652\n", "\n", "1NF_28A_1.fq.gz\t2344763\n", "\n", "1NF_28A_2.fq.gz\t2344763\n", "\n", "1NF_29A_1.fq.gz\t3829981\n", "\n", "1NF_29A_2.fq.gz\t3829981\n", "\n", "1NF_2A_1.fq.gz\t3452931\n", "\n", "1NF_2A_2.fq.gz\t3452931\n", "\n", "1NF_30A_1.fq.gz\t2701047\n", "\n", "1NF_30A_2.fq.gz\t2701047\n", "\n", "1NF_31A_1.fq.gz\t3561574\n", "\n", "1NF_31A_2.fq.gz\t3561574\n", "\n", "1NF_32A_1.fq.gz\t3477986\n", "\n", "1NF_32A_2.fq.gz\t3477986\n", "\n", "1NF_33A_1.fq.gz\t2602951\n", "\n", "1NF_33A_2.fq.gz\t2602951\n", "\n", "1NF_4A_1.fq.gz\t3582899\n", "\n", "1NF_4A_2.fq.gz\t3582899\n", "\n", "1NF_5A_1.fq.gz\t2765984\n", "\n", "1NF_5A_2.fq.gz\t2765984\n", "\n", "1NF_6A_1.fq.gz\t2423888\n", "\n", "1NF_6A_2.fq.gz\t2423888\n", "\n", "1NF_7A_1.fq.gz\t2590507\n", "\n", "1NF_7A_2.fq.gz\t2590507\n", "\n", "1NF_8A_1.fq.gz\t2772580\n", "\n", "1NF_8A_2.fq.gz\t2772580\n", "\n", "1NF_9A_1.fq.gz\t3380498\n", "\n", "1NF_9A_2.fq.gz\t3380498\n", "\n", "1SN_10A_1.fq.gz\t2558667\n", "\n", "1SN_10A_2.fq.gz\t2558667\n", "\n", "1SN_11A_1.fq.gz\t2583992\n", "\n", "1SN_11A_2.fq.gz\t2583992\n", "\n", "1SN_12A_1.fq.gz\t3423902\n", "\n", "1SN_12A_2.fq.gz\t3423902\n", "\n", "1SN_13A_1.fq.gz\t2956991\n", "\n", "1SN_13A_2.fq.gz\t2956991\n", "\n", "1SN_14A_1.fq.gz\t3544290\n", "\n", "1SN_14A_2.fq.gz\t3544290\n", "\n", "1SN_15A_1.fq.gz\t2506325\n", "\n", "1SN_15A_2.fq.gz\t2506325\n", "\n", "1SN_16A_1.fq.gz\t2616966\n", "\n", "1SN_16A_2.fq.gz\t2616966\n", "\n", "1SN_17A_1.fq.gz\t2983401\n", "\n", "1SN_17A_2.fq.gz\t2983401\n", "\n", "1SN_18A_1.fq.gz\t3248512\n", "\n", "1SN_18A_2.fq.gz\t3248512\n", "\n", "1SN_19A_1.fq.gz\t3036463\n", "\n", "1SN_19A_2.fq.gz\t3036463\n", "\n", "1SN_1A_1.fq.gz\t3040537\n", "\n", "1SN_1A_2.fq.gz\t3040537\n", "\n", "1SN_20A_1.fq.gz\t2155177\n", "\n", "1SN_20A_2.fq.gz\t2155177\n", "\n", "1SN_21A_1.fq.gz\t3540618\n", "\n", "1SN_21A_2.fq.gz\t3540618\n", "\n", "1SN_22A_1.fq.gz\t3060365\n", "\n", "1SN_22A_2.fq.gz\t3060365\n", "\n", "1SN_23A_1.fq.gz\t3696257\n", "\n", "1SN_23A_2.fq.gz\t3696257\n", "\n", "1SN_24A_1.fq.gz\t1885265\n", "\n", "1SN_24A_2.fq.gz\t1885265\n", "\n", "1SN_25A_1.fq.gz\t3037089\n", "\n", "1SN_25A_2.fq.gz\t3037089\n", "\n", "1SN_26A_1.fq.gz\t2758873\n", "\n", "1SN_26A_2.fq.gz\t2758873\n", "\n", "1SN_27A_1.fq.gz\t2607809\n", "\n", "1SN_27A_2.fq.gz\t2607809\n", "\n", "1SN_28A_1.fq.gz\t2841587\n", "\n", "1SN_28A_2.fq.gz\t2841587\n", "\n", "1SN_29A_1.fq.gz\t2257274\n", "\n", "1SN_29A_2.fq.gz\t2257274\n", "\n", "1SN_2A_1.fq.gz\t3080107\n", "\n", "1SN_2A_2.fq.gz\t3080107\n", "\n", "1SN_30A_1.fq.gz\t3840203\n", "\n", "1SN_30A_2.fq.gz\t3840203\n", "\n", "1SN_31A_1.fq.gz\t3353715\n", "\n", "1SN_31A_2.fq.gz\t3353715\n", "\n", "1SN_32A_1.fq.gz\t2552635\n", "\n", "1SN_32A_2.fq.gz\t2552635\n", "\n", "1SN_3A_1.fq.gz\t2442489\n", "\n", "1SN_3A_2.fq.gz\t2442489\n", "\n", "1SN_4A_1.fq.gz\t3294871\n", "\n", "1SN_4A_2.fq.gz\t3294871\n", "\n", "1SN_5A_1.fq.gz\t3000852\n", "\n", "1SN_5A_2.fq.gz\t3000852\n", "\n", "1SN_6A_1.fq.gz\t3354310\n", "\n", "1SN_6A_2.fq.gz\t3354310\n", "\n", "1SN_7A_1.fq.gz\t2964594\n", "\n", "1SN_7A_2.fq.gz\t2964594\n", "\n", "1SN_8A_1.fq.gz\t3269810\n", "\n", "1SN_8A_2.fq.gz\t3269810\n", "\n", "1SN_9A_1.fq.gz\t3177205\n", "\n", "1SN_9A_2.fq.gz\t3177205\n", "\n", "UnKnow_1.fq.gz\t7344729\n", "\n", "UnKnow_2.fq.gz\t7344729\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t35m56.559s\n", "user\t16m21.347s\n", "sys\t1m55.365s\n" ] } ], "source": [ "%%bash\n", "\n", "#For loop counts the lines in each file and divides them by four. This is performed because\n", "#Illumina sequencing files are composed of four lines per read.\n", "#Format the output (printf) to print the filename, followed by a tab, followed by the readcount.\n", "#The command \"tee -a\" is used to both print the output to the screen and append the output to the readme.md file.\n", "time for file in /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/*.fq.gz\n", " do linecount=`gunzip -c \"$file\" | wc -l`\n", " readcount=$(($linecount/4))\n", " printf \"%s\\t%s\\n\\n\" \"${file##*/}\" \"$readcount\" | tee -a /Volumes/nightingales/O_lurida/F15FTSUSAT0768_OYSzenG/readme.md\n", "done" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }