{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mon May 16 08:27:10 PDT 2016\r\n" ] } ], "source": [ "!date" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Software:\n", "\n", " System Software Overview:\n", "\n", " System Version: Mac OS X 10.7.5 (11G63)\n", " Kernel Version: Darwin 11.4.2\n", " Boot Volume: SSD2\n", " Boot Mode: Normal\n", " Computer Name: greenbird (2)\n", " User Name: Sam (Sam)\n", " Secure Virtual Memory: Enabled\n", " 64-bit Kernel and Extensions: No\n", " Time since boot: 5 days 1:46\n", "\n" ] } ], "source": [ "%%bash\n", "system_profiler SPSoftwareDataType" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", " Model Name: Mac Pro\n", " Model Identifier: MacPro1,1\n", " Processor Name: Dual-Core Intel Xeon\n", " Processor Speed: 3 GHz\n", " Number of Processors: 2\n", " Total Number of Cores: 4\n", " L2 Cache (per Processor): 4 MB\n", " Memory: 14 GB\n", " Bus Speed: 1.33 GHz\n", " Boot ROM Version: MP11.005C.B08\n", " SMC Version (system): 1.7f10\n", "\n" ] } ], "source": [ "%%bash\n", "#Uses grep to exclude lines that display serial number and hardware UUID\n", "system_profiler SPHardwareDataType | grep -v [SH][ea]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### List files provided by BGI" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_1.fq.gz.clean.dup.clean.gz\n", "151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_2.fq.gz.clean.dup.clean.gz\n", "151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_1.fq.gz.clean.dup.clean.gz\n", "151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_2.fq.gz.clean.dup.clean.gz\n", "151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_1.fq.gz.clean.dup.clean.gz\n", "151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_2.fq.gz.clean.dup.clean.gz\n", "20160512_F15FTSUSAT0327_genome_survey.pdf\n", "Ostrea_lurida.GC_content_vs_depth.png\n", "Ostrea_lurida.scafSeq\n", "README\n", "md5.txt\n" ] } ], "source": [ "%%bash\n", "ls /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create checksums file" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t43m39.240s\n", "user\t1m32.906s\n", "sys\t2m27.982s\n" ] } ], "source": [ "%%bash\n", "\n", "#For loop generates a md5 checksum has value for each file\n", "#and appends the output to the checksums.md5 file.\n", "time for file in /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/*.gz\n", " do\n", " md5 \"$file\" >> /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/checksums.md5\n", " done" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Calculate total number of reads generated by this project.ΒΆ\n", "#### Calculate number of reads per file, append filename and corresponding number of reads to readme file." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_1.fq.gz.clean.dup.clean.gz\t61253141\n", "151114_I191_FCH3Y35BCXX_L1_wHAIPI023992-37_2.fq.gz.clean.dup.clean.gz\t61253141\n", "151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_1.fq.gz.clean.dup.clean.gz\t58755925\n", "151114_I191_FCH3Y35BCXX_L2_wHAMPI023991-66_2.fq.gz.clean.dup.clean.gz\t58755925\n", "151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_1.fq.gz.clean.dup.clean.gz\t43938762\n", "151118_I137_FCH3KNJBBXX_L5_wHAXPI023905-96_2.fq.gz.clean.dup.clean.gz\t43938762\n", "327895656\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t59m24.186s\n", "user\t19m15.868s\n", "sys\t3m29.110s\n" ] } ], "source": [ "%%bash\n", "\n", "#Initializes variable.\n", "totalreads=0\n", "\n", "#For loop counts the lines in each file and divides them by four. This is performed because\n", "#Illumina sequencing files are composed of four lines per read.\n", "#A running total of the total number of reads is generated [totalreads=$((readcount+totalreads))]\n", "#and is printed after the for loop completes.\n", "\n", "#Format the output (printf) to print the filename, followed by a tab, followed by the readcount.\n", "#The command \"tee -a\" is used to both print the output to the screen and append the output to the readme.md file.\n", "time for file in /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/*.gz\n", " do linecount=`gunzip -c \"$file\" | wc -l`\n", " readcount=$((linecount/4))\n", " totalreads=$((readcount+totalreads))\n", " printf \"%s\\t%s\\n\" \"${file##*/}\" \"$readcount\" | tee -a /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/readme.md\n", "done\n", "echo $totalreads" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Count the number of sequences in the scafSeq (FASTA format) file" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 765755\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t1m7.420s\n", "user\t0m0.919s\n", "sys\t0m2.288s\n" ] } ], "source": [ "%%bash\n", "time grep \">\" /Volumes/web/O_lurida_genome_assemblies_BGI/20160512/Ostrea_lurida.scafSeq | wc -l" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }