{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TODAY'S DATE:\n", "Thu Oct 24 15:42:18 PDT 2019\n", "------------\n", "\n", "Distributor ID:\tUbuntu\n", "Description:\tUbuntu 16.04.6 LTS\n", "Release:\t16.04\n", "Codename:\txenial\n", "\n", "------------\n", "HOSTNAME: \n", "swoose\n", "\n", "------------\n", "Computer Specs:\n", "\n", "Architecture: x86_64\n", "CPU op-mode(s): 32-bit, 64-bit\n", "Byte Order: Little Endian\n", "CPU(s): 24\n", "On-line CPU(s) list: 0-23\n", "Thread(s) per core: 2\n", "Core(s) per socket: 6\n", "Socket(s): 2\n", "NUMA node(s): 1\n", "Vendor ID: GenuineIntel\n", "CPU family: 6\n", "Model: 44\n", "Model name: Intel(R) Xeon(R) CPU X5670 @ 2.93GHz\n", "Stepping: 2\n", "CPU MHz: 2925.798\n", "BogoMIPS: 5851.95\n", "Virtualization: VT-x\n", "L1d cache: 32K\n", "L1i cache: 32K\n", "L2 cache: 256K\n", "L3 cache: 12288K\n", "NUMA node0 CPU(s): 0-23\n", "Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 popcnt aes lahf_lm epb ssbd ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida arat flush_l1d\n", "\n", "------------\n", "\n", "Memory Specs\n", "\n", " total used free shared buff/cache available\n", "Mem: 70G 16G 494M 479M 53G 53G\n", "Swap: 4.7G 1.7G 3.0G\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "No LSB modules are available.\n" ] } ], "source": [ "%%bash\n", "echo \"TODAY'S DATE:\"\n", "date\n", "echo \"------------\"\n", "echo \"\"\n", "#Display operating system info\n", "lsb_release -a\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"HOSTNAME: \"; hostname \n", "echo \"\"\n", "echo \"------------\"\n", "echo \"Computer Specs:\"\n", "echo \"\"\n", "lscpu\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"\"\n", "echo \"Memory Specs\"\n", "echo \"\"\n", "free -mh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set variables\n", "`%env` variables are good for passing to bash cells\n", "\n", "NOTE: Seems like paths need to be absolute. Had issues trying to use relative path to home directory (~)." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "env: wd=/home/sam/analyses/20191024_cbai_fastq_concatenation\n", "env: fastq_100219_dir=/home/sam/Downloads/FASTQ-100219/\n", "env: fastq_102319_dir=/home/sam/Downloads/FASTQ-102319/\n" ] } ], "source": [ "wd=\"/home/sam/analyses/20191024_cbai_fastq_concatenation\"\n", "%env wd=/home/sam/analyses/20191024_cbai_fastq_concatenation\n", "%env fastq_100219_dir=/home/sam/Downloads/FASTQ-100219/\n", "%env fastq_102319_dir=/home/sam/Downloads/FASTQ-102319/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Make directory if it doesn't exist" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "[ -d \"${wd}\" ] || mkdir --parents \"${wd}\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/sam/analyses/20191024_cbai_fastq_concatenation\n" ] } ], "source": [ "cd {wd}" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Adding /home/sam/Downloads/FASTQ-100219/329774_S1_L001_R1_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329774_S1_L001_R2_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329774_S1_L002_R1_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329774_S1_L002_R2_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329775_S2_L001_R1_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329775_S2_L001_R2_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329775_S2_L002_R1_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329775_S2_L002_R2_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329776_S3_L001_R1_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329776_S3_L001_R2_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329776_S3_L002_R1_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329776_S3_L002_R2_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329777_S4_L001_R1_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329777_S4_L001_R2_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329777_S4_L002_R1_001.fastq.gz to fastq_100219_array\n", "Adding /home/sam/Downloads/FASTQ-100219/329777_S4_L002_R2_001.fastq.gz to fastq_100219_array\n", "\n", "Adding /home/sam/Downloads/FASTQ-102319/329774_S1_L001_R1_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329774_S1_L001_R2_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329774_S1_L002_R1_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329774_S1_L002_R2_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329775_S2_L001_R1_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329775_S2_L001_R2_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329775_S2_L002_R1_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329775_S2_L002_R2_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329776_S3_L001_R1_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329776_S3_L001_R2_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329776_S3_L002_R1_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329776_S3_L002_R2_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329777_S4_L001_R1_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329777_S4_L001_R2_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329777_S4_L002_R1_001.fastq.gz to fastq_102319_array\n", "Adding /home/sam/Downloads/FASTQ-102319/329777_S4_L002_R2_001.fastq.gz to fastq_102319_array\n", "\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329774_S1_L001_R1_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329774_S1_L001_R1_001.fastq.gz to 329774_S1_L001_R1_001.fastq.gz...\n", "2300543 /home/sam/Downloads/FASTQ-100219/329774_S1_L001_R1_001.fastq.gz\n", "4282884 /home/sam/Downloads/FASTQ-102319/329774_S1_L001_R1_001.fastq.gz\n", "6583427 329774_S1_L001_R1_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329774_S1_L001_R2_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329774_S1_L001_R2_001.fastq.gz to 329774_S1_L001_R2_001.fastq.gz...\n", "2257049 /home/sam/Downloads/FASTQ-100219/329774_S1_L001_R2_001.fastq.gz\n", "4209231 /home/sam/Downloads/FASTQ-102319/329774_S1_L001_R2_001.fastq.gz\n", "6466280 329774_S1_L001_R2_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329774_S1_L002_R1_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329774_S1_L002_R1_001.fastq.gz to 329774_S1_L002_R1_001.fastq.gz...\n", "2265690 /home/sam/Downloads/FASTQ-100219/329774_S1_L002_R1_001.fastq.gz\n", "3483571 /home/sam/Downloads/FASTQ-102319/329774_S1_L002_R1_001.fastq.gz\n", "5749261 329774_S1_L002_R1_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329774_S1_L002_R2_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329774_S1_L002_R2_001.fastq.gz to 329774_S1_L002_R2_001.fastq.gz...\n", "2227691 /home/sam/Downloads/FASTQ-100219/329774_S1_L002_R2_001.fastq.gz\n", "3427731 /home/sam/Downloads/FASTQ-102319/329774_S1_L002_R2_001.fastq.gz\n", "5655422 329774_S1_L002_R2_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329775_S2_L001_R1_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329775_S2_L001_R1_001.fastq.gz to 329775_S2_L001_R1_001.fastq.gz...\n", "1750105 /home/sam/Downloads/FASTQ-100219/329775_S2_L001_R1_001.fastq.gz\n", "3270584 /home/sam/Downloads/FASTQ-102319/329775_S2_L001_R1_001.fastq.gz\n", "5020689 329775_S2_L001_R1_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329775_S2_L001_R2_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329775_S2_L001_R2_001.fastq.gz to 329775_S2_L001_R2_001.fastq.gz...\n", "1773269 /home/sam/Downloads/FASTQ-100219/329775_S2_L001_R2_001.fastq.gz\n", "3285295 /home/sam/Downloads/FASTQ-102319/329775_S2_L001_R2_001.fastq.gz\n", "5058564 329775_S2_L001_R2_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329775_S2_L002_R1_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329775_S2_L002_R1_001.fastq.gz to 329775_S2_L002_R1_001.fastq.gz...\n", "1725318 /home/sam/Downloads/FASTQ-100219/329775_S2_L002_R1_001.fastq.gz\n", "2672797 /home/sam/Downloads/FASTQ-102319/329775_S2_L002_R1_001.fastq.gz\n", "4398115 329775_S2_L002_R1_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329775_S2_L002_R2_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329775_S2_L002_R2_001.fastq.gz to 329775_S2_L002_R2_001.fastq.gz...\n", "1757707 /home/sam/Downloads/FASTQ-100219/329775_S2_L002_R2_001.fastq.gz\n", "2712314 /home/sam/Downloads/FASTQ-102319/329775_S2_L002_R2_001.fastq.gz\n", "4470021 329775_S2_L002_R2_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329776_S3_L001_R1_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329776_S3_L001_R1_001.fastq.gz to 329776_S3_L001_R1_001.fastq.gz...\n", "2135788 /home/sam/Downloads/FASTQ-100219/329776_S3_L001_R1_001.fastq.gz\n", "4277244 /home/sam/Downloads/FASTQ-102319/329776_S3_L001_R1_001.fastq.gz\n", "6413032 329776_S3_L001_R1_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329776_S3_L001_R2_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329776_S3_L001_R2_001.fastq.gz to 329776_S3_L001_R2_001.fastq.gz...\n", "2103280 /home/sam/Downloads/FASTQ-100219/329776_S3_L001_R2_001.fastq.gz\n", "4238439 /home/sam/Downloads/FASTQ-102319/329776_S3_L001_R2_001.fastq.gz\n", "6341719 329776_S3_L001_R2_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329776_S3_L002_R1_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329776_S3_L002_R1_001.fastq.gz to 329776_S3_L002_R1_001.fastq.gz...\n", "2108386 /home/sam/Downloads/FASTQ-100219/329776_S3_L002_R1_001.fastq.gz\n", "3455578 /home/sam/Downloads/FASTQ-102319/329776_S3_L002_R1_001.fastq.gz\n", "5563964 329776_S3_L002_R1_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329776_S3_L002_R2_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329776_S3_L002_R2_001.fastq.gz to 329776_S3_L002_R2_001.fastq.gz...\n", "2081109 /home/sam/Downloads/FASTQ-100219/329776_S3_L002_R2_001.fastq.gz\n", "3427550 /home/sam/Downloads/FASTQ-102319/329776_S3_L002_R2_001.fastq.gz\n", "5508659 329776_S3_L002_R2_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329777_S4_L001_R1_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329777_S4_L001_R1_001.fastq.gz to 329777_S4_L001_R1_001.fastq.gz...\n", "2300923 /home/sam/Downloads/FASTQ-100219/329777_S4_L001_R1_001.fastq.gz\n", "3170048 /home/sam/Downloads/FASTQ-102319/329777_S4_L001_R1_001.fastq.gz\n", "5470971 329777_S4_L001_R1_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329777_S4_L001_R2_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329777_S4_L001_R2_001.fastq.gz to 329777_S4_L001_R2_001.fastq.gz...\n", "2298157 /home/sam/Downloads/FASTQ-100219/329777_S4_L001_R2_001.fastq.gz\n", "3136767 /home/sam/Downloads/FASTQ-102319/329777_S4_L001_R2_001.fastq.gz\n", "5434924 329777_S4_L001_R2_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329777_S4_L002_R1_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329777_S4_L002_R1_001.fastq.gz to 329777_S4_L002_R1_001.fastq.gz...\n", "2262007 /home/sam/Downloads/FASTQ-100219/329777_S4_L002_R1_001.fastq.gz\n", "2621485 /home/sam/Downloads/FASTQ-102319/329777_S4_L002_R1_001.fastq.gz\n", "4883492 329777_S4_L002_R1_001.fastq.gz\n", "Now concatenating /home/sam/Downloads/FASTQ-100219/329777_S4_L002_R2_001.fastq.gz and /home/sam/Downloads/FASTQ-102319/329777_S4_L002_R2_001.fastq.gz to 329777_S4_L002_R2_001.fastq.gz...\n", "2274328 /home/sam/Downloads/FASTQ-100219/329777_S4_L002_R2_001.fastq.gz\n", "2619848 /home/sam/Downloads/FASTQ-102319/329777_S4_L002_R2_001.fastq.gz\n", "4894176 329777_S4_L002_R2_001.fastq.gz\n" ] } ], "source": [ "%%bash\n", "# Create array of original FastQ files\n", "for file in \"${fastq_100219_dir}\"*.gz\n", " do\n", " fastq_100219_array+=($(echo \"$file\"))\n", " echo \"Adding ${file} to fastq_100219_array\"\n", "done\n", "\n", "echo \"\"\n", "\n", "# Create arrray of second FastQ files\n", "for file in \"${fastq_102319_dir}\"*.gz\n", " do\n", " fastq_102319_array+=($(echo \"$file\"))\n", " echo \"Adding ${file} to fastq_102319_array\"\n", "done\n", "\n", "echo \"\"\n", "\n", "# Check line numbers of each FastQ file\n", "for index in \"${!fastq_100219_array[@]}\"\n", " do\n", " # Strip leading path from filename\n", " fastq_file=$(echo \"${fastq_100219_array[index]##*/}\")\n", " echo \"Now concatenating ${fastq_100219_array[index]} and ${fastq_102319_array[index]} to ${fastq_file}...\"\n", " {\n", " cat \"${fastq_100219_array[index]}\"\n", " cat \"${fastq_102319_array[index]}\"\n", " } >> \"${fastq_file}\"\n", " wc -l \"${fastq_100219_array[index]}\"\n", " wc -l \"${fastq_102319_array[index]}\"\n", " wc -l \"${fastq_file}\"\n", "done" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sending incremental file list\n", "329774_S1_L001_R1_001.fastq.gz\n", "329774_S1_L001_R2_001.fastq.gz\n", "329774_S1_L002_R1_001.fastq.gz\n", "329774_S1_L002_R2_001.fastq.gz\n", "329775_S2_L001_R1_001.fastq.gz\n", "329775_S2_L001_R2_001.fastq.gz\n", "329775_S2_L002_R1_001.fastq.gz\n", "329775_S2_L002_R2_001.fastq.gz\n", "329776_S3_L001_R1_001.fastq.gz\n", "329776_S3_L001_R2_001.fastq.gz\n", "329776_S3_L002_R1_001.fastq.gz\n", "329776_S3_L002_R2_001.fastq.gz\n", "329777_S4_L001_R1_001.fastq.gz\n", "329777_S4_L001_R2_001.fastq.gz\n", "329777_S4_L002_R1_001.fastq.gz\n", "329777_S4_L002_R2_001.fastq.gz\n", "\n", "sent 19,533,901,406 bytes received 323 bytes 28,412,947.97 bytes/sec\n", "total size is 19,529,132,417 speedup is 1.00\n" ] } ], "source": [ "%%bash\n", "rsync \\\n", "--archive \\\n", "--verbose \\\n", "*.gz \\\n", "owl:/volume1/web/nightingales/C_bairdi/" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }