{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TODAY'S DATE:\n", "Tue May 29 12:23:57 PDT 2018\n", "------------\n", "\n", "Distributor ID:\tUbuntu\n", "Description:\tUbuntu 16.04.4 LTS\n", "Release:\t16.04\n", "Codename:\txenial\n", "\n", "------------\n", "HOSTNAME: \n", "roadrunner\n", "\n", "------------\n", "Computer Specs:\n", "\n", "Architecture: x86_64\n", "CPU op-mode(s): 32-bit, 64-bit\n", "Byte Order: Little Endian\n", "CPU(s): 16\n", "On-line CPU(s) list: 0-15\n", "Thread(s) per core: 2\n", "Core(s) per socket: 4\n", "Socket(s): 2\n", "NUMA node(s): 1\n", "Vendor ID: GenuineIntel\n", "CPU family: 6\n", "Model: 26\n", "Model name: Intel(R) Xeon(R) CPU E5520 @ 2.27GHz\n", "Stepping: 5\n", "CPU MHz: 1596.000\n", "CPU max MHz: 2394.0000\n", "CPU min MHz: 1596.0000\n", "BogoMIPS: 4521.78\n", "Virtualization: VT-x\n", "L1d cache: 32K\n", "L1i cache: 32K\n", "L2 cache: 256K\n", "L3 cache: 8192K\n", "NUMA node0 CPU(s): 0-15\n", "Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts nopl xtopology nonstop_tsc aperfmperf pni dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm dca sse4_1 sse4_2 popcnt lahf_lm kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida\n", "\n", "------------\n", "\n", "Memory Specs\n", "\n", " total used free shared buff/cache available\n", "Mem: 47G 472M 15G 274M 31G 45G\n", "Swap: 47G 0B 47G\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "No LSB modules are available.\n" ] } ], "source": [ "%%bash\n", "echo \"TODAY'S DATE:\"\n", "date\n", "echo \"------------\"\n", "echo \"\"\n", "lsb_release -a #Display operating system info\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"HOSTNAME: \"; hostname \n", "echo \"\"\n", "echo \"------------\"\n", "echo \"Computer Specs:\"\n", "echo \"\"\n", "lscpu\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"\"\n", "echo \"Memory Specs\"\n", "echo \"\"\n", "free -mh" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%%bash\n", "mkdir /home/sam/analyses/20180529_virginica_repeatmasker" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCF_002022765.2_C_virginica-3.0_genomic.fna.gz\n", "wget_stderr.txt\n", "wget_stout.txt\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "tar: This does not look like a tar archive\n", "tar: Skipping to next header\n", "tar: Exiting with failure status due to previous errors\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20180529_virginica_repeatmasker\n", "wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/022/765/GCF_002022765.2_C_virginica-3.0/GCF_002022765.2_C_virginica-3.0_genomic.fna.gz \\\n", "1> wget_stout.txt \\\n", "2> wget_stderr.txt\n", "tar -xzvf GCF_002022765.2_C_virginica-3.0_genomic.fna.gz\n", "ls" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20150313_LSU_Oil_Spill_IndexID_Comparisons.ipynb\n", "20150316_LSU_OilSpill_Adapter_ID.ipynb\n", "20150317_LSU_OilSpill_EpinextAdaptor1_ID.ipynb\n", "20150408_Install_Bismark_bisulfite_mapper.ipynb\n", "20150414_C_gigas_Larvae_OA_Trimmomatic_FASTQC.ipynb\n", "20150414_C_virginica_LSU_Oil_Spill_Trimmomatic_FASTQC.ipynb\n", "20150429_Gigas_larvae_OA_BLASTn.ipynb\n", "20150501_Cgigas_larvae_OA_BLASTn_nt.ipynb\n", "20150506_Cgigas_larvae_OA_trimmomatic_FASTQC.ipynb\n", "20150521_Cgigas_larvae_OA_Trimmomatic_FASTQC.ipynb\n", "20160114_wasted_space_synologies.ipynb\n", "20160126_Olurida_BGI_data_handling.ipynb\n", "20160126_Pgenerosa_BGI_data_handling.ipynb\n", "20160203_Olurida_Zymo_Data_Handling.ipynb\n", "20160308_find_rename_2bRAD_undetermined_fastqs.ipynb\n", "20160314_Olurida_GBS_data_management.ipynb\n", "20160406_Oly_GBS_STACKS.ipynb\n", "20160406_STACKS_install.ipynb\n", "20160411_Concatenate_Oly_MBDseq.ipynb\n", "20160418_Oly_GBS_PE-Pyrad_populations.ipynb\n", "20160418_pyrad_oly_PE-GBS.ipynb\n", "20160427_Oly_GBS_data_management.ipynb\n", "20160427_speed_comparison.ipynb\n", "20160428_Oly_GBS_STACKS.ipynb\n", "20160502_Oly_GBS_barcode_repair.ipynb\n", "20160516_Oly_Small_Insert_Library_Genome_Read_Counts.ipynb\n", "20160523_Oly_GBS_Stacks.ipynb\n", "20160525_pyrad_oly_gbs_bgi.ipynb\n", "20160609_pyrad_oly_gbs_bgi.ipynb\n", "20160714_EC2_Oly_GBS_stacks_analysis.ipynb\n", "20160715_ec2_oly_gbs_pyrad.ipynb\n", "20160816_oly_gbs_fst_calcs.ipynb\n", "20161025_Pgenerosa_Small_Library_Genome_Read_Counts.ipynb\n", "20161117_docker_oly_genome_fastq_corruption.ipynb\n", "20161117_docker_oly_vcf_analysis.ipynb\n", "20161129_docker_R_magics_failure.ipynb\n", "20161206_docker_BGI_genome_downloads.ipynb\n", "20161214_docker_BGI_data_integrity_check.ipynb\n", "20161214_docker_notebook_trimming.ipynb\n", "20161229_docker_genewiz_geoduck_RRBS_data.ipynb\n", "20161230_docker_geoduck_RRBS_md5_checks.ipynb\n", "20170104_docker_oly_BGI_genome_corruption_solved.ipynb\n", "20170227_docker_jay_ngs_data_retrieval.ipynb\n", "20170301_docker_fastqc_nondemultiplexed_bgi_oly_gbs.ipynb\n", "20170306_docker_fastqc_demultiplexed_bgi_oly_gbs.ipynb\n", "20170314_docker_Oly_BGI_GBS_demultiplexing_reproducibility.ipynb\n", "20170320_docker_Oly_BGI_GBS_demultiplexing_reproducibility.ipynb\n", "20170622_oly_pacbio_data_management.ipynb\n", "20170907_docker_pacbio_oly_minimap2.ipynb\n", "20170918_docker_pacbio_oly_miniasm0.2.ipynb\n", "20170918_docker_pacbio_oly_racon0.5.0.ipynb\n", "20171003_docker_oly_assembly_comparisons.ipynb\n", "20171004_docker_oly_redundans.ipynb\n", "20171005_docker_oly_redundans.ipynb\n", "20171018_docker_oly_canu.ipynb\n", "20171023_docker_oly_pacbio_canu_comparisons.ipynb\n", "20171023_docker_oly_redundans.ipynb\n", "20171023_restore_scaphapoda_data.ipynb\n", "20171113_emu_pbjelly_22mer_plat.ipynb\n", "20171114_emu_pbjelly_BGI_scaffold.ipynb\n", "20171114_swoose_oly_assembly_comparisons_quast.ipynb\n", "20171130_emu_pbjelly.ipynb\n", "20180103_emu_pbjelly.ipynb\n", "20180116_swoose_oly_assembly_comparisons_quast.ipynb\n", "20180125_roadrunner_trimming_geoduck_novaseq.ipynb\n", "20180205_roadrunner_meraculous_geoduck_novaseq.ipynb\n", "20180301_roadrunner_assembly_meraculous_geoduck_novaseq_subset.ipynb\n", "20180503_emu_oly_methylation_mapping.ipynb\n", "20180507_roadrunner_geoduck_genome_mapbacks.ipynb\n", "20180508_roadrunner_geoduck_bowtie2_genome_mapping.ipynb\n", "20180514_roadrunner_geoduck_RRBS_trimming.ipynb\n", "20180516_roadrunner_geoduck_EPI_fastqc.ipynb\n", "20180516_roadrunner_geoduck_RRBS_trimming.ipynb\n", "20180523_roadrunner_oly_TEs_repeatmasker.ipynb\n", "20180529_roadrunner_virginica_TEs_repeatmasker.ipynb\n", "InstallingBLAST.ipynb\n", "multiqc_data\n", "multiqc_report.html\n", "PE-GBS_empirical.ipynb\n", "PhageNGS_ID.ipynb\n", "PhageNGS.ipynb\n", "README.md\n", "stdin_fastqc.zip\n", "template_linux.ipynb\n", "Untitled.ipynb\n" ] } ], "source": [ "%%bash\n", "gunzip /home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fna.gz\n", "ls" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCF_002022765.2_C_virginica-3.0_genomic.fna\n", "wget_stderr.txt\n", "wget_stout.txt\n" ] } ], "source": [ "%%bash\n", "ls /home/sam/analyses/20180529_virginica_repeatmasker/" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "wget_stderr.txt\n", "wget_stout.txt\n" ] } ], "source": [ "%%bash\n", "mv /home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fna \\\n", "GCF_002022765.2_C_virginica-3.0_genomic.fasta\n", "ls /home/sam/analyses/20180529_virginica_repeatmasker/" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCF_002022765.2_C_virginica-3.0_genomic.fasta\n", "wget_stderr.txt\n", "wget_stout.txt\n" ] } ], "source": [ "%%bash\n", "mv GCF_002022765.2_C_virginica-3.0_genomic.fasta \\\n", "/home/sam/analyses/20180529_virginica_repeatmasker\n", "ls /home/sam/analyses/20180529_virginica_repeatmasker/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Run RepeatMasker with _Crassostrea virginica_ species setting and following options:\n", "\n", "- ```-species \"crassostrea virginica\"``` : Sets species to Crassostrea virginica\n", "\n", "- ```-par 15``` : Use 15 CPU threads\n", "\n", "- ```-gff``` : Create GFF output file (in addition to default files)\n", "\n", "- ```-excln``` : Adjusts output table calculations to exclude sequence runs of >=25Ns. Useful for draft genome assemblies.\n", "\n", "- ```1>``` : Send stdout to file instead of printing to notebook.\n", "\n", "- ```2>``` : Send stderr to file instead of printing to notebook." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t85m42.321s\n", "user\t907m8.920s\n", "sys\t130m12.600s\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20180529_virginica_repeatmasker/\n", "time \\\n", "/home/shared/RepeatMasker-4.0.7/RepeatMasker \\\n", "/home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fasta \\\n", "-species \"crassostrea virginica\" \\\n", "-par 15 \\\n", "-gff \\\n", "-excln \\\n", "1> /home/sam/analyses/20180529_virginica_repeatmasker/rm_stdout.out \\\n", "2> /home/sam/analyses/20180529_virginica_repeatmasker/rm_stderr.err\n", "\n", "sed '/^Subject:/ s/ / repeatmasker JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------------\n", "STANDARD OUT\n", "------------\n", "cycle 4 .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................\n", "cycle 5 \n", "cycle 6 .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................\n", "cycle 7 .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................\n", "cycle 8 ...........................................................................................................................................................................................................................................................................................................................................................................................................\n", "cycle 9 ...........................................................................................................................................................................................................................................................................................................................................................................................................\n", "cycle 10 ............................................................................................................................................................................................................................................................................................................................................................................................................\n", "Generating output... .........................................................................................................................................................................................................................................................................................................................................................................................................\n", "masking\n", "done\n", "\n", "\n", "------------\n", "STANDARD ERROR\n", "------------\n", "\n", "\n", "------------\n", "REPEATMASKER C.VIRGINICA TABLE\n", "------------\n", "==================================================\n", "file name: GCF_002022765.2_C_virginica-3.0_genomic.fasta\n", "sequences: 11\n", "total length: 684741128 bp (684675328 bp excl N/X-runs)\n", "GC level: 34.83 %\n", "bases masked: 46637065 bp ( 6.81 %)\n", "==================================================\n", " number of length percentage\n", " elements* occupied of sequence\n", "--------------------------------------------------\n", "Retroelements 43139 8952068 bp 1.31 %\n", " SINEs: 43139 8952068 bp 1.31 %\n", " Penelope 0 0 bp 0.00 %\n", " LINEs: 0 0 bp 0.00 %\n", " CRE/SLACS 0 0 bp 0.00 %\n", " L2/CR1/Rex 0 0 bp 0.00 %\n", " R1/LOA/Jockey 0 0 bp 0.00 %\n", " R2/R4/NeSL 0 0 bp 0.00 %\n", " RTE/Bov-B 0 0 bp 0.00 %\n", " L1/CIN4 0 0 bp 0.00 %\n", " LTR elements: 0 0 bp 0.00 %\n", " BEL/Pao 0 0 bp 0.00 %\n", " Ty1/Copia 0 0 bp 0.00 %\n", " Gypsy/DIRS1 0 0 bp 0.00 %\n", " Retroviral 0 0 bp 0.00 %\n", "\n", "DNA transposons 3538 1564942 bp 0.23 %\n", " hobo-Activator 0 0 bp 0.00 %\n", " Tc1-IS630-Pogo 0 0 bp 0.00 %\n", " En-Spm 0 0 bp 0.00 %\n", " MuDR-IS905 0 0 bp 0.00 %\n", " PiggyBac 0 0 bp 0.00 %\n", " Tourist/Harbinger 0 0 bp 0.00 %\n", " Other (Mirage, 0 0 bp 0.00 %\n", " P-element, Transib)\n", "\n", "Rolling-circles 0 0 bp 0.00 %\n", "\n", "Unclassified: 65151 23982146 bp 3.50 %\n", "\n", "Total interspersed repeats: 34499156 bp 5.04 %\n", "\n", "\n", "Small RNA: 43353 8992879 bp 1.31 %\n", "\n", "Satellites: 1 222 bp 0.00 %\n", "Simple repeats: 232627 10544162 bp 1.54 %\n", "Low complexity: 29762 1561018 bp 0.23 %\n", "==================================================\n", "\n", "* most repeats fragmented by insertions or deletions\n", " have been counted as one element\n", " Runs of >=20 X/Ns in query were excluded in % calcs\n", "\n", "\n", "The query species was assumed to be crassostrea virginica\n", "RepeatMasker Combined Database: Dfam_Consensus-20170127, RepBase-20170127\n", " \n", "run with rmblastn version 2.6.0+\n", "\n" ] } ], "source": [ "%%bash\n", "echo \"------------\"\n", "echo \"STANDARD OUT\"\n", "echo \"------------\"\n", "tail /home/sam/analyses/20180529_virginica_repeatmasker/rm_stdout.out\n", "\n", "echo \"\"\n", "echo \"\"\n", "\n", "echo \"------------\"\n", "echo \"STANDARD ERROR\"\n", "echo \"------------\"\n", "tail /home/sam/analyses/20180529_virginica_repeatmasker/rm_stderr.err\n", "\n", "echo \"\"\n", "echo \"\"\n", "\n", "echo \"------------\"\n", "echo \"REPEATMASKER C.VIRGINICA TABLE\"\n", "echo \"------------\"\n", "cat /home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fasta.tbl" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### SUMMARY TABLE (_Crassostrea virginica_)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "bash: line 1: /home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fasta.tbl: Permission denied\n" ] } ], "source": [ "%%bash\n", "/home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fasta.tbl" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==================================================\n", "file name: GCF_002022765.2_C_virginica-3.0_genomic.fasta\n", "sequences: 11\n", "total length: 684741128 bp (684675328 bp excl N/X-runs)\n", "GC level: 34.83 %\n", "bases masked: 46637065 bp ( 6.81 %)\n", "==================================================\n", " number of length percentage\n", " elements* occupied of sequence\n", "--------------------------------------------------\n", "Retroelements 43139 8952068 bp 1.31 %\n", " SINEs: 43139 8952068 bp 1.31 %\n", " Penelope 0 0 bp 0.00 %\n", " LINEs: 0 0 bp 0.00 %\n", " CRE/SLACS 0 0 bp 0.00 %\n", " L2/CR1/Rex 0 0 bp 0.00 %\n", " R1/LOA/Jockey 0 0 bp 0.00 %\n", " R2/R4/NeSL 0 0 bp 0.00 %\n", " RTE/Bov-B 0 0 bp 0.00 %\n", " L1/CIN4 0 0 bp 0.00 %\n", " LTR elements: 0 0 bp 0.00 %\n", " BEL/Pao 0 0 bp 0.00 %\n", " Ty1/Copia 0 0 bp 0.00 %\n", " Gypsy/DIRS1 0 0 bp 0.00 %\n", " Retroviral 0 0 bp 0.00 %\n", "\n", "DNA transposons 3538 1564942 bp 0.23 %\n", " hobo-Activator 0 0 bp 0.00 %\n", " Tc1-IS630-Pogo 0 0 bp 0.00 %\n", " En-Spm 0 0 bp 0.00 %\n", " MuDR-IS905 0 0 bp 0.00 %\n", " PiggyBac 0 0 bp 0.00 %\n", " Tourist/Harbinger 0 0 bp 0.00 %\n", " Other (Mirage, 0 0 bp 0.00 %\n", " P-element, Transib)\n", "\n", "Rolling-circles 0 0 bp 0.00 %\n", "\n", "Unclassified: 65151 23982146 bp 3.50 %\n", "\n", "Total interspersed repeats: 34499156 bp 5.04 %\n", "\n", "\n", "Small RNA: 43353 8992879 bp 1.31 %\n", "\n", "Satellites: 1 222 bp 0.00 %\n", "Simple repeats: 232627 10544162 bp 1.54 %\n", "Low complexity: 29762 1561018 bp 0.23 %\n", "==================================================\n", "\n", "* most repeats fragmented by insertions or deletions\n", " have been counted as one element\n", " Runs of >=20 X/Ns in query were excluded in % calcs\n", "\n", "\n", "The query species was assumed to be crassostrea virginica\n", "RepeatMasker Combined Database: Dfam_Consensus-20170127, RepBase-20170127\n", " \n", "run with rmblastn version 2.6.0+\n", "\n" ] } ], "source": [ "%%bash\n", "cat /home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fasta.tbl" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Copied to Owl outside of notebook, due to use of ```sudo```" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 1530150\n", "-rw-rw-r-- 1 sam users 0 May 29 12:28 wget_stout.txt\n", "-rw-rw-r-- 1 sam users 693301635 May 29 12:28 GCF_002022765.2_C_virginica-3.0_genomic.fasta\n", "-rw-rw-r-- 1 sam users 316589 May 29 12:28 wget_stderr.txt\n", "-rw-rw-r-- 1 sam users 0 May 29 12:33 rm_stderr.err\n", "-rw-rw-r-- 1 sam users 2593647 May 29 13:59 rm_stdout.out\n", "-rw-rw-r-- 1 sam users 52793192 May 29 13:59 GCF_002022765.2_C_virginica-3.0_genomic.fasta.out\n", "-rw-rw-r-- 1 sam users 2449 May 29 13:59 GCF_002022765.2_C_virginica-3.0_genomic.fasta.tbl\n", "-rw-rw-r-- 1 sam users 83461675 May 29 13:59 GCF_002022765.2_C_virginica-3.0_genomic.fasta.cat.gz\n", "-rw-rw-r-- 1 sam users 35963379 May 29 13:59 GCF_002022765.2_C_virginica-3.0_genomic.fasta.out.gff\n", "-rw-rw-r-- 1 sam users 698437192 May 29 13:59 GCF_002022765.2_C_virginica-3.0_genomic.fasta.masked\n", "-rw-rw-r-- 1 sam users 1984 May 29 14:46 readme.txt\n" ] } ], "source": [ "%%bash\n", "ls -ltr /mnt/owl/Athaliana/20180529_virginica_repeatmasker/" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }