{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TODAY'S DATE:\n", "Mon Oct 22 14:15:28 PDT 2018\n", "------------\n", "\n", "Distributor ID:\tUbuntu\n", "Description:\tUbuntu 16.04.5 LTS\n", "Release:\t16.04\n", "Codename:\txenial\n", "\n", "------------\n", "HOSTNAME: \n", "emu\n", "\n", "------------\n", "Computer Specs:\n", "\n", "Architecture: x86_64\n", "CPU op-mode(s): 32-bit, 64-bit\n", "Byte Order: Little Endian\n", "CPU(s): 16\n", "On-line CPU(s) list: 0-15\n", "Thread(s) per core: 2\n", "Core(s) per socket: 4\n", "Socket(s): 2\n", "NUMA node(s): 1\n", "Vendor ID: GenuineIntel\n", "CPU family: 6\n", "Model: 26\n", "Model name: Intel(R) Xeon(R) CPU E5520 @ 2.27GHz\n", "Stepping: 5\n", "CPU MHz: 2394.000\n", "CPU max MHz: 2394.0000\n", "CPU min MHz: 1596.0000\n", "BogoMIPS: 4521.80\n", "Virtualization: VT-x\n", "L1d cache: 32K\n", "L1i cache: 32K\n", "L2 cache: 256K\n", "L3 cache: 8192K\n", "NUMA node0 CPU(s): 0-15\n", "Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts nopl xtopology nonstop_tsc aperfmperf pni dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm dca sse4_1 sse4_2 popcnt lahf_lm tpr_shadow vnmi flexpriority ept vpid dtherm ida\n", "\n", "------------\n", "\n", "Memory Specs\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "No LSB modules are available.\n" ] } ], "source": [ "%%bash\n", "echo \"TODAY'S DATE:\"\n", "date\n", "echo \"------------\"\n", "echo \"\"\n", "#Display operating system info\n", "lsb_release -a\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"HOSTNAME: \"; hostname \n", "echo \"\"\n", "echo \"------------\"\n", "echo \"Computer Specs:\"\n", "echo \"\"\n", "lscpu\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"\"\n", "echo \"Memory Specs\"\n", "echo \"\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " total used free shared buff/cache available\n", "Mem: 47G 1.5G 28G 597M 17G 44G\n", "Swap: 11G 0B 11G\n" ] } ], "source": [ "%%bash\n", "free -mh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create working directories for both genome versions" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%%bash\n", "mkdir /home/sam/analyses/20181022_Olurida_v080_repeatmodeler\n", "mkdir /home/sam/analyses/20181022_Olurida_v081_repeatmodeler" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create RepeatModeler databases" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------------------------------------\n", "Begin v080 RepeatModeler\n", "------------------------------------------------------------------------\n", "\n", "------------------------------------------------------------------------\n", "Begin v081 RepeatModeler\n", "------------------------------------------------------------------------\n", "Begin v080 RepeatModeler\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t1m12.758s\n", "user\t1m4.244s\n", "sys\t0m3.320s\n", "\n", "real\t0m45.485s\n", "user\t0m37.752s\n", "sys\t0m2.672s\n" ] } ], "source": [ "%%bash\n", "# RepeatModeler path\n", "rptm=/home/shared/RepeatModeler-open-1.0.11/\n", "\n", "# Genome paths\n", "Olurida_v080=/home/sam/data/genomes/oly/Olurida_v080.fa\n", "Olurida_v081=/home/sam/data/genomes/oly/Olurida_v081.fa\n", "\n", "# Run on v080\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"Begin v080 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v080_repeatmodeler\n", "time \\\n", "perl ${rptm}BuildDatabase \\\n", "-name Ostrea_lurida_v080 \\\n", "${Olurida_v080} \\\n", "1> stdout.out \\\n", "2> stderr.err\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v080 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"\n", "\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"\"\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"Begin v081 RepeatModeler\"\n", "\n", "# Run on v081\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"Begin v080 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v081_repeatmodeler\n", "time \\\n", "perl ${rptm}BuildDatabase \\\n", "-name Ostrea_lurida_v081 \\\n", "${Olurida_v081} \\\n", "1> stdout.out \\\n", "2> stderr.err\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v081 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Ostrea_lurida_v080.nhr\n", "Ostrea_lurida_v080.nin\n", "Ostrea_lurida_v080.nnd\n", "Ostrea_lurida_v080.nni\n", "Ostrea_lurida_v080.nog\n", "Ostrea_lurida_v080.nsq\n", "Ostrea_lurida_v080.translation\n", "stderr.err\n", "stdout.out\n" ] } ], "source": [ "%%bash\n", "ls /home/sam/analyses/20181022_Olurida_v080_repeatmodeler" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 358M\n", "-rw-rw-r-- 1 sam sam 0 Oct 22 14:51 stderr.err\n", "-rw-rw-r-- 1 sam sam 14M Oct 22 14:51 Ostrea_lurida_v080.translation\n", "-rw-rw-r-- 1 sam sam 303M Oct 22 14:52 Ostrea_lurida_v080.nsq\n", "-rw-rw-r-- 1 sam sam 8.0M Oct 22 14:52 Ostrea_lurida_v080.nin\n", "-rw-rw-r-- 1 sam sam 26M Oct 22 14:52 Ostrea_lurida_v080.nhr\n", "-rw-rw-r-- 1 sam sam 22K Oct 22 14:52 Ostrea_lurida_v080.nni\n", "-rw-rw-r-- 1 sam sam 5.4M Oct 22 14:52 Ostrea_lurida_v080.nnd\n", "-rw-rw-r-- 1 sam sam 2.7M Oct 22 14:52 Ostrea_lurida_v080.nog\n", "-rw-rw-r-- 1 sam sam 157 Oct 22 14:52 stdout.out\n" ] } ], "source": [ "%%bash\n", "ls -ltrh /home/sam/analyses/20181022_Olurida_v080_repeatmodeler" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Building database Ostrea_lurida_v080:\n", " Adding /home/sam/data/genomes/oly/Olurida_v080.fa to database\n", "Number of sequences (bp) added to database: 0 ( 0 bp )\n" ] } ], "source": [ "%%bash\n", "tail /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/stdout.out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Well, that doesn't seem right... Let's try this again. Maybe need to add ```-engine```? Although, NCBI is already configured as default..." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------------------------------------\n", "Begin v080 RepeatModeler\n", "------------------------------------------------------------------------\n", "\n", "------------------------------------------------------------------------\n", "Begin v081 RepeatModeler\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t1m11.241s\n", "user\t1m5.240s\n", "sys\t0m3.044s\n", "\n", "real\t0m44.712s\n", "user\t0m37.732s\n", "sys\t0m2.732s\n" ] } ], "source": [ "%%bash\n", "# RepeatModeler path\n", "rptm=/home/shared/RepeatModeler-open-1.0.11/\n", "\n", "# Genome paths\n", "Olurida_v080=/home/sam/data/genomes/oly/Olurida_v080.fa\n", "Olurida_v081=/home/sam/data/genomes/oly/Olurida_v081.fa\n", "\n", "# Run on v080\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"Begin v080 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v080_repeatmodeler\n", "time \\\n", "perl ${rptm}BuildDatabase \\\n", "-name Ostrea_lurida_v080 \\\n", "-engine ncbi \\\n", "${Olurida_v080} \\\n", "1> stdout.out \\\n", "2> stderr.err\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v080 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"\n", "\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"\"\n", "echo \"------------------------------------------------------------------------\"\n", "\n", "# Run on v081\n", "echo \"Begin v081 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v081_repeatmodeler\n", "time \\\n", "perl ${rptm}BuildDatabase \\\n", "-name Ostrea_lurida_v081 \\\n", "-engine ncbi \\\n", "${Olurida_v081} \\\n", "1> stdout.out \\\n", "2> stderr.err\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v081 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 358M\n", "-rw-rw-r-- 1 sam sam 0 Oct 22 15:03 stderr.err\n", "-rw-rw-r-- 1 sam sam 14M Oct 22 15:04 Ostrea_lurida_v080.translation\n", "-rw-rw-r-- 1 sam sam 303M Oct 22 15:05 Ostrea_lurida_v080.nsq\n", "-rw-rw-r-- 1 sam sam 8.0M Oct 22 15:05 Ostrea_lurida_v080.nin\n", "-rw-rw-r-- 1 sam sam 26M Oct 22 15:05 Ostrea_lurida_v080.nhr\n", "-rw-rw-r-- 1 sam sam 22K Oct 22 15:05 Ostrea_lurida_v080.nni\n", "-rw-rw-r-- 1 sam sam 5.4M Oct 22 15:05 Ostrea_lurida_v080.nnd\n", "-rw-rw-r-- 1 sam sam 2.7M Oct 22 15:05 Ostrea_lurida_v080.nog\n", "-rw-rw-r-- 1 sam sam 157 Oct 22 15:05 stdout.out\n", "Building database Ostrea_lurida_v080:\n", " Adding /home/sam/data/genomes/oly/Olurida_v080.fa to database\n", "Number of sequences (bp) added to database: 0 ( 0 bp )\n" ] } ], "source": [ "%%bash\n", "ls -ltrh /home/sam/analyses/20181022_Olurida_v080_repeatmodeler\n", "tail /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/stdout.out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "That didn't seem to change anything. Let's just try to run RepeatModeler and see what happens." ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------------------------------------\n", "Begin v080 RepeatModeler\n", "------------------------------------------------------------------------\n", "\n", "------------------------------------------------------------------------\n", "Begin v081 RepeatModeler\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t0m0.148s\n", "user\t0m0.132s\n", "sys\t0m0.008s\n", "\n", "real\t0m0.113s\n", "user\t0m0.104s\n", "sys\t0m0.008s\n" ] } ], "source": [ "%%bash\n", "# RepeatModeler path\n", "rptm=/home/shared/RepeatModeler-open-1.0.11/\n", "\n", "# Run on v080\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"Begin v080 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v080_repeatmodeler\n", "time \\\n", "perl ${rptm}RepeatModeler \\\n", "-database /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/Ostrea_lurida_v080 \\\n", "-engine ncbi \\\n", "-pa 16 \\\n", ">& run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v080 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"\n", "\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"\"\n", "echo \"------------------------------------------------------------------------\"\n", "\n", "# Run on v081\n", "echo \"Begin v081 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v081_repeatmodeler\n", "time \\\n", "perl ${rptm}BuildDatabase \\\n", "-database /home/sam/analyses/20181022_Olurida_v081_repeatmodeler/Ostrea_lurida_v081 \\\n", "-engine ncbi \\\n", "-pa 16 \\\n", ">& run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v081 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Re-configured RepeatModeler installation to point to entire BLAST suite of tools." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Process is interrupted.\n" ] } ], "source": [ "%%bash\n", "# RepeatModeler path\n", "rptm=/home/shared/RepeatModeler-open-1.0.11/\n", "\n", "# Run on v080\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"Begin v080 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v080_repeatmodeler\n", "time \\\n", "perl ${rptm}RepeatModeler \\\n", "-database /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/Ostrea_lurida_v080 \\\n", "-engine ncbi \\\n", "-pa 16 \\\n", ">& database_build_run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v080 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"\n", "\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"\"\n", "echo \"------------------------------------------------------------------------\"\n", "\n", "# Run on v081\n", "echo \"Begin v081 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v081_repeatmodeler\n", "time \\\n", "perl ${rptm}BuildDatabase \\\n", "-database /home/sam/analyses/20181022_Olurida_v081_repeatmodeler/Ostrea_lurida_v081 \\\n", "-engine ncbi \\\n", "-pa 16 \\\n", ">& database_build_run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v081 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Whoops! Ran the wrong program!!" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------------------------------------\n", "Begin v080 RepeatModeler database builder\n", "------------------------------------------------------------------------\n", "\n", "------------------------------------------------------------------------\n", "Begin v081 RepeatModeler database builder\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t1m4.328s\n", "user\t0m59.520s\n", "sys\t0m3.116s\n", "\n", "real\t0m41.983s\n", "user\t0m36.252s\n", "sys\t0m2.716s\n" ] } ], "source": [ "%%bash\n", "# RepeatModeler path\n", "rptm=/home/shared/RepeatModeler-open-1.0.11/\n", "\n", "# Genome paths\n", "Olurida_v080=/home/sam/data/genomes/oly/Olurida_v080.fa\n", "Olurida_v081=/home/sam/data/genomes/oly/Olurida_v081.fa\n", "\n", "# Run on v080\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"Begin v080 RepeatModeler database builder\"\n", "cd /home/sam/analyses/20181022_Olurida_v080_repeatmodeler\n", "time \\\n", "perl ${rptm}BuildDatabase \\\n", "-name Ostrea_lurida_v080 \\\n", "-engine ncbi \\\n", "${Olurida_v080} \\\n", ">& database_build_run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v080 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"\n", "\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"\"\n", "echo \"------------------------------------------------------------------------\"\n", "\n", "# Run on v081\n", "echo \"Begin v081 RepeatModeler database builder\"\n", "cd /home/sam/analyses/20181022_Olurida_v081_repeatmodeler\n", "time \\\n", "perl ${rptm}BuildDatabase \\\n", "-name Ostrea_lurida_v081 \\\n", "-engine ncbi \\\n", "${Olurida_v081} \\\n", ">& database_build_run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v081 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "ename": "SyntaxError", "evalue": "invalid syntax (, line 1)", "output_type": "error", "traceback": [ "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m $$bash\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" ] } ], "source": [ "$$bash\n", "cat /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/database_build_run.out\n", "echo\"\"\n", "cat /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/database_build_run.out" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Building database Ostrea_lurida_v080:\n", " Adding /home/sam/data/genomes/oly/Olurida_v080.fa to database\n", "Number of sequences (bp) added to database: 696946 ( 1253001795 bp )\n", "\n", "Building database Ostrea_lurida_v080:\n", " Adding /home/sam/data/genomes/oly/Olurida_v080.fa to database\n", "Number of sequences (bp) added to database: 696946 ( 1253001795 bp )\n" ] } ], "source": [ "%%bash\n", "cat /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/database_build_run.out\n", "echo\"\"\n", "cat /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/database_build_run.out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Whoops! Printed the same output files. Let's actually look at the two different output files..." ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Building database Ostrea_lurida_v080:\n", " Adding /home/sam/data/genomes/oly/Olurida_v080.fa to database\n", "Number of sequences (bp) added to database: 696946 ( 1253001795 bp )\n", "\n", "Building database Ostrea_lurida_v081:\n", " Adding /home/sam/data/genomes/oly/Olurida_v081.fa to database\n", "Number of sequences (bp) added to database: 159429 ( 1140787867 bp )\n" ] } ], "source": [ "%%bash\n", "cat /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/database_build_run.out\n", "echo\"\"\n", "cat /home/sam/analyses/20181022_Olurida_v081_repeatmodeler/database_build_run.out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We're in business!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Run RepeatModeler" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------------------------------------\n", "Begin v080 RepeatModeler database builder\n", "------------------------------------------------------------------------\n", "\n", "------------------------------------------------------------------------\n", "Begin v081 RepeatModeler database builder\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t1m4.566s\n", "user\t0m59.616s\n", "sys\t0m2.968s\n", "\n", "real\t0m40.985s\n", "user\t0m36.208s\n", "sys\t0m2.732s\n" ] } ], "source": [ "%%bash\n", "# RepeatModeler path\n", "rptm=/home/shared/RepeatModeler-open-1.0.11/\n", "\n", "# Genome paths\n", "Olurida_v080=/home/sam/data/genomes/oly/Olurida_v080.fa\n", "Olurida_v081=/home/sam/data/genomes/oly/Olurida_v081.fa\n", "\n", "# Run on v080\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"Begin v080 RepeatModeler database builder\"\n", "cd /home/sam/analyses/20181022_Olurida_v080_repeatmodeler\n", "time \\\n", "perl ${rptm}BuildDatabase \\\n", "-name Ostrea_lurida_v080 \\\n", "-engine ncbi \\\n", "${Olurida_v080} \\\n", ">& database_build_run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v080 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"\n", "\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"\"\n", "echo \"------------------------------------------------------------------------\"\n", "\n", "# Run on v081\n", "echo \"Begin v081 RepeatModeler database builder\"\n", "cd /home/sam/analyses/20181022_Olurida_v081_repeatmodeler\n", "time \\\n", "perl ${rptm}BuildDatabase \\\n", "-name Ostrea_lurida_v081 \\\n", "-engine ncbi \\\n", "${Olurida_v081} \\\n", ">& database_build_run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v081 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Yeesh, did it again! Ran the wrong program. Copy and pasting is killing me today.." ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------------------------------------\n", "Begin v080 RepeatModeler\n", "------------------------------------------------------------------------\n", "\n", "------------------------------------------------------------------------\n", "Begin v081 RepeatModeler\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t2720m32.633s\n", "user\t26367m40.240s\n", "sys\t2371m25.528s\n", "\n", "real\t0m0.138s\n", "user\t0m0.092s\n", "sys\t0m0.008s\n" ] } ], "source": [ "%%bash\n", "# RepeatModeler path\n", "rptm=/home/shared/RepeatModeler-open-1.0.11/\n", "\n", "# Run on v080\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"Begin v080 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v080_repeatmodeler\n", "time \\\n", "perl ${rptm}RepeatModeler \\\n", "-database /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/Ostrea_lurida_v080 \\\n", "-engine ncbi \\\n", "-pa 16 \\\n", ">& run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v080 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"\n", "\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"\"\n", "echo \"------------------------------------------------------------------------\"\n", "\n", "# Run on v081\n", "echo \"Begin v081 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v081_repeatmodeler\n", "time \\\n", "perl ${rptm}BuildDatabase \\\n", "-database /home/sam/analyses/20181022_Olurida_v081_repeatmodeler/Ostrea_lurida_v081 \\\n", "-engine ncbi \\\n", "-pa 16 \\\n", ">& run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v081 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Yeesh! Forgot to change v081 code to actually run RepeatModeler!" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------------------------------------\n", "Begin v081 RepeatModeler\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t891m3.411s\n", "user\t9443m43.164s\n", "sys\t146m48.432s\n" ] } ], "source": [ "%%bash\n", "# RepeatModeler path\n", "rptm=/home/shared/RepeatModeler-open-1.0.11/\n", "\n", "# Run on v081\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"Begin v081 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v081_repeatmodeler\n", "time \\\n", "perl ${rptm}RepeatModeler \\\n", "-database /home/sam/analyses/20181022_Olurida_v081_repeatmodeler/Ostrea_lurida_v081 \\\n", "-engine ncbi \\\n", "-pa 16 \\\n", ">& run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v081 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " - Saving elements to a file...\n", " - 16 elements found.\n", "Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time\n", "Refining family-1505 model...\n", " - numRounds = 4\n", " - Consensus Length = 234 ( orig = 234 )\n", " - Avg Kimura Divergence = 0.01\n", " - Unaligned sequences = 0 ( orig = 0 )\n", " Build Consensus: 0:0:2 Elapsed Time\n", "Refinement: 00:00:04 (hh:mm:ss) Elapsed Time\n", "\n", "Processing RECON family: 3926\n", " - Saving elements to a file...\n", " - 16 elements found.\n", "Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time\n", "Refining family-3926 model...\n", " - numRounds = 8\n", " - Consensus Length = 1939 ( orig = 1902 )\n", " - Avg Kimura Divergence = 0.00\n", " - Unaligned sequences = 0 ( orig = 0 )\n", " Build Consensus: 0:0:5 Elapsed Time\n", "Refinement: 00:00:07 (hh:mm:ss) Elapsed Time\n", "Family Refinement: 00:38:15 (hh:mm:ss) Elapsed Time\n", "Round Time: 21:13:16 (hh:mm:ss) Elapsed Time\n", "\n", "Discovery complete: 2273 families found\n", "Classifying Repeats...\n", "RepeatClassifier Version open-1.0.11\n", "===============================\n", "Search Engine = ncbi\n", " - Looking for Simple and Low Complexity sequences..\n", " - Looking for similarity to known repeat proteins..\n", " - Looking for similarity to known repeat consensi..\n", "Classification Time: 01:24:58 (hh:mm:ss) Elapsed Time\n", "Program Time: 45:20:16 (hh:mm:ss) Elapsed Time\n", "Working directory: /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/RM_14392.TueOct231056022018\n", "may be deleted unless there were problems with the run.\n", "\n", "The results have been saved to:\n", " /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/Ostrea_lurida_v080-families.fa - Consensus sequences for each family identified.\n", " /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/Ostrea_lurida_v080-families.stk - Seed alignments for each family identified.\n", "\n", "This version of RepeatModeler can upload families directly to the\n", "open repeat database - Dfam_consensus. Please consider uploading your final\n", "curated library using the RepeatModeler \"util/dfamConsensusTool.pl\" script\n", "( details at http://www.repeatmasker.org/RepeatModeler/dfamConsensusTool ) or\n", "posting your raw (uncurated) RepeatModeler results to the TE Raw Dataset\n", "Repository ( http://www.repeatmasker.org/Dfam_consensus/#/public/repository ).\n", "\n", "\n" ] } ], "source": [ "%%bash\n", "tail -n 50 /home/sam/analyses/20181022_Olurida_v080_repeatmodeler/run.out" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " - 62 elements found.\n", "Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time\n", "Refining family-945 model...\n", " - numRounds = 6\n", " - Consensus Length = 1553 ( orig = 1544 )\n", " - Avg Kimura Divergence = 0.00\n", " - Unaligned sequences = 19 ( orig = 21 )\n", " Build Consensus: 0:0:6 Elapsed Time\n", "Refinement: 00:00:07 (hh:mm:ss) Elapsed Time\n", "\n", "Processing RECON family: 1599\n", " - Saving elements to a file...\n", " - 62 elements found.\n", "Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time\n", "Refining family-1599 model...\n", " - numRounds = 6\n", " - Consensus Length = 1132 ( orig = 1136 )\n", " - Avg Kimura Divergence = 0.00\n", " - Unaligned sequences = 33 ( orig = 33 )\n", " Build Consensus: 0:0:5 Elapsed Time\n", "Refinement: 00:00:06 (hh:mm:ss) Elapsed Time\n", "\n", "Processing RECON family: 2008\n", " - Saving elements to a file...\n", " - 61 elements found.\n", "Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time\n", "Refining family-2008 model...\n", " - numRounds = 6\n", " - Consensus Length = 303 ( orig = 301 )\n", " - Avg Kimura Divergence = 0.00\n", " - Unaligned sequences = 7 ( orig = 17 )\n", " Build Consensus: 0:0:3 Elapsed Time\n", "Refinement: 00:00:04 (hh:mm:ss) Elapsed Time\n", "\n", "Processing RECON family: 1142\n", " - Saving elements to a file...\n", " - 61 elements found.\n", "Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time\n", "Refining family-1142 model...\n", " - numRounds = 9\n", " - Consensus Length = 969 ( orig = 969 )\n", " - Avg Kimura Divergence = 0.00\n", " - Unaligned sequences = 16 ( orig = 23 )\n", " Build Consensus: 0:0:11 Elapsed Time\n", "Refinement: 00:00:12 (hh:mm:ss) Elapsed Time\n", "\n", "Processing RECON family: 17\n", " - Saving elements to a file...\n", "FastaDB::getSubstr - Error index out of bounds! (SeqID=gi|10431, offset=6970, length=918 actualSeqLen=7719)\n", " at /home/shared/RepeatModeler-open-1.0.11/RepeatModeler line 2069.\n" ] } ], "source": [ "%%bash\n", "tail -n 50 /home/sam/analyses/20181022_Olurida_v081_repeatmodeler/run.out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Well, based on this [SeqAnswers thread](http://seqanswers.com/forums/showthread.php?t=25832), it looks like the best resoution to this error is to run RepeatModeler repeatedly until it works??!!\n", "\n", "So, here we go..." ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------------------------------------\n", "Begin v081 RepeatModeler\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t1946m38.614s\n", "user\t20614m12.632s\n", "sys\t276m15.616s\n" ] } ], "source": [ "%%bash\n", "# RepeatModeler path\n", "rptm=/home/shared/RepeatModeler-open-1.0.11/\n", "\n", "# Run on v081\n", "echo \"------------------------------------------------------------------------\"\n", "echo \"Begin v081 RepeatModeler\"\n", "cd /home/sam/analyses/20181022_Olurida_v081_repeatmodeler\n", "time \\\n", "perl ${rptm}RepeatModeler \\\n", "-database /home/sam/analyses/20181022_Olurida_v081_repeatmodeler/Ostrea_lurida_v081 \\\n", "-engine ncbi \\\n", "-pa 16 \\\n", ">& run.out\n", "\n", "sed '/^Subject:/ s/ / repeatmodeler v081 JOB COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " - Saving elements to a file...\n", " - 16 elements found.\n", "Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time\n", "Refining family-3957 model...\n", " - numRounds = 6\n", " - Consensus Length = 1289 ( orig = 1286 )\n", " - Avg Kimura Divergence = 0.00\n", " - Unaligned sequences = 0 ( orig = 1 )\n", " Build Consensus: 0:0:6 Elapsed Time\n", "Refinement: 00:00:06 (hh:mm:ss) Elapsed Time\n", "\n", "Processing RECON family: 8930\n", " - Saving elements to a file...\n", " - 16 elements found.\n", "Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time\n", "Refining family-8930 model...\n", " - numRounds = 6\n", " - Consensus Length = 382 ( orig = 403 )\n", " - Avg Kimura Divergence = 0.01\n", " - Unaligned sequences = 3 ( orig = 3 )\n", " Build Consensus: 0:0:1 Elapsed Time\n", "Refinement: 00:00:02 (hh:mm:ss) Elapsed Time\n", "Family Refinement: 00:24:11 (hh:mm:ss) Elapsed Time\n", "Round Time: 15:35:32 (hh:mm:ss) Elapsed Time\n", "\n", "Discovery complete: 2223 families found\n", "Classifying Repeats...\n", "RepeatClassifier Version open-1.0.11\n", "===============================\n", "Search Engine = ncbi\n", " - Looking for Simple and Low Complexity sequences..\n", " - Looking for similarity to known repeat proteins..\n", " - Looking for similarity to known repeat consensi..\n", "Classification Time: 01:23:17 (hh:mm:ss) Elapsed Time\n", "Program Time: 32:26:34 (hh:mm:ss) Elapsed Time\n", "Working directory: /home/sam/analyses/20181022_Olurida_v081_repeatmodeler/RM_19440.MonOct290805562018\n", "may be deleted unless there were problems with the run.\n", "\n", "The results have been saved to:\n", " /home/sam/analyses/20181022_Olurida_v081_repeatmodeler/Ostrea_lurida_v081-families.fa - Consensus sequences for each family identified.\n", " /home/sam/analyses/20181022_Olurida_v081_repeatmodeler/Ostrea_lurida_v081-families.stk - Seed alignments for each family identified.\n", "\n", "This version of RepeatModeler can upload families directly to the\n", "open repeat database - Dfam_consensus. Please consider uploading your final\n", "curated library using the RepeatModeler \"util/dfamConsensusTool.pl\" script\n", "( details at http://www.repeatmasker.org/RepeatModeler/dfamConsensusTool ) or\n", "posting your raw (uncurated) RepeatModeler results to the TE Raw Dataset\n", "Repository ( http://www.repeatmasker.org/Dfam_consensus/#/public/repository ).\n", "\n", "\n" ] } ], "source": [ "%%bash\n", "tail -n 50 /home/sam/analyses/20181022_Olurida_v081_repeatmodeler/run.out" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%%bash\n", "cd /home/sam/analyses/\n", "rsync --archive --relative ./20181022_Olurida_v080_repeatmodeler gannet:/volume1/web/Atumefaciens\n", "rsync --archive --relative ./20181022_Olurida_v081_repeatmodeler gannet:/volume1/web/Atumefaciens\n", "\n", "sed '/^Subject:/ s/ / rsync JOBS COMPLETE/' ~/.default-subject.mail | msmtp \"$EMAIL\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 2 }