{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TODAY'S DATE:\n", "Wed Nov 28 12:01:27 PST 2018\n", "------------\n", "\n", "Distributor ID:\tUbuntu\n", "Description:\tUbuntu 16.04.5 LTS\n", "Release:\t16.04\n", "Codename:\txenial\n", "\n", "------------\n", "HOSTNAME: \n", "swoose\n", "\n", "------------\n", "Computer Specs:\n", "\n", "Architecture: x86_64\n", "CPU op-mode(s): 32-bit, 64-bit\n", "Byte Order: Little Endian\n", "CPU(s): 24\n", "On-line CPU(s) list: 0-23\n", "Thread(s) per core: 2\n", "Core(s) per socket: 6\n", "Socket(s): 2\n", "NUMA node(s): 1\n", "Vendor ID: GenuineIntel\n", "CPU family: 6\n", "Model: 44\n", "Model name: Intel(R) Xeon(R) CPU X5670 @ 2.93GHz\n", "Stepping: 2\n", "CPU MHz: 2925.866\n", "BogoMIPS: 5851.93\n", "Virtualization: VT-x\n", "L1d cache: 32K\n", "L1i cache: 32K\n", "L2 cache: 256K\n", "L3 cache: 12288K\n", "NUMA node0 CPU(s): 0-23\n", "Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 popcnt aes lahf_lm epb kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida arat\n", "\n", "------------\n", "\n", "Memory Specs\n", "\n", " total used free shared buff/cache available\n", "Mem: 70G 3.0G 65G 112M 2.5G 67G\n", "Swap: 4.7G 0B 4.7G\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "No LSB modules are available.\n" ] } ], "source": [ "%%bash\n", "echo \"TODAY'S DATE:\"\n", "date\n", "echo \"------------\"\n", "echo \"\"\n", "#Display operating system info\n", "lsb_release -a\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"HOSTNAME: \"; hostname \n", "echo \"\"\n", "echo \"------------\"\n", "echo \"Computer Specs:\"\n", "echo \"\"\n", "lscpu\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"\"\n", "echo \"Memory Specs\"\n", "echo \"\"\n", "free -mh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Make directories" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "mkdir /home/sam/data/geoduck\n", "mkdir /home/sam/data/geoduck/transcriptomes\n", "mkdir /home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Transfer Transdecoder coding sequences FastA file" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-r--r-- 283,967,118 2018/11/23 15:42:16 20180827_trinity_geoduck.fasta.transdecoder.cds\n", "total 4.0K\n", "drwxrwxr-x 2 sam sam 4.0K Nov 28 12:04 transdecoder_fasta_splits\n" ] } ], "source": [ "%%bash\n", "\n", "cd /home/sam/data/geoduck/transcriptomes\n", "\n", "# Uncomment following line to retrieve file using wget\n", "# wget http://gannet.fish.washington.edu/Atumefaciens/20181121_geo_transdecoder/20180827_trinity_geoduck.fasta.transdecoder.cds\n", "rsync gannet:/volume1/web/Atumefaciens/20181121_geo_transdecoder/20180827_trinity_geoduck.fasta.transdecoder.cds\n", "ls -lh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Split mulit-FastA file in to individual FastA files with PyFaidx" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-------------------\n", "NUMBER OF SEQUENCES IN ORIGINAL FASTA\n", "-------------------\n", "\n", "\n", "-------------------\n", "NUMBER OF INDIVIDUAL FASTA FILES\n", "0\n", "-------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "grep: 20180827_trinity_geoduck.fasta.transdecoder.cds: No such file or directory\n", "Traceback (most recent call last):\n", " File \"/home/sam/software/bin/pyfaidx-0.5.5.2\", line 9, in \n", " load_entry_point('pyfaidx==0.5.5.2', 'console_scripts', 'faidx')()\n", " File \"build/bdist.linux-x86_64/egg/pyfaidx/cli.py\", line 197, in main\n", " File \"build/bdist.linux-x86_64/egg/pyfaidx/cli.py\", line 21, in write_sequence\n", " File \"build/bdist.linux-x86_64/egg/pyfaidx/__init__.py\", line 996, in __init__\n", " File \"build/bdist.linux-x86_64/egg/pyfaidx/__init__.py\", line 368, in __init__\n", "pyfaidx.FastaNotFoundError: Cannot read FASTA file ../20180827_trinity_geoduck.fasta.transdecoder.cds\n", "\n", "real\t0m0.150s\n", "user\t0m0.104s\n", "sys\t0m0.020s\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/data/geoduck/transcriptomes/\n", "\n", "# Count sequences in FastA\n", "echo \"-------------------\"\n", "echo \"NUMBER OF SEQUENCES IN ORIGINAL FASTA\"\n", "grep -c \">\" 20180827_trinity_geoduck.fasta.transdecoder.cds\n", "echo \"-------------------\"\n", "echo \"\"\n", "echo \"\"\n", "\n", "cd /home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits/\n", "\n", "# Split FastA\n", "time \\\n", "/home/sam/software/bin/pyfaidx-0.5.5.2 \\\n", "--split-files \\\n", "../20180827_trinity_geoduck.fasta.transdecoder.cds\n", "\n", "# Count number of individual FastA files\n", "echo \"-------------------\"\n", "echo \"NUMBER OF INDIVIDUAL FASTA FILES\"\n", "ls -1 | wc -l\n", "echo \"-------------------\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Whoops! Ran ```rsync``` command incorrectly. Duh!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Transfer Transdecoder coding sequences FastA file (again)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-------------------\n", "total 271M\n", "-rw-r--r-- 1 sam users 271M Nov 23 15:42 20180827_trinity_geoduck.fasta.transdecoder.cds\n", "drwxrwxr-x 2 sam sam 4.0K Nov 28 12:04 transdecoder_fasta_splits\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/data/geoduck/transcriptomes\n", "rsync \\\n", "--archive \\\n", "gannet:/volume1/web/Atumefaciens/20181121_geo_transdecoder/20180827_trinity_geoduck.fasta.transdecoder.cds .\n", "\n", "echo \"-------------------\"\n", "\n", "ls -lh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Split mulit-FastA file in to individual FastA files with PyFaidx" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-------------------\n", "NUMBER OF SEQUENCES IN ORIGINAL FASTA\n", "210586\n", "-------------------\n", "\n", "\n", "-------------------\n", "NUMBER OF INDIVIDUAL FASTA FILES\n", "210586\n", "-------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t0m43.789s\n", "user\t0m36.340s\n", "sys\t0m6.656s\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/data/geoduck/transcriptomes/\n", "\n", "# Count sequences in FastA\n", "echo \"-------------------\"\n", "echo \"NUMBER OF SEQUENCES IN ORIGINAL FASTA\"\n", "grep -c \">\" 20180827_trinity_geoduck.fasta.transdecoder.cds\n", "echo \"-------------------\"\n", "echo \"\"\n", "echo \"\"\n", "\n", "cd /home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits/\n", "\n", "# Split FastA\n", "time \\\n", "/home/sam/software/bin/pyfaidx-0.5.5.2 \\\n", "--split-files \\\n", "../20180827_trinity_geoduck.fasta.transdecoder.cds\n", "\n", "# Count number of individual FastA files\n", "echo \"-------------------\"\n", "echo \"NUMBER OF INDIVIDUAL FASTA FILES\"\n", "ls -1 | wc -l\n", "echo \"-------------------\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Find complete coding sequences that contain typical terms for vitellogenin\n", "\n", "##### The code below does the following:\n", "\n", "- uses ```grep``` to search for vitellogenin terms and the word \"complete\"\n", "\n", "- translates commas to spaces to aid in parsing/formatting for sort command\n", "\n", "- sorts in reverse order, using the \"version\" flag to help sort strings that contain numbers and sorts on column 7 (i.e. the \"score\" column\n", "\n", "- awk prints a header line to provide column descriptions and then prints out desired columns\n", "\n", "- ```column -t``` formats output into nicely spaced columns\n", "\n", "- ```tee``` prints output to file and to screen" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "transcript_ID transcript_type transcript_length strand score annotation\n", ">TRINITY_DN51983_c0_g1_i8.p1 type:complete len:4828 (-) score=1005.69 sp|Q9U943|APLP_LOCMI|27.089|3.28e-109\n", ">TRINITY_DN51983_c0_g1_i4.p1 type:complete len:4680 (-) score=971.32 sp|Q9U943|APLP_LOCMI|27.089|2.82e-109\n", ">TRINITY_DN3302_c0_g1_i4.p1 type:complete len:4277 (+) score=941.33 sp|Q9U943|APLP_LOCMI|29.133|9.20e-117\n", ">TRINITY_DN3302_c0_g1_i8.p1 type:complete len:4277 (+) score=941.04 sp|Q9U943|APLP_LOCMI|29.133|1.08e-116\n", ">TRINITY_DN3302_c0_g1_i1.p1 type:complete len:4277 (+) score=941.04 sp|Q9U943|APLP_LOCMI|29.133|1.08e-116\n", ">TRINITY_DN1469_c0_g4_i1.p1 type:complete len:6053 (-) score=859.49 sp|Q2PZL6|FAT4_MOUSE|38.356|1.43e-08\n", ">TRINITY_DN1469_c0_g3_i1.p1 type:complete len:6053 (+) score=859.49 sp|Q2PZL6|FAT4_MOUSE|38.356|1.43e-08\n", ">TRINITY_DN20178_c0_g1_i3.p1 type:complete len:2501 (-) score=554.86 sp|P18948|VIT6_CAEEL|21.949|2.79e-40\n", ">TRINITY_DN20178_c0_g1_i1.p1 type:complete len:2501 (-) score=554.86 sp|P18948|VIT6_CAEEL|21.949|2.79e-40\n", ">TRINITY_DN15643_c1_g1_i3.p1 type:complete len:2487 (-) score=475.86 sp|E9Q414|APOB_MOUSE|20.790|2.08e-25\n", ">TRINITY_DN15643_c1_g1_i4.p1 type:complete len:2487 (-) score=475.26 sp|E9Q414|APOB_MOUSE|20.790|2.08e-25\n", ">TRINITY_DN15643_c1_g1_i2.p1 type:complete len:2487 (-) score=475.08 sp|E9Q414|APOB_MOUSE|20.790|2.02e-25\n", ">TRINITY_DN108893_c0_g1_i2.p1 type:complete len:1576 (-) score=272.48 Vitellogenin_N|PF01347.21|4.2e-08\n", ">TRINITY_DN108893_c0_g1_i7.p1 type:complete len:1576 (-) score=271.82 Vitellogenin_N|PF01347.21|4e-08\n", ">TRINITY_DN6438_c0_g1_i13.p1 type:complete len:887 (-) score=178.80 sp|Q865F1|MTP_PIG|28.196|4.04e-112\n", ">TRINITY_DN6438_c0_g1_i10.p1 type:complete len:887 (-) score=178.80 sp|Q865F1|MTP_PIG|28.196|4.04e-112\n", ">TRINITY_DN6438_c0_g1_i9.p1 type:complete len:887 (-) score=178.80 sp|Q865F1|MTP_PIG|28.196|4.04e-112\n", ">TRINITY_DN6438_c0_g1_i8.p1 type:complete len:887 (-) score=178.80 sp|Q865F1|MTP_PIG|28.196|4.04e-112\n", ">TRINITY_DN6438_c0_g1_i5.p1 type:complete len:887 (-) score=178.80 sp|Q865F1|MTP_PIG|28.196|4.04e-112\n", ">TRINITY_DN6438_c0_g1_i4.p1 type:complete len:887 (-) score=178.80 sp|Q865F1|MTP_PIG|28.196|4.04e-112\n", ">TRINITY_DN108893_c0_g1_i10.p2 type:complete len:541 (-) score=83.27 Vitellogenin_N|PF01347.21|2e-07\n", ">TRINITY_DN108893_c0_g1_i4.p2 type:complete len:541 (-) score=82.43 Vitellogenin_N|PF01347.21|2.3e-07\n", ">TRINITY_DN461352_c0_g1_i1.p1 type:complete len:169 (+) score=36.69 sp|O94390|ATP7_SCHPO|26.923|3.10e-07\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/data/geoduck/transcriptomes/\n", "grep --ignore-case \"Vitellogenin\" 20180827_trinity_geoduck.fasta.transdecoder.cds \\\n", "| grep \"complete\" \\\n", "| tr \",\" \" \" \\\n", "| sort -Vr -k7 \\\n", "| awk 'BEGIN{print \"transcript_ID\", \"transcript_type\", \"transcript_length\", \"strand\", \"score\", \"annotation\"}; \\\n", "{print $1, $4, $5, $6, $7 , $8}' \\\n", "| column -t \\\n", "| tee 20181127_geoduck_Vitellogenin_cds_matches.txt" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "transcript_ID transcript_type transcript_length strand score annotation\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/data/geoduck/transcriptomes/\n", "grep --ignore-case \"Vtg\" 20180827_trinity_geoduck.fasta.transdecoder.cds \\\n", "| grep \"complete\" \\\n", "| tr \",\" \" \" \\\n", "| sort -Vr -k7 \\\n", "| awk 'BEGIN{print \"transcript_ID\", \"transcript_type\", \"transcript_length\", \"strand\", \"score\", \"annotation\"}; \\\n", "{print $1, $4, $5, $6, $7 , $8}' \\\n", "| column -t \\\n", "| tee 20181127_geoduck_Vtg_cds_matches.txt" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "transcript_ID transcript_type transcript_length strand score annotation\n", ">TRINITY_DN306895_c2_g2_i2.p1 type:complete len:106 (-) score=-21.67 zf-LITAF-like|PF10601.8|2.4e+03\n", ">TRINITY_DN306895_c2_g2_i1.p1 type:complete len:106 (-) score=-21.67 zf-LITAF-like|PF10601.8|2.4e+03\n", ">TRINITY_DN48461_c1_g1_i1.p1 type:complete len:119 (-) score=-4.86 VGCC_alpha2|PF08473.10|0.02\n", ">TRINITY_DN42237_c0_g1_i8.p3 type:complete len:153 (+) score=-3.34 VGCC_beta4Aa_N|PF12052.7|1\n", ">TRINITY_DN100845_c0_g2_i1.p1 type:complete len:180 (+) score=-3.08 sp|Q6VGS5|DAPLE_MOUSE|27.350|5.18e-09\n", ">TRINITY_DN29517_c1_g1_i2.p1 type:complete len:2177 (-) score=515.83 sp|Q0VGT4|ZGRF1_MOUSE|46.659|0.0\n", ">TRINITY_DN29517_c1_g1_i5.p1 type:complete len:2188 (-) score=515.51 sp|Q0VGT4|ZGRF1_MOUSE|46.659|0.0\n", ">TRINITY_DN29517_c1_g1_i8.p1 type:complete len:2187 (-) score=514.52 sp|Q0VGT4|ZGRF1_MOUSE|46.476|0.0\n", ">TRINITY_DN27045_c0_g1_i6.p1 type:complete len:1760 (+) score=449.51 sp|Q4VGL6|RC3H1_MOUSE|70.386|0.0\n", ">TRINITY_DN27045_c0_g1_i5.p2 type:complete len:1740 (+) score=442.48 sp|Q4VGL6|RC3H1_MOUSE|70.386|0.0\n", ">TRINITY_DN27045_c0_g1_i2.p1 type:complete len:1740 (+) score=442.48 sp|Q4VGL6|RC3H1_MOUSE|70.386|0.0\n", ">TRINITY_DN27045_c0_g1_i6.p2 type:complete len:1741 (-) score=439.59 sp|Q4VGL6|RC3H1_MOUSE|70.386|0.0\n", ">TRINITY_DN27045_c0_g1_i5.p1 type:complete len:1741 (-) score=439.59 sp|Q4VGL6|RC3H1_MOUSE|70.386|0.0\n", ">TRINITY_DN27045_c0_g1_i2.p2 type:complete len:1721 (-) score=432.56 sp|Q4VGL6|RC3H1_MOUSE|70.386|0.0\n", ">TRINITY_DN29517_c1_g1_i9.p1 type:complete len:1782 (-) score=414.11 sp|Q0VGT4|ZGRF1_MOUSE|46.659|0.0\n", ">TRINITY_DN10176_c6_g1_i5.p1 type:complete len:1894 (-) score=385.96 sp|Q9VGG5|CAD87_DROME|29.038|1.08e-141\n", ">TRINITY_DN10176_c6_g1_i4.p1 type:complete len:1890 (-) score=382.65 sp|Q9VGG5|CAD87_DROME|29.106|3.43e-142\n", ">TRINITY_DN28397_c0_g1_i16.p1 type:complete len:1204 (-) score=343.96 sp|Q9BVG8|KIFC3_HUMAN|49.348|2.39e-169\n", ">TRINITY_DN28397_c0_g1_i12.p1 type:complete len:1204 (-) score=343.96 sp|Q9BVG8|KIFC3_HUMAN|49.348|2.39e-169\n", ">TRINITY_DN28397_c0_g1_i8.p1 type:complete len:1197 (-) score=341.08 sp|Q9BVG8|KIFC3_HUMAN|47.943|8.46e-170\n", ">TRINITY_DN28397_c0_g1_i3.p1 type:complete len:1197 (-) score=341.08 sp|Q9BVG8|KIFC3_HUMAN|47.943|8.46e-170\n", ">TRINITY_DN28397_c0_g1_i19.p1 type:complete len:1149 (-) score=322.22 sp|Q9BVG8|KIFC3_HUMAN|50.398|8.22e-167\n", ">TRINITY_DN28397_c0_g1_i6.p1 type:complete len:1149 (-) score=322.22 sp|Q9BVG8|KIFC3_HUMAN|50.398|8.22e-167\n", ">TRINITY_DN21793_c0_g1_i13.p1 type:complete len:1456 (-) score=265.28 sp|P35969|VGFR1_MOUSE|32.039|3.86e-164\n", ">TRINITY_DN21793_c0_g1_i12.p1 type:complete len:1456 (-) score=265.28 sp|P35969|VGFR1_MOUSE|32.039|3.86e-164\n", ">TRINITY_DN29517_c1_g1_i3.p1 type:complete len:925 (-) score=222.35 sp|Q0VGT4|ZGRF1_MOUSE|47.563|0.0\n", ">TRINITY_DN29517_c1_g1_i10.p1 type:complete len:925 (-) score=220.07 sp|Q0VGT4|ZGRF1_MOUSE|47.563|0.0\n", ">TRINITY_DN2197_c0_g1_i9.p1 type:complete len:736 (-) score=204.95 sp|F1R4Y7|CEP83_DANRE|36.143|6.98e-122\n", ">TRINITY_DN2197_c0_g1_i6.p1 type:complete len:738 (-) score=204.33 sp|F1R4Y7|CEP83_DANRE|36.286|7.50e-123\n", ">TRINITY_DN2197_c0_g1_i4.p1 type:complete len:738 (-) score=204.33 sp|F1R4Y7|CEP83_DANRE|36.286|7.50e-123\n", ">TRINITY_DN2197_c0_g1_i1.p1 type:complete len:738 (-) score=204.33 sp|F1R4Y7|CEP83_DANRE|36.286|7.50e-123\n", ">TRINITY_DN21793_c0_g1_i10.p1 type:complete len:1155 (-) score=200.21 sp|P17948|VGFR1_HUMAN|34.033|2.20e-151\n", ">TRINITY_DN21793_c0_g1_i5.p1 type:complete len:1155 (-) score=200.21 sp|P17948|VGFR1_HUMAN|34.033|2.20e-151\n", ">TRINITY_DN64188_c0_g1_i2.p1 type:complete len:1644 (-) score=194.69 sp|Q8C5K5|CX038_MOUSE|25.514|2.59e-10\n", ">TRINITY_DN64188_c0_g1_i4.p1 type:complete len:1644 (-) score=194.04 sp|Q8C5K5|CX038_MOUSE|25.514|2.55e-10\n", ">TRINITY_DN16250_c0_g1_i7.p1 type:complete len:849 (-) score=186.97 sp|Q3UVG3|F91A1_MOUSE|55.454|0.0\n", ">TRINITY_DN36889_c0_g1_i2.p1 type:complete len:1380 (+) score=182.20 sp|Q8IVG5|SAM9L_HUMAN|22.931|1.51e-09\n", ">TRINITY_DN2363_c3_g1_i10.p1 type:complete len:820 (+) score=178.17 sp|Q0VGE8|ZN816_HUMAN|28.452|1.83e-14\n", ">TRINITY_DN2363_c3_g1_i8.p1 type:complete len:820 (+) score=178.17 sp|Q0VGE8|ZN816_HUMAN|28.452|1.83e-14\n", ">TRINITY_DN16250_c0_g1_i11.p1 type:complete len:774 (-) score=166.94 sp|Q3UVG3|F91A1_MOUSE|54.271|0.0\n", ">TRINITY_DN8048_c1_g1_i10.p1 type:complete len:946 (+) score=160.87 sp|Q5JVG2|ZN484_HUMAN|34.375|2.49e-35\n", ">TRINITY_DN8048_c1_g1_i7.p1 type:complete len:946 (+) score=160.87 sp|Q5JVG2|ZN484_HUMAN|34.375|2.49e-35\n", ">TRINITY_DN8048_c1_g1_i5.p1 type:complete len:946 (+) score=160.87 sp|Q5JVG2|ZN484_HUMAN|34.375|2.49e-35\n", ">TRINITY_DN8048_c1_g1_i4.p1 type:complete len:946 (+) score=158.93 sp|Q5JVG2|ZN484_HUMAN|33.984|3.50e-34\n", ">TRINITY_DN4220_c1_g1_i4.p1 type:complete len:1377 (-) score=156.18 sp|Q8IVG5|SAM9L_HUMAN|26.226|2.38e-19\n", ">TRINITY_DN4220_c1_g1_i2.p1 type:complete len:1377 (-) score=156.18 sp|Q8IVG5|SAM9L_HUMAN|26.226|2.38e-19\n", ">TRINITY_DN24751_c0_g1_i4.p1 type:complete len:1313 (+) score=154.36 sp|Q8IVG5|SAM9L_HUMAN|24.645|2.24e-15\n", ">TRINITY_DN24751_c0_g1_i9.p1 type:complete len:1313 (+) score=153.65 sp|Q8IVG5|SAM9L_HUMAN|24.645|2.46e-15\n", ">TRINITY_DN24751_c0_g1_i8.p1 type:complete len:1313 (+) score=153.65 sp|Q8IVG5|SAM9L_HUMAN|24.645|2.46e-15\n", ">TRINITY_DN24751_c0_g1_i2.p1 type:complete len:1313 (+) score=153.65 sp|Q8IVG5|SAM9L_HUMAN|24.645|2.46e-15\n", ">TRINITY_DN16582_c0_g1_i5.p1 type:complete len:1249 (-) score=146.06 sp|Q8IVG5|SAM9L_HUMAN|24.525|7.25e-10\n", ">TRINITY_DN16582_c0_g1_i3.p1 type:complete len:1249 (-) score=146.06 sp|Q8IVG5|SAM9L_HUMAN|24.525|7.25e-10\n", ">TRINITY_DN16582_c0_g1_i2.p1 type:complete len:1249 (-) score=143.25 sp|Q8IVG5|SAM9L_HUMAN|24.525|8.09e-10\n", ">TRINITY_DN16582_c0_g1_i4.p1 type:complete len:1249 (-) score=140.58 sp|Q8IVG5|SAM9L_HUMAN|24.178|7.75e-10\n", ">TRINITY_DN27885_c0_g1_i1.p1 type:complete len:446 (+) score=138.91 sp|Q86VG3|CK074_HUMAN|35.652|1.19e-07\n", ">TRINITY_DN17404_c0_g1_i7.p1 type:complete len:917 (-) score=134.65 sp|Q0VGW6|S12A9_XENLA|47.552|0.0\n", ">TRINITY_DN17404_c0_g1_i6.p1 type:complete len:917 (-) score=134.65 sp|Q0VGW6|S12A9_XENLA|47.552|0.0\n", ">TRINITY_DN3518_c0_g1_i10.p1 type:complete len:662 (+) score=127.58 sp|Q6PHS9|CA2D2_MOUSE|29.799|2.84e-76\n", ">TRINITY_DN3518_c0_g1_i5.p1 type:complete len:630 (+) score=125.39 sp|Q6PHS9|CA2D2_MOUSE|30.618|6.96e-80\n", ">TRINITY_DN14465_c0_g2_i2.p1 type:complete len:630 (-) score=125.24 sp|Q0VGK3|GLCTK_RAT|42.054|1.43e-125\n", ">TRINITY_DN14465_c0_g2_i1.p1 type:complete len:630 (-) score=125.24 sp|Q0VGK3|GLCTK_RAT|42.054|1.43e-125\n", ">TRINITY_DN3588_c1_g1_i3.p1 type:complete len:611 (+) score=123.14 sp|Q9HGN1|GCN2_SCHPO|31.720|7.99e-40\n", ">TRINITY_DN3588_c1_g1_i6.p1 type:complete len:611 (+) score=122.20 sp|Q9HGN1|GCN2_SCHPO|31.720|7.49e-40\n", ">TRINITY_DN3588_c1_g1_i7.p1 type:complete len:611 (+) score=122.11 sp|Q9HGN1|GCN2_SCHPO|31.720|7.49e-40\n", ">TRINITY_DN3588_c1_g1_i2.p1 type:complete len:594 (+) score=117.20 sp|Q9HGN1|GCN2_SCHPO|31.720|4.68e-40\n", ">TRINITY_DN5529_c0_g1_i9.p1 type:complete len:482 (-) score=117.11 sp|Q80V24|VGLL4_MOUSE|31.841|2.07e-11\n", ">TRINITY_DN5529_c0_g1_i4.p1 type:complete len:482 (-) score=117.11 sp|Q80V24|VGLL4_MOUSE|31.841|2.07e-11\n", ">TRINITY_DN3588_c1_g1_i8.p1 type:complete len:594 (+) score=116.25 sp|Q9HGN1|GCN2_SCHPO|31.720|5.47e-40\n", ">TRINITY_DN3588_c1_g1_i4.p1 type:complete len:594 (+) score=116.17 sp|Q9HGN1|GCN2_SCHPO|31.720|5.47e-40\n", ">TRINITY_DN17404_c0_g1_i1.p1 type:complete len:820 (-) score=115.98 sp|Q0VGW6|S12A9_XENLA|48.010|0.0\n", ">TRINITY_DN29773_c0_g1_i6.p1 type:complete len:533 (+) score=115.30 sp|Q9VGZ5|CWO_DROME|39.336|1.74e-29\n", ">TRINITY_DN5529_c0_g1_i8.p1 type:complete len:418 (-) score=111.04 sp|Q80V24|VGLL4_MOUSE|32.124|9.29e-12\n", ">TRINITY_DN5529_c0_g1_i6.p1 type:complete len:418 (-) score=111.04 sp|Q80V24|VGLL4_MOUSE|32.124|9.29e-12\n", ">TRINITY_DN47836_c0_g1_i18.p1 type:complete len:573 (-) score=106.21 sp|Q9MZL5|CACB2_BOVIN|64.516|2.92e-177\n", ">TRINITY_DN47836_c0_g1_i17.p1 type:complete len:573 (-) score=106.21 sp|Q9MZL5|CACB2_BOVIN|64.516|2.92e-177\n", ">TRINITY_DN47836_c0_g1_i9.p1 type:complete len:573 (-) score=106.21 sp|Q9MZL5|CACB2_BOVIN|64.516|2.92e-177\n", ">TRINITY_DN47836_c0_g1_i4.p1 type:complete len:573 (-) score=106.21 sp|Q9MZL5|CACB2_BOVIN|64.516|2.92e-177\n", ">TRINITY_DN47836_c0_g1_i3.p1 type:complete len:573 (-) score=106.21 sp|Q9MZL5|CACB2_BOVIN|64.516|2.92e-177\n", ">TRINITY_DN47836_c0_g1_i1.p1 type:complete len:573 (-) score=106.21 sp|Q9MZL5|CACB2_BOVIN|64.516|2.92e-177\n", ">TRINITY_DN47836_c0_g1_i20.p1 type:complete len:575 (-) score=101.92 sp|P54288|CACB2_RABIT|60.811|4.45e-180\n", ">TRINITY_DN47836_c0_g1_i13.p1 type:complete len:575 (-) score=101.92 sp|P54288|CACB2_RABIT|60.811|4.45e-180\n", ">TRINITY_DN47836_c0_g1_i10.p1 type:complete len:575 (-) score=101.92 sp|P54288|CACB2_RABIT|60.811|4.45e-180\n", ">TRINITY_DN47836_c0_g1_i19.p1 type:complete len:550 (-) score=95.91 sp|P54288|CACB2_RABIT|63.221|4.48e-179\n", ">TRINITY_DN47836_c0_g1_i11.p1 type:complete len:550 (-) score=95.91 sp|P54288|CACB2_RABIT|63.221|4.48e-179\n", ">TRINITY_DN47836_c0_g1_i6.p1 type:complete len:550 (-) score=95.91 sp|P54288|CACB2_RABIT|63.221|4.48e-179\n", ">TRINITY_DN2363_c3_g1_i9.p1 type:complete len:524 (+) score=93.90 sp|Q0VGE8|ZN816_HUMAN|28.452|8.96e-15\n", ">TRINITY_DN997_c0_g1_i8.p1 type:complete len:573 (+) score=87.07 sp|A2AVA0|SVEP1_MOUSE|23.906|2.67e-08\n", ">TRINITY_DN138924_c0_g3_i5.p1 type:complete len:503 (+) score=85.59 sp|Q9JI12|VGLU2_RAT|42.085|2.89e-139\n", ">TRINITY_DN2443_c0_g1_i2.p1 type:complete len:452 (+) score=85.33 MIEAP|PF16026.4|1.4e-22\n", ">TRINITY_DN135629_c0_g1_i5.p1 type:complete len:619 (+) score=76.12 sp|Q1L8X9|VGLU3_DANRE|33.948|1.25e-107\n", ">TRINITY_DN135629_c0_g1_i3.p1 type:complete len:619 (+) score=76.12 sp|Q1L8X9|VGLU3_DANRE|33.948|1.25e-107\n", ">TRINITY_DN111_c0_g1_i13.p1 type:complete len:373 (+) score=74.54 sp|Q9WVG9|MS3L1_MOUSE|42.574|1.03e-14\n", ">TRINITY_DN111_c0_g1_i10.p1 type:complete len:373 (+) score=74.54 sp|Q9WVG9|MS3L1_MOUSE|42.574|1.03e-14\n", ">TRINITY_DN3518_c0_g1_i3.p1 type:complete len:375 (+) score=74.03 sp|Q6PHS9|CA2D2_MOUSE|30.914|3.29e-42\n", ">TRINITY_DN2014_c3_g1_i9.p2 type:complete len:316 (+) score=68.65 sp|P27980|POL_SIVVG|28.125|1.36e-09\n", ">TRINITY_DN294228_c0_g1_i4.p2 type:complete len:184 (+) score=68.13 EPTP|PF03736.16|1.2e+02\n", ">TRINITY_DN36853_c0_g1_i13.p1 type:complete len:384 (-) score=66.64 sp|Q6X0I2|VGR_SOLIN|32.168|4.25e-14\n", ">TRINITY_DN36853_c0_g1_i12.p1 type:complete len:384 (-) score=66.64 sp|Q6X0I2|VGR_SOLIN|32.168|4.25e-14\n", ">TRINITY_DN36853_c0_g1_i5.p1 type:complete len:384 (-) score=66.64 sp|Q6X0I2|VGR_SOLIN|32.168|4.25e-14\n", ">TRINITY_DN6486_c1_g1_i1.p1 type:complete len:762 (+) score=66.21 sp|P52583|VGFR2_COTJA|30.556|1.49e-08\n", ">TRINITY_DN6486_c1_g1_i2.p1 type:complete len:762 (+) score=65.92 sp|P52583|VGFR2_COTJA|30.556|1.49e-08\n", ">TRINITY_DN75615_c0_g1_i17.p1 type:complete len:457 (+) score=65.36 sp|Q8CFG5|CA2D3_RAT|25.225|2.56e-28\n", ">TRINITY_DN36853_c0_g1_i3.p1 type:complete len:384 (-) score=64.58 sp|Q6X0I2|VGR_SOLIN|32.168|4.25e-14\n", ">TRINITY_DN75615_c0_g1_i18.p1 type:complete len:433 (+) score=62.01 sp|Q8CFG5|CA2D3_RAT|25.765|4.15e-26\n", ">TRINITY_DN75615_c0_g1_i14.p1 type:complete len:433 (+) score=62.01 sp|Q8CFG5|CA2D3_RAT|25.765|4.15e-26\n", ">TRINITY_DN75615_c0_g1_i10.p1 type:complete len:433 (+) score=62.01 sp|Q8CFG5|CA2D3_RAT|25.765|4.15e-26\n", ">TRINITY_DN75615_c0_g1_i3.p1 type:complete len:433 (+) score=62.01 sp|Q8CFG5|CA2D3_RAT|25.765|4.15e-26\n", ">TRINITY_DN75615_c0_g1_i1.p1 type:complete len:433 (+) score=62.01 sp|Q8CFG5|CA2D3_RAT|25.765|4.15e-26\n", ">TRINITY_DN11016_c1_g3_i3.p1 type:complete len:327 (+) score=61.77 sp|Q5FVG6|PRR5_RAT|42.781|1.08e-43\n", ">TRINITY_DN143974_c2_g1_i1.p1 type:complete len:531 (+) score=60.92 sp|A4FV52|VGLU1_BOVIN|34.524|2.27e-92\n", ">TRINITY_DN14092_c0_g1_i23.p2 type:complete len:156 (-) score=46.70 LZ_Tnp_IS66|PF13007.6|0.024\n", ">TRINITY_DN14092_c0_g1_i12.p2 type:complete len:156 (-) score=46.70 LZ_Tnp_IS66|PF13007.6|0.024\n", ">TRINITY_DN14092_c0_g1_i6.p2 type:complete len:156 (-) score=46.70 LZ_Tnp_IS66|PF13007.6|0.024\n", ">TRINITY_DN138924_c0_g2_i1.p1 type:complete len:293 (-) score=46.58 sp|Q9JI12|VGLU2_RAT|42.667|2.30e-78\n", ">TRINITY_DN209_c1_g1_i1.p1 type:complete len:293 (+) score=44.73 sp|Q9BXJ4|C1QT3_HUMAN|30.405|2.78e-12\n", ">TRINITY_DN41639_c0_g2_i6.p1 type:complete len:742 (-) score=44.50 sp|Q9UT27|PVG1_SCHPO|35.535|4.17e-38\n", ">TRINITY_DN23922_c0_g1_i13.p2 type:complete len:638 (-) score=40.62 sp|Q0VGY8|TANC1_MOUSE|24.176|1.18e-09\n", ">TRINITY_DN23922_c0_g1_i12.p2 type:complete len:638 (-) score=40.62 sp|Q0VGY8|TANC1_MOUSE|24.176|1.18e-09\n", ">TRINITY_DN23922_c0_g1_i8.p2 type:complete len:638 (-) score=40.62 sp|Q0VGY8|TANC1_MOUSE|24.176|1.18e-09\n", ">TRINITY_DN23922_c0_g1_i6.p2 type:complete len:638 (-) score=40.62 sp|Q0VGY8|TANC1_MOUSE|24.176|1.18e-09\n", ">TRINITY_DN23922_c0_g1_i5.p2 type:complete len:638 (-) score=40.62 sp|Q0VGY8|TANC1_MOUSE|24.176|1.18e-09\n", ">TRINITY_DN23922_c0_g1_i2.p2 type:complete len:638 (-) score=40.62 sp|Q0VGY8|TANC1_MOUSE|24.176|1.18e-09\n", ">TRINITY_DN138924_c0_g3_i1.p1 type:complete len:221 (+) score=36.00 sp|A6QLI1|VGLU2_BOVIN|47.087|9.73e-63\n", ">TRINITY_DN41639_c0_g2_i1.p1 type:complete len:744 (-) score=34.17 sp|Q9UT27|PVG1_SCHPO|32.416|3.02e-33\n", ">TRINITY_DN27205_c0_g1_i1.p1 type:complete len:135 (+) score=33.57 sp|P35917|VGFR3_MOUSE|100.000|3.46e-20\n", ">TRINITY_DN120833_c0_g1_i2.p1 type:complete len:296 (-) score=31.98 sp|Q8BGW8|VGLL2_MOUSE|46.739|8.45e-17\n", ">TRINITY_DN41639_c0_g2_i4.p1 type:complete len:744 (-) score=29.44 sp|Q9UT27|PVG1_SCHPO|32.416|1.35e-33\n", ">TRINITY_DN120833_c0_g1_i1.p1 type:complete len:272 (-) score=28.17 sp|Q8BGW8|VGLL2_MOUSE|46.739|5.13e-17\n", ">TRINITY_DN232640_c0_g1_i3.p2 type:complete len:136 (-) score=27.74 sp|Q9NVG8|TBC13_HUMAN|60.150|2.22e-50\n", ">TRINITY_DN5529_c0_g1_i2.p1 type:complete len:108 (-) score=27.66 sp|Q80V24|VGLL4_MOUSE|50.877|4.78e-09\n", ">TRINITY_DN5529_c0_g1_i1.p1 type:complete len:108 (-) score=27.66 sp|Q80V24|VGLL4_MOUSE|50.877|4.78e-09\n", ">TRINITY_DN42213_c0_g1_i3.p1 type:complete len:228 (+) score=26.44 DivIC|PF04977.14|0.00019\n", ">TRINITY_DN138924_c0_g3_i1.p2 type:complete len:125 (+) score=21.32 sp|Q9JI12|VGLU2_RAT|44.800|9.00e-27\n", ">TRINITY_DN41639_c0_g2_i5.p1 type:complete len:429 (-) score=18.80 sp|Q9UT27|PVG1_SCHPO|32.416|2.12e-34\n", ">TRINITY_DN41639_c0_g2_i3.p1 type:complete len:429 (-) score=18.80 sp|Q9UT27|PVG1_SCHPO|32.416|2.12e-34\n", ">TRINITY_DN120833_c0_g1_i3.p1 type:complete len:184 (-) score=17.34 sp|Q8BGW8|VGLL2_MOUSE|50.000|4.61e-10\n", ">TRINITY_DN5608_c0_g2_i7.p1 type:complete len:249 (-) score=16.95 sp|Q9SVG0|AVT3C_ARATH|27.143|2.88e-17\n", ">TRINITY_DN374945_c0_g1_i2.p1 type:complete len:200 (-) score=16.68 sp|A4FV52|VGLU1_BOVIN|60.479|1.69e-67\n", ">TRINITY_DN374945_c0_g1_i5.p1 type:complete len:274 (-) score=15.76 sp|Q05B21|VGLU1_XENTR|55.187|2.20e-89\n", ">TRINITY_DN100416_c0_g1_i8.p1 type:complete len:120 (-) score=13.59 sp|Q1L8X9|VGLU3_DANRE|34.426|9.89e-16\n", ">TRINITY_DN100416_c0_g1_i6.p1 type:complete len:120 (-) score=13.59 sp|Q1L8X9|VGLU3_DANRE|34.426|9.89e-16\n", ">TRINITY_DN100416_c0_g1_i5.p1 type:complete len:120 (-) score=13.59 sp|Q1L8X9|VGLU3_DANRE|34.426|9.89e-16\n", ">TRINITY_DN100416_c0_g1_i3.p1 type:complete len:120 (-) score=13.59 sp|Q1L8X9|VGLU3_DANRE|34.426|9.89e-16\n", ">TRINITY_DN5608_c0_g2_i1.p1 type:complete len:135 (-) score=13.37 sp|Q9SVG0|AVT3C_ARATH|30.097|9.81e-08\n", ">TRINITY_DN356433_c0_g1_i1.p1 type:complete len:122 (+) score=11.24 sp|Q6INC8|VGLU1_XENLA|54.701|7.48e-38\n", ">TRINITY_DN133015_c0_g1_i11.p1 type:complete len:171 (+) score=9.55 sp|Q00130|VG50_ICHVA|35.714|1.49e-07\n", ">TRINITY_DN30354_c0_g2_i1.p1 type:complete len:125 (+) score=7.76 SpoVG|PF04026.11|10\n", ">TRINITY_DN133015_c0_g1_i10.p1 type:complete len:111 (+) score=4.68 sp|Q00130|VG50_ICHVA|36.170|3.85e-06\n", ">TRINITY_DN374945_c0_g1_i1.p1 type:complete len:170 (-) score=0.22 sp|Q5W8I8|VGL2A_DANRE|53.691|6.74e-49\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/data/geoduck/transcriptomes/\n", "grep --ignore-case \"Vg\" 20180827_trinity_geoduck.fasta.transdecoder.cds \\\n", "| grep \"complete\" \\\n", "| tr \",\" \" \" \\\n", "| sort -Vr -k7 \\\n", "| awk 'BEGIN{print \"transcript_ID\", \"transcript_type\", \"transcript_length\", \"strand\", \"score\", \"annotation\"}; \\\n", "{print $1, $4, $5, $6, $7 , $8}' \\\n", "| column -t \\\n", "| tee 20181127_geoduck_Vg_cds_matches.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Looks like the only search that produced viable results is the \"vitellogenin\". Will use the top scoring (i.e. best e-value) match for primer design." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create directories" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "mkdir /home/sam/analyses\n", "mkdir /home/sam/analyses/20181129_geoduck_vtg_primers" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Run Primer3 to design primers\n", "\n", "##### Quick explanation:\n", "\n", "- Primer3 requires a specially formatted input file. The file must be formatted like this:\n", "\n", "```\n", "SEQUENCE_ID=${seq_id}\n", "SEQUENCE_TEMPLATE=${sequence}\n", "PRIMER_TASK=generic\n", "PRIMER_PICK_LEFT_PRIMER=3\n", "PRIMER_PICK_RIGHT_PRIMER=3\n", "PRIMER_OPT_SIZE=18\n", "PRIMER_MIN_SIZE=15\n", "PRIMER_MAX_SIZE=21\n", "PRIMER_MAX_NS_ACCEPTED=1\n", "PRIMER_PRODUCT_SIZE_RANGE=75-150\n", "P3_FILE_FLAG=1\n", "PRIMER_EXPLAIN_FLAG=1\n", "=\n", "```\n", "\n", "Values after the \"=\" on each line can be changed to whatever values the user decides. The ```${sequence}``` must be a nucletoide sequence on a single line, with no line breaks.\n", "\n", "The code below uses a ```heredoc``` to write this information to a file. Everything between the following two lines gets printed (via ```cat```) as shown and then redirected to the indicated file (```20181129_primer3_params.txt```):\n", "\n", "```\n", "cat << EOF > /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_params.txt\n", "EOF\n", "```\n", "\n", "- Primer3 is run with the ```--format_output``` to make a nice, human-readable output format." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "cd /home/sam/analyses/20181129_geoduck_vtg_primers\n", "\n", "# Store sequence only from desired FastA.\n", "# Print all lines after the first line and then delete newlines\n", "# Creates a sequence that exists on a single line, which is necessary for Primer3\n", "sequence=$(tail -n +2 /home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits/TRINITY_DN51983_c0_g1_i8.p1.cds | tr -d '\\n')\n", "\n", "# Store file name of targeted FastA file.\n", "seq_id=$(echo \"$(head -n 1 /home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits/TRINITY_DN51983_c0_g1_i8.p1.cds | tr -d '>').cds\")\n", "\n", "# Use heredoc to create Primer3 parameters file\n", "cat << EOF > /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_params.txt\n", "SEQUENCE_ID=${seq_id}\n", "SEQUENCE_TEMPLATE=${sequence}\n", "PRIMER_TASK=generic\n", "PRIMER_PICK_LEFT_PRIMER=3\n", "PRIMER_PICK_RIGHT_PRIMER=3\n", "PRIMER_OPT_SIZE=18\n", "PRIMER_MIN_SIZE=15\n", "PRIMER_MAX_SIZE=21\n", "PRIMER_MAX_NS_ACCEPTED=1\n", "PRIMER_PRODUCT_SIZE_RANGE=75-150\n", "P3_FILE_FLAG=1\n", "PRIMER_EXPLAIN_FLAG=1\n", "=\n", "EOF\n", "\n", "# Run Primer3\n", "/home/sam/software/primer3-2.4.0/src/primer3_core \\\n", "--format_output \\\n", "--output=/home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_primers.txt \\\n", "/home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_params.txt" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PRIMER_ERROR=thermodynamic approach chosen, but path to thermodynamic parameters not specified\n", "=\n" ] } ], "source": [ "%%bash\n", "cat /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_primers.txt" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "cd /home/sam/analyses/20181129_geoduck_vtg_primers\n", "\n", "# Store sequence only from desired FastA.\n", "# Print all lines after the first line and then delete newlines\n", "sequence=$(tail -n +2 /home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits/TRINITY_DN51983_c0_g1_i8.p1.cds | tr -d '\\n')\n", "\n", "# \n", "seq_id=$(echo \"$(head -n 1 /home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits/TRINITY_DN51983_c0_g1_i8.p1.cds | tr -d '>').cds\")\n", "\n", "# Use heredoc to create Primer3 parameters file\n", "cat << EOF > /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_params.txt\n", "SEQUENCE_ID=${seq_id}\n", "SEQUENCE_TEMPLATE=${sequence}\n", "PRIMER_TASK=generic\n", "PRIMER_PICK_LEFT_PRIMER=3\n", "PRIMER_PICK_RIGHT_PRIMER=3\n", "PRIMER_OPT_SIZE=18\n", "PRIMER_MIN_SIZE=15\n", "PRIMER_MAX_SIZE=21\n", "PRIMER_MAX_NS_ACCEPTED=1\n", "PRIMER_PRODUCT_SIZE_RANGE=75-150\n", "P3_FILE_FLAG=1\n", "PRIMER_EXPLAIN_FLAG=1\n", "PRIMER_THERMODYNAMIC_PARAMETERS_PATH=/home/sam/software/primer3-2.4.0/src/primer3_config/\n", "=\n", "EOF\n", "\n", "# Run Primer3\n", "/home/sam/software/primer3-2.4.0/src/primer3_core \\\n", "--format_output \\\n", "--output=/home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_primers.txt \\\n", "/home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_params.txt" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PRIMER PICKING RESULTS FOR TRINITY_DN51983_c0_g1_i8.p1.cds\n", "\n", "No mispriming library specified\n", "Using 0-based sequence positions\n", "OLIGO start len tm gc% any_th 3'_th hairpin seq\n", "LEFT PRIMER 1347 18 59.89 55.56 9.11 0.13 42.06 TTACGCCACGGCAACTGT\n", "RIGHT PRIMER 1471 18 60.05 61.11 10.11 0.00 0.00 CGCAGTGCCAACAAGCTG\n", "SEQUENCE SIZE: 14484\n", "INCLUDED REGION SIZE: 14484\n", "\n", "PRODUCT SIZE: 125, PAIR ANY_TH COMPL: 10.66, PAIR 3'_TH COMPL: 0.00\n", "\n", " 0 ATGGAGCGATTGGTTTTAGCGCTAATCGTTTTGGCGGTCTGTGTCCACGCAGGTCCGATA\n", " \n", "\n", " 60 CAGAAAATCTCAGTTGATACCTGTGCAAGAACTTGTACAGGCAACAGCAAGTTTAATTAT\n", " \n", "\n", " 120 GTTGCTGGTAAGACTTACCAGTACAAGTATGATGCCGACATCAAGACTGGGGTCCAGGGA\n", " \n", "\n", " 180 GCATCAGAAGATAATGCGAAAATACACATGTCGGCAACAGTCGAGATGGAGGTCATATCA\n", " \n", "\n", " 240 AAGTGTGACCTTGTCATGCGGCTGAATGATGTCACATTGACAGAATATGACCCCATTGAC\n", " \n", "\n", " 300 TTGATAATGAAGGAAACGTCATCTGAATTCCGCAGTGGTCTTGAGAAGGCTCCCATCCGA\n", " \n", "\n", " 360 ATCTCTTTCCAAGATGGAAACATAGAGGAACTTTGTCTGAAACGAGAGGAGCCAAATTGG\n", " \n", "\n", " 420 GTGCTGAATGCCAAACGTGGAATCCTGTCAATGTTCCAGAACAACATGGACGATTTCTCA\n", " \n", "\n", " 480 ACCAATAAAACAGTGTCTGAGACTGATGTGTCTGGTGTGTGTGATACAGAGTACCACGTG\n", " \n", "\n", " 540 AGTAAATCTGGCTGGTACAAGACCACAGTCAGCAAGTCTAAGAATCTCCTAGGTTGTACT\n", " \n", "\n", " 600 GACAGGACCGGGTACAACACTGCCGTCCAGGGAGTGCCATATACCGCAGCTTCTGGCATC\n", " \n", "\n", " 660 CAGTCACTGCCAGTTGTGAAGAGCAACCATAACTGTGAACATGAGATTGACATTAAGTCG\n", " \n", "\n", " 720 CACATTCTCCATTCGGCAACTTGTAATGAACAGCACATGTTCCTGCCATTCTCACGCTCA\n", " \n", "\n", " 780 GACAGTGGAGCTGTAACATACAACACTCAGACTCTCAAGTTCGTGAAAGTAACAACTGGA\n", " \n", "\n", " 840 ATCAAGTCACCATTTGATACCAGTACTCGCAAATCAATGAAATTCGATCACATCAATGAT\n", " \n", "\n", " 900 GCAGGCAGAGAAGCAAAATCGAGGAAAGATATTACACGTAAGCTTATTGAAATCTGTGAG\n", " \n", "\n", " 960 AAGACGAAGTCTGGTGTTCGCCCTGAAACCCCACGTCTGTTCACAGACCTCGTGTTTATG\n", " \n", "\n", " 1020 ATGAAGACTGTTGATTCTGACACTCTAGCAGACACGTACAAACTGCTCCAAGAAGGTGCC\n", " \n", "\n", " 1080 ATCTGCACTGACAATAAAGAAAGAACCAGGAAGTTTTTCCTTGATGCCTTACCGATTGCT\n", " \n", "\n", " 1140 GGTTCCAGTTCCTCACTGCACTTGATGACCCAACTCTTGACCACAAAGGCTGTGACTGGA\n", " \n", "\n", " 1200 ATTGAGGCAGACATGTGGCTGTCGGCCATCTCTTTTCTCAAGAACCCAACTAAAGACATG\n", " \n", "\n", " 1260 CTGAAGGAAGTCAAGCCTTTGATCAACATGAAGGACACAACTGACAAGGCCCTGCTTTCT\n", " \n", "\n", " 1320 GTCGGTACCCTGATCCACAGTTATTGTTTACGCCACGGCAACTGTGAACGTGATGACATG\n", " >>>>>>>>>>>>>>>>>> \n", "\n", " 1380 GTGAAGGCTGTCATCTCCGCAATGCAGAACAAGATCGCTAAAGGATGCAAGATCAAGCCA\n", " \n", "\n", " 1440 GACAACTTCAAAACCAGCTTGTTGGCACTGCGAGGCATCGGCAATGCCGGATATGCCGTA\n", " <<<<<<<<<<<<<<<<<< \n", "\n", " 1500 TCAGCAATCCCCACCCTGGAGATTTGTGTGAAGATCACCTCGAACCCCATCGAAATTCGT\n", " \n", "\n", " 1560 CTGTCCGCTATTGAGGCTTTCAGGAGGATGCCATGTGATGCAAGCAGAAAAGCTTTGGTA\n", " \n", "\n", " 1620 AAGACATTCTTACAGAAGGAGGAAGATTCCGAAGTGAGGATCGCAGCATACAGGGTCTTG\n", " \n", "\n", " 1680 ATGGAATGTCCATCACCTCAGTTCCTCAGCCTAGTCCGCTCCACGTTAGAGAGTGAGGAA\n", " \n", "\n", " 1740 ATCAATCAAGTTGGGTCTTACATCTGGTCGCATCTTACAAACCTGATGGAGACATCTGAC\n", " \n", "\n", " 1800 AGACATAAGCAGGACATCAGAGCAATCCTTGAGGATGAAACATTTAAGAAGGAATTTGAC\n", " \n", "\n", " 1860 CTTGACAAAAGGAAGTTCTCACGTAACTACGAAGGTTCCATGTTTATTGAGAAACTCAAT\n", " \n", "\n", " 1920 GCCGGCGCGAAAGTTGAAGGTGATCTTGTCTGGTCATCAAGATCTTTTGTTCCAAGATCG\n", " \n", "\n", " 1980 GCCATGATGAATTTCACTGTTGATCTTTTTGGACATTCTATAAATCTTCTGGAATTCGGT\n", " \n", "\n", " 2040 GGTCGAGCTGAAGGAATAGAATACTTCTTGGAATCATTCTTTGGACCCAATGGCTACTTC\n", " \n", "\n", " 2100 AGTGGACAGGACACAAAGGGTGGAAATGATATGTTGGCCAATGGAATCAAACAGGAGAAA\n", " \n", "\n", " 2160 ATGAAGAAGATTGATAACAGGTACGGCAGCAAGATGGACGAGCTCCGTGGCTCTATGTAC\n", " \n", "\n", " 2220 ATGAGGGTGTTTGGCAATGAATTGCGTTACAAGAGCTTCCAGGGAGTGGAGGATCTGATG\n", " \n", "\n", " 2280 TCTGGTCCTAACTTCAACCTGTTCGATATGCTGATACAGCTGGCCAAGGACAACGATTAT\n", " \n", "\n", " 2340 ACTTTCAGCCATTCTACTATGTTCCTGGACACCAGCATCATTATCCCAACTGGAGCAGGT\n", " \n", "\n", " 2400 TTCCCAATGAATCTAACAGTCAACGGAACTGCAACAATTGATCTGAGAATGAAGGGAAAG\n", " \n", "\n", " 2460 ATGGATCTTAGGTCCCCGCCAGCTGTCAACATAGCAGGACTTGTACAACCAAGTGCGGCT\n", " \n", "\n", " 2520 ATCGACATATCATCAATGATGAGCGTGGATGCATTTGTAACCAAGTCCGGAATTAAGATG\n", " \n", "\n", " 2580 GTGTCCACCCTACACTCCAGTACAGCTGTCCAGGGCAAGTTCCAGGTGAACGAGGCTGGT\n", " \n", "\n", " 2640 GTGATAAGCGCAGAATACGAGATGCCACAACCCAAGATGGAGATCATTGATGTCAAGTCA\n", " \n", "\n", " 2700 TCATTCTTCACTGTTCATCGAGACATGGAGAAGGAACAAAAAATGATTGCTGATAACATT\n", " \n", "\n", " 2760 CATACAAAGAAATTCTGCTCCCCAGAGAAACTTGTCACCATCACAGGACTTGAACTATGC\n", " \n", "\n", " 2820 GCAGAGGTTACATACCCGAATGCATCTCTGAAATCAGATGCCCCATACTTCCCCCTTACA\n", " \n", "\n", " 2880 GGCCCAGTCACTGCTGGTGTCTATCTCTACAACAGAGATACCCACAAGAAATACAAGATG\n", " \n", "\n", " 2940 GAGGCCAGATCCACACATAACAAGGCAAGCAACATCCTGCATTTTACCTTTGACACACCA\n", " \n", "\n", " 3000 GAATCTCGCATCGACAGACATATTGTCCTAGATCTCAACCTCAACAAACGAGCAACAACC\n", " \n", "\n", " 3060 ATTGATATGACTTTGAACTCTCCATGGAAGAAGGCAAATCTGCAAGGTGCTATTACAAAT\n", " \n", "\n", " 3120 GACAAAGGTATCATGGGTATTAACAGCAAGATGTCGGTGGATGGCAAATCTGGTTTCTCG\n", " \n", "\n", " 3180 GTCAACTCCAAGTTTGTGAAATCCCGTGTCGGTAACAGCATCAAGTATGTTCCTTCTCTG\n", " \n", "\n", " 3240 GAAATCAGTATACCGCAGATGAAGCCCGTGAGCATCCGAGGCATGTTGATGCATAATGGT\n", " \n", "\n", " 3300 TACCGGCAGCTGGATTTGGAATATGACATGTCCGGACTGACTGCCGCACCAGTCTCCGCT\n", " \n", "\n", " 3360 CAGTTGACCATTAACAACAAACCGGCCCTAAAAGGCGTACGAGGAAGTTTGTCTTTGCAA\n", " \n", "\n", " 3420 AAGGACAAGGCCTACTCATTTGACACAAGGGTCATGATTTCATCTGACAACAACAAGATT\n", " \n", "\n", " 3480 GGATATAAGCCATTTGTCTCCCTCCGTTCTCCAACAGGGGAGGTCCTTGCATTTGGCGGA\n", " \n", "\n", " 3540 TCTGCTAATATTATCTTCAACAAGAAGGTTGTTGTAGACCTTGTCCTTGATAAGGCTTTT\n", " \n", "\n", " 3600 GCCAAAGCAGTAACACTCAAAGGTTTCGTAACCAAGACAACGAAGAACCGAGGTCGTGTT\n", " \n", "\n", " 3660 ATATACAACACTAAGATGACTTATACATCCAGTCCAGTGGATCTTCTGCTGAAAGCCAAG\n", " \n", "\n", " 3720 ATAGACAACAGAAACTCTCGTGCAGTTGCGACAGTTGTAGACTTCACCTACGTCATTCGC\n", " \n", "\n", " 3780 AAAATTGCCCGTAACAACATCAAACTTGTGTCAAAGGTCACGAACCTCAGCAGTAAATAC\n", " \n", "\n", " 3840 CTTACCAAGGCCAAAGCTTCAGCGAATGTACTAGTAAAAAGGAATCCTGAGTTGAACTTG\n", " \n", "\n", " 3900 AAATTTATGGGTAATCTAGAGCACAACAAGAAACACTCCGAGTTTGACGTCGATGTTCGC\n", " \n", "\n", " 3960 TATGGTCGTGACTTCAAAGATGAAAGCAAACATATCGACATGAGTTTTGAGATGAATAGA\n", " \n", "\n", " 4020 AAGTTCAAGGACATTACGGCAGCATCTGCCAATCTCCAGACAAGATTAACATTCCCTGGA\n", " \n", "\n", " 4080 CAGGGTCTTAATATAATCGTAAAGGGTAATCACAAACACAGCAGTAAGGAACTTGATTCC\n", " \n", "\n", " 4140 AACATCTTCCTGCGTCTTGGTAAAGGACAACCTATTGAGAGCAGTTTGCTCATACAGGAC\n", " \n", "\n", " 4200 AAAACAGAACAATTGGTGAAGGTCGTCAGTGAATACAAGCTTAGTTATCCAGGCAGGGAA\n", " \n", "\n", " 4260 GTCATCTTCAATCATACATTAGCACAGACAGATAAAACCACTTTTGTTAGTTCCTTCAAA\n", " \n", "\n", " 4320 TCTCAACTGGAGAAAAATGGAAAGAATACCATTATTACAACACTGAAAACTTACCCTGAC\n", " \n", "\n", " 4380 AAGGAAAGGGTTTCCATCTCATCAGATTTGAAACTAAGCAAACAAACACCAGTCCATTTG\n", " \n", "\n", " 4440 CAAGGAGACTTTGATTTCAATCCAGCAGACTTTATCACCAGTATGTCATATGAGAAAGAT\n", " \n", "\n", " 4500 TTAAAAAAATATTCTGGATCATTGACATCTCTGACCAAGGGAACTGAGTCATCAAGCTTA\n", " \n", "\n", " 4560 ACAATTGATTTGCAACACCCAGAAAGACGCATTGTCATGGACCTTAGTGGCAAGAGCATG\n", " \n", "\n", " 4620 ATGAAGAAATACCAGGTTGAGGCAGATTTAAAGTGGAATGCAGGCAGAAATGATTCTGAA\n", " \n", "\n", " 4680 AATGTGCATATATCATCTTGGATTGAGCCACCGACAGAAGAAAAATTGAATGGCTCCATA\n", " \n", "\n", " 4740 ACCATCACTTATCCGACAAGAACACTCACTTTGAACGCCCATCAATTACTATCAGACAAA\n", " \n", "\n", " 4800 TATAACATGCATGCTGATTTTACATGGGAAACTGACAAGAAAGTTACTATGGATACAATC\n", " \n", "\n", " 4860 GTCTTATACAAAAACAACATGATGATAAACACCTTAAAGTTGACCTCTCCATTTAGTAAG\n", " \n", "\n", " 4920 ATGCGTAGACTTGACCTAGCTGTAAACCATAAAGATGACCACGAAGAATATGCAACTAAC\n", " \n", "\n", " 4980 GTTGAGGTCAAGTGGAACAAAGGTGAATCTGTAACTTCTGAGCTCATACTTAAAAAGCCC\n", " \n", "\n", " 5040 GCCAGTCTTAAAACCGTTGTTGGTAGTGTCTTCATGAAAACAAGCTTCAAGGTTCTGAAG\n", " \n", "\n", " 5100 AAGATAAGATTAGACGTAAATCATAAATTAAGTGACTCCCTGGCTTCGTCTGTGAAATTT\n", " \n", "\n", " 5160 GCATGGAACAGGCAACTCATTAATGTAGATGTAAACCTGAAAAACACAACCAAAGGTAAG\n", " \n", "\n", " 5220 AAGATGGGATTTAATGGAAATGTTGATGTTAAGACCTCCTTTGCCTACTTGAAGAAGGGA\n", " \n", "\n", " 5280 CAACTTAAAATCTCCCATGATAACAATGGTAAGACATTCAACACTTTGACGACTTTGATG\n", " \n", "\n", " 5340 AAGAACAAGAAAACATACAAAATCGACAGTAAGATAACTCACATCCCCACAGCTAATAAA\n", " \n", "\n", " 5400 TATGAAAACACTGGAGCTATTTCTATTTCTGCACCTACTGGAAAGTCTGATATAGTATGG\n", " \n", "\n", " 5460 AGTCATATGCACACTTCAGAAAAAGTCACGTCCATGTTCGCTTCATCACAGGGAAAGAAG\n", " \n", "\n", " 5520 GGTGATAAGAATATAACAGTGAGACTAAATGGAGAATTTAAAGACCTATATGCTGCCAGT\n", " \n", "\n", " 5580 CTTAGCATTCAAACACCATATAAGTCTGCTCGTGATGCCTTGCTGGAGGTTAGTATGAAG\n", " \n", "\n", " 5640 CATGATGGGTATGTTATGGTTGACAGCAAGGCTAACGTAAATGTAAATGGTGCGAAAGTT\n", " \n", "\n", " 5700 GCTGCTGCTACCATTAGCTACAACTTGCGGAGCCCTGCTACCATTACTACACTTAATATT\n", " \n", "\n", " 5760 CCTGCCCTTGATATCAACTGCAGGATTCAAGGTAATGCAACATCAGAGAAAAACTCAGAA\n", " \n", "\n", " 5820 ACTGTTATAATGGAGGTTGTTCTTACACCAGAAATTAGTATGGCCCTCTCAGCATCACAG\n", " \n", "\n", " 5880 AAATTTTTTGATGTCACTGGTAACCCTGATGCTTTTGCCCAGTCTCTCATTTCATGGAAA\n", " \n", "\n", " 5940 TCTTCCTTCCCAGGATATGAGCATGCCAAATTCGTTTACGAAATTAGTGATAACAAAGAC\n", " \n", "\n", " 6000 GAGTCCAGTGTTACAACACATACAAAACTTGAGTATAGCCAGGGTAAAGTCATAGAAGTT\n", " \n", "\n", " 6060 AAATCTGAATTAGATAAAAACAGTTACTATTCATCACTGGCTACACCTTATGAATCGTTG\n", " \n", "\n", " 6120 CCATTTATGGAAGGAAAAATCAGTTTCGTTGGCAAACCAGAATCTTTCAGAAGCTCTGCA\n", " \n", "\n", " 6180 TTTATCAAGATACTACCTGTATTGGAGAAAACGTCTGCATCTGTATCATGGAGCACCCTT\n", " \n", "\n", " 6240 GATGGATTGAAAACTGAACTGAGAGTGGATACACCATCTACCCAGTATCCTTATATTCAG\n", " \n", "\n", " 6300 GTTAACTTGGAGGCAAAGAAAACGGAAAACGGAAAAACAACCGGTGATCTTGTAGTTGAA\n", " \n", "\n", " 6360 TATCTTCCCAGAGAAACTGTCCGAGTGGAGGTGATGAGCGATATTAGTGATATTGTCAAC\n", " \n", "\n", " 6420 CTCTCTGCAAAGGTGACATCACCATTTGTAATGATTGATCTATACCACAGTGGAAACCTA\n", " \n", "\n", " 6480 CAATCATTTAAATCCAGTGCAGAGATTGAGGGGACACCTGGCAAGAAATATGGTCTCATT\n", " \n", "\n", " 6540 GTAGGCTATACAAATGGTACAACGATTGAAGGCTATGCAACTATATCTGTGCCAGGAAGA\n", " \n", "\n", " 6600 AGAGACATAAATGCAGTCTTCTCACACCAAGGATCTGCAATGAACTTTATCACTCATGCA\n", " \n", "\n", " 6660 GAAATAACACACAACAGAATGAACCAATTCACCTCTGACTTTAAATTTGCAGCAGGCAAC\n", " \n", "\n", " 6720 TCAATAACTGTGTCTGCATCCACTTCACTTAGGTCTTTGCTCCTCATATCAGAAGACAAA\n", " \n", "\n", " 6780 TATAGAGCAGCTTTTACTACGAAGGTCATCCCATTGAAAAAGATGTCTGCTCATGGAGAA\n", " \n", "\n", " 6840 TTTGTGACAACAACCATAGGAAAGTCTGAAGCAGATGTGTCTTTTGACCTGACTAAAGAT\n", " \n", "\n", " 6900 ATTGAGGGTAGTCTTACTGTTAAATCACCTTTGATTAAAAACATTGAAGCTTCTTTTAAT\n", " \n", "\n", " 6960 CATCATCAGAGTGAAAAATATATAAACAGTAGAGCAGAAATCGTCCATGACGGTAAAAAA\n", " \n", "\n", " 7020 AATATTGATGTGCAGGCCTCTCTAAATGTTGATGAAGATTCTGTTGTTGGTGAAATAGGT\n", " \n", "\n", " 7080 ATCAAATCCCCATTGTCTGATGATATCCAGGTCCTGTCTAGGTTTGATGGTGGTAAAGAC\n", " \n", "\n", " 7140 AACTTTGTTGTTCATTTTGAAGGCTCTGTCGGTGTTAACAAATCTGAAATTGATTTATCC\n", " \n", "\n", " 7200 CACAAATGGGTTGAGAGAAACAGCCAGCAGACGATAAGTGTGAGATCGACAGGGATGAAG\n", " \n", "\n", " 7260 GACGTGACAGCAGAGATAAGTAACACCGGTGATTACTATAACTTAAGATCATATGCTGAG\n", " \n", "\n", " 7320 TTAAATCATGGACTTGAAAAACATAGAATTGATTCTACATTTAAACATCTGATAGGCAAC\n", " \n", "\n", " 7380 ACTGAAGGAAGTATTTCCATATCTTCTCCCCTGGGTGATCCACTATCCACAAACTTCTTT\n", " \n", "\n", " 7440 GTAAAGGTCACTGAGGCAAATTTAGAGTCAAATGTTGAACTGAACATAGGCTCAGACAAG\n", " \n", "\n", " 7500 AGTGAAGTTCATGTTTCTGTTGGTTCAAAACCAAATCTGGAAGGAAGCATTCGTGTCAAT\n", " \n", "\n", " 7560 TCACCTTACGTCACTGATGTCTCAGCTGGATTTGATCACACTGGAGAGTTTCCAAATATT\n", " \n", "\n", " 7620 GTTTCTCACGCACAAGTGAACATAGCCAATAAAGACATTGTTGATGTTAAGTTGATGTCA\n", " \n", "\n", " 7680 CAATCCACAGAGAGGGGAATAGCTGGATCATTTGCACTGCAGACACCATTGAAAAACTAT\n", " \n", "\n", " 7740 TCAAGTATTGAGACATCTTTCTTCCACCAAGGCATGTCTAACAATTTCAGAAGTCATGGA\n", " \n", "\n", " 7800 GATTTTGCCATTAATTCTGATAAGTCTGAAATTAATCTTTTCTACAGTTCAATGTACAAG\n", " \n", "\n", " 7860 TATGAGGGTATGGTCTCCATCAAATCTCATCTCATTGATGACCTTAAGATTGGCTTTGAA\n", " \n", "\n", " 7920 CATGATGGAACAAAAACTAACTTTGACTCTCATGGGGAAATCAGTGTTGGATTTGAGAAA\n", " \n", "\n", " 7980 TATGAACTCGATGTTAATGCAAATGTAGATTCAGATGTCATATTGAGCATGATTGTAAAA\n", " \n", "\n", " 8040 TCTCCCAGAAATGAAGATATCATTGCTAGGTTTTCACACTCGGGTAATTTCTCAGATTTT\n", " \n", "\n", " 8100 AGATGCCATGCAGAACTAATGCATGATAAAGACAGCAAATGGGAGGGAGATGTCAGTTTT\n", " \n", "\n", " 8160 TCTGCTGGTGATGAAATAAGAGGAGAGATTGTTTTCAAACCATGTCATCATCTTCACAAA\n", " \n", "\n", " 8220 GTTTTCATTTTCGATTCTAATATCAAGTTTGAATTCAGTAAATACCATGCTGAAATAAAT\n", " \n", "\n", " 8280 GGAGATTTACCATTTTTAGGAGAAGTACTTCTGAATTTCAAACGCACAGGAGAACTAAAT\n", " \n", "\n", " 8340 GACTTTAACATACATGCAGATGCTAGTTTCAGAAATAAGAAATCAGATGCAGATGTCACT\n", " \n", "\n", " 8400 TTCCAGCTTGCAAAGAAATTACTCTCTTCATCTGTAAACCTTAGATTTCCACATATGGAA\n", " \n", "\n", " 8460 GATGTGTCTTTTAGTTTCAATCACAATGGAGGTTTGAAAAACTTCAACACAAAAACTGAA\n", " \n", "\n", " 8520 ATGACAATTGGACAGAATAGAACCCAAGGAGCTGTAAGGGTGAGAATCACTGGTTCATCC\n", " \n", "\n", " 8580 ATTTATTCAAACTTAGCAATTCAATCCCATTACATGGACAGGATAAATGTTAACTTCAAC\n", " \n", "\n", " 8640 CACAATGGACAATTTGGAGATTTCACCACCAGTGGAGATGTCACTATTGGCAACATGAAA\n", " \n", "\n", " 8700 AATGAAGCAGACATGGCATTCCACCTTTCTGGCAAAGTTATGTCATCATCAGCCAGTGTC\n", " \n", "\n", " 8760 AAATCGCCTTTAATGGATAGTGTCAGCATAGACTTTAACCACAATGGAGCACCAACAGAC\n", " \n", "\n", " 8820 TTTACTACCAAAGCTTCTGTGACCATAGGTGATAAAAAAAATGAAGGAGACATTACATTC\n", " \n", "\n", " 8880 CAGATTACAGAAAAGGCTGTCATCTCTTCACTTAATATCAAATCACCATACATGGACAAC\n", " \n", "\n", " 8940 ATCAATGCTAATTTCAATCACGAAGGATTTGCTACGAAATTCACTACCAAGGCTGATGTA\n", " \n", "\n", " 9000 ACAATTGGTGTTGAAAAAAGTGAAGGTGATGTTTCATTCCAGTGGTCCAACACTTCTACG\n", " \n", "\n", " 9060 ACAATCTCTGGGAACCTGAAATCGCCATCAATGGAAGATATTACTTTTAACCAGTACACC\n", " \n", "\n", " 9120 TTATCTATGCCAACAAAGTATACGTATCATCAGGATTACTCGGTTGGTGATGCAAAAACT\n", " \n", "\n", " 9180 GAAACTGATATGATGCTGGACTACACCAACCTGAAGGCCACCTTTGCTTTGACTTCGCCA\n", " \n", "\n", " 9240 AAAACTTCCCTAAATTTTGACCACTCTGGTGATATTAAGAATTTCATCTCCGCATTGGAC\n", " \n", "\n", " 9300 CTAACAACTGAAATGGGAAGATATTCTTCTGAAGCAAAATTCACACTGAATGATAATGTA\n", " \n", "\n", " 9360 GAAGCAAGCTTCAAACTCACTTCCCCTGTTGAAGGAATGGAGTCAATTTCAGCACTTTAT\n", " \n", "\n", " 9420 ACCCACAATGGCAAACCATCCAACTTCAGATGCCATGGGGAGTTATCTCTTCCAAATAAA\n", " \n", "\n", " 9480 GCTAATGTTGCTGATTTAATTATCAATGTTGAGCAACTAACAAATATGAAAGGAAGTCTT\n", " \n", "\n", " 9540 CAAATTAAATCTGAAATCATTCCAGACTTCACAGCGAAATTCTCCAACATACTCACAGAA\n", " \n", "\n", " 9600 ACATCTCTGCAGTCAATGTCTGAGATCAGTGTTGATGATGAAGTACTCTATGGTATAGCT\n", " \n", "\n", " 9660 GTCTCTTTCACCAACAATTCATTCCTTAAAGCTGAACTGGAAGTGAAAACACCAGTAACA\n", " \n", "\n", " 9720 GAATATCGTGTTATTAGGCTGTCAGGAATTAATCAAATGGACAAAGACCAGGTAAAAGCA\n", " \n", "\n", " 9780 TCTGCAGAGCTTCTTATTGGAAGTGATAGCAGTGTAATGGAACTTTCATTTTCTTCTAAA\n", " \n", "\n", " 9840 CCAAATGTTGCGGGCGTTTTCGAACTCAGATCTGTATACCTACCTCATCTTAAGGCTACC\n", " \n", "\n", " 9900 CTAGATGCTACTGGAACACCAACTAATTTTAGAGGAAACTCAGAACTTATTTTTGATGGT\n", " \n", "\n", " 9960 GAAAAGAAAGGAAGTGTTGATGCTTCTTTCAATATGGAAGGTATATTAGATGCAAATCTC\n", " \n", "\n", "10020 AAGATGGAACTGCCCCTGGAAAATTACACAAGGATAAATGGAGACATCTCAATTAGGAGC\n", " \n", "\n", "10080 AGTAGTGACTTACTTGACATAAAATCTGCACTGAGTATTGAAGATAGAGAGTTAGTTGAC\n", " \n", "\n", "10140 GGCAGTGTTCGAATTACAAACCAAAATCAGCTCTCAGGAGAAATATCTCTGAAATCTATT\n", " \n", "\n", "10200 TACATTAACCCTTTGACTGTGAGCTTTGATTTATCTGGGGATCTTAAAGACTTCAGTACA\n", " \n", "\n", "10260 ACAGCCGACATAAGCTACAATAATGAGAAACTGGCTGCAAGTAGTATGAAATTCAATTTC\n", " \n", "\n", "10320 AACCCAACTCCTTTGAAAACATCTGGGTTTGCATCGTTCAACAAGATGTCTTTTGCATAC\n", " \n", "\n", "10380 AGCTTAGATAATGCCCCAAAAGATTTTAAATCCCATGCAGAGGTAAAAGTAGAAGAAAAG\n", " \n", "\n", "10440 ATGTGGTCTCTTGATGCAACCTGCACTAGAAAGCAAGACATTGATGCTTCATTTAAGGTC\n", " \n", "\n", "10500 ATTATTCCAGAGATTGATCCAATTTCTGGAAAATTGTCTCATAGCCAGAGACAGTTCAGA\n", " \n", "\n", "10560 TGTGCATCTCATGTCGAGCTTAAAACAGATTCCTCAACAAAATACCAGTATGACATGCTT\n", " \n", "\n", "10620 CTCAAGTGGAGACGTGCCTTGGAAGGAACTTTCAGTGTCAAAACACCCATCAAAGGAATG\n", " \n", "\n", "10680 GAAATTACAAAAGTTGTCCTCAATCATGATGGCACTTTCCCAAACATCAAATCTTCAGCT\n", " \n", "\n", "10740 GAAGTCAATTTCAACAAGACAGACATTTCAGCCGCTGTCAGTCTGATGCATGGTGATTTA\n", " \n", "\n", "10800 ACCAAAGGAGATGTTTCCATTAAAACTCCATTTGAAGGATTGGAAGATGTCAGTGTAACA\n", " \n", "\n", "10860 TTCTATTCTGAAGGTGATATTGAAAAATTCCAAGCAGGTGGAAAGGTTTTGTATGCTCCA\n", " \n", "\n", "10920 GGAAAAGATATTGAGGCAGGAGTGGAACATAGTTTGTCTGACAATAAGCTGCAGTCCCGC\n", " \n", "\n", "10980 ATCCATTTTATGTCACCCTTCACCGATGAAATCGTTGTCAGTGCTAACCATACTGGGGTT\n", " \n", "\n", "11040 CCTATGAATTTTGTGTCAAGTTATGAATTAGCCATGGGTTCAGATAATAGAATCAAATCT\n", " \n", "\n", "11100 GAGACATCTTTGAAAACTGGATTCCAGTATGTTCAATTCAAATCTTCTGTCACTACAACA\n", " \n", "\n", "11160 ATGAGTGGGGAAACATTTGTACAAAAGGTTGAGCTAAAGCATGATGGCGCTATAGAGAAG\n", " \n", "\n", "11220 TTCAAAACAGAGGCAATGGTACAAGCAATGGAGAAATTAGTAAGAGTTGATACATCCTTC\n", " \n", "\n", "11280 CAGCTTGAACCAGTCATTGAAGGATCAGCCTCAATCATTACACCATTCTTTGAAAAGTCA\n", " \n", "\n", "11340 GAGGAAATAAAGGTAATCTTTTCACATAGTGGTAGTACTAGTGGATTCCTCTCAACTGGA\n", " \n", "\n", "11400 GAGTTACAGTATTCTCCAAAGAAAAAGATAAGTGGAAAAATTGATTTCCTCAATAATGGT\n", " \n", "\n", "11460 TGGAGGAGGAGAGTTGCAACTGTCGAAGTAAGAACGCCATTTAAGGGCATGGAGGTGAAC\n", " \n", "\n", "11520 AAGGTTACCTACCAGCATACAGGCAACTCGGACAGCTTTGATTGTGATGCAGATATTTTT\n", " \n", "\n", "11580 GTTGCTTCCCAGCCACTCAGCGCTACTTTCAAAGGATCTAAAGCACCTTTAAGCATGGAT\n", " \n", "\n", "11640 CTGAACATCAAAACACCATTTGAAGGATATGAGAGTTTTGGACTTAACGGGAACTTCCTA\n", " \n", "\n", "11700 AACAACACAATGGGCAGATATAGCGGCAGAATGGAAGCCTCATGGCACCCAAATCAGGCA\n", " \n", "\n", "11760 GTCATTTTTGAAAGTTCATTTGCTGCATTGAGAAATAAAATTGAAGGAAGTGCCTCACTA\n", " \n", "\n", "11820 ACAACTCCATTTGTCATGCTCCAACGGTCCTCAATTGAATTCTCACACCAGGAGCTCTCT\n", " \n", "\n", "11880 CAAAAATTCATTGAAAATCTAAAAGCAACTTTCAATGGAAGTAGAATTGTGGATGTTGAG\n", " \n", "\n", "11940 GTTGATCACAGCCTGATGAATGAACGCAAACATGCCATTATTACAATGCGTGCTCCACGC\n", " \n", "\n", "12000 TCCATGAAATTTGACATGAGTGGTGATTTCACTCTGGAAAGTTCAGCAGTTGATGCATCT\n", " \n", "\n", "12060 GCCAACTGGAACACTGAAGATAGTAAAAGTAATCTGAAACTTGAGGGAGTTTATGACATA\n", " \n", "\n", "12120 AGAACAGACAAGAGAATGAACTTCAAACTGACCCGACCTGGACAGGTGATTTCCTATTCC\n", " \n", "\n", "12180 GGATTATTGGACAGAAGACACAGTAAATCTGACCTTGTCTGGGGTATAGGATCAACACAA\n", " \n", "\n", "12240 AAAGCAGGCTATGAAGTCGCAATCAATGATTATGACTCCAGACTGAAACTTATACTTCCA\n", " \n", "\n", "12300 ACACGAAGCCTAGAGCTAACCGGGTCCCATCGATCAAAGGTCACAGAAGGGTCATTTCTA\n", " \n", "\n", "12360 TGGGATGCTGATGTGGATGAAACAAAGAAGGTTGGCTTCCGTTCTGTGATAGTGCCATCA\n", " \n", "\n", "12420 GATGATGCTATTAAGGCCGATGTTACATTCCAGCTTCCAAGCTTAGGAAAGAGAGTACAA\n", " \n", "\n", "12480 GTCGGTTCAGAGGTTGTGGTCAATAGAGGACGTGTAATTTTCGACGGCAAGACAGAGTTC\n", " \n", "\n", "12540 TCTTACTCTCCTGATGAGAGAAAGAAGATTGTGTTTTCATCAAGGCTTGAGGATAAGTCC\n", " \n", "\n", "12600 CTTGGAGCCACGAAAAACTACAGCTTCAGCATGGGTCTAAGTCATCCATACACGACTGTT\n", " \n", "\n", "12660 GATATCAAACTGGCATCACATGTTGGCAAATCCAAGAACAGATATTCCGCTGGTATGGGG\n", " \n", "\n", "12720 CTTGAGTACCTTAACACACACAGACAGACCAAGACCTTCCAAGTCAATGGACAGATTAAC\n", " \n", "\n", "12780 AAACTCAGGAAAACCATCAGCTTCGAGCTGCAGAGTCCAGTCAAATCTATCAGTATTTCG\n", " \n", "\n", "12840 GGCAGTGCTCAGACTGAAGACAAGTTCCGTGTGTCTATCCTCAACATCTATGACCAGAAG\n", " \n", "\n", "12900 ATGCCACTGACGACTGTTTTCACCGTCGACCCTGCCACCAGATCACTTGACTTTTACATT\n", " \n", "\n", "12960 AATTATGACCTTGAGAACCCAAGTAGTGAGTTCCACATAAATGCCAAGTATGTGAACAGT\n", " \n", "\n", "13020 TCTGCATTGGCTGCCGAGGTGTTCCATATCGTCAACCGTGAACGAGTGACTGACATTCTG\n", " \n", "\n", "13080 GTTACTGCCAGACTCAACAACTCTCACCTCCTTCACACAAGAGTTCACTGGAGACCGGAA\n", " \n", "\n", "13140 ATGATCAAGGAATACGAGACTGTTCGAGAGAGGAAACTTGAGGACTACAAGAGACGCCTG\n", " \n", "\n", "13200 GAAATTTCAGCAGTTAAAGTAGAGGCTGCTGTAATTGAGGAAACTGTTGAGAAGTTTAAG\n", " \n", "\n", "13260 ATGATCTGGGAAGCTGTTGTAGAGGAGATTGGCCAAGACAATGCTCTAGCTATAGAGGCT\n", " \n", "\n", "13320 GCTGTTATCCCCATGTTTGAAGATGGACTGAGGACATATGCACAAGTGCAGGGACATGTG\n", " \n", "\n", "13380 CAGGCAAACTTCCTTCCATTTGCCTTGGATTTGGCCAACATAACACAGTACAAGTACAAC\n", " \n", "\n", "13440 AGATTTGTACAAACATACAGAGAGTTTTCACAGGCAGTAAAGCTTCCAGAAACAATGAAG\n", " \n", "\n", "13500 GATCTTGACTTCCTGTCGTCTGCCAGACAACCATTGGAGAAGCCCTTTAATTATGCACTG\n", " \n", "\n", "13560 GATCTGATCAAGATCCCTGCGAAGCACCTTCCAACCCAGGTGACCGGAGCACTGAAATAC\n", " \n", "\n", "13620 CCGGCCAAATATATGTTTTCTAATATGGATACTGCTATGTTGGAGCAGCTTAACAAGAAC\n", " \n", "\n", "13680 TGGGAACTTGAACACCATCTAGAATCTGTCATTGAAACCATTCTTCTTGCAATTGAGGAG\n", " \n", "\n", "13740 GAAGCTGAGGAAGGCATACTTAAATTGAAGAGTATCATTCTCAACTCGGCACAATCCCCA\n", " \n", "\n", "13800 ATCACTGTGTACGACCCAACCCACGGGGAGATCCAGGCAGAATTTCACTCACCCATACCA\n", " \n", "\n", "13860 CTGAAGTCTCTGGATGAGCTACCTGTTATCAATATTGAAAGATATATTAACATGGTTATG\n", " \n", "\n", "13920 AAAACTCGCAGTAGAAAGACAGAAAATGTTCCCAGCAAGACATCAGAATGGTTGCCACCA\n", " \n", "\n", "13980 TTTACTGGTGTTGCCAGTATTACCAGCGGTCATAAAATCACAACTTTCGACGGGTATATC\n", " \n", "\n", "14040 TATGACCTTGATGCTGACTGCACGTTTGTGCTTGTCCGTGATTTCAGGAATGGCAACTTC\n", " \n", "\n", "14100 AGTATTATTCTGAACAACAATGATGGGACAACTCTGTCAATCCTTTCACATGGAAAACCA\n", " \n", "\n", "14160 ATAGAAATCAAGCAGAGCGGAGAGATCTCTGTTGCCATGGAGCCTGTCACTCTCCCATAC\n", " \n", "\n", "14220 AAAGAAAACAACATCACAATTACATCATCAGATGACGGAATCATCAGCATTGATGGGGCT\n", " \n", "\n", "14280 AACCACTTCAAGGTAGATTATGACCTCTCAGTCGATCACATCGATGTAGAGATCAATGGA\n", " \n", "\n", "14340 TGGTATTACGGCAAAACTGCAGGACTTCTTGGTACCTTTGACAACGAGCCCTCAAATGAC\n", " \n", "\n", "14400 CTTATGACCTCCTTTGGAAAAATCATCAATAATCCTAAACGTTTCGCCAGAACTTGGGAT\n", " \n", "\n", "14460 GTTGGAAACACAAATTGCCGTTAA\n", " \n", "\n", "KEYS (in order of precedence):\n", ">>>>>> left primer\n", "<<<<<< right primer\n", "\n", "ADDITIONAL OLIGOS\n", " start len tm gc% any_th 3'_th hairpin seq\n", "\n", " 1 LEFT PRIMER 3266 18 59.89 61.11 7.91 0.00 31.82 CGTGAGCATCCGAGGCAT\n", " RIGHT PRIMER 3362 18 60.13 66.67 0.00 0.00 0.00 CTGAGCGGAGACTGGTGC\n", " PRODUCT SIZE: 97, PAIR ANY_TH COMPL: 0.78, PAIR 3'_TH COMPL: 0.78\n", "\n", " 2 LEFT PRIMER 3253 18 60.13 61.11 0.00 0.00 0.00 CGCAGATGAAGCCCGTGA\n", " RIGHT PRIMER 3362 18 60.13 66.67 0.00 0.00 0.00 CTGAGCGGAGACTGGTGC\n", " PRODUCT SIZE: 110, PAIR ANY_TH COMPL: 0.00, PAIR 3'_TH COMPL: 0.00\n", "\n", " 3 LEFT PRIMER 1922 18 59.83 61.11 12.10 0.00 0.00 CGGCGCGAAAGTTGAAGG\n", " RIGHT PRIMER 2053 18 60.13 66.67 0.00 0.00 40.25 CCTTCAGCTCGACCACCG\n", " PRODUCT SIZE: 132, PAIR ANY_TH COMPL: 11.23, PAIR 3'_TH COMPL: 14.04\n", "\n", " 4 LEFT PRIMER 1921 18 59.83 61.11 12.10 0.00 0.00 CCGGCGCGAAAGTTGAAG\n", " RIGHT PRIMER 2053 18 60.13 66.67 0.00 0.00 40.25 CCTTCAGCTCGACCACCG\n", " PRODUCT SIZE: 133, PAIR ANY_TH COMPL: 5.41, PAIR 3'_TH COMPL: 9.34\n", "\n", "Statistics\n", " con too in in not no tm tm high high high high \n", " sid many tar excl ok bad GC too too any_th 3'_th hair- poly end \n", " ered Ns get reg reg GC% clamp low high compl compl pin X stab ok\n", "libprimer3 release 2.4.0\n", "\n", "\n" ] } ], "source": [ "%%bash\n", "cat /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_primers.txt" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "cd /home/sam/analyses/\n", "rsync \\\n", "--archive \\\n", "--relative \\\n", "./20181129_geoduck_vtg_primers/ gannet:/volume1/web/Atumefaciens" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Run again to generate default output format, just for curiosity" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SEQUENCE_ID=TRINITY_DN51983_c0_g1_i8.p1.cds\n", "SEQUENCE_TEMPLATE=ATGGAGCGATTGGTTTTAGCGCTAATCGTTTTGGCGGTCTGTGTCCACGCAGGTCCGATACAGAAAATCTCAGTTGATACCTGTGCAAGAACTTGTACAGGCAACAGCAAGTTTAATTATGTTGCTGGTAAGACTTACCAGTACAAGTATGATGCCGACATCAAGACTGGGGTCCAGGGAGCATCAGAAGATAATGCGAAAATACACATGTCGGCAACAGTCGAGATGGAGGTCATATCAAAGTGTGACCTTGTCATGCGGCTGAATGATGTCACATTGACAGAATATGACCCCATTGACTTGATAATGAAGGAAACGTCATCTGAATTCCGCAGTGGTCTTGAGAAGGCTCCCATCCGAATCTCTTTCCAAGATGGAAACATAGAGGAACTTTGTCTGAAACGAGAGGAGCCAAATTGGGTGCTGAATGCCAAACGTGGAATCCTGTCAATGTTCCAGAACAACATGGACGATTTCTCAACCAATAAAACAGTGTCTGAGACTGATGTGTCTGGTGTGTGTGATACAGAGTACCACGTGAGTAAATCTGGCTGGTACAAGACCACAGTCAGCAAGTCTAAGAATCTCCTAGGTTGTACTGACAGGACCGGGTACAACACTGCCGTCCAGGGAGTGCCATATACCGCAGCTTCTGGCATCCAGTCACTGCCAGTTGTGAAGAGCAACCATAACTGTGAACATGAGATTGACATTAAGTCGCACATTCTCCATTCGGCAACTTGTAATGAACAGCACATGTTCCTGCCATTCTCACGCTCAGACAGTGGAGCTGTAACATACAACACTCAGACTCTCAAGTTCGTGAAAGTAACAACTGGAATCAAGTCACCATTTGATACCAGTACTCGCAAATCAATGAAATTCGATCACATCAATGATGCAGGCAGAGAAGCAAAATCGAGGAAAGATATTACACGTAAGCTTATTGAAATCTGTGAGAAGACGAAGTCTGGTGTTCGCCCTGAAACCCCACGTCTGTTCACAGACCTCGTGTTTATGATGAAGACTGTTGATTCTGACACTCTAGCAGACACGTACAAACTGCTCCAAGAAGGTGCCATCTGCACTGACAATAAAGAAAGAACCAGGAAGTTTTTCCTTGATGCCTTACCGATTGCTGGTTCCAGTTCCTCACTGCACTTGATGACCCAACTCTTGACCACAAAGGCTGTGACTGGAATTGAGGCAGACATGTGGCTGTCGGCCATCTCTTTTCTCAAGAACCCAACTAAAGACATGCTGAAGGAAGTCAAGCCTTTGATCAACATGAAGGACACAACTGACAAGGCCCTGCTTTCTGTCGGTACCCTGATCCACAGTTATTGTTTACGCCACGGCAACTGTGAACGTGATGACATGGTGAAGGCTGTCATCTCCGCAATGCAGAACAAGATCGCTAAAGGATGCAAGATCAAGCCAGACAACTTCAAAACCAGCTTGTTGGCACTGCGAGGCATCGGCAATGCCGGATATGCCGTATCAGCAATCCCCACCCTGGAGATTTGTGTGAAGATCACCTCGAACCCCATCGAAATTCGTCTGTCCGCTATTGAGGCTTTCAGGAGGATGCCATGTGATGCAAGCAGAAAAGCTTTGGTAAAGACATTCTTACAGAAGGAGGAAGATTCCGAAGTGAGGATCGCAGCATACAGGGTCTTGATGGAATGTCCATCACCTCAGTTCCTCAGCCTAGTCCGCTCCACGTTAGAGAGTGAGGAAATCAATCAAGTTGGGTCTTACATCTGGTCGCATCTTACAAACCTGATGGAGACATCTGACAGACATAAGCAGGACATCAGAGCAATCCTTGAGGATGAAACATTTAAGAAGGAATTTGACCTTGACAAAAGGAAGTTCTCACGTAACTACGAAGGTTCCATGTTTATTGAGAAACTCAATGCCGGCGCGAAAGTTGAAGGTGATCTTGTCTGGTCATCAAGATCTTTTGTTCCAAGATCGGCCATGATGAATTTCACTGTTGATCTTTTTGGACATTCTATAAATCTTCTGGAATTCGGTGGTCGAGCTGAAGGAATAGAATACTTCTTGGAATCATTCTTTGGACCCAATGGCTACTTCAGTGGACAGGACACAAAGGGTGGAAATGATATGTTGGCCAATGGAATCAAACAGGAGAAAATGAAGAAGATTGATAACAGGTACGGCAGCAAGATGGACGAGCTCCGTGGCTCTATGTACATGAGGGTGTTTGGCAATGAATTGCGTTACAAGAGCTTCCAGGGAGTGGAGGATCTGATGTCTGGTCCTAACTTCAACCTGTTCGATATGCTGATACAGCTGGCCAAGGACAACGATTATACTTTCAGCCATTCTACTATGTTCCTGGACACCAGCATCATTATCCCAACTGGAGCAGGTTTCCCAATGAATCTAACAGTCAACGGAACTGCAACAATTGATCTGAGAATGAAGGGAAAGATGGATCTTAGGTCCCCGCCAGCTGTCAACATAGCAGGACTTGTACAACCAAGTGCGGCTATCGACATATCATCAATGATGAGCGTGGATGCATTTGTAACCAAGTCCGGAATTAAGATGGTGTCCACCCTACACTCCAGTACAGCTGTCCAGGGCAAGTTCCAGGTGAACGAGGCTGGTGTGATAAGCGCAGAATACGAGATGCCACAACCCAAGATGGAGATCATTGATGTCAAGTCATCATTCTTCACTGTTCATCGAGACATGGAGAAGGAACAAAAAATGATTGCTGATAACATTCATACAAAGAAATTCTGCTCCCCAGAGAAACTTGTCACCATCACAGGACTTGAACTATGCGCAGAGGTTACATACCCGAATGCATCTCTGAAATCAGATGCCCCATACTTCCCCCTTACAGGCCCAGTCACTGCTGGTGTCTATCTCTACAACAGAGATACCCACAAGAAATACAAGATGGAGGCCAGATCCACACATAACAAGGCAAGCAACATCCTGCATTTTACCTTTGACACACCAGAATCTCGCATCGACAGACATATTGTCCTAGATCTCAACCTCAACAAACGAGCAACAACCATTGATATGACTTTGAACTCTCCATGGAAGAAGGCAAATCTGCAAGGTGCTATTACAAATGACAAAGGTATCATGGGTATTAACAGCAAGATGTCGGTGGATGGCAAATCTGGTTTCTCGGTCAACTCCAAGTTTGTGAAATCCCGTGTCGGTAACAGCATCAAGTATGTTCCTTCTCTGGAAATCAGTATACCGCAGATGAAGCCCGTGAGCATCCGAGGCATGTTGATGCATAATGGTTACCGGCAGCTGGATTTGGAATATGACATGTCCGGACTGACTGCCGCACCAGTCTCCGCTCAGTTGACCATTAACAACAAACCGGCCCTAAAAGGCGTACGAGGAAGTTTGTCTTTGCAAAAGGACAAGGCCTACTCATTTGACACAAGGGTCATGATTTCATCTGACAACAACAAGATTGGATATAAGCCATTTGTCTCCCTCCGTTCTCCAACAGGGGAGGTCCTTGCATTTGGCGGATCTGCTAATATTATCTTCAACAAGAAGGTTGTTGTAGACCTTGTCCTTGATAAGGCTTTTGCCAAAGCAGTAACACTCAAAGGTTTCGTAACCAAGACAACGAAGAACCGAGGTCGTGTTATATACAACACTAAGATGACTTATACATCCAGTCCAGTGGATCTTCTGCTGAAAGCCAAGATAGACAACAGAAACTCTCGTGCAGTTGCGACAGTTGTAGACTTCACCTACGTCATTCGCAAAATTGCCCGTAACAACATCAAACTTGTGTCAAAGGTCACGAACCTCAGCAGTAAATACCTTACCAAGGCCAAAGCTTCAGCGAATGTACTAGTAAAAAGGAATCCTGAGTTGAACTTGAAATTTATGGGTAATCTAGAGCACAACAAGAAACACTCCGAGTTTGACGTCGATGTTCGCTATGGTCGTGACTTCAAAGATGAAAGCAAACATATCGACATGAGTTTTGAGATGAATAGAAAGTTCAAGGACATTACGGCAGCATCTGCCAATCTCCAGACAAGATTAACATTCCCTGGACAGGGTCTTAATATAATCGTAAAGGGTAATCACAAACACAGCAGTAAGGAACTTGATTCCAACATCTTCCTGCGTCTTGGTAAAGGACAACCTATTGAGAGCAGTTTGCTCATACAGGACAAAACAGAACAATTGGTGAAGGTCGTCAGTGAATACAAGCTTAGTTATCCAGGCAGGGAAGTCATCTTCAATCATACATTAGCACAGACAGATAAAACCACTTTTGTTAGTTCCTTCAAATCTCAACTGGAGAAAAATGGAAAGAATACCATTATTACAACACTGAAAACTTACCCTGACAAGGAAAGGGTTTCCATCTCATCAGATTTGAAACTAAGCAAACAAACACCAGTCCATTTGCAAGGAGACTTTGATTTCAATCCAGCAGACTTTATCACCAGTATGTCATATGAGAAAGATTTAAAAAAATATTCTGGATCATTGACATCTCTGACCAAGGGAACTGAGTCATCAAGCTTAACAATTGATTTGCAACACCCAGAAAGACGCATTGTCATGGACCTTAGTGGCAAGAGCATGATGAAGAAATACCAGGTTGAGGCAGATTTAAAGTGGAATGCAGGCAGAAATGATTCTGAAAATGTGCATATATCATCTTGGATTGAGCCACCGACAGAAGAAAAATTGAATGGCTCCATAACCATCACTTATCCGACAAGAACACTCACTTTGAACGCCCATCAATTACTATCAGACAAATATAACATGCATGCTGATTTTACATGGGAAACTGACAAGAAAGTTACTATGGATACAATCGTCTTATACAAAAACAACATGATGATAAACACCTTAAAGTTGACCTCTCCATTTAGTAAGATGCGTAGACTTGACCTAGCTGTAAACCATAAAGATGACCACGAAGAATATGCAACTAACGTTGAGGTCAAGTGGAACAAAGGTGAATCTGTAACTTCTGAGCTCATACTTAAAAAGCCCGCCAGTCTTAAAACCGTTGTTGGTAGTGTCTTCATGAAAACAAGCTTCAAGGTTCTGAAGAAGATAAGATTAGACGTAAATCATAAATTAAGTGACTCCCTGGCTTCGTCTGTGAAATTTGCATGGAACAGGCAACTCATTAATGTAGATGTAAACCTGAAAAACACAACCAAAGGTAAGAAGATGGGATTTAATGGAAATGTTGATGTTAAGACCTCCTTTGCCTACTTGAAGAAGGGACAACTTAAAATCTCCCATGATAACAATGGTAAGACATTCAACACTTTGACGACTTTGATGAAGAACAAGAAAACATACAAAATCGACAGTAAGATAACTCACATCCCCACAGCTAATAAATATGAAAACACTGGAGCTATTTCTATTTCTGCACCTACTGGAAAGTCTGATATAGTATGGAGTCATATGCACACTTCAGAAAAAGTCACGTCCATGTTCGCTTCATCACAGGGAAAGAAGGGTGATAAGAATATAACAGTGAGACTAAATGGAGAATTTAAAGACCTATATGCTGCCAGTCTTAGCATTCAAACACCATATAAGTCTGCTCGTGATGCCTTGCTGGAGGTTAGTATGAAGCATGATGGGTATGTTATGGTTGACAGCAAGGCTAACGTAAATGTAAATGGTGCGAAAGTTGCTGCTGCTACCATTAGCTACAACTTGCGGAGCCCTGCTACCATTACTACACTTAATATTCCTGCCCTTGATATCAACTGCAGGATTCAAGGTAATGCAACATCAGAGAAAAACTCAGAAACTGTTATAATGGAGGTTGTTCTTACACCAGAAATTAGTATGGCCCTCTCAGCATCACAGAAATTTTTTGATGTCACTGGTAACCCTGATGCTTTTGCCCAGTCTCTCATTTCATGGAAATCTTCCTTCCCAGGATATGAGCATGCCAAATTCGTTTACGAAATTAGTGATAACAAAGACGAGTCCAGTGTTACAACACATACAAAACTTGAGTATAGCCAGGGTAAAGTCATAGAAGTTAAATCTGAATTAGATAAAAACAGTTACTATTCATCACTGGCTACACCTTATGAATCGTTGCCATTTATGGAAGGAAAAATCAGTTTCGTTGGCAAACCAGAATCTTTCAGAAGCTCTGCATTTATCAAGATACTACCTGTATTGGAGAAAACGTCTGCATCTGTATCATGGAGCACCCTTGATGGATTGAAAACTGAACTGAGAGTGGATACACCATCTACCCAGTATCCTTATATTCAGGTTAACTTGGAGGCAAAGAAAACGGAAAACGGAAAAACAACCGGTGATCTTGTAGTTGAATATCTTCCCAGAGAAACTGTCCGAGTGGAGGTGATGAGCGATATTAGTGATATTGTCAACCTCTCTGCAAAGGTGACATCACCATTTGTAATGATTGATCTATACCACAGTGGAAACCTACAATCATTTAAATCCAGTGCAGAGATTGAGGGGACACCTGGCAAGAAATATGGTCTCATTGTAGGCTATACAAATGGTACAACGATTGAAGGCTATGCAACTATATCTGTGCCAGGAAGAAGAGACATAAATGCAGTCTTCTCACACCAAGGATCTGCAATGAACTTTATCACTCATGCAGAAATAACACACAACAGAATGAACCAATTCACCTCTGACTTTAAATTTGCAGCAGGCAACTCAATAACTGTGTCTGCATCCACTTCACTTAGGTCTTTGCTCCTCATATCAGAAGACAAATATAGAGCAGCTTTTACTACGAAGGTCATCCCATTGAAAAAGATGTCTGCTCATGGAGAATTTGTGACAACAACCATAGGAAAGTCTGAAGCAGATGTGTCTTTTGACCTGACTAAAGATATTGAGGGTAGTCTTACTGTTAAATCACCTTTGATTAAAAACATTGAAGCTTCTTTTAATCATCATCAGAGTGAAAAATATATAAACAGTAGAGCAGAAATCGTCCATGACGGTAAAAAAAATATTGATGTGCAGGCCTCTCTAAATGTTGATGAAGATTCTGTTGTTGGTGAAATAGGTATCAAATCCCCATTGTCTGATGATATCCAGGTCCTGTCTAGGTTTGATGGTGGTAAAGACAACTTTGTTGTTCATTTTGAAGGCTCTGTCGGTGTTAACAAATCTGAAATTGATTTATCCCACAAATGGGTTGAGAGAAACAGCCAGCAGACGATAAGTGTGAGATCGACAGGGATGAAGGACGTGACAGCAGAGATAAGTAACACCGGTGATTACTATAACTTAAGATCATATGCTGAGTTAAATCATGGACTTGAAAAACATAGAATTGATTCTACATTTAAACATCTGATAGGCAACACTGAAGGAAGTATTTCCATATCTTCTCCCCTGGGTGATCCACTATCCACAAACTTCTTTGTAAAGGTCACTGAGGCAAATTTAGAGTCAAATGTTGAACTGAACATAGGCTCAGACAAGAGTGAAGTTCATGTTTCTGTTGGTTCAAAACCAAATCTGGAAGGAAGCATTCGTGTCAATTCACCTTACGTCACTGATGTCTCAGCTGGATTTGATCACACTGGAGAGTTTCCAAATATTGTTTCTCACGCACAAGTGAACATAGCCAATAAAGACATTGTTGATGTTAAGTTGATGTCACAATCCACAGAGAGGGGAATAGCTGGATCATTTGCACTGCAGACACCATTGAAAAACTATTCAAGTATTGAGACATCTTTCTTCCACCAAGGCATGTCTAACAATTTCAGAAGTCATGGAGATTTTGCCATTAATTCTGATAAGTCTGAAATTAATCTTTTCTACAGTTCAATGTACAAGTATGAGGGTATGGTCTCCATCAAATCTCATCTCATTGATGACCTTAAGATTGGCTTTGAACATGATGGAACAAAAACTAACTTTGACTCTCATGGGGAAATCAGTGTTGGATTTGAGAAATATGAACTCGATGTTAATGCAAATGTAGATTCAGATGTCATATTGAGCATGATTGTAAAATCTCCCAGAAATGAAGATATCATTGCTAGGTTTTCACACTCGGGTAATTTCTCAGATTTTAGATGCCATGCAGAACTAATGCATGATAAAGACAGCAAATGGGAGGGAGATGTCAGTTTTTCTGCTGGTGATGAAATAAGAGGAGAGATTGTTTTCAAACCATGTCATCATCTTCACAAAGTTTTCATTTTCGATTCTAATATCAAGTTTGAATTCAGTAAATACCATGCTGAAATAAATGGAGATTTACCATTTTTAGGAGAAGTACTTCTGAATTTCAAACGCACAGGAGAACTAAATGACTTTAACATACATGCAGATGCTAGTTTCAGAAATAAGAAATCAGATGCAGATGTCACTTTCCAGCTTGCAAAGAAATTACTCTCTTCATCTGTAAACCTTAGATTTCCACATATGGAAGATGTGTCTTTTAGTTTCAATCACAATGGAGGTTTGAAAAACTTCAACACAAAAACTGAAATGACAATTGGACAGAATAGAACCCAAGGAGCTGTAAGGGTGAGAATCACTGGTTCATCCATTTATTCAAACTTAGCAATTCAATCCCATTACATGGACAGGATAAATGTTAACTTCAACCACAATGGACAATTTGGAGATTTCACCACCAGTGGAGATGTCACTATTGGCAACATGAAAAATGAAGCAGACATGGCATTCCACCTTTCTGGCAAAGTTATGTCATCATCAGCCAGTGTCAAATCGCCTTTAATGGATAGTGTCAGCATAGACTTTAACCACAATGGAGCACCAACAGACTTTACTACCAAAGCTTCTGTGACCATAGGTGATAAAAAAAATGAAGGAGACATTACATTCCAGATTACAGAAAAGGCTGTCATCTCTTCACTTAATATCAAATCACCATACATGGACAACATCAATGCTAATTTCAATCACGAAGGATTTGCTACGAAATTCACTACCAAGGCTGATGTAACAATTGGTGTTGAAAAAAGTGAAGGTGATGTTTCATTCCAGTGGTCCAACACTTCTACGACAATCTCTGGGAACCTGAAATCGCCATCAATGGAAGATATTACTTTTAACCAGTACACCTTATCTATGCCAACAAAGTATACGTATCATCAGGATTACTCGGTTGGTGATGCAAAAACTGAAACTGATATGATGCTGGACTACACCAACCTGAAGGCCACCTTTGCTTTGACTTCGCCAAAAACTTCCCTAAATTTTGACCACTCTGGTGATATTAAGAATTTCATCTCCGCATTGGACCTAACAACTGAAATGGGAAGATATTCTTCTGAAGCAAAATTCACACTGAATGATAATGTAGAAGCAAGCTTCAAACTCACTTCCCCTGTTGAAGGAATGGAGTCAATTTCAGCACTTTATACCCACAATGGCAAACCATCCAACTTCAGATGCCATGGGGAGTTATCTCTTCCAAATAAAGCTAATGTTGCTGATTTAATTATCAATGTTGAGCAACTAACAAATATGAAAGGAAGTCTTCAAATTAAATCTGAAATCATTCCAGACTTCACAGCGAAATTCTCCAACATACTCACAGAAACATCTCTGCAGTCAATGTCTGAGATCAGTGTTGATGATGAAGTACTCTATGGTATAGCTGTCTCTTTCACCAACAATTCATTCCTTAAAGCTGAACTGGAAGTGAAAACACCAGTAACAGAATATCGTGTTATTAGGCTGTCAGGAATTAATCAAATGGACAAAGACCAGGTAAAAGCATCTGCAGAGCTTCTTATTGGAAGTGATAGCAGTGTAATGGAACTTTCATTTTCTTCTAAACCAAATGTTGCGGGCGTTTTCGAACTCAGATCTGTATACCTACCTCATCTTAAGGCTACCCTAGATGCTACTGGAACACCAACTAATTTTAGAGGAAACTCAGAACTTATTTTTGATGGTGAAAAGAAAGGAAGTGTTGATGCTTCTTTCAATATGGAAGGTATATTAGATGCAAATCTCAAGATGGAACTGCCCCTGGAAAATTACACAAGGATAAATGGAGACATCTCAATTAGGAGCAGTAGTGACTTACTTGACATAAAATCTGCACTGAGTATTGAAGATAGAGAGTTAGTTGACGGCAGTGTTCGAATTACAAACCAAAATCAGCTCTCAGGAGAAATATCTCTGAAATCTATTTACATTAACCCTTTGACTGTGAGCTTTGATTTATCTGGGGATCTTAAAGACTTCAGTACAACAGCCGACATAAGCTACAATAATGAGAAACTGGCTGCAAGTAGTATGAAATTCAATTTCAACCCAACTCCTTTGAAAACATCTGGGTTTGCATCGTTCAACAAGATGTCTTTTGCATACAGCTTAGATAATGCCCCAAAAGATTTTAAATCCCATGCAGAGGTAAAAGTAGAAGAAAAGATGTGGTCTCTTGATGCAACCTGCACTAGAAAGCAAGACATTGATGCTTCATTTAAGGTCATTATTCCAGAGATTGATCCAATTTCTGGAAAATTGTCTCATAGCCAGAGACAGTTCAGATGTGCATCTCATGTCGAGCTTAAAACAGATTCCTCAACAAAATACCAGTATGACATGCTTCTCAAGTGGAGACGTGCCTTGGAAGGAACTTTCAGTGTCAAAACACCCATCAAAGGAATGGAAATTACAAAAGTTGTCCTCAATCATGATGGCACTTTCCCAAACATCAAATCTTCAGCTGAAGTCAATTTCAACAAGACAGACATTTCAGCCGCTGTCAGTCTGATGCATGGTGATTTAACCAAAGGAGATGTTTCCATTAAAACTCCATTTGAAGGATTGGAAGATGTCAGTGTAACATTCTATTCTGAAGGTGATATTGAAAAATTCCAAGCAGGTGGAAAGGTTTTGTATGCTCCAGGAAAAGATATTGAGGCAGGAGTGGAACATAGTTTGTCTGACAATAAGCTGCAGTCCCGCATCCATTTTATGTCACCCTTCACCGATGAAATCGTTGTCAGTGCTAACCATACTGGGGTTCCTATGAATTTTGTGTCAAGTTATGAATTAGCCATGGGTTCAGATAATAGAATCAAATCTGAGACATCTTTGAAAACTGGATTCCAGTATGTTCAATTCAAATCTTCTGTCACTACAACAATGAGTGGGGAAACATTTGTACAAAAGGTTGAGCTAAAGCATGATGGCGCTATAGAGAAGTTCAAAACAGAGGCAATGGTACAAGCAATGGAGAAATTAGTAAGAGTTGATACATCCTTCCAGCTTGAACCAGTCATTGAAGGATCAGCCTCAATCATTACACCATTCTTTGAAAAGTCAGAGGAAATAAAGGTAATCTTTTCACATAGTGGTAGTACTAGTGGATTCCTCTCAACTGGAGAGTTACAGTATTCTCCAAAGAAAAAGATAAGTGGAAAAATTGATTTCCTCAATAATGGTTGGAGGAGGAGAGTTGCAACTGTCGAAGTAAGAACGCCATTTAAGGGCATGGAGGTGAACAAGGTTACCTACCAGCATACAGGCAACTCGGACAGCTTTGATTGTGATGCAGATATTTTTGTTGCTTCCCAGCCACTCAGCGCTACTTTCAAAGGATCTAAAGCACCTTTAAGCATGGATCTGAACATCAAAACACCATTTGAAGGATATGAGAGTTTTGGACTTAACGGGAACTTCCTAAACAACACAATGGGCAGATATAGCGGCAGAATGGAAGCCTCATGGCACCCAAATCAGGCAGTCATTTTTGAAAGTTCATTTGCTGCATTGAGAAATAAAATTGAAGGAAGTGCCTCACTAACAACTCCATTTGTCATGCTCCAACGGTCCTCAATTGAATTCTCACACCAGGAGCTCTCTCAAAAATTCATTGAAAATCTAAAAGCAACTTTCAATGGAAGTAGAATTGTGGATGTTGAGGTTGATCACAGCCTGATGAATGAACGCAAACATGCCATTATTACAATGCGTGCTCCACGCTCCATGAAATTTGACATGAGTGGTGATTTCACTCTGGAAAGTTCAGCAGTTGATGCATCTGCCAACTGGAACACTGAAGATAGTAAAAGTAATCTGAAACTTGAGGGAGTTTATGACATAAGAACAGACAAGAGAATGAACTTCAAACTGACCCGACCTGGACAGGTGATTTCCTATTCCGGATTATTGGACAGAAGACACAGTAAATCTGACCTTGTCTGGGGTATAGGATCAACACAAAAAGCAGGCTATGAAGTCGCAATCAATGATTATGACTCCAGACTGAAACTTATACTTCCAACACGAAGCCTAGAGCTAACCGGGTCCCATCGATCAAAGGTCACAGAAGGGTCATTTCTATGGGATGCTGATGTGGATGAAACAAAGAAGGTTGGCTTCCGTTCTGTGATAGTGCCATCAGATGATGCTATTAAGGCCGATGTTACATTCCAGCTTCCAAGCTTAGGAAAGAGAGTACAAGTCGGTTCAGAGGTTGTGGTCAATAGAGGACGTGTAATTTTCGACGGCAAGACAGAGTTCTCTTACTCTCCTGATGAGAGAAAGAAGATTGTGTTTTCATCAAGGCTTGAGGATAAGTCCCTTGGAGCCACGAAAAACTACAGCTTCAGCATGGGTCTAAGTCATCCATACACGACTGTTGATATCAAACTGGCATCACATGTTGGCAAATCCAAGAACAGATATTCCGCTGGTATGGGGCTTGAGTACCTTAACACACACAGACAGACCAAGACCTTCCAAGTCAATGGACAGATTAACAAACTCAGGAAAACCATCAGCTTCGAGCTGCAGAGTCCAGTCAAATCTATCAGTATTTCGGGCAGTGCTCAGACTGAAGACAAGTTCCGTGTGTCTATCCTCAACATCTATGACCAGAAGATGCCACTGACGACTGTTTTCACCGTCGACCCTGCCACCAGATCACTTGACTTTTACATTAATTATGACCTTGAGAACCCAAGTAGTGAGTTCCACATAAATGCCAAGTATGTGAACAGTTCTGCATTGGCTGCCGAGGTGTTCCATATCGTCAACCGTGAACGAGTGACTGACATTCTGGTTACTGCCAGACTCAACAACTCTCACCTCCTTCACACAAGAGTTCACTGGAGACCGGAAATGATCAAGGAATACGAGACTGTTCGAGAGAGGAAACTTGAGGACTACAAGAGACGCCTGGAAATTTCAGCAGTTAAAGTAGAGGCTGCTGTAATTGAGGAAACTGTTGAGAAGTTTAAGATGATCTGGGAAGCTGTTGTAGAGGAGATTGGCCAAGACAATGCTCTAGCTATAGAGGCTGCTGTTATCCCCATGTTTGAAGATGGACTGAGGACATATGCACAAGTGCAGGGACATGTGCAGGCAAACTTCCTTCCATTTGCCTTGGATTTGGCCAACATAACACAGTACAAGTACAACAGATTTGTACAAACATACAGAGAGTTTTCACAGGCAGTAAAGCTTCCAGAAACAATGAAGGATCTTGACTTCCTGTCGTCTGCCAGACAACCATTGGAGAAGCCCTTTAATTATGCACTGGATCTGATCAAGATCCCTGCGAAGCACCTTCCAACCCAGGTGACCGGAGCACTGAAATACCCGGCCAAATATATGTTTTCTAATATGGATACTGCTATGTTGGAGCAGCTTAACAAGAACTGGGAACTTGAACACCATCTAGAATCTGTCATTGAAACCATTCTTCTTGCAATTGAGGAGGAAGCTGAGGAAGGCATACTTAAATTGAAGAGTATCATTCTCAACTCGGCACAATCCCCAATCACTGTGTACGACCCAACCCACGGGGAGATCCAGGCAGAATTTCACTCACCCATACCACTGAAGTCTCTGGATGAGCTACCTGTTATCAATATTGAAAGATATATTAACATGGTTATGAAAACTCGCAGTAGAAAGACAGAAAATGTTCCCAGCAAGACATCAGAATGGTTGCCACCATTTACTGGTGTTGCCAGTATTACCAGCGGTCATAAAATCACAACTTTCGACGGGTATATCTATGACCTTGATGCTGACTGCACGTTTGTGCTTGTCCGTGATTTCAGGAATGGCAACTTCAGTATTATTCTGAACAACAATGATGGGACAACTCTGTCAATCCTTTCACATGGAAAACCAATAGAAATCAAGCAGAGCGGAGAGATCTCTGTTGCCATGGAGCCTGTCACTCTCCCATACAAAGAAAACAACATCACAATTACATCATCAGATGACGGAATCATCAGCATTGATGGGGCTAACCACTTCAAGGTAGATTATGACCTCTCAGTCGATCACATCGATGTAGAGATCAATGGATGGTATTACGGCAAAACTGCAGGACTTCTTGGTACCTTTGACAACGAGCCCTCAAATGACCTTATGACCTCCTTTGGAAAAATCATCAATAATCCTAAACGTTTCGCCAGAACTTGGGATGTTGGAAACACAAATTGCCGTTAA\n", "PRIMER_TASK=generic\n", "PRIMER_PICK_LEFT_PRIMER=3\n", "PRIMER_PICK_RIGHT_PRIMER=3\n", "PRIMER_OPT_SIZE=18\n", "PRIMER_MIN_SIZE=15\n", "PRIMER_MAX_SIZE=21\n", "PRIMER_MAX_NS_ACCEPTED=1\n", "PRIMER_PRODUCT_SIZE_RANGE=75-150\n", "P3_FILE_FLAG=1\n", "PRIMER_EXPLAIN_FLAG=1\n", "PRIMER_THERMODYNAMIC_PARAMETERS_PATH=/home/sam/software/primer3-2.4.0/src/primer3_config/\n", "PRIMER_LEFT_NUM_RETURNED=5\n", "PRIMER_RIGHT_NUM_RETURNED=5\n", "PRIMER_INTERNAL_NUM_RETURNED=0\n", "PRIMER_PAIR_NUM_RETURNED=5\n", "PRIMER_PAIR_0_PENALTY=0.160101\n", "PRIMER_LEFT_0_PENALTY=0.109387\n", "PRIMER_RIGHT_0_PENALTY=0.050714\n", "PRIMER_LEFT_0_SEQUENCE=TTACGCCACGGCAACTGT\n", "PRIMER_RIGHT_0_SEQUENCE=CGCAGTGCCAACAAGCTG\n", "PRIMER_LEFT_0=1347,18\n", "PRIMER_RIGHT_0=1471,18\n", "PRIMER_LEFT_0_TM=59.891\n", "PRIMER_RIGHT_0_TM=60.051\n", "PRIMER_LEFT_0_GC_PERCENT=55.556\n", "PRIMER_RIGHT_0_GC_PERCENT=61.111\n", "PRIMER_LEFT_0_SELF_ANY_TH=9.11\n", "PRIMER_RIGHT_0_SELF_ANY_TH=10.11\n", "PRIMER_LEFT_0_SELF_END_TH=0.13\n", "PRIMER_RIGHT_0_SELF_END_TH=0.00\n", "PRIMER_LEFT_0_HAIRPIN_TH=42.06\n", "PRIMER_RIGHT_0_HAIRPIN_TH=0.00\n", "PRIMER_LEFT_0_END_STABILITY=3.5500\n", "PRIMER_RIGHT_0_END_STABILITY=4.2400\n", "PRIMER_PAIR_0_COMPL_ANY_TH=10.66\n", "PRIMER_PAIR_0_COMPL_END_TH=0.00\n", "PRIMER_PAIR_0_PRODUCT_SIZE=125\n", "PRIMER_PAIR_1_PENALTY=0.233503\n", "PRIMER_LEFT_1_PENALTY=0.107750\n", "PRIMER_RIGHT_1_PENALTY=0.125753\n", "PRIMER_LEFT_1_SEQUENCE=CGTGAGCATCCGAGGCAT\n", "PRIMER_RIGHT_1_SEQUENCE=CTGAGCGGAGACTGGTGC\n", "PRIMER_LEFT_1=3266,18\n", "PRIMER_RIGHT_1=3362,18\n", "PRIMER_LEFT_1_TM=59.892\n", "PRIMER_RIGHT_1_TM=60.126\n", "PRIMER_LEFT_1_GC_PERCENT=61.111\n", "PRIMER_RIGHT_1_GC_PERCENT=66.667\n", "PRIMER_LEFT_1_SELF_ANY_TH=7.91\n", "PRIMER_RIGHT_1_SELF_ANY_TH=0.00\n", "PRIMER_LEFT_1_SELF_END_TH=0.00\n", "PRIMER_RIGHT_1_SELF_END_TH=0.00\n", "PRIMER_LEFT_1_HAIRPIN_TH=31.82\n", "PRIMER_RIGHT_1_HAIRPIN_TH=0.00\n", "PRIMER_LEFT_1_END_STABILITY=4.4000\n", "PRIMER_RIGHT_1_END_STABILITY=5.0100\n", "PRIMER_PAIR_1_COMPL_ANY_TH=0.78\n", "PRIMER_PAIR_1_COMPL_END_TH=0.78\n", "PRIMER_PAIR_1_PRODUCT_SIZE=97\n", "PRIMER_PAIR_2_PENALTY=0.251305\n", "PRIMER_LEFT_2_PENALTY=0.125552\n", "PRIMER_RIGHT_2_PENALTY=0.125753\n", "PRIMER_LEFT_2_SEQUENCE=CGCAGATGAAGCCCGTGA\n", "PRIMER_RIGHT_2_SEQUENCE=CTGAGCGGAGACTGGTGC\n", "PRIMER_LEFT_2=3253,18\n", "PRIMER_RIGHT_2=3362,18\n", "PRIMER_LEFT_2_TM=60.126\n", "PRIMER_RIGHT_2_TM=60.126\n", "PRIMER_LEFT_2_GC_PERCENT=61.111\n", "PRIMER_RIGHT_2_GC_PERCENT=66.667\n", "PRIMER_LEFT_2_SELF_ANY_TH=0.00\n", "PRIMER_RIGHT_2_SELF_ANY_TH=0.00\n", "PRIMER_LEFT_2_SELF_END_TH=0.00\n", "PRIMER_RIGHT_2_SELF_END_TH=0.00\n", "PRIMER_LEFT_2_HAIRPIN_TH=0.00\n", "PRIMER_RIGHT_2_HAIRPIN_TH=0.00\n", "PRIMER_LEFT_2_END_STABILITY=4.3500\n", "PRIMER_RIGHT_2_END_STABILITY=5.0100\n", "PRIMER_PAIR_2_COMPL_ANY_TH=0.00\n", "PRIMER_PAIR_2_COMPL_END_TH=0.00\n", "PRIMER_PAIR_2_PRODUCT_SIZE=110\n", "PRIMER_PAIR_3_PENALTY=0.299294\n", "PRIMER_LEFT_3_PENALTY=0.173382\n", "PRIMER_RIGHT_3_PENALTY=0.125912\n", "PRIMER_LEFT_3_SEQUENCE=CGGCGCGAAAGTTGAAGG\n", "PRIMER_RIGHT_3_SEQUENCE=CCTTCAGCTCGACCACCG\n", "PRIMER_LEFT_3=1922,18\n", "PRIMER_RIGHT_3=2053,18\n", "PRIMER_LEFT_3_TM=59.827\n", "PRIMER_RIGHT_3_TM=60.126\n", "PRIMER_LEFT_3_GC_PERCENT=61.111\n", "PRIMER_RIGHT_3_GC_PERCENT=66.667\n", "PRIMER_LEFT_3_SELF_ANY_TH=12.10\n", "PRIMER_RIGHT_3_SELF_ANY_TH=0.00\n", "PRIMER_LEFT_3_SELF_END_TH=0.00\n", "PRIMER_RIGHT_3_SELF_END_TH=0.00\n", "PRIMER_LEFT_3_HAIRPIN_TH=0.00\n", "PRIMER_RIGHT_3_HAIRPIN_TH=40.25\n", "PRIMER_LEFT_3_END_STABILITY=3.4600\n", "PRIMER_RIGHT_3_END_STABILITY=4.9400\n", "PRIMER_PAIR_3_COMPL_ANY_TH=11.23\n", "PRIMER_PAIR_3_COMPL_END_TH=14.04\n", "PRIMER_PAIR_3_PRODUCT_SIZE=132\n", "PRIMER_PAIR_4_PENALTY=0.299294\n", "PRIMER_LEFT_4_PENALTY=0.173382\n", "PRIMER_RIGHT_4_PENALTY=0.125912\n", "PRIMER_LEFT_4_SEQUENCE=CCGGCGCGAAAGTTGAAG\n", "PRIMER_RIGHT_4_SEQUENCE=CCTTCAGCTCGACCACCG\n", "PRIMER_LEFT_4=1921,18\n", "PRIMER_RIGHT_4=2053,18\n", "PRIMER_LEFT_4_TM=59.827\n", "PRIMER_RIGHT_4_TM=60.126\n", "PRIMER_LEFT_4_GC_PERCENT=61.111\n", "PRIMER_RIGHT_4_GC_PERCENT=66.667\n", "PRIMER_LEFT_4_SELF_ANY_TH=12.10\n", "PRIMER_RIGHT_4_SELF_ANY_TH=0.00\n", "PRIMER_LEFT_4_SELF_END_TH=0.00\n", "PRIMER_RIGHT_4_SELF_END_TH=0.00\n", "PRIMER_LEFT_4_HAIRPIN_TH=0.00\n", "PRIMER_RIGHT_4_HAIRPIN_TH=40.25\n", "PRIMER_LEFT_4_END_STABILITY=3.0200\n", "PRIMER_RIGHT_4_END_STABILITY=4.9400\n", "PRIMER_PAIR_4_COMPL_ANY_TH=5.41\n", "PRIMER_PAIR_4_COMPL_END_TH=9.34\n", "PRIMER_PAIR_4_PRODUCT_SIZE=133\n", "=\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20181129_geoduck_vtg_primers\n", "\n", "# Store sequence only from desired FastA.\n", "# Print all lines after the first line and then delete newlines\n", "sequence=$(tail -n +2 /home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits/TRINITY_DN51983_c0_g1_i8.p1.cds | tr -d '\\n')\n", "\n", "# \n", "seq_id=$(echo \"$(head -n 1 /home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits/TRINITY_DN51983_c0_g1_i8.p1.cds | tr -d '>').cds\")\n", "\n", "# Use heredoc to create Primer3 parameters file\n", "cat << EOF > /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_params.txt\n", "SEQUENCE_ID=${seq_id}\n", "SEQUENCE_TEMPLATE=${sequence}\n", "PRIMER_TASK=generic\n", "PRIMER_PICK_LEFT_PRIMER=3\n", "PRIMER_PICK_RIGHT_PRIMER=3\n", "PRIMER_OPT_SIZE=18\n", "PRIMER_MIN_SIZE=15\n", "PRIMER_MAX_SIZE=21\n", "PRIMER_MAX_NS_ACCEPTED=1\n", "PRIMER_PRODUCT_SIZE_RANGE=75-150\n", "P3_FILE_FLAG=1\n", "PRIMER_EXPLAIN_FLAG=1\n", "PRIMER_THERMODYNAMIC_PARAMETERS_PATH=/home/sam/software/primer3-2.4.0/src/primer3_config/\n", "=\n", "EOF\n", "\n", "# Run Primer3\n", "/home/sam/software/primer3-2.4.0/src/primer3_core \\\n", "--output=/home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_primers_default_format.txt \\\n", "/home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_params.txt\n", "\n", "cat /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_primers_default_format.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Test EMBOSS PrimerSearch on first set of primers picked by Primer3 on source FastA" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### The code below does the following:\n", "\n", "- Parses out sequence id, left, and right primers and creates the proper tab-delimited primer sequences file needed by ```primersearch```\n", "\n", "- Runs ```primersearch``` using the newly created primer sequences file and the target FastA file that was used to generate our primers in ```Primer3```" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "cat: /home/sam/analyses/20181129_geoduck_vtg_primers/TRINITY_DN51983_c0_g1_i8.primersearch: No such file or directory\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20181129_geoduck_vtg_primers/\n", "\n", "seq_id=$(grep \"SEQUENCE_ID=\" /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_primers_default_format.txt | sed 's/SEQUENCE_ID=//')\n", "left_primer=$(grep \"PRIMER_LEFT_0_SEQUENCE=\" /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_primers_default_format.txt | sed 's/PRIMER_LEFT_0_SEQUENCE=//')\n", "right_primer=$(grep \"PRIMER_RIGHT_0_SEQUENCE=\" /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_primers_default_format.txt | sed 's/PRIMER_RIGHT_0_SEQUENCE=//')\n", "\n", "\n", "printf \"%s\\t\" \"${seq_id}\" \"${left_primer}\" \"${right_primer}\" > /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_emboss_primers.txt\n", "\n", "# Add newline to end of file\n", "printf \"\\n\" >> /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_emboss_primers.txt\n", "\n", "/home/sam/software/EMBOSS-6.6.0/emboss/primersearch \\\n", "-auto \\\n", "/home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits/TRINITY_DN51983_c0_g1_i8.p1.cds \\\n", "/home/sam/analyses/20181129_geoduck_vtg_primers/20181129_emboss_primers.txt\n", "\n", "cat /home/sam/analyses/20181129_geoduck_vtg_primers/TRINITY_DN51983_c0_g1_i8.primersearch" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20181129_emboss_primers.txt\n", "20181129_primer3_params.txt\n", "20181129_primer3_primers_default_format.txt\n", "20181129_primer3_primers.txt\n", "TRINITY_DN51983_c0_g1_i8.p1.cds.for\n", "TRINITY_DN51983_c0_g1_i8.p1.cds.rev\n", "trinity_dn51983_c0_g1_i8.primersearch\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20181129_geoduck_vtg_primers/\n", "\n", "ls" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Primer name TRINITY_DN51983_c0_g1_i8.p1.cds\n", "Amplimer 1\n", "\tSequence: TRINITY_DN51983_c0_g1_i8.p1 \n", "\t\n", "\tTTACGCCACGGCAACTGT hits forward strand at 1348 with 0 mismatches\n", "\tCGCAGTGCCAACAAGCTG hits reverse strand at [13013] with 0 mismatches\n", "\tAmplimer length: 125 bp\n" ] } ], "source": [ "%%bash\n", "cat /home/sam/analyses/20181129_geoduck_vtg_primers/trinity_dn51983_c0_g1_i8.primersearch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Primers match up to their source sequence, as expected. Now, to test the primers on the rest of the transcriptome to ensure specificity." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Use EMBOSS PrimerSearch tool to test primers across transcriptome" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### The code below does the following:\n", "\n", "- Sets variables for file/folder paths\n", "\n", "- Runs for loop over all individual CDS FastA files:\n", " - Uses parameter substitution to strip paths from filenames\n", " - Uses parameter subsitution to strip extensions from filesnames\n", " - Uses ```tr``` to convert filenames to lowercase\n", " - Runs ```primersearch``` on each CDS FastA file\n", " - Uses ```grep``` to evaluate if the word \"Amplimer\" is found in the resulting output file; if it is _not_, the file is deleted." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Process is interrupted.\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20181129_geoduck_vtg_primers/\n", "\n", "fasta_loc=\"/home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits/\"\n", "primersearch=\"/home/sam/software/EMBOSS-6.6.0/emboss/primersearch\"\n", "primers=\"/home/sam/analyses/20181129_geoduck_vtg_primers/20181129_emboss_primers.txt\"\n", "\n", "\n", "time \\\n", "for fasta in ${fasta_loc}*.cds\n", " do\n", " fasta_no_path=$(echo ${fasta##*/})\n", " fasta_no_ext=$(echo ${no_path%%.*})\n", " fasta_no_ext_lower=$(echo ${fasta_no_ext} | tr '[:upper:]' '[:lower:]')\n", " ${primersearch} -auto ${fasta} ${primers}\n", " if ! grep --quiet \"Amplimer\" \"${fasta_no_ext_lower}.primersearch\"\n", " then rm ${fasta_no_ext_lower}.primersearch\n", " fi\n", "done" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Killed it to change code to allow primer mismatches.\n", "\n", "Also fix extension removal." ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t208m42.396s\n", "user\t76m45.956s\n", "sys\t79m27.744s\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20181129_geoduck_vtg_primers/\n", "\n", "fasta_loc=\"/home/sam/data/geoduck/transcriptomes/transdecoder_fasta_splits/\"\n", "primersearch=\"/home/sam/software/EMBOSS-6.6.0/emboss/primersearch\"\n", "primers=\"/home/sam/analyses/20181129_geoduck_vtg_primers/20181129_emboss_primers.txt\"\n", "\n", "\n", "time \\\n", "for fasta in ${fasta_loc}*.cds\n", " do\n", " fasta_no_path=$(echo ${fasta##*/})\n", " fasta_no_ext=$(echo ${fasta_no_path%%.*})\n", " fasta_no_ext_lower=$(echo ${fasta_no_ext} | tr '[:upper:]' '[:lower:]')\n", " ${primersearch} -auto ${fasta} ${primers} 20\n", " if ! grep --quiet \"Amplimer\" \"${fasta_no_ext_lower}.primersearch\"\n", " then rm ${fasta_no_ext_lower}.primersearch\n", " fi\n", "done" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FILE: trinity_dn51983_c0_g1_i4.primersearch\n", "\n", "\n", "Primer name TRINITY_DN51983_c0_g1_i8.p1.cds\n", "Amplimer 1\n", "\tSequence: TRINITY_DN51983_c0_g1_i4.p1 \n", "\t\n", "\tTTACGCCACGGCAACTGT hits forward strand at 1348 with 0 mismatches\n", "\tCGCAGTGCCAACAAGCTG hits reverse strand at [12569] with 0 mismatches\n", "\tAmplimer length: 125 bp\n", "----------------------------------\n", "\n", "FILE: trinity_dn51983_c0_g1_i8.primersearch\n", "\n", "\n", "Primer name TRINITY_DN51983_c0_g1_i8.p1.cds\n", "Amplimer 1\n", "\tSequence: TRINITY_DN51983_c0_g1_i8.p1 \n", "\t\n", "\tTTACGCCACGGCAACTGT hits forward strand at 1348 with 0 mismatches\n", "\tCGCAGTGCCAACAAGCTG hits reverse strand at [13013] with 0 mismatches\n", "\tAmplimer length: 125 bp\n", "----------------------------------\n", "\n" ] } ], "source": [ "%%bash\n", "\n", "cd /home/sam/analyses/20181129_geoduck_vtg_primers/\n", "\n", "# Check contents of files with matches\n", "for file in *.primersearch\n", " do\n", " echo \"FILE: ${file}\"\n", " echo \"\"\n", " cat ${file}\n", " echo \"----------------------------------\"\n", " echo \"\"\n", "done\n", "\n", "# Copy data to Gannet\n", "cd /home/sam/analyses/\n", "rsync \\\n", "--archive \\\n", "--relative \\\n", "./20181129_geoduck_vtg_primers/ gannet:/volume1/web/Atumefaciens" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### OK, only two matches found, both are the same gene, different isoforms. So, I say we're good to go! Will order that primer pair." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }