{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TODAY'S DATE:\n", "Tue Dec 11 06:29:41 PST 2018\n", "------------\n", "\n", "Distributor ID:\tUbuntu\n", "Description:\tUbuntu 16.04.5 LTS\n", "Release:\t16.04\n", "Codename:\txenial\n", "\n", "------------\n", "HOSTNAME: \n", "swoose\n", "\n", "------------\n", "Computer Specs:\n", "\n", "Architecture: x86_64\n", "CPU op-mode(s): 32-bit, 64-bit\n", "Byte Order: Little Endian\n", "CPU(s): 24\n", "On-line CPU(s) list: 0-23\n", "Thread(s) per core: 2\n", "Core(s) per socket: 6\n", "Socket(s): 2\n", "NUMA node(s): 1\n", "Vendor ID: GenuineIntel\n", "CPU family: 6\n", "Model: 44\n", "Model name: Intel(R) Xeon(R) CPU X5670 @ 2.93GHz\n", "Stepping: 2\n", "CPU MHz: 2925.866\n", "BogoMIPS: 5851.93\n", "Virtualization: VT-x\n", "L1d cache: 32K\n", "L1i cache: 32K\n", "L2 cache: 256K\n", "L3 cache: 12288K\n", "NUMA node0 CPU(s): 0-23\n", "Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 popcnt aes lahf_lm epb kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida arat\n", "\n", "------------\n", "\n", "Memory Specs\n", "\n", " total used free shared buff/cache available\n", "Mem: 70G 6.1G 2.9G 824M 61G 63G\n", "Swap: 4.7G 47M 4.6G\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "No LSB modules are available.\n" ] } ], "source": [ "%%bash\n", "echo \"TODAY'S DATE:\"\n", "date\n", "echo \"------------\"\n", "echo \"\"\n", "#Display operating system info\n", "lsb_release -a\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"HOSTNAME: \"; hostname \n", "echo \"\"\n", "echo \"------------\"\n", "echo \"Computer Specs:\"\n", "echo \"\"\n", "lscpu\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"\"\n", "echo \"Memory Specs\"\n", "echo \"\"\n", "free -mh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Make directories" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "mkdir /home/sam/data/gigas\n", "mkdir /home/sam/data/gigas/genomes\n", "mkdir /home/sam/data/gigas/genes\n", "mkdir /home/sam/analyses/20181211_gigas_cox1_primeres" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Download FastA files" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GCF_000297895.1_oyster_v9_genomic.fna\n", "NC_001276.1.fa\n", "NP_037555.1.fa\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/data/gigas/genomes\n", "\n", "# C.gigas mitochondrial genome\n", "curl --silent \"https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?tool=portal&save=file&log$=seqview&db=nuccore&report=fasta&id=7212445&extrafeat=null&conwithfeat=on\" > NC_001276.1.fa\n", "\n", "# C.gigas genome\n", "curl --silent \"ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/297/895/GCF_000297895.1_oyster_v9/GCF_000297895.1_oyster_v9_genomic.fna.gz\" > GCF_000297895.1_oyster_v9_genomic.fna.gz\n", "gunzip --quiet GCF_000297895.1_oyster_v9_genomic.fna.gz\n", "\n", "cd /home/sam/data/gigas/genes\n", "\n", "# C.gigas COX1\n", "curl --silent \"https://www.ncbi.nlm.nih.gov/projects/sviewer/sequence.cgi?id=gi|7212457&format=fasta&filename=NP_037555.1.fa&ranges=0-504\" > NP_037555.1.fa\n", "\n", "ls /home/sam/data/gigas/genomes\n", "ls /home/sam/data/gigas/genes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Split mulit-FastA file in to individual FastA files with PyFaidx" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Gigas genome" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-------------------\n", "NUMBER OF SEQUENCES IN ORIGINAL FASTA\n", "7659\n", "-------------------\n", "\n", "\n", "-------------------\n", "NUMBER OF INDIVIDUAL FASTA FILES\n", "7659\n", "-------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t0m39.604s\n", "user\t0m38.412s\n", "sys\t0m1.140s\n" ] } ], "source": [ "%%bash\n", "mkdir /home/sam/data/gigas/genomes/GCF_000297895_fasta_splits\n", "cd /home/sam/data/gigas/genomes/GCF_000297895_fasta_splits\n", "\n", "# Count sequences in FastA\n", "echo \"-------------------\"\n", "echo \"NUMBER OF SEQUENCES IN ORIGINAL FASTA\"\n", "grep -c \">\" ../GCF_000297895.1_oyster_v9_genomic.fna\n", "echo \"-------------------\"\n", "echo \"\"\n", "echo \"\"\n", "\n", "# Split FastA\n", "time \\\n", "/home/sam/software/bin/pyfaidx-0.5.5.2 \\\n", "--split-files \\\n", "../GCF_000297895.1_oyster_v9_genomic.fna\n", "\n", "# Count number of individual FastA files\n", "echo \"-------------------\"\n", "echo \"NUMBER OF INDIVIDUAL FASTA FILES\"\n", "ls -1 | wc -l\n", "echo \"-------------------\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Gigas mt genome" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-------------------\n", "NUMBER OF SEQUENCES IN ORIGINAL FASTA\n", "1\n", "-------------------\n", "\n", "\n", "-------------------\n", "NUMBER OF INDIVIDUAL FASTA FILES\n", "1\n", "-------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t0m0.130s\n", "user\t0m0.112s\n", "sys\t0m0.016s\n" ] } ], "source": [ "%%bash\n", "mkdir /home/sam/data/gigas/genomes/NC_001276_fasta_splits\n", "cd /home/sam/data/gigas/genomes/NC_001276_fasta_splits\n", "\n", "# Count sequences in FastA\n", "echo \"-------------------\"\n", "echo \"NUMBER OF SEQUENCES IN ORIGINAL FASTA\"\n", "grep -c \">\" ../NC_001276.1.fa\n", "echo \"-------------------\"\n", "echo \"\"\n", "echo \"\"\n", "\n", "# Split FastA\n", "time \\\n", "/home/sam/software/bin/pyfaidx-0.5.5.2 \\\n", "--split-files \\\n", "../NC_001276.1.fa\n", "\n", "# Count number of individual FastA files\n", "echo \"-------------------\"\n", "echo \"NUMBER OF INDIVIDUAL FASTA FILES\"\n", "ls -1 | wc -l\n", "echo \"-------------------\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Snagged wrong COX1 sequence - it was the protein sequence. Here's nucleotide CDS:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-------------------\n", "NUMBER OF SEQUENCES IN ORIGINAL FASTA\n", "12\n", "-------------------\n", "\n", "\n", "-------------------\n", "NUMBER OF INDIVIDUAL FASTA FILES\n", "12\n", "-------------------\n", "lclAF177226.1_cds_AAF20042.1_1.fa\n", "lclAF177226.1_cds_AAF20043.1_2.fa\n", "lclAF177226.1_cds_AAF20044.1_3.fa\n", "lclAF177226.1_cds_AAF20045.1_4.fa\n", "lclAF177226.1_cds_AAF20046.1_5.fa\n", "lclAF177226.1_cds_AAF20047.1_6.fa\n", "lclAF177226.1_cds_AAF20048.1_7.fa\n", "lclAF177226.1_cds_AAF20049.1_8.fa\n", "lclAF177226.1_cds_AAF20050.1_9.fa\n", "lclAF177226.1_cds_AAF20051.1_10.fa\n", "lclAF177226.1_cds_AAF20052.1_11.fa\n", "lclAF177226.1_cds_AAF20053.1_12.fa\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t0m0.137s\n", "user\t0m0.128s\n", "sys\t0m0.008s\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/data/gigas/genomes\n", "\n", "# Download mt genome coding sequences\n", "curl --silent \"https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?tool=portal&save=file&log$=seqview&db=nuccore&report=fasta_cds_na&id=6636083&conwithfeat=on&withparts=on\" > AF177226.cds.fa\n", "\n", "### Split in to individual FastA files\n", "mkdir /home/sam/data/gigas/genomes/AF177226.cds_splits\n", "cd /home/sam/data/gigas/genomes/AF177226.cds_splits\n", "\n", "# Count sequences in FastA\n", "echo \"-------------------\"\n", "echo \"NUMBER OF SEQUENCES IN ORIGINAL FASTA\"\n", "grep -c \">\" ../AF177226.cds.fa\n", "echo \"-------------------\"\n", "echo \"\"\n", "echo \"\"\n", "\n", "# Split FastA\n", "time \\\n", "/home/sam/software/bin/pyfaidx-0.5.5.2 \\\n", "--split-files \\\n", "../AF177226.cds.fa\n", "\n", "# Count number of individual FastA files\n", "echo \"-------------------\"\n", "echo \"NUMBER OF INDIVIDUAL FASTA FILES\"\n", "ls -1 | wc -l\n", "echo \"-------------------\"\n", "\n", "ls /home/sam/data/gigas/genomes/AF177226.cds_splits" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "cat: lclAF177226.1_cds_AAF20053.1_12.fa: No such file or directory\n" ] } ], "source": [ "%%bash\n", "cat lclAF177226.1_cds_AAF20053.1_12.fa" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">lcl|AF177226.1_cds_AAF20053.1_12\n", "ATGTCAACAAATCATTTAGACATTGGAAGGTTTTATATAGTATTTGGATTTTGAGCTGTTCTTGCGGGAA\n", "CTAGGTTTAGGTCTCTTATTCGTTGGAGACTTTATAACCCTGGAGCTAAGTTTTTAGACCCCGTGACTTA\n", "TAATGCAGTTGTAACTAGGCATGCGTTGGTTATGATTTTTTTCTTTGTTATACCTGTAATAATTGGGGGG\n", "TTTGGTAACTGGCTTATCCCTTTGATGCTTCTAGTAGCAGACATGCAATTTCCTCGATTAAATGCATTTA\n", "GATTTTGAGTTTTGCCAGGGTCTCTTTATCTTATGCTTATGTCTAACATTGTAGAAAACGGAGTTGGGGC\n", "AGGGTGAACAATTTACCCTCCTTTATCAACTTACTCTTATCATGGAGTTTGTATAGACCTTGCAATTCTA\n", "AGCCTTCACCTTGCTGGTATTAGCTCTATTTTCAGGTCAATTAATTTCATAGTAACGATTAGAAATATGC\n", "GATCTGTTGGGGGCCATTTACTAGCACTATTCCCTTGATCTATTAAGGTTACTTCATTCTTGCTTTTGAC\n", "TACTCTCCCAGTGTTAGCTGGAGGTCTTACTATACTTTTGACTGATCGTCATTTTAATACCTCTTTTTTT\n", "GACCCTGTCGGAGGGGGGGACCCTGTCTTATTTCAGCATTTGTTTTGATTTTTTGGTCACCCTGAGGTGT\n", "ATGTCCTTATTCTTCCAGGTTTTGGAATAATTTCTCATGTCTTATGTTTTTGGTCAAGTAAAAAGACTGC\n", "ATATGGAAATATGGGAATGTTTTATGCAATACTTAATATTGGGTTCTTAGGGTTTATTGTCTGGGGGCAT\n", "CACATGTTTGTGGCTGGAATGGATATTGATACGCGTGCTTATTTTAGTGCTGCCACCGTTATTATTGCAG\n", "TGCCAACTGGTATTAAGGTGTTTGCATGAATTAGCACAATGCTAGGCTCTAAAGTTTCAACTCAAGCACC\n", "TATGTTGTGGTCTACTGGTTTTATTATTCTTTTTACAACAGGGGGTCTTACAGGACTTATTCTATCAAGA\n", "GCTTCAGTAGATGTTACGCTTCACGACACTTATTTTGTAACTGGTCATTTTCACTACGTCTTATCAATGG\n", "GTGCGGTGTTTACAATTTTAGCTGGGTTTACTCACTGATTTCCTCTTGTTGCTAAGGTTATAATGCATCG\n", "GCAAAAAATGAAAAGTCATTTTTTAGCAATGTTTTTAGGTGTTAATGCAGCATTTTTGCCACATCATTTT\n", "TTGGGTTTGGCTGGTATACCACGTCGAGTAGTTGATTATCCAGATCATTTTTGATTTTGAAATAAAGTAT\n", "CCACATTTGGCTCTCATTTGAGTACTGGCTCATTGTTATTTTTTGTGTTTTTGTTATGAGAGTCATTTAT\n", "TGCTCAACGGCCAGTTATTTCAGTGCGAAACACTTCTAGGTCCCCCGAATGGGCTGTTGTGTCTAGCCTC\n", "CCTAAGCATGCAGGGGATGAATTAGCAAAAATGGCTAAGCTTTGTTAG\n" ] } ], "source": [ "%%bash\n", "cat /home/sam/data/gigas/genomes/AF177226.cds_splits/lclAF177226.1_cds_AAF20053.1_12.fa" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">lcl|AF177226.1_cds_AAF20053.1_12 [protein=cytochrome oxidase subunit 1] [protein_id=AAF20053.1] [location=15598..17115] [gbkey=CDS]\n" ] } ], "source": [ "%%bash\n", "grep \"cytochrome oxidase subunit 1\" /home/sam/data/gigas/genomes/AF177226.cds.fa" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "lclAF177226.1_cds_AAF20053.1_12.fa\n", "NP_037555.1.fa\n" ] } ], "source": [ "%%bash\n", "cp /home/sam/data/gigas/genomes/AF177226.cds_splits/lclAF177226.1_cds_AAF20053.1_12.fa \\\n", "/home/sam/data/gigas/genes/\n", "\n", "ls /home/sam/data/gigas/genes/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Run Primer3 to design primers\n", "\n", "##### Quick explanation:\n", "\n", "- Primer3 requires a specially formatted input file. The file must be formatted like this:\n", "\n", "```\n", "SEQUENCE_ID=${seq_id}\n", "SEQUENCE_TEMPLATE=${sequence}\n", "PRIMER_TASK=generic\n", "PRIMER_PICK_LEFT_PRIMER=3\n", "PRIMER_PICK_RIGHT_PRIMER=3\n", "PRIMER_OPT_SIZE=18\n", "PRIMER_MIN_SIZE=15\n", "PRIMER_MAX_SIZE=21\n", "PRIMER_MAX_NS_ACCEPTED=1\n", "PRIMER_PRODUCT_SIZE_RANGE=75-150\n", "P3_FILE_FLAG=1\n", "PRIMER_EXPLAIN_FLAG=1\n", "=\n", "```\n", "\n", "Values after the \"=\" on each line can be changed to whatever values the user decides. The ```${sequence}``` must be a nucletoide sequence on a single line, with no line breaks.\n", "\n", "The code below uses a ```heredoc``` to write this information to a file. Everything between the following two lines gets printed (via ```cat```) as shown and then redirected to the indicated file (```20181129_primer3_params.txt```):\n", "\n", "```\n", "cat << EOF > /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_params.txt\n", "EOF\n", "```\n", "\n", "- Primer3 is run with the ```--format_output``` to make a nice, human-readable output format." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "Copyright (c) 1996-2017\n", "Whitehead Institute for Biomedical Research, Steve Rozen\n", "(http://purl.com/STEVEROZEN/), Andreas Untergasser and Helen Skaletsky\n", "All rights reserved.\n", "\n", " This file is part of the primer3 suite and libraries.\n", "\n", " The primer3 suite and libraries are free software;\n", " you can redistribute them and/or modify them under the terms\n", " of the GNU General Public License as published by the Free\n", " Software Foundation; either version 2 of the License, or (at\n", " your option) any later version.\n", "\n", " This software is distributed in the hope that it will be useful,\n", " but WITHOUT ANY WARRANTY; without even the implied warranty of\n", " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n", " GNU General Public License for more details.\n", "\n", " You should have received a copy of the GNU General Public License\n", " along with this software (file gpl-2.0.txt in the source\n", " distribution); if not, write to the Free Software\n", " Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA\n", "\n", "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n", "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n", "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n", "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n", "OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n", "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n", "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n", "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n", "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n", "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n", "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n", "\n", "\n", "\n", "USAGE: /home/sam/software/primer3-2.4.0/src/primer3_core [--format_output] [--default_version=1|--default_version=2] [--io_version=4] [--p3_settings_file=] [--echo_settings_file] [--strict_tags] [--output=] [--error=] [input_file]\n", "This is primer3 (libprimer3 release 2.4.0)\n", "Input can also be provided on standard input.\n", "For example:\n", "$ primer3_core < my_input_file\n" ] } ], "source": [ "%%bash\n", "\n", "# Rename this folder\n", "mv /home/sam/analyses/20181211_gigas_cox1_primeres /home/sam/analyses/20181211_gigas_cox1_primers\n", "\n", "cd /home/sam/analyses/20181211_gigas_cox1_primers\n", "\n", "# Store sequence only from desired FastA.\n", "# Print all lines after the first line and then delete newlines\n", "sequence=$(tail -n +2 /home/sam/data/gigas/genes/lclAF177226.1_cds_AAF20053.1_12.fa | tr -d '\\n')\n", "\n", "# Store file name of targeted FastA file.\n", "## Use sed to strip leading text from FastA header\n", "seq_id=$(head -n 1 /home/sam/data/gigas/genes/lclAF177226.1_cds_AAF20053.1_12.fa | sed 's/>lcl|//')\n", "\n", "# Use heredoc to create Primer3 parameters file\n", "cat << EOF > /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt\n", "SEQUENCE_ID=${seq_id}\n", "SEQUENCE_TEMPLATE=${sequence}\n", "PRIMER_TASK=generic\n", "PRIMER_PICK_LEFT_PRIMER=3\n", "PRIMER_PICK_RIGHT_PRIMER=3\n", "PRIMER_OPT_SIZE=18\n", "PRIMER_MIN_SIZE=15\n", "PRIMER_MAX_SIZE=21\n", "PRIMER_MAX_NS_ACCEPTED=1\n", "PRIMER_PRODUCT_SIZE_RANGE=75-150\n", "P3_FILE_FLAG=1\n", "PRIMER_EXPLAIN_FLAG=1\n", "PRIMER_THERMODYNAMIC_PARAMETERS_PATH=/home/sam/software/primer3-2.4.0/src/primer3_config/\n", "=\n", "EOF\n", "\n", "# Run Primer3\n", "/home/sam/software/primer3-2.4.0/src/primer3_core \\\n", "--format_output \\\n", "--output=/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt \\\n", "/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "mv: cannot stat '/home/sam/analyses/20181211_gigas_cox1_primeres': No such file or directory\n" ] } ], "source": [ "%%bash\n", "\n", "# Rename this folder\n", "mv /home/sam/analyses/20181211_gigas_cox1_primeres /home/sam/analyses/20181211_gigas_cox1_primers\n", "\n", "cd /home/sam/analyses/20181211_gigas_cox1_primers\n", "\n", "# Store sequence only from desired FastA.\n", "# Print all lines after the first line and then delete newlines\n", "sequence=$(tail -n +2 /home/sam/data/gigas/genes/lclAF177226.1_cds_AAF20053.1_12.fa | tr -d '\\n')\n", "\n", "# Store file name of targeted FastA file.\n", "## Use sed to strip leading text from FastA header\n", "seq_id=$(head -n 1 /home/sam/data/gigas/genes/lclAF177226.1_cds_AAF20053.1_12.fa | sed 's/>lcl|//')\n", "\n", "# Use heredoc to create Primer3 parameters file\n", "cat << EOF > /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt\n", "SEQUENCE_ID=${seq_id}\n", "SEQUENCE_TEMPLATE=${sequence}\n", "PRIMER_TASK=generic\n", "PRIMER_PICK_LEFT_PRIMER=3\n", "PRIMER_PICK_RIGHT_PRIMER=3\n", "PRIMER_OPT_SIZE=18\n", "PRIMER_MIN_SIZE=15\n", "PRIMER_MAX_SIZE=21\n", "PRIMER_MAX_NS_ACCEPTED=1\n", "PRIMER_PRODUCT_SIZE_RANGE=75-150\n", "P3_FILE_FLAG=1\n", "PRIMER_EXPLAIN_FLAG=1\n", "PRIMER_THERMODYNAMIC_PARAMETERS_PATH=/home/sam/software/primer3-2.4.0/src/primer3_config/\n", "=\n", "EOF\n", "\n", "# Run Primer3\n", "/home/sam/software/primer3-2.4.0/src/primer3_core \\\n", "--format_output \\\n", "--output=/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers.txt \\\n", "/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PRIMER PICKING RESULTS FOR AF177226.1_cds_AAF20053.1_12\n", "\n", "No mispriming library specified\n", "Using 0-based sequence positions\n", "OLIGO start len tm gc% any_th 3'_th hairpin seq\n", "LEFT PRIMER 205 19 59.54 57.89 0.00 0.00 34.59 GGGGGTTTGGTAACTGGCT\n", "RIGHT PRIMER 352 18 59.88 61.11 0.00 0.00 0.00 CCTGCCCCAACTCCGTTT\n", "SEQUENCE SIZE: 1518\n", "INCLUDED REGION SIZE: 1518\n", "\n", "PRODUCT SIZE: 148, PAIR ANY_TH COMPL: 0.00, PAIR 3'_TH COMPL: 0.00\n", "\n", " 0 ATGTCAACAAATCATTTAGACATTGGAAGGTTTTATATAGTATTTGGATTTTGAGCTGTT\n", " \n", "\n", " 60 CTTGCGGGAACTAGGTTTAGGTCTCTTATTCGTTGGAGACTTTATAACCCTGGAGCTAAG\n", " \n", "\n", " 120 TTTTTAGACCCCGTGACTTATAATGCAGTTGTAACTAGGCATGCGTTGGTTATGATTTTT\n", " \n", "\n", " 180 TTCTTTGTTATACCTGTAATAATTGGGGGGTTTGGTAACTGGCTTATCCCTTTGATGCTT\n", " >>>>>>>>>>>>>>>>>>> \n", "\n", " 240 CTAGTAGCAGACATGCAATTTCCTCGATTAAATGCATTTAGATTTTGAGTTTTGCCAGGG\n", " \n", "\n", " 300 TCTCTTTATCTTATGCTTATGTCTAACATTGTAGAAAACGGAGTTGGGGCAGGGTGAACA\n", " <<<<<<<<<<<<<<<<<< \n", "\n", " 360 ATTTACCCTCCTTTATCAACTTACTCTTATCATGGAGTTTGTATAGACCTTGCAATTCTA\n", " \n", "\n", " 420 AGCCTTCACCTTGCTGGTATTAGCTCTATTTTCAGGTCAATTAATTTCATAGTAACGATT\n", " \n", "\n", " 480 AGAAATATGCGATCTGTTGGGGGCCATTTACTAGCACTATTCCCTTGATCTATTAAGGTT\n", " \n", "\n", " 540 ACTTCATTCTTGCTTTTGACTACTCTCCCAGTGTTAGCTGGAGGTCTTACTATACTTTTG\n", " \n", "\n", " 600 ACTGATCGTCATTTTAATACCTCTTTTTTTGACCCTGTCGGAGGGGGGGACCCTGTCTTA\n", " \n", "\n", " 660 TTTCAGCATTTGTTTTGATTTTTTGGTCACCCTGAGGTGTATGTCCTTATTCTTCCAGGT\n", " \n", "\n", " 720 TTTGGAATAATTTCTCATGTCTTATGTTTTTGGTCAAGTAAAAAGACTGCATATGGAAAT\n", " \n", "\n", " 780 ATGGGAATGTTTTATGCAATACTTAATATTGGGTTCTTAGGGTTTATTGTCTGGGGGCAT\n", " \n", "\n", " 840 CACATGTTTGTGGCTGGAATGGATATTGATACGCGTGCTTATTTTAGTGCTGCCACCGTT\n", " \n", "\n", " 900 ATTATTGCAGTGCCAACTGGTATTAAGGTGTTTGCATGAATTAGCACAATGCTAGGCTCT\n", " \n", "\n", " 960 AAAGTTTCAACTCAAGCACCTATGTTGTGGTCTACTGGTTTTATTATTCTTTTTACAACA\n", " \n", "\n", " 1020 GGGGGTCTTACAGGACTTATTCTATCAAGAGCTTCAGTAGATGTTACGCTTCACGACACT\n", " \n", "\n", " 1080 TATTTTGTAACTGGTCATTTTCACTACGTCTTATCAATGGGTGCGGTGTTTACAATTTTA\n", " \n", "\n", " 1140 GCTGGGTTTACTCACTGATTTCCTCTTGTTGCTAAGGTTATAATGCATCGGCAAAAAATG\n", " \n", "\n", " 1200 AAAAGTCATTTTTTAGCAATGTTTTTAGGTGTTAATGCAGCATTTTTGCCACATCATTTT\n", " \n", "\n", " 1260 TTGGGTTTGGCTGGTATACCACGTCGAGTAGTTGATTATCCAGATCATTTTTGATTTTGA\n", " \n", "\n", " 1320 AATAAAGTATCCACATTTGGCTCTCATTTGAGTACTGGCTCATTGTTATTTTTTGTGTTT\n", " \n", "\n", " 1380 TTGTTATGAGAGTCATTTATTGCTCAACGGCCAGTTATTTCAGTGCGAAACACTTCTAGG\n", " \n", "\n", " 1440 TCCCCCGAATGGGCTGTTGTGTCTAGCCTCCCTAAGCATGCAGGGGATGAATTAGCAAAA\n", " \n", "\n", " 1500 ATGGCTAAGCTTTGTTAG\n", " \n", "\n", "KEYS (in order of precedence):\n", ">>>>>> left primer\n", "<<<<<< right primer\n", "\n", "ADDITIONAL OLIGOS\n", " start len tm gc% any_th 3'_th hairpin seq\n", "\n", " 1 LEFT PRIMER 205 18 57.89 61.11 0.00 0.00 34.59 GGGGGTTTGGTAACTGGC\n", " RIGHT PRIMER 352 18 59.88 61.11 0.00 0.00 0.00 CCTGCCCCAACTCCGTTT\n", " PRODUCT SIZE: 148, PAIR ANY_TH COMPL: 0.00, PAIR 3'_TH COMPL: 0.00\n", "\n", " 2 LEFT PRIMER 205 20 60.18 55.00 0.00 0.00 34.59 GGGGGTTTGGTAACTGGCTT\n", " RIGHT PRIMER 352 18 59.88 61.11 0.00 0.00 0.00 CCTGCCCCAACTCCGTTT\n", " PRODUCT SIZE: 148, PAIR ANY_TH COMPL: 0.00, PAIR 3'_TH COMPL: 0.00\n", "\n", " 3 LEFT PRIMER 419 18 57.49 55.56 0.00 0.00 42.93 AAGCCTTCACCTTGCTGG\n", " RIGHT PRIMER 503 18 60.12 61.11 0.00 0.00 0.00 GCCCCCAACAGATCGCAT\n", " PRODUCT SIZE: 85, PAIR ANY_TH COMPL: 0.00, PAIR 3'_TH COMPL: 0.00\n", "\n", " 4 LEFT PRIMER 342 18 59.80 61.11 0.00 0.00 0.00 GTTGGGGCAGGGTGAACA\n", " RIGHT PRIMER 436 18 57.49 55.56 0.00 0.00 35.27 CCAGCAAGGTGAAGGCTT\n", " PRODUCT SIZE: 95, PAIR ANY_TH COMPL: 0.00, PAIR 3'_TH COMPL: 0.00\n", "\n", "Statistics\n", " con too in in not no tm tm high high high high \n", " sid many tar excl ok bad GC too too any_th 3'_th hair- poly end \n", " ered Ns get reg reg GC% clamp low high compl compl pin X stab ok\n", "libprimer3 release 2.4.0\n", "\n", "\n" ] } ], "source": [ "%%bash\n", "cat /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Run again to generate default output format, just for curiosity" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SEQUENCE_ID=AF177226.1_cds_AAF20053.1_12\n", "SEQUENCE_TEMPLATE=ATGTCAACAAATCATTTAGACATTGGAAGGTTTTATATAGTATTTGGATTTTGAGCTGTTCTTGCGGGAACTAGGTTTAGGTCTCTTATTCGTTGGAGACTTTATAACCCTGGAGCTAAGTTTTTAGACCCCGTGACTTATAATGCAGTTGTAACTAGGCATGCGTTGGTTATGATTTTTTTCTTTGTTATACCTGTAATAATTGGGGGGTTTGGTAACTGGCTTATCCCTTTGATGCTTCTAGTAGCAGACATGCAATTTCCTCGATTAAATGCATTTAGATTTTGAGTTTTGCCAGGGTCTCTTTATCTTATGCTTATGTCTAACATTGTAGAAAACGGAGTTGGGGCAGGGTGAACAATTTACCCTCCTTTATCAACTTACTCTTATCATGGAGTTTGTATAGACCTTGCAATTCTAAGCCTTCACCTTGCTGGTATTAGCTCTATTTTCAGGTCAATTAATTTCATAGTAACGATTAGAAATATGCGATCTGTTGGGGGCCATTTACTAGCACTATTCCCTTGATCTATTAAGGTTACTTCATTCTTGCTTTTGACTACTCTCCCAGTGTTAGCTGGAGGTCTTACTATACTTTTGACTGATCGTCATTTTAATACCTCTTTTTTTGACCCTGTCGGAGGGGGGGACCCTGTCTTATTTCAGCATTTGTTTTGATTTTTTGGTCACCCTGAGGTGTATGTCCTTATTCTTCCAGGTTTTGGAATAATTTCTCATGTCTTATGTTTTTGGTCAAGTAAAAAGACTGCATATGGAAATATGGGAATGTTTTATGCAATACTTAATATTGGGTTCTTAGGGTTTATTGTCTGGGGGCATCACATGTTTGTGGCTGGAATGGATATTGATACGCGTGCTTATTTTAGTGCTGCCACCGTTATTATTGCAGTGCCAACTGGTATTAAGGTGTTTGCATGAATTAGCACAATGCTAGGCTCTAAAGTTTCAACTCAAGCACCTATGTTGTGGTCTACTGGTTTTATTATTCTTTTTACAACAGGGGGTCTTACAGGACTTATTCTATCAAGAGCTTCAGTAGATGTTACGCTTCACGACACTTATTTTGTAACTGGTCATTTTCACTACGTCTTATCAATGGGTGCGGTGTTTACAATTTTAGCTGGGTTTACTCACTGATTTCCTCTTGTTGCTAAGGTTATAATGCATCGGCAAAAAATGAAAAGTCATTTTTTAGCAATGTTTTTAGGTGTTAATGCAGCATTTTTGCCACATCATTTTTTGGGTTTGGCTGGTATACCACGTCGAGTAGTTGATTATCCAGATCATTTTTGATTTTGAAATAAAGTATCCACATTTGGCTCTCATTTGAGTACTGGCTCATTGTTATTTTTTGTGTTTTTGTTATGAGAGTCATTTATTGCTCAACGGCCAGTTATTTCAGTGCGAAACACTTCTAGGTCCCCCGAATGGGCTGTTGTGTCTAGCCTCCCTAAGCATGCAGGGGATGAATTAGCAAAAATGGCTAAGCTTTGTTAG\n", "PRIMER_TASK=generic\n", "PRIMER_PICK_LEFT_PRIMER=3\n", "PRIMER_PICK_RIGHT_PRIMER=3\n", "PRIMER_OPT_SIZE=18\n", "PRIMER_MIN_SIZE=15\n", "PRIMER_MAX_SIZE=21\n", "PRIMER_MAX_NS_ACCEPTED=1\n", "PRIMER_PRODUCT_SIZE_RANGE=75-150\n", "P3_FILE_FLAG=1\n", "PRIMER_EXPLAIN_FLAG=1\n", "PRIMER_THERMODYNAMIC_PARAMETERS_PATH=/home/sam/software/primer3-2.4.0/src/primer3_config/\n", "PRIMER_LEFT_NUM_RETURNED=5\n", "PRIMER_RIGHT_NUM_RETURNED=5\n", "PRIMER_INTERNAL_NUM_RETURNED=0\n", "PRIMER_PAIR_NUM_RETURNED=5\n", "PRIMER_PAIR_0_PENALTY=1.578641\n", "PRIMER_LEFT_0_PENALTY=1.463375\n", "PRIMER_RIGHT_0_PENALTY=0.115266\n", "PRIMER_LEFT_0_SEQUENCE=GGGGGTTTGGTAACTGGCT\n", "PRIMER_RIGHT_0_SEQUENCE=CCTGCCCCAACTCCGTTT\n", "PRIMER_LEFT_0=205,19\n", "PRIMER_RIGHT_0=352,18\n", "PRIMER_LEFT_0_TM=59.537\n", "PRIMER_RIGHT_0_TM=59.885\n", "PRIMER_LEFT_0_GC_PERCENT=57.895\n", "PRIMER_RIGHT_0_GC_PERCENT=61.111\n", "PRIMER_LEFT_0_SELF_ANY_TH=0.00\n", "PRIMER_RIGHT_0_SELF_ANY_TH=0.00\n", "PRIMER_LEFT_0_SELF_END_TH=0.00\n", "PRIMER_RIGHT_0_SELF_END_TH=0.00\n", "PRIMER_LEFT_0_HAIRPIN_TH=34.59\n", "PRIMER_RIGHT_0_HAIRPIN_TH=0.00\n", "PRIMER_LEFT_0_END_STABILITY=4.7500\n", "PRIMER_RIGHT_0_END_STABILITY=3.6000\n", "PRIMER_PAIR_0_COMPL_ANY_TH=0.00\n", "PRIMER_PAIR_0_COMPL_END_TH=0.00\n", "PRIMER_PAIR_0_PRODUCT_SIZE=148\n", "PRIMER_PAIR_1_PENALTY=2.222295\n", "PRIMER_LEFT_1_PENALTY=2.107029\n", "PRIMER_RIGHT_1_PENALTY=0.115266\n", "PRIMER_LEFT_1_SEQUENCE=GGGGGTTTGGTAACTGGC\n", "PRIMER_RIGHT_1_SEQUENCE=CCTGCCCCAACTCCGTTT\n", "PRIMER_LEFT_1=205,18\n", "PRIMER_RIGHT_1=352,18\n", "PRIMER_LEFT_1_TM=57.893\n", "PRIMER_RIGHT_1_TM=59.885\n", "PRIMER_LEFT_1_GC_PERCENT=61.111\n", "PRIMER_RIGHT_1_GC_PERCENT=61.111\n", "PRIMER_LEFT_1_SELF_ANY_TH=0.00\n", "PRIMER_RIGHT_1_SELF_ANY_TH=0.00\n", "PRIMER_LEFT_1_SELF_END_TH=0.00\n", "PRIMER_RIGHT_1_SELF_END_TH=0.00\n", "PRIMER_LEFT_1_HAIRPIN_TH=34.59\n", "PRIMER_RIGHT_1_HAIRPIN_TH=0.00\n", "PRIMER_LEFT_1_END_STABILITY=4.8500\n", "PRIMER_RIGHT_1_END_STABILITY=3.6000\n", "PRIMER_PAIR_1_COMPL_ANY_TH=0.00\n", "PRIMER_PAIR_1_COMPL_END_TH=0.00\n", "PRIMER_PAIR_1_PRODUCT_SIZE=148\n", "PRIMER_PAIR_2_PENALTY=2.294210\n", "PRIMER_LEFT_2_PENALTY=2.178944\n", "PRIMER_RIGHT_2_PENALTY=0.115266\n", "PRIMER_LEFT_2_SEQUENCE=GGGGGTTTGGTAACTGGCTT\n", "PRIMER_RIGHT_2_SEQUENCE=CCTGCCCCAACTCCGTTT\n", "PRIMER_LEFT_2=205,20\n", "PRIMER_RIGHT_2=352,18\n", "PRIMER_LEFT_2_TM=60.179\n", "PRIMER_RIGHT_2_TM=59.885\n", "PRIMER_LEFT_2_GC_PERCENT=55.000\n", "PRIMER_RIGHT_2_GC_PERCENT=61.111\n", "PRIMER_LEFT_2_SELF_ANY_TH=0.00\n", "PRIMER_RIGHT_2_SELF_ANY_TH=0.00\n", "PRIMER_LEFT_2_SELF_END_TH=0.00\n", "PRIMER_RIGHT_2_SELF_END_TH=0.00\n", "PRIMER_LEFT_2_HAIRPIN_TH=34.59\n", "PRIMER_RIGHT_2_HAIRPIN_TH=0.00\n", "PRIMER_LEFT_2_END_STABILITY=4.3500\n", "PRIMER_RIGHT_2_END_STABILITY=3.6000\n", "PRIMER_PAIR_2_COMPL_ANY_TH=0.00\n", "PRIMER_PAIR_2_COMPL_END_TH=0.00\n", "PRIMER_PAIR_2_PRODUCT_SIZE=148\n", "PRIMER_PAIR_3_PENALTY=2.632121\n", "PRIMER_LEFT_3_PENALTY=2.507969\n", "PRIMER_RIGHT_3_PENALTY=0.124151\n", "PRIMER_LEFT_3_SEQUENCE=AAGCCTTCACCTTGCTGG\n", "PRIMER_RIGHT_3_SEQUENCE=GCCCCCAACAGATCGCAT\n", "PRIMER_LEFT_3=419,18\n", "PRIMER_RIGHT_3=503,18\n", "PRIMER_LEFT_3_TM=57.492\n", "PRIMER_RIGHT_3_TM=60.124\n", "PRIMER_LEFT_3_GC_PERCENT=55.556\n", "PRIMER_RIGHT_3_GC_PERCENT=61.111\n", "PRIMER_LEFT_3_SELF_ANY_TH=0.00\n", "PRIMER_RIGHT_3_SELF_ANY_TH=0.00\n", "PRIMER_LEFT_3_SELF_END_TH=0.00\n", "PRIMER_RIGHT_3_SELF_END_TH=0.00\n", "PRIMER_LEFT_3_HAIRPIN_TH=42.93\n", "PRIMER_RIGHT_3_HAIRPIN_TH=0.00\n", "PRIMER_LEFT_3_END_STABILITY=4.8500\n", "PRIMER_RIGHT_3_END_STABILITY=4.7300\n", "PRIMER_PAIR_3_COMPL_ANY_TH=0.00\n", "PRIMER_PAIR_3_COMPL_END_TH=0.00\n", "PRIMER_PAIR_3_PRODUCT_SIZE=85\n", "PRIMER_PAIR_4_PENALTY=2.705294\n", "PRIMER_LEFT_4_PENALTY=0.197324\n", "PRIMER_RIGHT_4_PENALTY=2.507969\n", "PRIMER_LEFT_4_SEQUENCE=GTTGGGGCAGGGTGAACA\n", "PRIMER_RIGHT_4_SEQUENCE=CCAGCAAGGTGAAGGCTT\n", "PRIMER_LEFT_4=342,18\n", "PRIMER_RIGHT_4=436,18\n", "PRIMER_LEFT_4_TM=59.803\n", "PRIMER_RIGHT_4_TM=57.492\n", "PRIMER_LEFT_4_GC_PERCENT=61.111\n", "PRIMER_RIGHT_4_GC_PERCENT=55.556\n", "PRIMER_LEFT_4_SELF_ANY_TH=0.00\n", "PRIMER_RIGHT_4_SELF_ANY_TH=0.00\n", "PRIMER_LEFT_4_SELF_END_TH=0.00\n", "PRIMER_RIGHT_4_SELF_END_TH=0.00\n", "PRIMER_LEFT_4_HAIRPIN_TH=0.00\n", "PRIMER_RIGHT_4_HAIRPIN_TH=35.27\n", "PRIMER_LEFT_4_END_STABILITY=3.1800\n", "PRIMER_RIGHT_4_END_STABILITY=4.3500\n", "PRIMER_PAIR_4_COMPL_ANY_TH=0.00\n", "PRIMER_PAIR_4_COMPL_END_TH=0.00\n", "PRIMER_PAIR_4_PRODUCT_SIZE=95\n", "=\n" ] } ], "source": [ "%%bash\n", "\n", "cd /home/sam/analyses/20181211_gigas_cox1_primers\n", "\n", "\n", "# Run Primer3\n", "/home/sam/software/primer3-2.4.0/src/primer3_core \\\n", "--output=/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers_default_format.txt \\\n", "/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt\n", "\n", "cat /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers_default_format.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Test EMBOSS PrimerSearch on first set of primers picked by Primer3 on source FastA" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### The code below does the following:\n", "\n", "- Parses out sequence id, left, and right primers and creates the proper tab-delimited primer sequences file needed by ```primersearch```\n", "\n", "- Runs ```primersearch``` using the newly created primer sequences file and the target FastA file that was used to generate our primers in ```Primer3```" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt\n" ] } ], "source": [ "%%bash\n", "\n", "cd /home/sam/analyses/20181211_gigas_cox1_primers\n", "\n", "seq_id=$(grep \"SEQUENCE_ID=\" /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers_default_format.txt | sed 's/SEQUENCE_ID=//')\n", "left_primer=$(grep \"PRIMER_LEFT_0_SEQUENCE=\" /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers_default_format.txt | sed 's/PRIMER_LEFT_0_SEQUENCE=//')\n", "right_primer=$(grep \"PRIMER_RIGHT_0_SEQUENCE=\" /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers_default_format.txt | sed 's/PRIMER_RIGHT_0_SEQUENCE=//')\n", "\n", "\n", "printf \"%s\\t\" \"${seq_id}\" \"${left_primer}\" \"${right_primer}\" > /home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt\n", "\n", "# Add newline to end of file\n", "printf \"\\n\" >> /home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt\n", "\n", "/home/sam/software/EMBOSS-6.6.0/emboss/primersearch \\\n", "-auto \\\n", "/home/sam/data/gigas/genes/lclAF177226.1_cds_AAF20053.1_12.fa \\\n", "/home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt\n", "\n", "ls /home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1_cds_aaf20053.primersearch\n", "20181211_emboss_primers.txt\n", "20181211_primer3_params.txt\n", "20181211_primer3_primers_default_format.txt\n", "20181211_primer3_primers.txt\n", "AF177226.1_cds_AAF20053.1_12.for\n", "AF177226.1_cds_AAF20053.1_12.rev\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20181211_gigas_cox1_primers/\n", "\n", "ls" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Primer name AF177226.1_cds_AAF20053.1_12\n", "Amplimer 1\n", "\tSequence: AF177226.1_cds_AAF20053.1_12 \n", "\t\n", "\tGGGGGTTTGGTAACTGGCT hits forward strand at 206 with 0 mismatches\n", "\tCCTGCCCCAACTCCGTTT hits reverse strand at [1166] with 0 mismatches\n", "\tAmplimer length: 148 bp\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20181211_gigas_cox1_primers/\n", "\n", "mv 1_cds_aaf20053.primersearch AF177226.1_cds_AAF20053.1_12.primersearch\n", "\n", "cat AF177226.1_cds_AAF20053.1_12.primersearch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Primers match up to their source sequence, as expected. Now, to test the primers on the rest of the genome and mt genome to ensure specificity." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Use EMBOSS PrimerSearch tool to test primers across mt genome" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### The code below does the following:\n", "\n", "- Sets variables for file/folder paths\n", "\n", "- Runs for loop over all individual CDS FastA files:\n", " - Uses parameter substitution to strip paths from filenames\n", " - Uses parameter subsitution to strip extensions from filesnames\n", " - Uses ```tr``` to convert filenames to lowercase\n", " - Runs ```primersearch``` on each CDS FastA file\n", " - Uses ```grep``` to evaluate if the word \"Amplimer\" is found in the resulting output file; if it is _not_, the file is deleted." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 112\n", "-rw-rw-r-- 1 sam sam 1895 Dec 11 07:46 20181211_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 5400 Dec 11 07:46 20181211_primer3_primers.txt\n", "-rw-rw-r-- 1 sam sam 43889 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.rev\n", "-rw-rw-r-- 1 sam sam 40878 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.for\n", "-rw-rw-r-- 1 sam sam 5459 Dec 11 07:50 20181211_primer3_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 69 Dec 11 07:56 20181211_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 256 Dec 11 07:56 AF177226.1_cds_AAF20053.1_12.primersearch\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Error: Failed to open filename '/home/sam/data/gigas/genomes/NC_001276_fasta_splits*.fa'\n", "Error: Unable to read sequence '/home/sam/data/gigas/genomes/NC_001276_fasta_splits*.fa'\n", "Died: primersearch terminated: Bad value for '-seqall' with -auto defined\n", "grep: nc_001276_fasta_splits*.primersearch: No such file or directory\n", "rm: cannot remove 'nc_001276_fasta_splits*.primersearch': No such file or directory\n", "\n", "real\t0m0.025s\n", "user\t0m0.008s\n", "sys\t0m0.004s\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20181211_gigas_cox1_primers/\n", "\n", "fasta_loc=\"/home/sam/data/gigas/genomes/NC_001276_fasta_splits\"\n", "primersearch=\"/home/sam/software/EMBOSS-6.6.0/emboss/primersearch\"\n", "primers=\"/home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt\"\n", "\n", "\n", "time \\\n", "for fasta in ${fasta_loc}*.fa\n", " do\n", " fasta_no_path=$(echo ${fasta##*/})\n", " fasta_no_ext=$(echo ${fasta_no_path%%.*})\n", " fasta_no_ext_lower=$(echo ${fasta_no_ext} | tr '[:upper:]' '[:lower:]')\n", " ${primersearch} -auto ${fasta} ${primers} 20\n", " if ! grep --quiet \"Amplimer\" \"${fasta_no_ext_lower}.primersearch\"\n", " then rm ${fasta_no_ext_lower}.primersearch\n", " fi\n", "done\n", "\n", "ls -ltr" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 116\n", "-rw-rw-r-- 1 sam sam 1895 Dec 11 07:46 20181211_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 5400 Dec 11 07:46 20181211_primer3_primers.txt\n", "-rw-rw-r-- 1 sam sam 43889 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.rev\n", "-rw-rw-r-- 1 sam sam 40878 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.for\n", "-rw-rw-r-- 1 sam sam 5459 Dec 11 07:50 20181211_primer3_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 69 Dec 11 07:56 20181211_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 256 Dec 11 07:56 AF177226.1_cds_AAF20053.1_12.primersearch\n", "-rw-rw-r-- 1 sam sam 241 Dec 11 08:05 nc_001276.primersearch\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t0m0.026s\n", "user\t0m0.016s\n", "sys\t0m0.000s\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20181211_gigas_cox1_primers/\n", "\n", "fasta_loc=\"/home/sam/data/gigas/genomes/NC_001276_fasta_splits/\"\n", "primersearch=\"/home/sam/software/EMBOSS-6.6.0/emboss/primersearch\"\n", "primers=\"/home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt\"\n", "\n", "\n", "time \\\n", "for fasta in ${fasta_loc}*.fa\n", " do\n", " fasta_no_path=$(echo ${fasta##*/})\n", " fasta_no_ext=$(echo ${fasta_no_path%%.*})\n", " fasta_no_ext_lower=$(echo ${fasta_no_ext} | tr '[:upper:]' '[:lower:]')\n", " ${primersearch} -auto ${fasta} ${primers} 20\n", " if ! grep --quiet \"Amplimer\" \"${fasta_no_ext_lower}.primersearch\"\n", " then rm ${fasta_no_ext_lower}.primersearch\n", " fi\n", "done\n", "\n", "ls -ltr" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Primer name AF177226.1_cds_AAF20053.1_12\n", "Amplimer 1\n", "\tSequence: NC_001276.1 \n", "\t\n", "\tGGGGGTTTGGTAACTGGCT hits forward strand at 15803 with 0 mismatches\n", "\tCCTGCCCCAACTCCGTTT hits reverse strand at [2275] with 0 mismatches\n", "\tAmplimer length: 148 bp\n" ] } ], "source": [ "%%bash\n", "cat /home/sam/analyses/20181211_gigas_cox1_primers/nc_001276.primersearch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Use EMBOSS PrimerSearch tool to test primers across genome" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 120\n", "-rw-rw-r-- 1 sam sam 1895 Dec 11 07:46 20181211_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 5400 Dec 11 07:46 20181211_primer3_primers.txt\n", "-rw-rw-r-- 1 sam sam 43889 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.rev\n", "-rw-rw-r-- 1 sam sam 40878 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.for\n", "-rw-rw-r-- 1 sam sam 5459 Dec 11 07:50 20181211_primer3_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 69 Dec 11 07:56 20181211_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 256 Dec 11 07:56 AF177226.1_cds_AAF20053.1_12.primersearch\n", "-rw-rw-r-- 1 sam sam 241 Dec 11 08:08 nc_001276.primersearch\n", "-rw-rw-r-- 1 sam sam 246 Dec 11 08:08 nw_011935054.primersearch\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "real\t4m10.485s\n", "user\t2m1.152s\n", "sys\t0m26.576s\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20181211_gigas_cox1_primers/\n", "\n", "fasta_loc=\"/home/sam/data/gigas/genomes/GCF_000297895_fasta_splits/\"\n", "primersearch=\"/home/sam/software/EMBOSS-6.6.0/emboss/primersearch\"\n", "primers=\"/home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt\"\n", "\n", "\n", "time \\\n", "for fasta in ${fasta_loc}*.fna\n", " do\n", " fasta_no_path=$(echo ${fasta##*/})\n", " fasta_no_ext=$(echo ${fasta_no_path%%.*})\n", " fasta_no_ext_lower=$(echo ${fasta_no_ext} | tr '[:upper:]' '[:lower:]')\n", " ${primersearch} -auto ${fasta} ${primers} 20\n", " if ! grep --quiet \"Amplimer\" \"${fasta_no_ext_lower}.primersearch\"\n", " then rm ${fasta_no_ext_lower}.primersearch\n", " fi\n", "done\n", "\n", "ls -ltr" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Primer name AF177226.1_cds_AAF20053.1_12\n", "Amplimer 1\n", "\tSequence: NW_011935054.1 \n", "\t\n", "\tCCTGCCCCAACTCCGTTT hits forward strand at 119485 with 0 mismatches\n", "\tGGGGGTTTGGTAACTGGCT hits reverse strand at [93393] with 0 mismatches\n", "\tAmplimer length: 148 bp\n" ] } ], "source": [ "%%bash\n", "cd /home/sam/analyses/20181211_gigas_cox1_primers/\n", "\n", "cat nw_011935054.primersearch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I belive what I'm seeing is that the entire _C.gigas_ genome _includes_ mitochondrial sequences. Thus, I see a single match in the mitochondrial genome and a single match in the the full genome. Will order this primer set." ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Copy data to Gannet\n", "cd /home/sam/analyses/\n", "rsync \\\n", "--archive \\\n", "--relative \\\n", "./20181211_gigas_cox1_primers/ gannet:/volume1/web/Atumefaciens" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }