{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TODAY'S DATE:\n", "Thu Jul 30 11:36:00 PDT 2020\n", "------------\n", "\n", "Distributor ID:\tUbuntu\n", "Description:\tUbuntu 16.04.6 LTS\n", "Release:\t16.04\n", "Codename:\txenial\n", "\n", "------------\n", "HOSTNAME: \n", "swoose\n", "\n", "------------\n", "Computer Specs:\n", "\n", "Architecture: x86_64\n", "CPU op-mode(s): 32-bit, 64-bit\n", "Byte Order: Little Endian\n", "CPU(s): 24\n", "On-line CPU(s) list: 0-23\n", "Thread(s) per core: 2\n", "Core(s) per socket: 6\n", "Socket(s): 2\n", "NUMA node(s): 1\n", "Vendor ID: GenuineIntel\n", "CPU family: 6\n", "Model: 44\n", "Model name: Intel(R) Xeon(R) CPU X5670 @ 2.93GHz\n", "Stepping: 2\n", "CPU MHz: 2925.990\n", "BogoMIPS: 5851.88\n", "Virtualization: VT-x\n", "L1d cache: 32K\n", "L1i cache: 32K\n", "L2 cache: 256K\n", "L3 cache: 12288K\n", "NUMA node0 CPU(s): 0-23\n", "Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 popcnt aes lahf_lm epb ssbd ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida arat flush_l1d\n", "\n", "------------\n", "\n", "Memory Specs\n", "\n", " total used free shared buff/cache available\n", "Mem: 70G 2.3G 1.4G 564M 67G 67G\n", "Swap: 4.7G 4.6M 4.7G\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "No LSB modules are available.\n" ] } ], "source": [ "%%bash\n", "echo \"TODAY'S DATE:\"\n", "date\n", "echo \"------------\"\n", "echo \"\"\n", "#Display operating system info\n", "lsb_release -a\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"HOSTNAME: \"; hostname \n", "echo \"\"\n", "echo \"------------\"\n", "echo \"Computer Specs:\"\n", "echo \"\"\n", "lscpu\n", "echo \"\"\n", "echo \"------------\"\n", "echo \"\"\n", "echo \"Memory Specs\"\n", "echo \"\"\n", "free -mh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set variables" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "env: data_dir=/home/sam/data/P_generosa\n", "env: fasta=/home/sam/data/P_generosa/Panopea-generosa-vv0.74.a4.5d9637f372b5d-publish.genes.fna\n", "env: out_dir=/home/sam/analyses/20200730_pgen_primer_design\n", "env: thermo_params_dir=/home/sam/programs/primer3-2.4.0/src/primer3_config/\n", "env: primer3=/home/sam/programs/primer3-2.4.0/src/primer3_core\n", "env: primersearch=/home/sam/programs/EMBOSS-6.6.0/emboss/primersearch\n" ] } ], "source": [ "# Set data directories\n", "%env data_dir=/home/sam/data/P_generosa\n", "%env fasta=/home/sam/data/P_generosa/Panopea-generosa-vv0.74.a4.5d9637f372b5d-publish.genes.fna\n", "%env out_dir=/home/sam/analyses/20200730_pgen_primer_design\n", "\n", "# Needed for primer3-2.4.0\n", "%env thermo_params_dir=/home/sam/programs/primer3-2.4.0/src/primer3_config/\n", "\n", "# Programs\n", "%env primer3=/home/sam/programs/primer3-2.4.0/src/primer3_core\n", "%env primersearch=/home/sam/programs/EMBOSS-6.6.0/emboss/primersearch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Make directories" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "mkdir --parents \"${out_dir}\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Download P.generosa genes FastA file from OSF repo (https://osf.io/ct623/)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 376M\n", "-rw-rw-r-- 1 sam sam 375M Jul 30 11:53 Panopea-generosa-vv0.74.a4.5d9637f372b5d-publish.genes.fna\n", "-rw-rw-r-- 1 sam sam 1.6M Jul 29 11:48 Panopea-generosa-vv0.74.a4.5d9637f372b5d-publish.genes.fna.fai\n" ] } ], "source": [ "%%bash\n", "\n", "cd ${data_dir}\n", "\n", "wget --quiet \"https://files.osf.io/v1/resources/yem8n/providers/osfstorage/5db35d9abc32f4000e0b70c2?action=download&direct&version=1\" \\\n", "--output-document ${fasta}\n", "\n", "ls -lh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Extract target sequences from FastA" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PGEN_.00g194630-vv0.74.a_ECHD3.fna:1\n", "PGEN_.00g194630-vv0.74.a_ECHD3.fna: >PGEN_.00g194630-vv0.74.a\n", "\n", "PGEN_.00g288180-vv0.74.a_TIF3s4a.fna:1\n", "PGEN_.00g288180-vv0.74.a_TIF3s4a.fna: >PGEN_.00g288180-vv0.74.a\n", "\n", "PGEN_.00g132040-vv0.74.a_TIF3s8-2.fna:1\n", "PGEN_.00g132040-vv0.74.a_TIF3s8-2.fna: >PGEN_.00g132040-vv0.74.a\n", "\n", "PGEN_.00g132030-vv0.74.a_TIF3s8-1.fna:1\n", "PGEN_.00g132030-vv0.74.a_TIF3s8-1.fna: >PGEN_.00g132030-vv0.74.a\n", "\n", "PGEN_.00g338640-vv0.74.a_NSF.fna:1\n", "PGEN_.00g338640-vv0.74.a_NSF.fna: >PGEN_.00g338640-vv0.74.a\n", "\n", "PGEN_.00g245080-vv0.74.a_TIF3s10.fna:1\n", "PGEN_.00g245080-vv0.74.a_TIF3s10.fna: >PGEN_.00g245080-vv0.74.a\n", "\n", "PGEN_.00g025890-vv0.74.a_TIF3s12.fna:1\n", "PGEN_.00g025890-vv0.74.a_TIF3s12.fna: >PGEN_.00g025890-vv0.74.a\n", "\n", "PGEN_.00g114060-vv0.74.a_GSK3B.fna:1\n", "PGEN_.00g114060-vv0.74.a_GSK3B.fna: >PGEN_.00g114060-vv0.74.a\n", "\n", "PGEN_.00g287540-vv0.74.a_RPL5.fna:1\n", "PGEN_.00g287540-vv0.74.a_RPL5.fna: >PGEN_.00g287540-vv0.74.a\n", "\n", "PGEN_.00g188130-vv0.74.a_FEN1.fna:1\n", "PGEN_.00g188130-vv0.74.a_FEN1.fna: >PGEN_.00g188130-vv0.74.a\n", "\n", "PGEN_.00g224740-vv0.74.a_GLYG.fna:1\n", "PGEN_.00g224740-vv0.74.a_GLYG.fna: >PGEN_.00g224740-vv0.74.a\n", "\n", "PGEN_.00g088260-vv0.74.a_NFIP1.fna:1\n", "PGEN_.00g088260-vv0.74.a_NFIP1.fna: >PGEN_.00g088260-vv0.74.a\n", "\n", "PGEN_.00g082590-vv0.74.a_TIF3s5.fna:1\n", "PGEN_.00g082590-vv0.74.a_TIF3s5.fna: >PGEN_.00g082590-vv0.74.a\n", "\n", "PGEN_.00g079690-vv0.74.a_TIF3s7.fna:1\n", "PGEN_.00g079690-vv0.74.a_TIF3s7.fna: >PGEN_.00g079690-vv0.74.a\n", "\n", "PGEN_.00g000750-vv0.74.a_TIF3s6b.fna:1\n", "PGEN_.00g000750-vv0.74.a_TIF3s6b.fna: >PGEN_.00g000750-vv0.74.a\n", "\n", "PGEN_.00g280110-vv0.74.a_SPTN1.fna:1\n", "PGEN_.00g280110-vv0.74.a_SPTN1.fna: >PGEN_.00g280110-vv0.74.a\n", "\n", "PGEN_.00g070040-vv0.74.a_APLP.fna:1\n", "PGEN_.00g070040-vv0.74.a_APLP.fna: >PGEN_.00g070040-vv0.74.a\n", "\n" ] } ], "source": [ "%%bash\n", "\n", "timestamp=$(date +\"%Y%m%d\")\n", "\n", "cd \"${out_dir}\"\n", "\n", "# Associative array to associate gene names with sequence ids\n", "# Requires >= Bash 4.0\n", "declare -A seqid_array\n", "\n", "# Populate associative array [gene_abbreviation]=seqid\n", "seqid_array=(\n", "[TIF3s6b]=PGEN_.00g000750-vv0.74.a \\\n", "[TIF3s12]=PGEN_.00g025890-vv0.74.a \\\n", "[APLP]=PGEN_.00g070040-vv0.74.a \\\n", "[TIF3s7]=PGEN_.00g079690-vv0.74.a \\\n", "[TIF3s5]=PGEN_.00g082590-vv0.74.a \\\n", "[NFIP1]=PGEN_.00g088260-vv0.74.a \\\n", "[GSK3B]=PGEN_.00g114060-vv0.74.a \\\n", "[TIF3s8-1]=PGEN_.00g132030-vv0.74.a \\\n", "[TIF3s8-2]=PGEN_.00g132040-vv0.74.a \\\n", "[FEN1]=PGEN_.00g188130-vv0.74.a \\\n", "[ECHD3]=PGEN_.00g194630-vv0.74.a \\\n", "[GLYG]=PGEN_.00g224740-vv0.74.a \\\n", "[TIF3s10]=PGEN_.00g245080-vv0.74.a \\\n", "[SPTN1]=PGEN_.00g280110-vv0.74.a \\\n", "[RPL5]=PGEN_.00g287540-vv0.74.a \\\n", "[TIF3s4a]=PGEN_.00g288180-vv0.74.a \\\n", "[NSF]=PGEN_.00g338640-vv0.74.a\n", ")\n", "\n", "# Individual FastAs array\n", "fasta_array=()\n", "\n", "# Extract sequences to individual FastA files\n", "for gene_name in \"${!seqid_array[@]}\"\n", "do\n", " # Set output file names\n", " out_file=\"${seqid_array[$gene_name]}\"_\"${gene_name}\".fna\n", "\n", " # Run faidx\n", " faidx \"${fasta}\" \"${seqid_array[$gene_name]}\" \\\n", " --out \"${out_file}\"\n", " \n", " # Add FastA to array\n", " fasta_array+=(${out_file})\n", " \n", " ## Check output\n", " # Count number of entries in output FastA (should be = 1)\n", " # -H displays filename - is compatible with OSX\n", " grep --count -H \">\" \"${out_file}\"\n", " \n", " # Check each FastA header\n", " echo \"${out_file}: $(head -n1 \"${out_file}\")\"\n", " echo \"\"\n", " \n", "done\n", "\n", "# Run Primer3\n", "for fna in \"${fasta_array[@]}\"\n", "do\n", "\n", " # Store sequence only from desired FastA.\n", " # Print all lines after the first line and then delete newlines\n", " # because sequence needs to be on single line for Primer3 params file\n", " sequence=$(tail -n +2 \"${fna}\" | tr -d '\\n')\n", " \n", " # Remove suffix from FastA file to use as sequence ID\n", " seq_id=${fna%.*}\n", " \n", " # Variables for output filenaming\n", " params_out=\"${timestamp}_${seq_id}_primer3_params.txt\"\n", " primer3_def_out=\"${timestamp}_${seq_id}_primers_default_format.txt\"\n", " emboss_primers=\"${timestamp}_${seq_id}_emboss_primers.txt\"\n", " primersearch_out=\"${timestamp}_${seq_id}_primersearch.txt\"\n", " \n", " # Use heredoc to create Primer3 parameters file\n", " \n", " ##Values after the \"=\" on each line can be changed to whatever values the user decides. \n", " ##The ${sequence} must be a nucletoide sequence on a single line, with no line breaks.\n", "\n", " ## The code below uses a ```heredoc``` to write this information to a file.\n", " ## Everything between the following two lines gets printed (via ```cat```) as shown and then\n", " ## redirected to the indicated file \"${params_out}\".\n", " \n", " ## BTW, heredoc cannot be indented (well, it can, but requires some formatting that I couldn't get to work)\n", "cat << EOF > \"${params_out}\"\n", "SEQUENCE_ID=${seq_id}\n", "SEQUENCE_TEMPLATE=${sequence}\n", "PRIMER_TASK=generic\n", "PRIMER_PICK_LEFT_PRIMER=3\n", "PRIMER_PICK_RIGHT_PRIMER=3\n", "PRIMER_OPT_SIZE=18\n", "PRIMER_MIN_SIZE=15\n", "PRIMER_MAX_SIZE=21\n", "PRIMER_MAX_NS_ACCEPTED=1\n", "PRIMER_PRODUCT_SIZE_RANGE=75-150\n", "P3_FILE_FLAG=1\n", "PRIMER_EXPLAIN_FLAG=1\n", "PRIMER_THERMODYNAMIC_PARAMETERS_PATH=${thermo_params_dir}\n", "=\n", "EOF\n", " \n", " # Run Primer3 with default output format\n", " ${primer3} \\\n", " --output=\"${out_dir}/${primer3_def_out}\" \\\n", " \"${params_out}\"\n", " \n", " # Create tab-delimited primer file for primersearch\n", " sequence_id=$(grep \"SEQUENCE_ID=\" \"${primer3_def_out}\" | sed 's/SEQUENCE_ID=//')\n", " left_primer=$(grep \"PRIMER_LEFT_0_SEQUENCE=\" \"${primer3_def_out}\" | sed 's/PRIMER_LEFT_0_SEQUENCE=//')\n", " right_primer=$(grep \"PRIMER_RIGHT_0_SEQUENCE=\" \"${primer3_def_out}\" | sed 's/PRIMER_RIGHT_0_SEQUENCE=//')\n", " \n", " printf \"%s\\t\" \"${sequence_id}\" \"${left_primer}\" \"${right_primer}\" > \"${emboss_primers}\"\n", " \n", " # Add required newline to end of file\n", " printf \"\\n\" >> \"${emboss_primers}\"\n", " \n", " # Run EMBOSS primersearch\n", " ${primersearch} \\\n", " -seqall \"${fasta}\" \\\n", " -infile \"${emboss_primers}\" \\\n", " -mismatchpercent 20 \\\n", " -outfile \"${primersearch_out}\" \\\n", " -auto\n", " \n", "\n", "done" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### List output files" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 28M\n", "-rw-rw-r-- 1 sam sam 15K Jul 30 21:13 PGEN_.00g194630-vv0.74.a_ECHD3.fna\n", "-rw-rw-r-- 1 sam sam 12K Jul 30 21:13 PGEN_.00g288180-vv0.74.a_TIF3s4a.fna\n", "-rw-rw-r-- 1 sam sam 4.9K Jul 30 21:13 PGEN_.00g132040-vv0.74.a_TIF3s8-2.fna\n", "-rw-rw-r-- 1 sam sam 17K Jul 30 21:13 PGEN_.00g132030-vv0.74.a_TIF3s8-1.fna\n", "-rw-rw-r-- 1 sam sam 21K Jul 30 21:13 PGEN_.00g338640-vv0.74.a_NSF.fna\n", "-rw-rw-r-- 1 sam sam 27K Jul 30 21:13 PGEN_.00g245080-vv0.74.a_TIF3s10.fna\n", "-rw-rw-r-- 1 sam sam 9.8K Jul 30 21:13 PGEN_.00g025890-vv0.74.a_TIF3s12.fna\n", "-rw-rw-r-- 1 sam sam 27K Jul 30 21:13 PGEN_.00g114060-vv0.74.a_GSK3B.fna\n", "-rw-rw-r-- 1 sam sam 14K Jul 30 21:13 PGEN_.00g287540-vv0.74.a_RPL5.fna\n", "-rw-rw-r-- 1 sam sam 17K Jul 30 21:13 PGEN_.00g188130-vv0.74.a_FEN1.fna\n", "-rw-rw-r-- 1 sam sam 48K Jul 30 21:13 PGEN_.00g224740-vv0.74.a_GLYG.fna\n", "-rw-rw-r-- 1 sam sam 8.4K Jul 30 21:13 PGEN_.00g088260-vv0.74.a_NFIP1.fna\n", "-rw-rw-r-- 1 sam sam 27K Jul 30 21:13 PGEN_.00g082590-vv0.74.a_TIF3s5.fna\n", "-rw-rw-r-- 1 sam sam 23K Jul 30 21:13 PGEN_.00g079690-vv0.74.a_TIF3s7.fna\n", "-rw-rw-r-- 1 sam sam 13K Jul 30 21:13 PGEN_.00g000750-vv0.74.a_TIF3s6b.fna\n", "-rw-rw-r-- 1 sam sam 69K Jul 30 21:13 PGEN_.00g280110-vv0.74.a_SPTN1.fna\n", "-rw-rw-r-- 1 sam sam 32K Jul 30 21:13 PGEN_.00g070040-vv0.74.a_APLP.fna\n", "-rw-rw-r-- 1 sam sam 15K Jul 30 21:13 20200730_PGEN_.00g194630-vv0.74.a_ECHD3_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 278K Jul 30 21:13 PGEN_.00g194630-vv0.74.a_ECHD3.for\n", "-rw-rw-r-- 1 sam sam 264K Jul 30 21:13 PGEN_.00g194630-vv0.74.a_ECHD3.rev\n", "-rw-rw-r-- 1 sam sam 18K Jul 30 21:13 20200730_PGEN_.00g194630-vv0.74.a_ECHD3_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 70 Jul 30 21:13 20200730_PGEN_.00g194630-vv0.74.a_ECHD3_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 423 Jul 30 21:13 20200730_PGEN_.00g194630-vv0.74.a_ECHD3_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 12K Jul 30 21:13 20200730_PGEN_.00g288180-vv0.74.a_TIF3s4a_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 259K Jul 30 21:13 PGEN_.00g288180-vv0.74.a_TIF3s4a.for\n", "-rw-rw-r-- 1 sam sam 266K Jul 30 21:13 PGEN_.00g288180-vv0.74.a_TIF3s4a.rev\n", "-rw-rw-r-- 1 sam sam 15K Jul 30 21:13 20200730_PGEN_.00g288180-vv0.74.a_TIF3s4a_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 72 Jul 30 21:13 20200730_PGEN_.00g288180-vv0.74.a_TIF3s4a_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 810 Jul 30 21:14 20200730_PGEN_.00g288180-vv0.74.a_TIF3s4a_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 5.2K Jul 30 21:14 20200730_PGEN_.00g132040-vv0.74.a_TIF3s8-2_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 95K Jul 30 21:14 PGEN_.00g132040-vv0.74.a_TIF3s8-2.for\n", "-rw-rw-r-- 1 sam sam 107K Jul 30 21:14 PGEN_.00g132040-vv0.74.a_TIF3s8-2.rev\n", "-rw-rw-r-- 1 sam sam 8.7K Jul 30 21:14 20200730_PGEN_.00g132040-vv0.74.a_TIF3s8-2_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 73 Jul 30 21:14 20200730_PGEN_.00g132040-vv0.74.a_TIF3s8-2_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 1.5M Jul 30 21:14 20200730_PGEN_.00g132040-vv0.74.a_TIF3s8-2_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 17K Jul 30 21:14 20200730_PGEN_.00g132030-vv0.74.a_TIF3s8-1_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 510K Jul 30 21:14 PGEN_.00g132030-vv0.74.a_TIF3s8-1.for\n", "-rw-rw-r-- 1 sam sam 511K Jul 30 21:14 PGEN_.00g132030-vv0.74.a_TIF3s8-1.rev\n", "-rw-rw-r-- 1 sam sam 20K Jul 30 21:14 20200730_PGEN_.00g132030-vv0.74.a_TIF3s8-1_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 73 Jul 30 21:14 20200730_PGEN_.00g132030-vv0.74.a_TIF3s8-1_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 2.0K Jul 30 21:15 20200730_PGEN_.00g132030-vv0.74.a_TIF3s8-1_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 21K Jul 30 21:15 20200730_PGEN_.00g338640-vv0.74.a_NSF_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 647K Jul 30 21:15 PGEN_.00g338640-vv0.74.a_NSF.for\n", "-rw-rw-r-- 1 sam sam 640K Jul 30 21:15 PGEN_.00g338640-vv0.74.a_NSF.rev\n", "-rw-rw-r-- 1 sam sam 25K Jul 30 21:15 20200730_PGEN_.00g338640-vv0.74.a_NSF_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 68 Jul 30 21:15 20200730_PGEN_.00g338640-vv0.74.a_NSF_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 423 Jul 30 21:15 20200730_PGEN_.00g338640-vv0.74.a_NSF_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 27K Jul 30 21:15 20200730_PGEN_.00g245080-vv0.74.a_TIF3s10_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 831K Jul 30 21:15 PGEN_.00g245080-vv0.74.a_TIF3s10.for\n", "-rw-rw-r-- 1 sam sam 829K Jul 30 21:15 PGEN_.00g245080-vv0.74.a_TIF3s10.rev\n", "-rw-rw-r-- 1 sam sam 30K Jul 30 21:15 20200730_PGEN_.00g245080-vv0.74.a_TIF3s10_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 72 Jul 30 21:15 20200730_PGEN_.00g245080-vv0.74.a_TIF3s10_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 1.6K Jul 30 21:16 20200730_PGEN_.00g245080-vv0.74.a_TIF3s10_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 9.9K Jul 30 21:16 20200730_PGEN_.00g025890-vv0.74.a_TIF3s12_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 169K Jul 30 21:16 PGEN_.00g025890-vv0.74.a_TIF3s12.for\n", "-rw-rw-r-- 1 sam sam 169K Jul 30 21:16 PGEN_.00g025890-vv0.74.a_TIF3s12.rev\n", "-rw-rw-r-- 1 sam sam 14K Jul 30 21:16 20200730_PGEN_.00g025890-vv0.74.a_TIF3s12_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 72 Jul 30 21:16 20200730_PGEN_.00g025890-vv0.74.a_TIF3s12_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 424 Jul 30 21:16 20200730_PGEN_.00g025890-vv0.74.a_TIF3s12_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 27K Jul 30 21:16 20200730_PGEN_.00g114060-vv0.74.a_GSK3B_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 693K Jul 30 21:16 PGEN_.00g114060-vv0.74.a_GSK3B.for\n", "-rw-rw-r-- 1 sam sam 707K Jul 30 21:16 PGEN_.00g114060-vv0.74.a_GSK3B.rev\n", "-rw-rw-r-- 1 sam sam 31K Jul 30 21:16 20200730_PGEN_.00g114060-vv0.74.a_GSK3B_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 70 Jul 30 21:16 20200730_PGEN_.00g114060-vv0.74.a_GSK3B_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 1.6M Jul 30 21:17 20200730_PGEN_.00g114060-vv0.74.a_GSK3B_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 14K Jul 30 21:17 20200730_PGEN_.00g287540-vv0.74.a_RPL5_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 362K Jul 30 21:17 PGEN_.00g287540-vv0.74.a_RPL5.for\n", "-rw-rw-r-- 1 sam sam 355K Jul 30 21:17 PGEN_.00g287540-vv0.74.a_RPL5.rev\n", "-rw-rw-r-- 1 sam sam 17K Jul 30 21:17 20200730_PGEN_.00g287540-vv0.74.a_RPL5_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 69 Jul 30 21:17 20200730_PGEN_.00g287540-vv0.74.a_RPL5_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 480K Jul 30 21:17 20200730_PGEN_.00g287540-vv0.74.a_RPL5_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 17K Jul 30 21:17 20200730_PGEN_.00g188130-vv0.74.a_FEN1_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 377K Jul 30 21:17 PGEN_.00g188130-vv0.74.a_FEN1.for\n", "-rw-rw-r-- 1 sam sam 395K Jul 30 21:17 PGEN_.00g188130-vv0.74.a_FEN1.rev\n", "-rw-rw-r-- 1 sam sam 20K Jul 30 21:17 20200730_PGEN_.00g188130-vv0.74.a_FEN1_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 69 Jul 30 21:17 20200730_PGEN_.00g188130-vv0.74.a_FEN1_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 423 Jul 30 21:18 20200730_PGEN_.00g188130-vv0.74.a_FEN1_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 47K Jul 30 21:18 20200730_PGEN_.00g224740-vv0.74.a_GLYG_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 1.2M Jul 30 21:18 PGEN_.00g224740-vv0.74.a_GLYG.for\n", "-rw-rw-r-- 1 sam sam 1.1M Jul 30 21:18 PGEN_.00g224740-vv0.74.a_GLYG.rev\n", "-rw-rw-r-- 1 sam sam 50K Jul 30 21:18 20200730_PGEN_.00g224740-vv0.74.a_GLYG_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 69 Jul 30 21:18 20200730_PGEN_.00g224740-vv0.74.a_GLYG_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 8.7K Jul 30 21:18 20200730_PGEN_.00g224740-vv0.74.a_GLYG_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 8.6K Jul 30 21:18 20200730_PGEN_.00g088260-vv0.74.a_NFIP1_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 276K Jul 30 21:18 PGEN_.00g088260-vv0.74.a_NFIP1.for\n", "-rw-rw-r-- 1 sam sam 290K Jul 30 21:18 PGEN_.00g088260-vv0.74.a_NFIP1.rev\n", "-rw-rw-r-- 1 sam sam 13K Jul 30 21:18 20200730_PGEN_.00g088260-vv0.74.a_NFIP1_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 70 Jul 30 21:18 20200730_PGEN_.00g088260-vv0.74.a_NFIP1_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 6.8K Jul 30 21:19 20200730_PGEN_.00g088260-vv0.74.a_NFIP1_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 27K Jul 30 21:19 20200730_PGEN_.00g082590-vv0.74.a_TIF3s5_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 493K Jul 30 21:19 PGEN_.00g082590-vv0.74.a_TIF3s5.for\n", "-rw-rw-r-- 1 sam sam 471K Jul 30 21:19 PGEN_.00g082590-vv0.74.a_TIF3s5.rev\n", "-rw-rw-r-- 1 sam sam 30K Jul 30 21:19 20200730_PGEN_.00g082590-vv0.74.a_TIF3s5_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 71 Jul 30 21:19 20200730_PGEN_.00g082590-vv0.74.a_TIF3s5_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 139K Jul 30 21:19 20200730_PGEN_.00g082590-vv0.74.a_TIF3s5_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 23K Jul 30 21:19 20200730_PGEN_.00g079690-vv0.74.a_TIF3s7_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 634K Jul 30 21:20 PGEN_.00g079690-vv0.74.a_TIF3s7.for\n", "-rw-rw-r-- 1 sam sam 594K Jul 30 21:20 PGEN_.00g079690-vv0.74.a_TIF3s7.rev\n", "-rw-rw-r-- 1 sam sam 26K Jul 30 21:20 20200730_PGEN_.00g079690-vv0.74.a_TIF3s7_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 71 Jul 30 21:20 20200730_PGEN_.00g079690-vv0.74.a_TIF3s7_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 2.7K Jul 30 21:20 20200730_PGEN_.00g079690-vv0.74.a_TIF3s7_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 13K Jul 30 21:20 20200730_PGEN_.00g000750-vv0.74.a_TIF3s6b_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 279K Jul 30 21:20 PGEN_.00g000750-vv0.74.a_TIF3s6b.for\n", "-rw-rw-r-- 1 sam sam 282K Jul 30 21:20 PGEN_.00g000750-vv0.74.a_TIF3s6b.rev\n", "-rw-rw-r-- 1 sam sam 16K Jul 30 21:20 20200730_PGEN_.00g000750-vv0.74.a_TIF3s6b_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 72 Jul 30 21:20 20200730_PGEN_.00g000750-vv0.74.a_TIF3s6b_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 2.9M Jul 30 21:20 20200730_PGEN_.00g000750-vv0.74.a_TIF3s6b_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 68K Jul 30 21:20 20200730_PGEN_.00g280110-vv0.74.a_SPTN1_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 2.2M Jul 30 21:21 PGEN_.00g280110-vv0.74.a_SPTN1.for\n", "-rw-rw-r-- 1 sam sam 2.3M Jul 30 21:21 PGEN_.00g280110-vv0.74.a_SPTN1.rev\n", "-rw-rw-r-- 1 sam sam 71K Jul 30 21:21 20200730_PGEN_.00g280110-vv0.74.a_SPTN1_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 70 Jul 30 21:21 20200730_PGEN_.00g280110-vv0.74.a_SPTN1_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 93K Jul 30 21:21 20200730_PGEN_.00g280110-vv0.74.a_SPTN1_primersearch.txt\n", "-rw-rw-r-- 1 sam sam 32K Jul 30 21:21 20200730_PGEN_.00g070040-vv0.74.a_APLP_primer3_params.txt\n", "-rw-rw-r-- 1 sam sam 944K Jul 30 21:22 PGEN_.00g070040-vv0.74.a_APLP.for\n", "-rw-rw-r-- 1 sam sam 966K Jul 30 21:22 PGEN_.00g070040-vv0.74.a_APLP.rev\n", "-rw-rw-r-- 1 sam sam 36K Jul 30 21:22 20200730_PGEN_.00g070040-vv0.74.a_APLP_primers_default_format.txt\n", "-rw-rw-r-- 1 sam sam 69 Jul 30 21:22 20200730_PGEN_.00g070040-vv0.74.a_APLP_emboss_primers.txt\n", "-rw-rw-r-- 1 sam sam 424 Jul 30 21:22 20200730_PGEN_.00g070040-vv0.74.a_APLP_primersearch.txt\n" ] } ], "source": [ "%%bash\n", "\n", "cd \"${out_dir}\"\n", "\n", "ls -ltrh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Count number of primer matches identified for each primer set\n", "\n", "Reminder: Each set should match at least once - to its own gene sequence from which it was derived.\n", "\n", "Reminder: These primers were designed against gene sequences, _not_ coding sequences.\n", "\n", "##### NOTE: Counts should be divided by 2, due to the presence of the word \"Amplimer\" occuring _twice_ per match in the primersearch output files! That means samples with a count of 2 have a single match." ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20200730_PGEN_.00g000750-vv0.74.a_TIF3s6b_primersearch.txt:15512\n", "20200730_PGEN_.00g025890-vv0.74.a_TIF3s12_primersearch.txt:2\n", "20200730_PGEN_.00g070040-vv0.74.a_APLP_primersearch.txt:2\n", "20200730_PGEN_.00g079690-vv0.74.a_TIF3s7_primersearch.txt:14\n", "20200730_PGEN_.00g082590-vv0.74.a_TIF3s5_primersearch.txt:742\n", "20200730_PGEN_.00g088260-vv0.74.a_NFIP1_primersearch.txt:36\n", "20200730_PGEN_.00g114060-vv0.74.a_GSK3B_primersearch.txt:8596\n", "20200730_PGEN_.00g132030-vv0.74.a_TIF3s8-1_primersearch.txt:10\n", "20200730_PGEN_.00g132040-vv0.74.a_TIF3s8-2_primersearch.txt:7800\n", "20200730_PGEN_.00g188130-vv0.74.a_FEN1_primersearch.txt:2\n", "20200730_PGEN_.00g194630-vv0.74.a_ECHD3_primersearch.txt:2\n", "20200730_PGEN_.00g224740-vv0.74.a_GLYG_primersearch.txt:46\n", "20200730_PGEN_.00g245080-vv0.74.a_TIF3s10_primersearch.txt:8\n", "20200730_PGEN_.00g280110-vv0.74.a_SPTN1_primersearch.txt:496\n", "20200730_PGEN_.00g287540-vv0.74.a_RPL5_primersearch.txt:2570\n", "20200730_PGEN_.00g288180-vv0.74.a_TIF3s4a_primersearch.txt:4\n", "20200730_PGEN_.00g338640-vv0.74.a_NSF_primersearch.txt:2\n" ] } ], "source": [ "%%bash\n", "\n", "cd \"${out_dir}\"\n", "\n", "grep -c \"Amplimer*\" *primersearch.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Print program options, for reference" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Pyfaidx command line program" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "usage: faidx [-h] [-b BED] [-o OUT]\n", " [-i {bed,chromsizes,nucleotide,transposed}] [-c] [-r] [-y]\n", " [-a SIZE_RANGE] [-n | -f] [-t] [-x] [-l] [-s DEFAULT_SEQ]\n", " [-d DELIMITER] [-e HEADER_FUNCTION]\n", " [-u {stop,first,last,longest,shortest}] [-g REGEX] [-v] [-m | -M]\n", " [--no-output] [--no-rebuild] [--version]\n", " fasta [regions [regions ...]]\n", "\n", "Fetch sequences from FASTA. If no regions are specified, all entries in the\n", "input file are returned. Input FASTA file must be consistently line-wrapped,\n", "and line wrapping of output is based on input line lengths.\n", "\n", "positional arguments:\n", " fasta FASTA file\n", " regions space separated regions of sequence to fetch e.g.\n", " chr1:1-1000\n", "\n", "optional arguments:\n", " -h, --help show this help message and exit\n", " --no-rebuild do not rebuild the .fai index even if it is out of\n", " date. default: False\n", " --version print pyfaidx version number\n", "\n", "input options:\n", " -b BED, --bed BED bed file of regions\n", "\n", "output options:\n", " -o OUT, --out OUT output file name (default: stdout)\n", " -i {bed,chromsizes,nucleotide,transposed}, --transform {bed,chromsizes,nucleotide,transposed}\n", " transform the requested regions into another format.\n", " default: None\n", " -c, --complement complement the sequence. default: False\n", " -r, --reverse reverse the sequence. default: False\n", " -y, --auto-strand reverse complement the sequence when start > end\n", " coordinate. default: False\n", " -a SIZE_RANGE, --size-range SIZE_RANGE\n", " selected sequences are in the size range [low, high].\n", " example: 1,1000 default: None\n", " -x, --split-files write each region to a separate file (names are\n", " derived from regions)\n", " -l, --lazy fill in --default-seq for missing ranges. default:\n", " False\n", " -s DEFAULT_SEQ, --default-seq DEFAULT_SEQ\n", " default base for missing positions and masking.\n", " default: None\n", " -m, --mask-with-default-seq\n", " mask the FASTA file using --default-seq default: False\n", " -M, --mask-by-case mask the FASTA file by changing to lowercase. default:\n", " False\n", " --no-output do not output any sequence. default: False\n", "\n", "header options:\n", " -n, --no-names omit sequence names from output. default: False\n", " -f, --long-names output full (long) names from the input fasta headers.\n", " default: headers are truncated after the first\n", " whitespace\n", " -t, --no-coords omit coordinates (e.g. chr:start-end) from output\n", " headers. default: False\n", " -d DELIMITER, --delimiter DELIMITER\n", " delimiter for splitting names to multiple values\n", " (duplicate names will be discarded). default: None\n", " -e HEADER_FUNCTION, --header-function HEADER_FUNCTION\n", " python function to modify header lines e.g: \"lambda x:\n", " x.split(\"|\")[0]\". default: lambda x: x.split()[0]\n", " -u {stop,first,last,longest,shortest}, --duplicates-action {stop,first,last,longest,shortest}\n", " entry to take when duplicate sequence names are\n", " encountered. default: stop\n", "\n", "matching arguments:\n", " -g REGEX, --regex REGEX\n", " selected sequences are those matching regular\n", " expression. default: .*\n", " -v, --invert-match selected sequences are those not matching 'regions'\n", " argument. default: False\n", "\n", "Please cite: Shirley MD, Ma Z, Pedersen BS, Wheelan SJ. (2015) Efficient\n", "\"pythonic\" access to FASTA files using pyfaidx. PeerJ PrePrints 3:e1196\n", "https://dx.doi.org/10.7287/peerj.preprints.970v1\n" ] } ], "source": [ "%%bash\n", "faidx -h" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Primer3" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "Copyright (c) 1996-2017\n", "Whitehead Institute for Biomedical Research, Steve Rozen\n", "(http://purl.com/STEVEROZEN/), Andreas Untergasser and Helen Skaletsky\n", "All rights reserved.\n", "\n", " This file is part of the primer3 suite and libraries.\n", "\n", " The primer3 suite and libraries are free software;\n", " you can redistribute them and/or modify them under the terms\n", " of the GNU General Public License as published by the Free\n", " Software Foundation; either version 2 of the License, or (at\n", " your option) any later version.\n", "\n", " This software is distributed in the hope that it will be useful,\n", " but WITHOUT ANY WARRANTY; without even the implied warranty of\n", " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n", " GNU General Public License for more details.\n", "\n", " You should have received a copy of the GNU General Public License\n", " along with this software (file gpl-2.0.txt in the source\n", " distribution); if not, write to the Free Software\n", " Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA\n", "\n", "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n", "\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n", "LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n", "A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n", "OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n", "SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n", "LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n", "DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n", "THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n", "(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n", "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n", "\n", "\n", "\n", "USAGE: /home/sam/programs/primer3-2.4.0/src/primer3_core [--format_output] [--default_version=1|--default_version=2] [--io_version=4] [--p3_settings_file=] [--echo_settings_file] [--strict_tags] [--output=] [--error=] [input_file]\n", "This is primer3 (libprimer3 release 2.4.0)\n", "Input can also be provided on standard input.\n", "For example:\n", "$ primer3_core < my_input_file\n" ] }, { "ename": "CalledProcessError", "evalue": "Command 'b'${primer3} -help\\n'' returned non-zero exit status 255.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mCalledProcessError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_cell_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'bash'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'${primer3} -help\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mrun_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m 2360\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2361\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mmagic_arg_s\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2362\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2363\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/magics/script.py\u001b[0m in \u001b[0;36mnamed_script_magic\u001b[0;34m(line, cell)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscript\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshebang\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;31m# write a basic docstring:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m\u001b[0m in \u001b[0;36mshebang\u001b[0;34m(self, line, cell)\u001b[0m\n", "\u001b[0;32m~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/magic.py\u001b[0m in \u001b[0;36m\u001b[0;34m(f, *a, **k)\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;31m# but it's overkill for just that one bit of state.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmagic_deco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m \u001b[0mcall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 188\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/magics/script.py\u001b[0m in \u001b[0;36mshebang\u001b[0;34m(self, line, cell)\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstderr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflush\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_error\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreturncode\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 245\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mCalledProcessError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreturncode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstderr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_run_script\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mto_close\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mCalledProcessError\u001b[0m: Command 'b'${primer3} -help\\n'' returned non-zero exit status 255." ] } ], "source": [ "%%bash\n", "${primer3} -help" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### EMBOSS primersearch" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Search DNA sequences for matches with primer pairs\n", "Version: EMBOSS:6.6.0.0\n", "\n", " Standard (Mandatory) qualifiers:\n", " [-seqall] seqall Nucleotide sequence(s) filename and optional\n", " format, or reference (input USA)\n", " [-infile] infile Primer pairs file\n", " [-mismatchpercent] integer [0] Allowed percent mismatch (Any integer\n", " value)\n", " [-outfile] outfile [*.primersearch] Whitehead primer3_core\n", " program output file\n", "\n", " Additional (Optional) qualifiers: (none)\n", " Advanced (Unprompted) qualifiers: (none)\n", " Associated qualifiers:\n", "\n", " \"-seqall\" associated qualifiers\n", " -sbegin1 integer Start of each sequence to be used\n", " -send1 integer End of each sequence to be used\n", " -sreverse1 boolean Reverse (if DNA)\n", " -sask1 boolean Ask for begin/end/reverse\n", " -snucleotide1 boolean Sequence is nucleotide\n", " -sprotein1 boolean Sequence is protein\n", " -slower1 boolean Make lower case\n", " -supper1 boolean Make upper case\n", " -scircular1 boolean Sequence is circular\n", " -squick1 boolean Read id and sequence only\n", " -sformat1 string Input sequence format\n", " -iquery1 string Input query fields or ID list\n", " -ioffset1 integer Input start position offset\n", " -sdbname1 string Database name\n", " -sid1 string Entryname\n", " -ufo1 string UFO features\n", " -fformat1 string Features format\n", " -fopenfile1 string Features file name\n", "\n", " \"-outfile\" associated qualifiers\n", " -odirectory4 string Output directory\n", "\n", " General qualifiers:\n", " -auto boolean Turn off prompts\n", " -stdout boolean Write first file to standard output\n", " -filter boolean Read first file from standard input, write\n", " first file to standard output\n", " -options boolean Prompt for standard and additional values\n", " -debug boolean Write debug output to program.dbg\n", " -verbose boolean Report some/full command line options\n", " -help boolean Report command line options and exit. More\n", " information on associated and general\n", " qualifiers can be found with -help -verbose\n", " -warning boolean Report warnings\n", " -error boolean Report errors\n", " -fatal boolean Report fatal errors\n", " -die boolean Report dying program messages\n", " -version boolean Report version number and exit\n", "\n" ] } ], "source": [ "%%bash\n", "${primersearch} -help -verbose" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }