{
 "metadata": {
  "name": "PhageNGS_ID"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd /Users/Sam"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/Users/Sam\n"
       ]
      }
     ],
     "prompt_number": 17
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd ../../Applications"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/Applications\n"
       ]
      }
     ],
     "prompt_number": 18
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd /Applications/ncbi-blast-2.2.29+/bin"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/Applications/ncbi-blast-2.2.29+/bin\n"
       ]
      }
     ],
     "prompt_number": 19
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!./blastn -task blastn -query /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -db /Volumes/homes/srlab/blastdbs/PhageGBnuc20130529filter -outfmt \"6 stitle std\" -max_target_seqs 3 -num_threads 16 -out /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnPhageGBnt.txt"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 21
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!head -10 /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnPhageGBnt.txt"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "gi|396576808|emb|FR775895.2| Enterobacteria phage phi92, complete genome\tContig5DeNovoAssembly\tgi|396576808|emb|FR775895.2|\t87.88\t33\t2\t2\t3\t34\t76644\t76613\t0.32\t35.6\r\n",
        "gi|15281680|gb|AF396866.1| Bacteriophage Mx8, complete genome\tContig5DeNovoAssembly\tgi|15281680|gb|AF396866.1|\t95.00\t20\t1\t0\t17\t36\t24781\t24762\t3.9\t31.9\r\n",
        "gi|15320570|ref|NC_003085.1| Myxococcus phage Mx8, complete genome\tContig5DeNovoAssembly\tgi|15320570|ref|NC_003085.1|\t95.00\t20\t1\t0\t17\t36\t24781\t24762\t3.9\t31.9\r\n",
        "gi|312262424|gb|GU396103.1| Aeromonas phage PX29, complete genome\tContig6DeNovoAssembly\tgi|312262424|gb|GU396103.1|\t100.00\t19\t0\t0\t42\t60\t200091\t200109\t0.39\t35.6\r\n",
        "gi|254211614|gb|GQ334450.1| Cyanophage PSS2, complete genome\tContig6DeNovoAssembly\tgi|254211614|gb|GQ334450.1|\t100.00\t17\t0\t0\t18\t34\t56942\t56926\t4.7\t31.9\r\n",
        "gi|254729462|ref|NC_013021.1| Cyanophage PSS2, complete genome\tContig6DeNovoAssembly\tgi|254729462|ref|NC_013021.1|\t100.00\t17\t0\t0\t18\t34\t56942\t56926\t4.7\t31.9\r\n",
        "gi|310005390|gb|GU075905.1| Prochlorococcus phage P-HM2, complete genome\tContig9DeNovoAssembly\tgi|310005390|gb|GU075905.1|\t85.71\t28\t4\t0\t11\t38\t180126\t180099\t1.3\t33.7\r\n",
        "gi|326782972|ref|NC_015284.1| Prochlorococcus phage P-HM2, complete genome\tContig9DeNovoAssembly\tgi|326782972|ref|NC_015284.1|\t85.71\t28\t4\t0\t11\t38\t180126\t180099\t1.3\t33.7\r\n",
        "gi|311788808|gb|HQ336222.2| Acanthamoeba polyphaga mimivirus, complete genome\tContig10DeNovoAssembly\tgi|311788808|gb|HQ336222.2|\t95.45\t22\t1\t0\t23\t44\t342629\t342608\t0.26\t35.6\r\n",
        "gi|311788808|gb|HQ336222.2| Acanthamoeba polyphaga mimivirus, complete genome\tContig10DeNovoAssembly\tgi|311788808|gb|HQ336222.2|\t80.00\t40\t5\t1\t9\t48\t993827\t993791\t0.26\t35.6\r\n"
       ]
      }
     ],
     "prompt_number": 22
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "./blastn -h"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "SyntaxError",
       "evalue": "invalid syntax (<ipython-input-27-1213ea4d2e0b>, line 1)",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-27-1213ea4d2e0b>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m    ./blastn -h\u001b[0m\n\u001b[0m    ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
       ]
      }
     ],
     "prompt_number": 27
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#can't remember column order of output file\n",
      "!./blastn -h"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "USAGE\r\n",
        "  blastn [-h] [-help] [-import_search_strategy filename]\r\n",
        "    [-export_search_strategy filename] [-task task_name] [-db database_name]\r\n",
        "    [-dbsize num_letters] [-gilist filename] [-seqidlist filename]\r\n",
        "    [-negative_gilist filename] [-entrez_query entrez_query]\r\n",
        "    [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm]\r\n",
        "    [-subject subject_input_file] [-subject_loc range] [-query input_file]\r\n",
        "    [-out output_file] [-evalue evalue] [-word_size int_value]\r\n",
        "    [-gapopen open_penalty] [-gapextend extend_penalty]\r\n",
        "    [-perc_identity float_value] [-xdrop_ungap float_value]\r\n",
        "    [-xdrop_gap float_value] [-xdrop_gap_final float_value]\r\n",
        "    [-searchsp int_value] [-max_hsps int_value] [-sum_statistics]\r\n",
        "    [-penalty penalty] [-reward reward] [-no_greedy]\r\n",
        "    [-min_raw_gapped_score int_value] [-template_type type]\r\n",
        "    [-template_length int_value] [-dust DUST_options]\r\n",
        "    [-filtering_db filtering_database]\r\n",
        "    [-window_masker_taxid window_masker_taxid]\r\n",
        "    [-window_masker_db window_masker_db] [-soft_masking soft_masking]\r\n",
        "    [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value]\r\n",
        "    [-best_hit_score_edge float_value] [-window_size int_value]\r\n",
        "    [-off_diagonal_range int_value] [-use_index boolean] [-index_name string]\r\n",
        "    [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines]\r\n",
        "    [-outfmt format] [-show_gis] [-num_descriptions int_value]\r\n",
        "    [-num_alignments int_value] [-html] [-max_target_seqs num_sequences]\r\n",
        "    [-num_threads int_value] [-remote] [-version]\r\n",
        "\r\n",
        "DESCRIPTION\r\n",
        "   Nucleotide-Nucleotide BLAST 2.2.29+\r\n",
        "\r\n",
        "Use '-help' to print detailed descriptions of command line arguments\r\n"
       ]
      }
     ],
     "prompt_number": 28
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#can't remember column order of output file\n",
      "!./blastn -help"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "USAGE\r\n",
        "  blastn [-h] [-help] [-import_search_strategy filename]\r\n",
        "    [-export_search_strategy filename] [-task task_name] [-db database_name]\r\n",
        "    [-dbsize num_letters] [-gilist filename] [-seqidlist filename]\r\n",
        "    [-negative_gilist filename] [-entrez_query entrez_query]\r\n",
        "    [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm]\r\n",
        "    [-subject subject_input_file] [-subject_loc range] [-query input_file]\r\n",
        "    [-out output_file] [-evalue evalue] [-word_size int_value]\r\n",
        "    [-gapopen open_penalty] [-gapextend extend_penalty]\r\n",
        "    [-perc_identity float_value] [-xdrop_ungap float_value]\r\n",
        "    [-xdrop_gap float_value] [-xdrop_gap_final float_value]\r\n",
        "    [-searchsp int_value] [-max_hsps int_value] [-sum_statistics]\r\n",
        "    [-penalty penalty] [-reward reward] [-no_greedy]\r\n",
        "    [-min_raw_gapped_score int_value] [-template_type type]\r\n",
        "    [-template_length int_value] [-dust DUST_options]\r\n",
        "    [-filtering_db filtering_database]\r\n",
        "    [-window_masker_taxid window_masker_taxid]\r\n",
        "    [-window_masker_db window_masker_db] [-soft_masking soft_masking]\r\n",
        "    [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value]\r\n",
        "    [-best_hit_score_edge float_value] [-window_size int_value]\r\n",
        "    [-off_diagonal_range int_value] [-use_index boolean] [-index_name string]\r\n",
        "    [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines]\r\n",
        "    [-outfmt format] [-show_gis] [-num_descriptions int_value]\r\n",
        "    [-num_alignments int_value] [-html] [-max_target_seqs num_sequences]\r\n",
        "    [-num_threads int_value] [-remote] [-version]\r\n",
        "\r\n",
        "DESCRIPTION\r\n",
        "   Nucleotide-Nucleotide BLAST 2.2.29+\r\n",
        "\r\n",
        "OPTIONAL ARGUMENTS\r\n",
        " -h\r\n",
        "   Print USAGE and DESCRIPTION;  ignore all other parameters\r\n",
        " -help\r\n",
        "   Print USAGE, DESCRIPTION and ARGUMENTS; ignore all other parameters\r\n",
        " -version\r\n",
        "   Print version number;  ignore other arguments\r\n",
        "\r\n",
        " *** Input query options\r\n",
        " -query <File_In>\r\n",
        "   Input file name\r\n",
        "   Default = `-'\r\n",
        " -query_loc <String>\r\n",
        "   Location on the query sequence in 1-based offsets (Format: start-stop)\r\n",
        " -strand <String, `both', `minus', `plus'>\r\n",
        "   Query strand(s) to search against database/subject\r\n",
        "   Default = `both'\r\n",
        "\r\n",
        " *** General search options\r\n",
        " -task <String, Permissible values: 'blastn' 'blastn-short' 'dc-megablast'\r\n",
        "                'megablast' 'rmblastn' >\r\n",
        "   Task to execute\r\n",
        "   Default = `megablast'\r\n",
        " -db <String>\r\n",
        "   BLAST database name\r\n",
        "    * Incompatible with:  subject, subject_loc\r\n",
        " -out <File_Out>\r\n",
        "   Output file name\r\n",
        "   Default = `-'\r\n",
        " -evalue <Real>\r\n",
        "   Expectation value (E) threshold for saving hits \r\n",
        "   Default = `10'\r\n",
        " -word_size <Integer, >=4>\r\n",
        "   Word size for wordfinder algorithm (length of best perfect match)\r\n",
        " -gapopen <Integer>\r\n",
        "   Cost to open a gap\r\n",
        " -gapextend <Integer>\r\n",
        "   Cost to extend a gap\r\n",
        " -penalty <Integer, <=0>\r\n",
        "   Penalty for a nucleotide mismatch\r\n",
        " -reward <Integer, >=0>\r\n",
        "   Reward for a nucleotide match\r\n",
        " -use_index <Boolean>\r\n",
        "   Use MegaBLAST database index\r\n",
        "   Default = `false'\r\n",
        " -index_name <String>\r\n",
        "   MegaBLAST database index name\r\n",
        "\r\n",
        " *** BLAST-2-Sequences options\r\n",
        " -subject <File_In>\r\n",
        "   Subject sequence(s) to search\r\n",
        "    * Incompatible with:  db, gilist, seqidlist, negative_gilist,\r\n",
        "   db_soft_mask, db_hard_mask\r\n",
        " -subject_loc <String>\r\n",
        "   Location on the subject sequence in 1-based offsets (Format: start-stop)\r\n",
        "    * Incompatible with:  db, gilist, seqidlist, negative_gilist,\r\n",
        "   db_soft_mask, db_hard_mask, remote\r\n",
        "\r\n",
        " *** Formatting options\r\n",
        " -outfmt <String>\r\n",
        "   alignment view options:\r\n",
        "     0 = pairwise,\r\n",
        "     1 = query-anchored showing identities,\r\n",
        "     2 = query-anchored no identities,\r\n",
        "     3 = flat query-anchored, show identities,\r\n",
        "     4 = flat query-anchored, no identities,\r\n",
        "     5 = XML Blast output,\r\n",
        "     6 = tabular,\r\n",
        "     7 = tabular with comment lines,\r\n",
        "     8 = Text ASN.1,\r\n",
        "     9 = Binary ASN.1,\r\n",
        "    10 = Comma-separated values,\r\n",
        "    11 = BLAST archive format (ASN.1) \r\n",
        "   \r\n",
        "   Options 6, 7, and 10 can be additionally configured to produce\r\n",
        "   a custom format specified by space delimited format specifiers.\r\n",
        "   The supported format specifiers are:\r\n",
        "   \t    qseqid means Query Seq-id\r\n",
        "   \t       qgi means Query GI\r\n",
        "   \t      qacc means Query accesion\r\n",
        "   \t   qaccver means Query accesion.version\r\n",
        "   \t      qlen means Query sequence length\r\n",
        "   \t    sseqid means Subject Seq-id\r\n",
        "   \t sallseqid means All subject Seq-id(s), separated by a ';'\r\n",
        "   \t       sgi means Subject GI\r\n",
        "   \t    sallgi means All subject GIs\r\n",
        "   \t      sacc means Subject accession\r\n",
        "   \t   saccver means Subject accession.version\r\n",
        "   \t   sallacc means All subject accessions\r\n",
        "   \t      slen means Subject sequence length\r\n",
        "   \t    qstart means Start of alignment in query\r\n",
        "   \t      qend means End of alignment in query\r\n",
        "   \t    sstart means Start of alignment in subject\r\n",
        "   \t      send means End of alignment in subject\r\n",
        "   \t      qseq means Aligned part of query sequence\r\n",
        "   \t      sseq means Aligned part of subject sequence\r\n",
        "   \t    evalue means Expect value\r\n",
        "   \t  bitscore means Bit score\r\n",
        "   \t     score means Raw score\r\n",
        "   \t    length means Alignment length\r\n",
        "   \t    pident means Percentage of identical matches\r\n",
        "   \t    nident means Number of identical matches\r\n",
        "   \t  mismatch means Number of mismatches\r\n",
        "   \t  positive means Number of positive-scoring matches\r\n",
        "   \t   gapopen means Number of gap openings\r\n",
        "   \t      gaps means Total number of gaps\r\n",
        "   \t      ppos means Percentage of positive-scoring matches\r\n",
        "   \t    frames means Query and subject frames separated by a '/'\r\n",
        "   \t    qframe means Query frame\r\n",
        "   \t    sframe means Subject frame\r\n",
        "   \t      btop means Blast traceback operations (BTOP)\r\n",
        "   \t   staxids means unique Subject Taxonomy ID(s), separated by a ';'\r\n",
        "   \t\t\t (in numerical order)\r\n",
        "   \t sscinames means unique Subject Scientific Name(s), separated by a ';'\r\n",
        "   \t scomnames means unique Subject Common Name(s), separated by a ';'\r\n",
        "   \tsblastnames means unique Subject Blast Name(s), separated by a ';'\r\n",
        "   \t\t\t (in alphabetical order)\r\n",
        "   \tsskingdoms means unique Subject Super Kingdom(s), separated by a ';'\r\n",
        "   \t\t\t (in alphabetical order) \r\n",
        "   \t    stitle means Subject Title\r\n",
        "   \tsalltitles means All Subject Title(s), separated by a '<>'\r\n",
        "   \t   sstrand means Subject Strand\r\n",
        "   \t     qcovs means Query Coverage Per Subject\r\n",
        "   \t   qcovhsp means Query Coverage Per HSP\r\n",
        "   When not provided, the default value is:\r\n",
        "   'qseqid sseqid pident length mismatch gapopen qstart qend sstart send\r\n",
        "   evalue bitscore', which is equivalent to the keyword 'std'\r\n",
        "   Default = `0'\r\n",
        " -show_gis\r\n",
        "   Show NCBI GIs in deflines?\r\n",
        " -num_descriptions <Integer, >=0>\r\n",
        "   Number of database sequences to show one-line descriptions for\r\n",
        "   Not applicable for outfmt > 4\r\n",
        "   Default = `500'\r\n",
        "    * Incompatible with:  max_target_seqs\r\n",
        " -num_alignments <Integer, >=0>\r\n",
        "   Number of database sequences to show alignments for\r\n",
        "   Default = `250'\r\n",
        "    * Incompatible with:  max_target_seqs\r\n",
        " -html\r\n",
        "   Produce HTML output?\r\n",
        "\r\n",
        " *** Query filtering options\r\n",
        " -dust <String>\r\n",
        "   Filter query sequence with DUST (Format: 'yes', 'level window linker', or\r\n",
        "   'no' to disable)\r\n",
        "   Default = `20 64 1'\r\n",
        " -filtering_db <String>\r\n",
        "   BLAST database containing filtering elements (i.e.: repeats)\r\n",
        " -window_masker_taxid <Integer>\r\n",
        "   Enable WindowMasker filtering using a Taxonomic ID\r\n",
        " -window_masker_db <String>\r\n",
        "   Enable WindowMasker filtering using this repeats database.\r\n",
        " -soft_masking <Boolean>\r\n",
        "   Apply filtering locations as soft masks\r\n",
        "   Default = `true'\r\n",
        " -lcase_masking\r\n",
        "   Use lower case filtering in query and subject sequence(s)?\r\n",
        "\r\n",
        " *** Restrict search or results\r\n",
        " -gilist <String>\r\n",
        "   Restrict search of database to list of GI's\r\n",
        "    * Incompatible with:  negative_gilist, seqidlist, remote, subject,\r\n",
        "   subject_loc\r\n",
        " -seqidlist <String>\r\n",
        "   Restrict search of database to list of SeqId's\r\n",
        "    * Incompatible with:  gilist, negative_gilist, remote, subject,\r\n",
        "   subject_loc\r\n",
        " -negative_gilist <String>\r\n",
        "   Restrict search of database to everything except the listed GIs\r\n",
        "    * Incompatible with:  gilist, seqidlist, remote, subject, subject_loc\r\n",
        " -entrez_query <String>\r\n",
        "   Restrict search with the given Entrez query\r\n",
        "    * Requires:  remote\r\n",
        " -db_soft_mask <String>\r\n",
        "   Filtering algorithm ID to apply to the BLAST database as soft masking\r\n",
        "    * Incompatible with:  db_hard_mask, subject, subject_loc\r\n",
        " -db_hard_mask <String>\r\n",
        "   Filtering algorithm ID to apply to the BLAST database as hard masking\r\n",
        "    * Incompatible with:  db_soft_mask, subject, subject_loc\r\n",
        " -perc_identity <Real, 0..100>\r\n",
        "   Percent identity\r\n",
        " -culling_limit <Integer, >=0>\r\n",
        "   If the query range of a hit is enveloped by that of at least this many\r\n",
        "   higher-scoring hits, delete the hit\r\n",
        "    * Incompatible with:  best_hit_overhang, best_hit_score_edge\r\n",
        " -best_hit_overhang <Real, (>=0 and =<0.5)>\r\n",
        "   Best Hit algorithm overhang value (recommended value: 0.1)\r\n",
        "    * Incompatible with:  culling_limit\r\n",
        " -best_hit_score_edge <Real, (>=0 and =<0.5)>\r\n",
        "   Best Hit algorithm score edge value (recommended value: 0.1)\r\n",
        "    * Incompatible with:  culling_limit\r\n",
        " -max_target_seqs <Integer, >=1>\r\n",
        "   Maximum number of aligned sequences to keep \r\n",
        "   Not applicable for outfmt <= 4\r\n",
        "   Default = `500'\r\n",
        "    * Incompatible with:  num_descriptions, num_alignments\r\n",
        "\r\n",
        " *** Discontiguous MegaBLAST options\r\n",
        " -template_type <String, `coding', `coding_and_optimal', `optimal'>\r\n",
        "   Discontiguous MegaBLAST template type\r\n",
        "    * Requires:  template_length\r\n",
        " -template_length <Integer, Permissible values: '16' '18' '21' >\r\n",
        "   Discontiguous MegaBLAST template length\r\n",
        "    * Requires:  template_type\r\n",
        "\r\n",
        " *** Statistical options\r\n",
        " -dbsize <Int8>\r\n",
        "   Effective length of the database \r\n",
        " -searchsp <Int8, >=0>\r\n",
        "   Effective length of the search space\r\n",
        " -max_hsps <Integer, >=0>\r\n",
        "   Set maximum number of HSPs per subject sequence to save (0 means no limit)\r\n",
        "   Default = `0'\r\n",
        " -sum_statistics\r\n",
        "   Use sum statistics\r\n",
        "\r\n",
        " *** Search strategy options\r\n",
        " -import_search_strategy <File_In>\r\n",
        "   Search strategy to use\r\n",
        "    * Incompatible with:  export_search_strategy\r\n",
        " -export_search_strategy <File_Out>\r\n",
        "   File name to record the search strategy used\r\n",
        "    * Incompatible with:  import_search_strategy\r\n",
        "\r\n",
        " *** Extension options\r\n",
        " -xdrop_ungap <Real>\r\n",
        "   X-dropoff value (in bits) for ungapped extensions\r\n",
        " -xdrop_gap <Real>\r\n",
        "   X-dropoff value (in bits) for preliminary gapped extensions\r\n",
        " -xdrop_gap_final <Real>\r\n",
        "   X-dropoff value (in bits) for final gapped alignment\r\n",
        " -no_greedy\r\n",
        "   Use non-greedy dynamic programming extension\r\n",
        " -min_raw_gapped_score <Integer>\r\n",
        "   Minimum raw gapped score to keep an alignment in the preliminary gapped and\r\n",
        "   traceback stages\r\n",
        " -ungapped\r\n",
        "   Perform ungapped alignment only?\r\n",
        " -window_size <Integer, >=0>\r\n",
        "   Multiple hits window size, use 0 to specify 1-hit algorithm\r\n",
        " -off_diagonal_range <Integer, >=0>\r\n",
        "   Number of off-diagonals to search for the 2nd hit, use 0 to turn off\r\n",
        "   Default = `0'\r\n",
        "\r\n",
        " *** Miscellaneous options\r\n",
        " -parse_deflines\r\n",
        "   Should the query and subject defline(s) be parsed?\r\n",
        " -num_threads <Integer, >=1>\r\n",
        "   Number of threads (CPUs) to use in the BLAST search\r\n",
        "   Default = `1'\r\n",
        "    * Incompatible with:  remote\r\n",
        " -remote\r\n",
        "   Execute search remotely?\r\n",
        "    * Incompatible with:  gilist, seqidlist, negative_gilist, subject_loc,\r\n",
        "   num_threads\r\n",
        "\r\n"
       ]
      }
     ],
     "prompt_number": 29
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#removed \"pipe\" delimeters, added column headings in LibreOffice\n",
      "#renamed file: /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnPhageGBntTAB.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 30
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#Now using Ubuntu.  Had to copy this notebook to iPython local directory, as iPython would not open it from\n",
      "#its location on Eagle.  :(\n",
      "#same with data file\n",
      "#use sed to remove spaces in contig names for subsequent joining using SQLShare\n",
      "!sed 's/ //g' /home/samb/Desktop/AbWithPhageToAbMasterRefNoPhageUnmappedReadsMapping78369references.csv > /home/samb/Desktop/AbWithPhageToAbMasterRefNoPhageUnmappedReadsMapping78369referencesNoSpaces.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!head -10 /home/samb/Desktop/AbWithPhageToAbMasterRefNoPhageUnmappedReadsMapping78369referencesNoSpaces.csv"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Name,Lengthofconsensussequence,Numberofreads,Averagecoverage,Referencesequences\r",
        "\r\n",
        "Contig5DeNovoAssemblymapping,106,90,24.67,Contig5DeNovoAssembly\r",
        "\r\n",
        "Contig6DeNovoAssemblymapping,125,40,12.744,Contig6DeNovoAssembly\r",
        "\r\n",
        "Contig9DeNovoAssemblymapping,121,36,13.198,Contig9DeNovoAssembly\r",
        "\r\n",
        "Contig10DeNovoAssemblymapping,92,31,13.848,Contig10DeNovoAssembly\r",
        "\r\n",
        "Contig11DeNovoAssemblymapping,116,30,11.026,Contig11DeNovoAssembly\r",
        "\r\n",
        "Contig13DeNovoAssemblymapping,50,5,4.9,Contig13DeNovoAssembly\r",
        "\r\n",
        "Contig15DeNovoAssemblymapping,64,11,7.141,Contig15DeNovoAssembly\r",
        "\r\n",
        "Contig16DeNovoAssemblymapping,50,6,4.76,Contig16DeNovoAssembly\r",
        "\r\n",
        "Contig17DeNovoAssemblymapping,70,41,16.829,Contig17DeNovoAssembly\r",
        "\r\n"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "The sed command worked. No more spaces between contig names.  Quick code explanation: s - substitute, / - for a space, / - substitute no space, /g - apply globally; ie make substitution throughout the entire line, not just at the first instance of the match"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#Installed BLAST2.2.29+ on Linux\n",
      "#Appending install location to the PATH\n",
      "!cd ../.."
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 35
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pwd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "pyout",
       "prompt_number": 36,
       "text": [
        "u'/home/samb'"
       ]
      }
     ],
     "prompt_number": 36
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd .."
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/home"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      }
     ],
     "prompt_number": 37
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd /"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/\n"
       ]
      }
     ],
     "prompt_number": 38
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\u001b[0m\u001b[01;34mbin\u001b[0m/    \u001b[01;34metc\u001b[0m/             \u001b[01;34mlib\u001b[0m/         \u001b[01;34mmnt\u001b[0m/   \u001b[01;34mrun\u001b[0m/   \u001b[30;42mtmp\u001b[0m/      \u001b[01;36mvmlinuz.old\u001b[0m@\r\n",
        "\u001b[01;34mboot\u001b[0m/   \u001b[01;34mhome\u001b[0m/            \u001b[01;34mlib64\u001b[0m/       \u001b[01;34mopt\u001b[0m/   \u001b[01;34msbin\u001b[0m/  \u001b[01;34musr\u001b[0m/\r\n",
        "\u001b[01;34mcdrom\u001b[0m/  \u001b[01;36minitrd.img\u001b[0m@      \u001b[01;34mlost+found\u001b[0m/  \u001b[01;34mproc\u001b[0m/  \u001b[01;34msrv\u001b[0m/   \u001b[01;34mvar\u001b[0m/\r\n",
        "\u001b[01;34mdev\u001b[0m/    \u001b[01;36minitrd.img.old\u001b[0m@  \u001b[01;34mmedia\u001b[0m/       \u001b[01;34mroot\u001b[0m/  \u001b[01;34msys\u001b[0m/   \u001b[01;36mvmlinuz\u001b[0m@\r\n"
       ]
      }
     ],
     "prompt_number": 39
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd etc/profile"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "[Errno 20] Not a directory: 'etc/profile'\n",
        "/\n"
       ]
      }
     ],
     "prompt_number": 40
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd /etc/profile"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "[Errno 20] Not a directory: '/etc/profile'\n",
        "/\n"
       ]
      }
     ],
     "prompt_number": 41
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd etc"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/etc\n"
       ]
      }
     ],
     "prompt_number": 42
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\u001b[0m\u001b[01;34macpi\u001b[0m/                   host.conf                \u001b[01;36mprintcap\u001b[0m@\r\n",
        "adduser.conf            hostname                 profile\r\n",
        "\u001b[01;34malternatives\u001b[0m/           hostname~                \u001b[01;34mprofile.d\u001b[0m/\r\n",
        "anacrontab              hostname.old             protocols\r\n",
        "apg.conf                hosts                    \u001b[01;34mpulse\u001b[0m/\r\n",
        "\u001b[01;34mapm\u001b[0m/                    hosts~                   \u001b[01;34mpython\u001b[0m/\r\n",
        "\u001b[01;34mapparmor\u001b[0m/               hosts.allow              \u001b[01;34mpython2.7\u001b[0m/\r\n",
        "\u001b[01;34mapparmor.d\u001b[0m/             hosts.deny               \u001b[01;34mpython3\u001b[0m/\r\n",
        "\u001b[01;34mapport\u001b[0m/                 hosts.old                \u001b[01;34mpython3.3\u001b[0m/\r\n",
        "\u001b[01;34mapt\u001b[0m/                    \u001b[01;34mhp\u001b[0m/                      \u001b[01;34mrc0.d\u001b[0m/\r\n",
        "\u001b[01;34maptdaemon\u001b[0m/              \u001b[01;34mifplugd\u001b[0m/                 \u001b[01;34mrc1.d\u001b[0m/\r\n",
        "\u001b[01;34mat-spi2\u001b[0m/                \u001b[01;34mImageMagick\u001b[0m/             \u001b[01;34mrc2.d\u001b[0m/\r\n",
        "\u001b[01;34mavahi\u001b[0m/                  \u001b[01;34minit\u001b[0m/                    \u001b[01;34mrc3.d\u001b[0m/\r\n",
        "bash.bashrc             \u001b[01;34minit.d\u001b[0m/                  \u001b[01;34mrc4.d\u001b[0m/\r\n",
        "bash_completion         \u001b[01;34minitramfs-tools\u001b[0m/         \u001b[01;34mrc5.d\u001b[0m/\r\n",
        "\u001b[01;34mbash_completion.d\u001b[0m/      inputrc                  \u001b[01;34mrc6.d\u001b[0m/\r\n",
        "bindresvport.blacklist  \u001b[01;34minsserv\u001b[0m/                 \u001b[01;32mrc.local\u001b[0m*\r\n",
        "blkid.conf              insserv.conf             \u001b[01;34mrcS.d\u001b[0m/\r\n",
        "\u001b[40;31;01mblkid.tab\u001b[0m@              \u001b[01;34minsserv.conf.d\u001b[0m/          remote-login-service.conf\r\n",
        "\u001b[01;34mbluetooth\u001b[0m/              \u001b[01;34miproute2\u001b[0m/                \u001b[01;34mresolvconf\u001b[0m/\r\n",
        "\u001b[01;34mbonobo-activation\u001b[0m/      issue                    \u001b[01;36mresolv.conf\u001b[0m@\r\n",
        "brlapi.key              issue.net                \u001b[01;32mrmt\u001b[0m*\r\n",
        "\u001b[01;34mbrltty\u001b[0m/                 \u001b[01;34mkbd\u001b[0m/                     rpc\r\n",
        "brltty.conf             \u001b[01;34mkernel\u001b[0m/                  rsyslog.conf\r\n",
        "\u001b[01;34mca-certificates\u001b[0m/        kernel-img.conf          \u001b[01;34mrsyslog.d\u001b[0m/\r\n",
        "ca-certificates.conf    kerneloops.conf          \u001b[01;34msamba\u001b[0m/\r\n",
        "\u001b[01;34mcalendar\u001b[0m/               \u001b[01;34mldap\u001b[0m/                    \u001b[01;34msane.d\u001b[0m/\r\n",
        "\u001b[01;34mchatscripts\u001b[0m/            ld.so.cache              securetty\r\n",
        "\u001b[01;34mcheckbox.d\u001b[0m/             ld.so.conf               \u001b[01;34msecurity\u001b[0m/\r\n",
        "\u001b[01;34mchromium-browser\u001b[0m/       \u001b[01;34mld.so.conf.d\u001b[0m/            \u001b[01;34mselinux\u001b[0m/\r\n",
        "colord.conf             legal                    sensors3.conf\r\n",
        "\u001b[01;34mcompizconfig\u001b[0m/           libaudit.conf            \u001b[01;34msensors.d\u001b[0m/\r\n",
        "\u001b[01;34mconsole-setup\u001b[0m/          \u001b[01;34mlibnl-3\u001b[0m/                 services\r\n",
        "\u001b[01;34mcracklib\u001b[0m/               \u001b[01;34mlibpaper.d\u001b[0m/              \u001b[01;34msgml\u001b[0m/\r\n",
        "\u001b[01;34mcron.d\u001b[0m/                 \u001b[01;34mlibreoffice\u001b[0m/             shadow\r\n",
        "\u001b[01;34mcron.daily\u001b[0m/             \u001b[01;34mlightdm\u001b[0m/                 shadow-\r\n",
        "\u001b[01;34mcron.hourly\u001b[0m/            lintianrc                shells\r\n",
        "\u001b[01;34mcron.monthly\u001b[0m/           locale.alias             signond.conf\r\n",
        "crontab                 localtime                \u001b[01;34msignon-ui\u001b[0m/\r\n",
        "\u001b[01;34mcron.weekly\u001b[0m/            \u001b[01;34mlogcheck\u001b[0m/                \u001b[01;34mskel\u001b[0m/\r\n",
        "\u001b[01;34mcups\u001b[0m/                   login.defs               \u001b[01;34msound\u001b[0m/\r\n",
        "\u001b[01;34mcupshelpers\u001b[0m/            logrotate.conf           \u001b[01;34mspeech-dispatcher\u001b[0m/\r\n",
        "\u001b[01;34mdbus-1\u001b[0m/                 \u001b[01;34mlogrotate.d\u001b[0m/             \u001b[01;34mssh\u001b[0m/\r\n",
        "\u001b[01;34mdconf\u001b[0m/                  lsb-release              \u001b[01;34mssl\u001b[0m/\r\n",
        "debconf.conf            ltrace.conf              subgid\r\n",
        "debian_version          magic                    subgid-\r\n",
        "\u001b[01;34mdefault\u001b[0m/                magic.mime               subuid\r\n",
        "deluser.conf            mailcap                  subuid-\r\n",
        "\u001b[01;34mdepmod.d\u001b[0m/               mailcap.order            sudoers\r\n",
        "\u001b[01;34mdhcp\u001b[0m/                   manpath.config           \u001b[01;34msudoers.d\u001b[0m/\r\n",
        "\u001b[01;34mdhcp3\u001b[0m/                  mime.types               sysctl.conf\r\n",
        "\u001b[01;34mdictionaries-common\u001b[0m/    mke2fs.conf              \u001b[01;34msysctl.d\u001b[0m/\r\n",
        "\u001b[01;34mdnsmasq.d\u001b[0m/              \u001b[01;34mmodprobe.d\u001b[0m/              \u001b[01;34msystemd\u001b[0m/\r\n",
        "\u001b[01;34mdoc-base\u001b[0m/               modules                  \u001b[01;34mterminfo\u001b[0m/\r\n",
        "\u001b[01;34mdpkg\u001b[0m/                   mtab                     \u001b[01;34mthunderbird\u001b[0m/\r\n",
        "drirc                   mtab.fuselock            timezone\r\n",
        "\u001b[01;34memacs\u001b[0m/                  mtools.conf              \u001b[01;34mtimidity\u001b[0m/\r\n",
        "environment             \u001b[01;34mmysql\u001b[0m/                   ts.conf\r\n",
        "\u001b[01;34mfirefox\u001b[0m/                nanorc                   ucf.conf\r\n",
        "\u001b[01;34mfonts\u001b[0m/                  netscsid.conf            \u001b[01;34mudev\u001b[0m/\r\n",
        "\u001b[01;34mfoomatic\u001b[0m/               \u001b[01;34mnetwork\u001b[0m/                 \u001b[01;34mudisks2\u001b[0m/\r\n",
        "fstab                   \u001b[01;34mNetworkManager\u001b[0m/          \u001b[01;34mufw\u001b[0m/\r\n",
        "\u001b[01;34mfstab.d\u001b[0m/                networks                 updatedb.conf\r\n",
        "fuse.conf               \u001b[01;34mnewt\u001b[0m/                    \u001b[01;34mupdate-manager\u001b[0m/\r\n",
        "gai.conf                nsswitch.conf            \u001b[01;34mupdate-motd.d\u001b[0m/\r\n",
        "\u001b[01;34mgconf\u001b[0m/                  \u001b[01;34mobex-data-server\u001b[0m/        \u001b[01;34mupdate-notifier\u001b[0m/\r\n",
        "\u001b[01;34mgdb\u001b[0m/                    \u001b[01;34mopenal\u001b[0m/                  \u001b[01;34mUPower\u001b[0m/\r\n",
        "\u001b[01;34mghostscript\u001b[0m/            \u001b[01;34mopt\u001b[0m/                     upstart-xsessions\r\n",
        "\u001b[01;34mgnome\u001b[0m/                  os-release               usb_modeswitch.conf\r\n",
        "\u001b[01;34mgnome-app-install\u001b[0m/      pam.conf                 \u001b[01;34musb_modeswitch.d\u001b[0m/\r\n",
        "\u001b[01;34mgnome-settings-daemon\u001b[0m/  \u001b[01;34mpam.d\u001b[0m/                   \u001b[01;34mvim\u001b[0m/\r\n",
        "\u001b[01;34mgnome-vfs-2.0\u001b[0m/          papersize                \u001b[01;36mvtrgb\u001b[0m@\r\n",
        "\u001b[01;34mgroff\u001b[0m/                  passwd                   wgetrc\r\n",
        "group                   passwd-                  \u001b[01;34mwildmidi\u001b[0m/\r\n",
        "group-                  \u001b[01;34mpcmcia\u001b[0m/                  wodim.conf\r\n",
        "\u001b[01;34mgrub.d\u001b[0m/                 \u001b[01;34mperl\u001b[0m/                    \u001b[01;34mwpa_supplicant\u001b[0m/\r\n",
        "gshadow                 \u001b[01;34mpm\u001b[0m/                      \u001b[01;34mX11\u001b[0m/\r\n",
        "gshadow-                pnm2ppa.conf             \u001b[01;34mxdg\u001b[0m/\r\n",
        "\u001b[01;34mgtk-2.0\u001b[0m/                \u001b[01;34mpolkit-1\u001b[0m/                \u001b[01;34mxml\u001b[0m/\r\n",
        "\u001b[01;34mgtk-3.0\u001b[0m/                popularity-contest.conf  \u001b[01;34mxul-ext\u001b[0m/\r\n",
        "hdparm.conf             \u001b[01;34mppp\u001b[0m/                     zsh_command_not_found\r\n"
       ]
      }
     ],
     "prompt_number": 43
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd profile.d"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/etc/profile.d"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      }
     ],
     "prompt_number": 46
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "bash_completion.sh  vte.sh\r\n"
       ]
      }
     ],
     "prompt_number": 47
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/bin/sh: 1: cannot create myenvvars.sh: Permission denied\r\n"
       ]
      }
     ],
     "prompt_number": 49
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!sudo export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/bin/sh: 1: cannot create myenvvars.sh: Permission denied\r\n"
       ]
      }
     ],
     "prompt_number": 50
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!gedit bash_completion.sh"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\r\n",
        "** (gedit:4854): WARNING **: Could not load Gedit repository: Typelib file for namespace 'GtkSource', version '3.0' not found\r\n"
       ]
      }
     ],
     "prompt_number": 51
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!gedit vte.sh"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\r\n",
        "** (gedit:4874): WARNING **: Could not load Gedit repository: Typelib file for namespace 'GtkSource', version '3.0' not found\r\n"
       ]
      }
     ],
     "prompt_number": 52
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pwd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "pyout",
       "prompt_number": 53,
       "text": [
        "u'/etc/profile.d'"
       ]
      }
     ],
     "prompt_number": 53
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/bin/sh: 1: cannot create myenvvars.sh: Permission denied\r\n"
       ]
      }
     ],
     "prompt_number": 54
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls -ld"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "drwxr-xr-x 2 root root 4096 Oct 16 12:02 \u001b[0m\u001b[01;34m.\u001b[0m/\r\n"
       ]
      }
     ],
     "prompt_number": 55
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls -l vte.sh"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "-rw-r--r-- 1 root root 1945 Jun 20  2013 vte.sh\r\n"
       ]
      }
     ],
     "prompt_number": 56
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#need to change permissions on this directory in order to write to it\n",
      "#can't change permissions via iPython; will do this in terminal\n",
      "#sudo chmod 757\n",
      "#was 755, with root as owner and user"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\r\n",
        "\r\n"
       ]
      }
     ],
     "prompt_number": 57
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!echo $PATH"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/usr/lib/lightdm/lightdm:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games\r\n"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Need to restart the Terminal in order for the changes to PATH to be loaded.  UPDATE: I think this got written to the wrong directory!"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!echo $PATH"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/usr/lib/lightdm/lightdm:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games\r\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd /"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd /etc/profile.d"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/etc/profile.d\n"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "bash_completion.sh  vte.sh\r\n"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh\n",
      "#oddly, this didn't write any text to the file\n",
      "#I also think I need the \"#!/bin/bash\" at the beginning of the script\n",
      "#Added both to the script manually using gedit"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "bash_completion.sh  myenvvars.sh  vte.sh\r\n"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!echo $PATH"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/usr/lib/lightdm/lightdm:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin\r\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pwd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "pyout",
       "prompt_number": 2,
       "text": [
        "u'/home/samb'"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#let's see if this worked\n",
      "!./blastn -h"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/bin/sh: 1: ./blastn: not found\r\n"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "blastn -help"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "NameError",
       "evalue": "name 'blastn' is not defined",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
        "\u001b[0;32m<ipython-input-6-f74ffb396a19>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mblastn\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0mhelp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
        "\u001b[0;31mNameError\u001b[0m: name 'blastn' is not defined"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!blastn -help"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "USAGE\r\n",
        "  blastn [-h] [-help] [-import_search_strategy filename]\r\n",
        "    [-export_search_strategy filename] [-task task_name] [-db database_name]\r\n",
        "    [-dbsize num_letters] [-gilist filename] [-seqidlist filename]\r\n",
        "    [-negative_gilist filename] [-entrez_query entrez_query]\r\n",
        "    [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm]\r\n",
        "    [-subject subject_input_file] [-subject_loc range] [-query input_file]\r\n",
        "    [-out output_file] [-evalue evalue] [-word_size int_value]\r\n",
        "    [-gapopen open_penalty] [-gapextend extend_penalty]\r\n",
        "    [-perc_identity float_value] [-xdrop_ungap float_value]\r\n",
        "    [-xdrop_gap float_value] [-xdrop_gap_final float_value]\r\n",
        "    [-searchsp int_value] [-max_hsps int_value] [-sum_statistics]\r\n",
        "    [-penalty penalty] [-reward reward] [-no_greedy]\r\n",
        "    [-min_raw_gapped_score int_value] [-template_type type]\r\n",
        "    [-template_length int_value] [-dust DUST_options]\r\n",
        "    [-filtering_db filtering_database]\r\n",
        "    [-window_masker_taxid window_masker_taxid]\r\n",
        "    [-window_masker_db window_masker_db] [-soft_masking soft_masking]\r\n",
        "    [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value]\r\n",
        "    [-best_hit_score_edge float_value] [-window_size int_value]\r\n",
        "    [-off_diagonal_range int_value] [-use_index boolean] [-index_name string]\r\n",
        "    [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines]\r\n",
        "    [-outfmt format] [-show_gis] [-num_descriptions int_value]\r\n",
        "    [-num_alignments int_value] [-html] [-max_target_seqs num_sequences]\r\n",
        "    [-num_threads int_value] [-remote] [-version]\r\n",
        "\r\n",
        "DESCRIPTION\r\n",
        "   Nucleotide-Nucleotide BLAST 2.2.29+\r\n",
        "\r\n",
        "OPTIONAL ARGUMENTS\r\n",
        " -h\r\n",
        "   Print USAGE and DESCRIPTION;  ignore all other parameters\r\n",
        " -help\r\n",
        "   Print USAGE, DESCRIPTION and ARGUMENTS; ignore all other parameters\r\n",
        " -version\r\n",
        "   Print version number;  ignore other arguments\r\n",
        "\r\n",
        " *** Input query options\r\n",
        " -query <File_In>\r\n",
        "   Input file name\r\n",
        "   Default = `-'\r\n",
        " -query_loc <String>\r\n",
        "   Location on the query sequence in 1-based offsets (Format: start-stop)\r\n",
        " -strand <String, `both', `minus', `plus'>\r\n",
        "   Query strand(s) to search against database/subject\r\n",
        "   Default = `both'\r\n",
        "\r\n",
        " *** General search options\r\n",
        " -task <String, Permissible values: 'blastn' 'blastn-short' 'dc-megablast'\r\n",
        "                'megablast' 'rmblastn' >\r\n",
        "   Task to execute\r\n",
        "   Default = `megablast'\r\n",
        " -db <String>\r\n",
        "   BLAST database name\r\n",
        "    * Incompatible with:  subject, subject_loc\r\n",
        " -out <File_Out>\r\n",
        "   Output file name\r\n",
        "   Default = `-'\r\n",
        " -evalue <Real>\r\n",
        "   Expectation value (E) threshold for saving hits \r\n",
        "   Default = `10'\r\n",
        " -word_size <Integer, >=4>\r\n",
        "   Word size for wordfinder algorithm (length of best perfect match)\r\n",
        " -gapopen <Integer>\r\n",
        "   Cost to open a gap\r\n",
        " -gapextend <Integer>\r\n",
        "   Cost to extend a gap\r\n",
        " -penalty <Integer, <=0>\r\n",
        "   Penalty for a nucleotide mismatch\r\n",
        " -reward <Integer, >=0>\r\n",
        "   Reward for a nucleotide match\r\n",
        " -use_index <Boolean>\r\n",
        "   Use MegaBLAST database index\r\n",
        "   Default = `false'\r\n",
        " -index_name <String>\r\n",
        "   MegaBLAST database index name\r\n",
        "\r\n",
        " *** BLAST-2-Sequences options\r\n",
        " -subject <File_In>\r\n",
        "   Subject sequence(s) to search\r\n",
        "    * Incompatible with:  db, gilist, seqidlist, negative_gilist,\r\n",
        "   db_soft_mask, db_hard_mask\r\n",
        " -subject_loc <String>\r\n",
        "   Location on the subject sequence in 1-based offsets (Format: start-stop)\r\n",
        "    * Incompatible with:  db, gilist, seqidlist, negative_gilist,\r\n",
        "   db_soft_mask, db_hard_mask, remote\r\n",
        "\r\n",
        " *** Formatting options\r\n",
        " -outfmt <String>\r\n",
        "   alignment view options:\r\n",
        "     0 = pairwise,\r\n",
        "     1 = query-anchored showing identities,\r\n",
        "     2 = query-anchored no identities,\r\n",
        "     3 = flat query-anchored, show identities,\r\n",
        "     4 = flat query-anchored, no identities,\r\n",
        "     5 = XML Blast output,\r\n",
        "     6 = tabular,\r\n",
        "     7 = tabular with comment lines,\r\n",
        "     8 = Text ASN.1,\r\n",
        "     9 = Binary ASN.1,\r\n",
        "    10 = Comma-separated values,\r\n",
        "    11 = BLAST archive format (ASN.1) \r\n",
        "   \r\n",
        "   Options 6, 7, and 10 can be additionally configured to produce\r\n",
        "   a custom format specified by space delimited format specifiers.\r\n",
        "   The supported format specifiers are:\r\n",
        "   \t    qseqid means Query Seq-id\r\n",
        "   \t       qgi means Query GI\r\n",
        "   \t      qacc means Query accesion\r\n",
        "   \t   qaccver means Query accesion.version\r\n",
        "   \t      qlen means Query sequence length\r\n",
        "   \t    sseqid means Subject Seq-id\r\n",
        "   \t sallseqid means All subject Seq-id(s), separated by a ';'\r\n",
        "   \t       sgi means Subject GI\r\n",
        "   \t    sallgi means All subject GIs\r\n",
        "   \t      sacc means Subject accession\r\n",
        "   \t   saccver means Subject accession.version\r\n",
        "   \t   sallacc means All subject accessions\r\n",
        "   \t      slen means Subject sequence length\r\n",
        "   \t    qstart means Start of alignment in query\r\n",
        "   \t      qend means End of alignment in query\r\n",
        "   \t    sstart means Start of alignment in subject\r\n",
        "   \t      send means End of alignment in subject\r\n",
        "   \t      qseq means Aligned part of query sequence\r\n",
        "   \t      sseq means Aligned part of subject sequence\r\n",
        "   \t    evalue means Expect value\r\n",
        "   \t  bitscore means Bit score\r\n",
        "   \t     score means Raw score\r\n",
        "   \t    length means Alignment length\r\n",
        "   \t    pident means Percentage of identical matches\r\n",
        "   \t    nident means Number of identical matches\r\n",
        "   \t  mismatch means Number of mismatches\r\n",
        "   \t  positive means Number of positive-scoring matches\r\n",
        "   \t   gapopen means Number of gap openings\r\n",
        "   \t      gaps means Total number of gaps\r\n",
        "   \t      ppos means Percentage of positive-scoring matches\r\n",
        "   \t    frames means Query and subject frames separated by a '/'\r\n",
        "   \t    qframe means Query frame\r\n",
        "   \t    sframe means Subject frame\r\n",
        "   \t      btop means Blast traceback operations (BTOP)\r\n",
        "   \t   staxids means unique Subject Taxonomy ID(s), separated by a ';'\r\n",
        "   \t\t\t (in numerical order)\r\n",
        "   \t sscinames means unique Subject Scientific Name(s), separated by a ';'\r\n",
        "   \t scomnames means unique Subject Common Name(s), separated by a ';'\r\n",
        "   \tsblastnames means unique Subject Blast Name(s), separated by a ';'\r\n",
        "   \t\t\t (in alphabetical order)\r\n",
        "   \tsskingdoms means unique Subject Super Kingdom(s), separated by a ';'\r\n",
        "   \t\t\t (in alphabetical order) \r\n",
        "   \t    stitle means Subject Title\r\n",
        "   \tsalltitles means All Subject Title(s), separated by a '<>'\r\n",
        "   \t   sstrand means Subject Strand\r\n",
        "   \t     qcovs means Query Coverage Per Subject\r\n",
        "   \t   qcovhsp means Query Coverage Per HSP\r\n",
        "   When not provided, the default value is:\r\n",
        "   'qseqid sseqid pident length mismatch gapopen qstart qend sstart send\r\n",
        "   evalue bitscore', which is equivalent to the keyword 'std'\r\n",
        "   Default = `0'\r\n",
        " -show_gis\r\n",
        "   Show NCBI GIs in deflines?\r\n",
        " -num_descriptions <Integer, >=0>\r\n",
        "   Number of database sequences to show one-line descriptions for\r\n",
        "   Not applicable for outfmt > 4\r\n",
        "   Default = `500'\r\n",
        "    * Incompatible with:  max_target_seqs\r\n",
        " -num_alignments <Integer, >=0>\r\n",
        "   Number of database sequences to show alignments for\r\n",
        "   Default = `250'\r\n",
        "    * Incompatible with:  max_target_seqs\r\n",
        " -html\r\n",
        "   Produce HTML output?\r\n",
        "\r\n",
        " *** Query filtering options\r\n",
        " -dust <String>\r\n",
        "   Filter query sequence with DUST (Format: 'yes', 'level window linker', or\r\n",
        "   'no' to disable)\r\n",
        "   Default = `20 64 1'\r\n",
        " -filtering_db <String>\r\n",
        "   BLAST database containing filtering elements (i.e.: repeats)\r\n",
        " -window_masker_taxid <Integer>\r\n",
        "   Enable WindowMasker filtering using a Taxonomic ID\r\n",
        " -window_masker_db <String>\r\n",
        "   Enable WindowMasker filtering using this repeats database.\r\n",
        " -soft_masking <Boolean>\r\n",
        "   Apply filtering locations as soft masks\r\n",
        "   Default = `true'\r\n",
        " -lcase_masking\r\n",
        "   Use lower case filtering in query and subject sequence(s)?\r\n",
        "\r\n",
        " *** Restrict search or results\r\n",
        " -gilist <String>\r\n",
        "   Restrict search of database to list of GI's\r\n",
        "    * Incompatible with:  negative_gilist, seqidlist, remote, subject,\r\n",
        "   subject_loc\r\n",
        " -seqidlist <String>\r\n",
        "   Restrict search of database to list of SeqId's\r\n",
        "    * Incompatible with:  gilist, negative_gilist, remote, subject,\r\n",
        "   subject_loc\r\n",
        " -negative_gilist <String>\r\n",
        "   Restrict search of database to everything except the listed GIs\r\n",
        "    * Incompatible with:  gilist, seqidlist, remote, subject, subject_loc\r\n",
        " -entrez_query <String>\r\n",
        "   Restrict search with the given Entrez query\r\n",
        "    * Requires:  remote\r\n",
        " -db_soft_mask <String>\r\n",
        "   Filtering algorithm ID to apply to the BLAST database as soft masking\r\n",
        "    * Incompatible with:  db_hard_mask, subject, subject_loc\r\n",
        " -db_hard_mask <String>\r\n",
        "   Filtering algorithm ID to apply to the BLAST database as hard masking\r\n",
        "    * Incompatible with:  db_soft_mask, subject, subject_loc\r\n",
        " -perc_identity <Real, 0..100>\r\n",
        "   Percent identity\r\n",
        " -culling_limit <Integer, >=0>\r\n",
        "   If the query range of a hit is enveloped by that of at least this many\r\n",
        "   higher-scoring hits, delete the hit\r\n",
        "    * Incompatible with:  best_hit_overhang, best_hit_score_edge\r\n",
        " -best_hit_overhang <Real, (>=0 and =<0.5)>\r\n",
        "   Best Hit algorithm overhang value (recommended value: 0.1)\r\n",
        "    * Incompatible with:  culling_limit\r\n",
        " -best_hit_score_edge <Real, (>=0 and =<0.5)>\r\n",
        "   Best Hit algorithm score edge value (recommended value: 0.1)\r\n",
        "    * Incompatible with:  culling_limit\r\n",
        " -max_target_seqs <Integer, >=1>\r\n",
        "   Maximum number of aligned sequences to keep \r\n",
        "   Not applicable for outfmt <= 4\r\n",
        "   Default = `500'\r\n",
        "    * Incompatible with:  num_descriptions, num_alignments\r\n",
        "\r\n",
        " *** Discontiguous MegaBLAST options\r\n",
        " -template_type <String, `coding', `coding_and_optimal', `optimal'>\r\n",
        "   Discontiguous MegaBLAST template type\r\n",
        "    * Requires:  template_length\r\n",
        " -template_length <Integer, Permissible values: '16' '18' '21' >\r\n",
        "   Discontiguous MegaBLAST template length\r\n",
        "    * Requires:  template_type\r\n",
        "\r\n",
        " *** Statistical options\r\n",
        " -dbsize <Int8>\r\n",
        "   Effective length of the database \r\n",
        " -searchsp <Int8, >=0>\r\n",
        "   Effective length of the search space\r\n",
        " -max_hsps <Integer, >=0>\r\n",
        "   Set maximum number of HSPs per subject sequence to save (0 means no limit)\r\n",
        "   Default = `0'\r\n",
        " -sum_statistics\r\n",
        "   Use sum statistics\r\n",
        "\r\n",
        " *** Search strategy options\r\n",
        " -import_search_strategy <File_In>\r\n",
        "   Search strategy to use\r\n",
        "    * Incompatible with:  export_search_strategy\r\n",
        " -export_search_strategy <File_Out>\r\n",
        "   File name to record the search strategy used\r\n",
        "    * Incompatible with:  import_search_strategy\r\n",
        "\r\n",
        " *** Extension options\r\n",
        " -xdrop_ungap <Real>\r\n",
        "   X-dropoff value (in bits) for ungapped extensions\r\n",
        " -xdrop_gap <Real>\r\n",
        "   X-dropoff value (in bits) for preliminary gapped extensions\r\n",
        " -xdrop_gap_final <Real>\r\n",
        "   X-dropoff value (in bits) for final gapped alignment\r\n",
        " -no_greedy\r\n",
        "   Use non-greedy dynamic programming extension\r\n",
        " -min_raw_gapped_score <Integer>\r\n",
        "   Minimum raw gapped score to keep an alignment in the preliminary gapped and\r\n",
        "   traceback stages\r\n",
        " -ungapped\r\n",
        "   Perform ungapped alignment only?\r\n",
        " -window_size <Integer, >=0>\r\n",
        "   Multiple hits window size, use 0 to specify 1-hit algorithm\r\n",
        " -off_diagonal_range <Integer, >=0>\r\n",
        "   Number of off-diagonals to search for the 2nd hit, use 0 to turn off\r\n",
        "   Default = `0'\r\n",
        "\r\n",
        " *** Miscellaneous options\r\n",
        " -parse_deflines\r\n",
        "   Should the query and subject defline(s) be parsed?\r\n",
        " -num_threads <Integer, >=1>\r\n",
        "   Number of threads (CPUs) to use in the BLAST search\r\n",
        "   Default = `1'\r\n",
        "    * Incompatible with:  remote\r\n",
        " -remote\r\n",
        "   Execute search remotely?\r\n",
        "    * Incompatible with:  gilist, seqidlist, negative_gilist, subject_loc,\r\n",
        "   num_threads\r\n",
        "\r\n"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Yes!  It works!  Can now just run BLAST without changing directories in Linux!"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#let's try creating a file for the BLAST to look in the default BLAST databases (dbs)\n",
      "#directory on this computer (/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs)\n",
      "cd home"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "SyntaxError",
       "evalue": "invalid syntax (<ipython-input-8-42ba6ba23109>, line 3)",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-8-42ba6ba23109>\"\u001b[0;36m, line \u001b[0;32m3\u001b[0m\n\u001b[0;31m    cd home\u001b[0m\n\u001b[0m          ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd /home"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/home"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!makefile blastdbs.ncbirc"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/bin/sh: 1: makefile: not found\r\n"
       ]
      }
     ],
     "prompt_number": 11
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!echo \"BLASTDB=/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\" > blastdbs.ncbirc"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/bin/sh: 1: cannot create blastdbs.ncbirc: Permission denied\r\n"
       ]
      }
     ],
     "prompt_number": 12
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Looks like I need to change permissions again.  Can't do this in IPython.  Be right back..."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!echo \"BLASTDB=/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\" > blastdbs.ncbirc"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Moved that file to /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!makeblastdb -h"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "USAGE\r\n",
        "  makeblastdb [-h] [-help] [-in input_file] [-input_type type]\r\n",
        "    -dbtype molecule_type [-title database_title] [-parse_seqids]\r\n",
        "    [-hash_index] [-mask_data mask_data_files] [-mask_id mask_algo_ids]\r\n",
        "    [-mask_desc mask_algo_descriptions] [-gi_mask]\r\n",
        "    [-gi_mask_name gi_based_mask_names] [-out database_name]\r\n",
        "    [-max_file_sz number_of_bytes] [-taxid TaxID] [-taxid_map TaxIDMapFile]\r\n",
        "    [-logfile File_Name] [-version]\r\n",
        "\r\n",
        "DESCRIPTION\r\n",
        "   Application to create BLAST databases, version 2.2.29+\r\n",
        "\r\n",
        "Use '-help' to print detailed descriptions of command line arguments\r\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!makeblastdb -in 20140225_RickettsiaGBnt.fasta -dbtype nucl"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\r\n",
        "\r\n",
        "Building a new DB, current time: 02/25/2014 12:39:55\r\n",
        "New DB name:   20140225_RickettsiaGBnt.fasta\r\n",
        "New DB title:  20140225_RickettsiaGBnt.fasta\r\n",
        "Sequence type: Nucleotide\r\n",
        "Keep Linkouts: T\r\n",
        "Keep MBits: T\r\n",
        "Maximum file size: 1000000000B\r\n",
        "BLAST options error: File 20140225_RickettsiaGBnt.fasta does not exist\r\n"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!makeblastdb -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/20140225_RickettsiaGBnt.fasta -dbtype nucl -out 20140225_RickettsiaGBnt"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\r\n",
        "\r\n",
        "Building a new DB, current time: 02/25/2014 12:41:16\r\n",
        "New DB name:   20140225_RickettsiaGBnt\r\n",
        "New DB title:  /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/20140225_RickettsiaGBnt.fasta\r\n",
        "Sequence type: Nucleotide\r\n",
        "Keep Linkouts: T\r\n",
        "Keep MBits: T\r\n",
        "Maximum file size: 1000000000B\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|167' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|168' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|296' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|297' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|460' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|479' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|480' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|481' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|482' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|483' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|519' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|520' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|521' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|522' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|541' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1762' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1763' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1764' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1792' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1795' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1805' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1997' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1998' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|2068' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|2354' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5394' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5471' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5565' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5578' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Adding sequences from FASTA; added 8786 sequences in 9.54842 seconds.\r\n"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#check number of entries (i.e. the '>' used to provide the description\n",
      "#for each entry in source fasta file\n",
      "!awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/20140225_RickettsiaGBnt.fasta"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "8815\r\n"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!blastn -h"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "USAGE\r\n",
        "  blastn [-h] [-help] [-import_search_strategy filename]\r\n",
        "    [-export_search_strategy filename] [-task task_name] [-db database_name]\r\n",
        "    [-dbsize num_letters] [-gilist filename] [-seqidlist filename]\r\n",
        "    [-negative_gilist filename] [-entrez_query entrez_query]\r\n",
        "    [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm]\r\n",
        "    [-subject subject_input_file] [-subject_loc range] [-query input_file]\r\n",
        "    [-out output_file] [-evalue evalue] [-word_size int_value]\r\n",
        "    [-gapopen open_penalty] [-gapextend extend_penalty]\r\n",
        "    [-perc_identity float_value] [-xdrop_ungap float_value]\r\n",
        "    [-xdrop_gap float_value] [-xdrop_gap_final float_value]\r\n",
        "    [-searchsp int_value] [-max_hsps int_value] [-sum_statistics]\r\n",
        "    [-penalty penalty] [-reward reward] [-no_greedy]\r\n",
        "    [-min_raw_gapped_score int_value] [-template_type type]\r\n",
        "    [-template_length int_value] [-dust DUST_options]\r\n",
        "    [-filtering_db filtering_database]\r\n",
        "    [-window_masker_taxid window_masker_taxid]\r\n",
        "    [-window_masker_db window_masker_db] [-soft_masking soft_masking]\r\n",
        "    [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value]\r\n",
        "    [-best_hit_score_edge float_value] [-window_size int_value]\r\n",
        "    [-off_diagonal_range int_value] [-use_index boolean] [-index_name string]\r\n",
        "    [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines]\r\n",
        "    [-outfmt format] [-show_gis] [-num_descriptions int_value]\r\n",
        "    [-num_alignments int_value] [-html] [-max_target_seqs num_sequences]\r\n",
        "    [-num_threads int_value] [-remote] [-version]\r\n",
        "\r\n",
        "DESCRIPTION\r\n",
        "   Nucleotide-Nucleotide BLAST 2.2.29+\r\n",
        "\r\n",
        "Use '-help' to print detailed descriptions of command line arguments\r\n"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n"
       ]
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#try this again, but moved the blastdbs.ncbirc file to /home/samb, as suggested by the error message in In[9]\n",
      "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n"
       ]
      }
     ],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#added [BLAST] as first line in the blastdbs.ncbirc file\n",
      "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n"
       ]
      }
     ],
     "prompt_number": 11
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#think I might actually have to specify the specific database in the .ncbirc file? Added full path to Rickettsia db in that file"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n"
       ]
      }
     ],
     "prompt_number": 13
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#restarted Terminal and IPython\n",
      "#also resotored database path to just the \"dbs\" directory; not to an actualy db file\n",
      "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pwd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "pyout",
       "prompt_number": 2,
       "text": [
        "u'/home/samb'"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\u001b[0m\u001b[01;34mBioinformaticsTools\u001b[0m/  \u001b[01;34mDocuments\u001b[0m/        PhageNGS_ID.ipynb           \u001b[01;34mTemplates\u001b[0m/\r\n",
        "blastdbs.ncbirc       \u001b[01;34mDownloads\u001b[0m/        \u001b[01;34mPictures\u001b[0m/                   \u001b[01;34mUbuntu One\u001b[0m/\r\n",
        "blastdbs.ncbirc~      examples.desktop  \u001b[01;34mPublic\u001b[0m/                     \u001b[01;34mVideos\u001b[0m/\r\n",
        "\u001b[01;34mDesktop\u001b[0m/              \u001b[01;34mMusic\u001b[0m/            sed_and_awk_practice.ipynb\r\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!head -10 blastdbs.ncbirc"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "[BLAST]\r\n",
        "BLASTDB=/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\r\n"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Hmmmm...  Everything looks correct, as far as I can tell from looking at the BLAST configuration documentation and other internet resources.  Not sure why this isn't working."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#copied that .ncbirc file to the root directory (/) of the computer\n",
      "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Well, I've added \"BLASTDB=/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\" to the /etc/profile.d file that I used earlier to set the append the PATH.  We'll see if that works.  Restarting the computer."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Sweet!  That worked!  No more specifying full directories to databases or BLAST executables!!!"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!head -10 /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Contig366DeNovoAssembly\tgi|311103224|ref|NC_014640.1|\t79.95\t369\t66\t5\t1\t366\t820544\t820907\t2e-69\t  265\r\n",
        "Contig366DeNovoAssembly\tgi|311103224|ref|NC_014640.1|\t79.95\t369\t66\t5\t1\t366\t2208847\t2209210\t2e-69\t  265\r\n",
        "Contig366DeNovoAssembly\tgi|311103224|ref|NC_014640.1|\t79.95\t369\t66\t5\t1\t366\t5159121\t5158758\t2e-69\t  265\r\n",
        "Contig2706DeNovoAssembly\tgi|390137196|gb|AJWD01000108.1|\t92.59\t54\t4\t0\t1\t54\t1637\t1584\t3e-14\t78.7\r\n",
        "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t3205273\t3205225\t7e-11\t67.6\r\n",
        "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t3854412\t3854364\t7e-11\t67.6\r\n",
        "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t4413608\t4413560\t7e-11\t67.6\r\n",
        "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t4600084\t4600036\t7e-11\t67.6\r\n",
        "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t4658009\t4657961\t7e-11\t67.6\r\n",
        "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t5128163\t5128115\t7e-11\t67.6\r\n"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "OK, starting this again because the fasta file (2014_RickettsiaGBnt) count should've indicated 11,000+ entries but the awk count (see In[7] above) indicated \n",
      "only 8000+ entries.  Have re-downloaded all Rickettsia nucleotide (nt) entries from NCBI as a fasta file.  There should be 11414 entries."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "#count entries in new Rickettsia GenBank fasta file\n",
      "!awk '/>/ { count++ } END { print count }' /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "11414\r\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Perfect.  Don't know what wrong with the last one.  Will now make BLAST database."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!makeblastdb -in /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta -dbtype nucl -out /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/bin/sh: 1: makeblastdb: Permission denied\r\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Well, that's weird.  Time to check permissions on the input/output locations. Ugh."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls -l"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "total 2428056\r\n",
        "-rw------- 1 samb samb  376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta\r\n",
        "-rw------- 1 samb samb    1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr\r\n",
        "-rw------- 1 samb samb     105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin\r\n",
        "-rw------- 1 samb samb   92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq\r\n",
        "-rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta\r\n"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "OK, there are no read permissions for anyone else.  However, since I'm (samb) running the commands, it seems weird that it wont' work.\n",
      "Going to change permissions to 744.  BRB..."
     ]
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Changed permissions to the \"dbs\" folder using: \n",
      "sudo chmod -R 744 /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\n",
      "\n",
      "Let's see if that worked."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cd /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "/media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls -l"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "total 2428056\r\n",
        "-rw------- 1 samb samb  376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta\r\n",
        "-rw------- 1 samb samb    1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr\r\n",
        "-rw------- 1 samb samb     105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin\r\n",
        "-rw------- 1 samb samb   92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq\r\n",
        "-rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta\r\n"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "It didn't work!  Well, that's weird.  Tried this instead:\n",
      "sudo chmod 744 -R /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\n"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls -l"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "total 2428056\r\n",
        "-rw------- 1 samb samb  376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta\r\n",
        "-rw------- 1 samb samb    1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr\r\n",
        "-rw------- 1 samb samb     105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin\r\n",
        "-rw------- 1 samb samb   92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq\r\n",
        "-rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta\r\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Seems like the problem is possibly related to having moved the \"BioinformaticsTools\" folder to my larger partition (which is actually a Windows partition).  Might have to modify how it is mounted in Linux in order to enable changes to the read/write permissions."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ls -l"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "total 2428056\r\n",
        "-rw------- 1 samb samb  376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta\r\n",
        "-rw------- 1 samb samb    1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr\r\n",
        "-rw------- 1 samb samb     105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin\r\n",
        "-rw------- 1 samb samb   92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq\r\n",
        "-rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta\r\n"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "OK, I'll deal with this later.  Moved \"BioinformaticsTools\" folder back to original location (/home/samb) and\n",
      "updated the myenvvars.sh (in etc/profile.d).  Thinking about it some more, the problem might be related simply to me\n",
      "moving the BLAST folder to a different directory, instead of re-installing it in the new, desired location.  Will test this out later."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!makeblastdb -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta -dbtype nucl -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\r\n",
        "\r\n",
        "Building a new DB, current time: 02/28/2014 12:05:12\r\n",
        "New DB name:   /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227\r\n",
        "New DB title:  /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta\r\n",
        "Sequence type: Nucleotide\r\n",
        "Keep Linkouts: T\r\n",
        "Keep MBits: T\r\n",
        "Maximum file size: 1000000000B\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|167' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|168' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|296' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|297' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|460' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|479' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|480' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|481' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|482' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|483' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|519' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|520' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|521' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|522' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|541' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1762' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1763' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1764' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1792' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1795' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1805' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1997' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1998' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|2068' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|2354' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5394' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5471' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5565' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5578' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Adding sequences from FASTA; added 11385 sequences in 51.2601 seconds.\r\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!blastn -db RickettsiaGBnt20140227 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt \"6 stitle std\" -max_target_seqs 1 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!head -10 /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "gi|288926859|gb|CP000613.2| Rhodospirillum centenum SW, complete genome\tContig5DeNovoAssembly\tgi|288926859|gb|CP000613.2|\t90.00\t30\t3\t0\t67\t96\t3744736\t3744765\t0.071\t41.0\r\n",
        "gi|327396847|dbj|AB104413.1| Red sea bream iridovirus genomic DNA, circular physical map, complete sequence\tContig6DeNovoAssembly\tgi|327396847|dbj|AB104413.1|\t76.09\t46\t5\t1\t52\t91\t102234\t102279\t3.7\t35.6\r\n",
        "gi|255529916|ref|NC_013061.1| Pedobacter heparinus DSM 2366 chromosome, complete genome\tContig9DeNovoAssembly\tgi|255529916|ref|NC_013061.1|\t86.21\t29\t4\t0\t64\t92\t1205031\t1205003\t3.6\t35.6\r\n",
        "gi|18308982|ref|NC_003366.1| Clostridium perfringens str. 13 chromosome, complete genome\tContig10DeNovoAssembly\tgi|18308982|ref|NC_003366.1|\t83.33\t36\t6\t0\t53\t88\t1178864\t1178899\t0.20\t39.2\r\n",
        "gi|42494965|emb|AJ270058.1| Arabidopsis thaliana DNA chromosome 4, short arm\tContig11DeNovoAssembly\tgi|42494965|emb|AJ270058.1|\t89.29\t28\t3\t0\t75\t102\t387185\t387212\t0.98\t37.4\r\n",
        "gi|386818599|ref|NZ_JH651379.1| Joostella marina DSM 19592 genomic scaffold Joomascaffold_1, whole genome shotgun sequence\tContig13DeNovoAssembly\tgi|386818599|ref|NZ_JH651379.1|\t87.10\t31\t4\t0\t3\t33\t598184\t598154\t0.071\t39.2\r\n",
        "gi|552562410|gb|CM000780.3| Zea mays cultivar B73 chromosome 4\tContig15DeNovoAssembly\tgi|552562410|gb|CM000780.3|\t82.05\t39\t5\t1\t26\t64\t229168348\t229168384\t0.41\t37.4\r\n",
        "gi|55417891|dbj|BA000014.8| Arabidopsis thaliana DNA, chromosome 3, complete sequence\tContig16DeNovoAssembly\tgi|55417891|dbj|BA000014.8|\t89.66\t29\t1\t1\t17\t43\t22640442\t22640470\t0.25\t37.4\r\n",
        "gi|32444162|emb|BX294142.1| Rhodopirellula baltica SH 1 complete genome; segment 10/24\tContig17DeNovoAssembly\tgi|32444162|emb|BX294142.1|\t100.00\t19\t0\t0\t50\t68\t112716\t112698\t1.6\t35.6\r\n",
        "gi|584450787|emb|HG916852.1| Rhizobium sp. LPU83 main chrosome complete genome\tContig19DeNovoAssembly\tgi|584450787|emb|HG916852.1|\t100.00\t19\t0\t0\t6\t24\t1422493\t1422475\t0.95\t35.6\r\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Um, weird that there're entries for Arabidopsis and Zea mays...  Will re-download Rickettsia nucleotides from GenBank.  Ugh!  Never mind!  I'm an idiot!\n",
      "Didn't filter the initial NCBI search by Taxonomy!  Doh!\n",
      "\n",
      "Only bacteria should have 10788 sequences."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228.fasta"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "10788\r\n"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!makeblastdb -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228.fasta -dbtype nucl -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\r\n",
        "\r\n",
        "Building a new DB, current time: 02/28/2014 15:24:39\r\n",
        "New DB name:   /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228\r\n",
        "New DB title:  /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228.fasta\r\n",
        "Sequence type: Nucleotide\r\n",
        "Keep Linkouts: T\r\n",
        "Keep MBits: T\r\n",
        "Maximum file size: 1000000000B\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|167' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|168' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|296' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|297' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|460' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|479' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|480' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|481' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|482' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|483' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|519' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|520' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|521' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|522' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|541' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1762' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1763' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1764' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1792' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1795' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1805' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1997' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|1998' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|2068' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|2354' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5398' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5475' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5569' as it has no sequence data\r\n",
        "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n",
        "Ignoring sequence 'lcl|5582' as it has no sequence data\r\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Adding sequences from FASTA; added 10759 sequences in 40.3107 seconds.\r\n"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!blastn -db RickettsiaGBnt20140228 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt \"6 stitle std\" -max_target_seqs 1 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}