{ "metadata": { "name": "PhageNGS_ID" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "cd /Users/Sam" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Users/Sam\n" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "cd ../../Applications" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Applications\n" ] } ], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "cd /Applications/ncbi-blast-2.2.29+/bin" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/Applications/ncbi-blast-2.2.29+/bin\n" ] } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "!./blastn -task blastn -query /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -db /Volumes/homes/srlab/blastdbs/PhageGBnuc20130529filter -outfmt \"6 stitle std\" -max_target_seqs 3 -num_threads 16 -out /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnPhageGBnt.txt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -10 /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnPhageGBnt.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "gi|396576808|emb|FR775895.2| Enterobacteria phage phi92, complete genome\tContig5DeNovoAssembly\tgi|396576808|emb|FR775895.2|\t87.88\t33\t2\t2\t3\t34\t76644\t76613\t0.32\t35.6\r\n", "gi|15281680|gb|AF396866.1| Bacteriophage Mx8, complete genome\tContig5DeNovoAssembly\tgi|15281680|gb|AF396866.1|\t95.00\t20\t1\t0\t17\t36\t24781\t24762\t3.9\t31.9\r\n", "gi|15320570|ref|NC_003085.1| Myxococcus phage Mx8, complete genome\tContig5DeNovoAssembly\tgi|15320570|ref|NC_003085.1|\t95.00\t20\t1\t0\t17\t36\t24781\t24762\t3.9\t31.9\r\n", "gi|312262424|gb|GU396103.1| Aeromonas phage PX29, complete genome\tContig6DeNovoAssembly\tgi|312262424|gb|GU396103.1|\t100.00\t19\t0\t0\t42\t60\t200091\t200109\t0.39\t35.6\r\n", "gi|254211614|gb|GQ334450.1| Cyanophage PSS2, complete genome\tContig6DeNovoAssembly\tgi|254211614|gb|GQ334450.1|\t100.00\t17\t0\t0\t18\t34\t56942\t56926\t4.7\t31.9\r\n", "gi|254729462|ref|NC_013021.1| Cyanophage PSS2, complete genome\tContig6DeNovoAssembly\tgi|254729462|ref|NC_013021.1|\t100.00\t17\t0\t0\t18\t34\t56942\t56926\t4.7\t31.9\r\n", "gi|310005390|gb|GU075905.1| Prochlorococcus phage P-HM2, complete genome\tContig9DeNovoAssembly\tgi|310005390|gb|GU075905.1|\t85.71\t28\t4\t0\t11\t38\t180126\t180099\t1.3\t33.7\r\n", "gi|326782972|ref|NC_015284.1| Prochlorococcus phage P-HM2, complete genome\tContig9DeNovoAssembly\tgi|326782972|ref|NC_015284.1|\t85.71\t28\t4\t0\t11\t38\t180126\t180099\t1.3\t33.7\r\n", "gi|311788808|gb|HQ336222.2| Acanthamoeba polyphaga mimivirus, complete genome\tContig10DeNovoAssembly\tgi|311788808|gb|HQ336222.2|\t95.45\t22\t1\t0\t23\t44\t342629\t342608\t0.26\t35.6\r\n", "gi|311788808|gb|HQ336222.2| Acanthamoeba polyphaga mimivirus, complete genome\tContig10DeNovoAssembly\tgi|311788808|gb|HQ336222.2|\t80.00\t40\t5\t1\t9\t48\t993827\t993791\t0.26\t35.6\r\n" ] } ], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "./blastn -h" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "SyntaxError", "evalue": "invalid syntax (<ipython-input-27-1213ea4d2e0b>, line 1)", "output_type": "pyerr", "traceback": [ "\u001b[0;36m File \u001b[0;32m\"<ipython-input-27-1213ea4d2e0b>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m ./blastn -h\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" ] } ], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [ "#can't remember column order of output file\n", "!./blastn -h" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "USAGE\r\n", " blastn [-h] [-help] [-import_search_strategy filename]\r\n", " [-export_search_strategy filename] [-task task_name] [-db database_name]\r\n", " [-dbsize num_letters] [-gilist filename] [-seqidlist filename]\r\n", " [-negative_gilist filename] [-entrez_query entrez_query]\r\n", " [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm]\r\n", " [-subject subject_input_file] [-subject_loc range] [-query input_file]\r\n", " [-out output_file] [-evalue evalue] [-word_size int_value]\r\n", " [-gapopen open_penalty] [-gapextend extend_penalty]\r\n", " [-perc_identity float_value] [-xdrop_ungap float_value]\r\n", " [-xdrop_gap float_value] [-xdrop_gap_final float_value]\r\n", " [-searchsp int_value] [-max_hsps int_value] [-sum_statistics]\r\n", " [-penalty penalty] [-reward reward] [-no_greedy]\r\n", " [-min_raw_gapped_score int_value] [-template_type type]\r\n", " [-template_length int_value] [-dust DUST_options]\r\n", " [-filtering_db filtering_database]\r\n", " [-window_masker_taxid window_masker_taxid]\r\n", " [-window_masker_db window_masker_db] [-soft_masking soft_masking]\r\n", " [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value]\r\n", " [-best_hit_score_edge float_value] [-window_size int_value]\r\n", " [-off_diagonal_range int_value] [-use_index boolean] [-index_name string]\r\n", " [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines]\r\n", " [-outfmt format] [-show_gis] [-num_descriptions int_value]\r\n", " [-num_alignments int_value] [-html] [-max_target_seqs num_sequences]\r\n", " [-num_threads int_value] [-remote] [-version]\r\n", "\r\n", "DESCRIPTION\r\n", " Nucleotide-Nucleotide BLAST 2.2.29+\r\n", "\r\n", "Use '-help' to print detailed descriptions of command line arguments\r\n" ] } ], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "#can't remember column order of output file\n", "!./blastn -help" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "USAGE\r\n", " blastn [-h] [-help] [-import_search_strategy filename]\r\n", " [-export_search_strategy filename] [-task task_name] [-db database_name]\r\n", " [-dbsize num_letters] [-gilist filename] [-seqidlist filename]\r\n", " [-negative_gilist filename] [-entrez_query entrez_query]\r\n", " [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm]\r\n", " [-subject subject_input_file] [-subject_loc range] [-query input_file]\r\n", " [-out output_file] [-evalue evalue] [-word_size int_value]\r\n", " [-gapopen open_penalty] [-gapextend extend_penalty]\r\n", " [-perc_identity float_value] [-xdrop_ungap float_value]\r\n", " [-xdrop_gap float_value] [-xdrop_gap_final float_value]\r\n", " [-searchsp int_value] [-max_hsps int_value] [-sum_statistics]\r\n", " [-penalty penalty] [-reward reward] [-no_greedy]\r\n", " [-min_raw_gapped_score int_value] [-template_type type]\r\n", " [-template_length int_value] [-dust DUST_options]\r\n", " [-filtering_db filtering_database]\r\n", " [-window_masker_taxid window_masker_taxid]\r\n", " [-window_masker_db window_masker_db] [-soft_masking soft_masking]\r\n", " [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value]\r\n", " [-best_hit_score_edge float_value] [-window_size int_value]\r\n", " [-off_diagonal_range int_value] [-use_index boolean] [-index_name string]\r\n", " [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines]\r\n", " [-outfmt format] [-show_gis] [-num_descriptions int_value]\r\n", " [-num_alignments int_value] [-html] [-max_target_seqs num_sequences]\r\n", " [-num_threads int_value] [-remote] [-version]\r\n", "\r\n", "DESCRIPTION\r\n", " Nucleotide-Nucleotide BLAST 2.2.29+\r\n", "\r\n", "OPTIONAL ARGUMENTS\r\n", " -h\r\n", " Print USAGE and DESCRIPTION; ignore all other parameters\r\n", " -help\r\n", " Print USAGE, DESCRIPTION and ARGUMENTS; ignore all other parameters\r\n", " -version\r\n", " Print version number; ignore other arguments\r\n", "\r\n", " *** Input query options\r\n", " -query <File_In>\r\n", " Input file name\r\n", " Default = `-'\r\n", " -query_loc <String>\r\n", " Location on the query sequence in 1-based offsets (Format: start-stop)\r\n", " -strand <String, `both', `minus', `plus'>\r\n", " Query strand(s) to search against database/subject\r\n", " Default = `both'\r\n", "\r\n", " *** General search options\r\n", " -task <String, Permissible values: 'blastn' 'blastn-short' 'dc-megablast'\r\n", " 'megablast' 'rmblastn' >\r\n", " Task to execute\r\n", " Default = `megablast'\r\n", " -db <String>\r\n", " BLAST database name\r\n", " * Incompatible with: subject, subject_loc\r\n", " -out <File_Out>\r\n", " Output file name\r\n", " Default = `-'\r\n", " -evalue <Real>\r\n", " Expectation value (E) threshold for saving hits \r\n", " Default = `10'\r\n", " -word_size <Integer, >=4>\r\n", " Word size for wordfinder algorithm (length of best perfect match)\r\n", " -gapopen <Integer>\r\n", " Cost to open a gap\r\n", " -gapextend <Integer>\r\n", " Cost to extend a gap\r\n", " -penalty <Integer, <=0>\r\n", " Penalty for a nucleotide mismatch\r\n", " -reward <Integer, >=0>\r\n", " Reward for a nucleotide match\r\n", " -use_index <Boolean>\r\n", " Use MegaBLAST database index\r\n", " Default = `false'\r\n", " -index_name <String>\r\n", " MegaBLAST database index name\r\n", "\r\n", " *** BLAST-2-Sequences options\r\n", " -subject <File_In>\r\n", " Subject sequence(s) to search\r\n", " * Incompatible with: db, gilist, seqidlist, negative_gilist,\r\n", " db_soft_mask, db_hard_mask\r\n", " -subject_loc <String>\r\n", " Location on the subject sequence in 1-based offsets (Format: start-stop)\r\n", " * Incompatible with: db, gilist, seqidlist, negative_gilist,\r\n", " db_soft_mask, db_hard_mask, remote\r\n", "\r\n", " *** Formatting options\r\n", " -outfmt <String>\r\n", " alignment view options:\r\n", " 0 = pairwise,\r\n", " 1 = query-anchored showing identities,\r\n", " 2 = query-anchored no identities,\r\n", " 3 = flat query-anchored, show identities,\r\n", " 4 = flat query-anchored, no identities,\r\n", " 5 = XML Blast output,\r\n", " 6 = tabular,\r\n", " 7 = tabular with comment lines,\r\n", " 8 = Text ASN.1,\r\n", " 9 = Binary ASN.1,\r\n", " 10 = Comma-separated values,\r\n", " 11 = BLAST archive format (ASN.1) \r\n", " \r\n", " Options 6, 7, and 10 can be additionally configured to produce\r\n", " a custom format specified by space delimited format specifiers.\r\n", " The supported format specifiers are:\r\n", " \t qseqid means Query Seq-id\r\n", " \t qgi means Query GI\r\n", " \t qacc means Query accesion\r\n", " \t qaccver means Query accesion.version\r\n", " \t qlen means Query sequence length\r\n", " \t sseqid means Subject Seq-id\r\n", " \t sallseqid means All subject Seq-id(s), separated by a ';'\r\n", " \t sgi means Subject GI\r\n", " \t sallgi means All subject GIs\r\n", " \t sacc means Subject accession\r\n", " \t saccver means Subject accession.version\r\n", " \t sallacc means All subject accessions\r\n", " \t slen means Subject sequence length\r\n", " \t qstart means Start of alignment in query\r\n", " \t qend means End of alignment in query\r\n", " \t sstart means Start of alignment in subject\r\n", " \t send means End of alignment in subject\r\n", " \t qseq means Aligned part of query sequence\r\n", " \t sseq means Aligned part of subject sequence\r\n", " \t evalue means Expect value\r\n", " \t bitscore means Bit score\r\n", " \t score means Raw score\r\n", " \t length means Alignment length\r\n", " \t pident means Percentage of identical matches\r\n", " \t nident means Number of identical matches\r\n", " \t mismatch means Number of mismatches\r\n", " \t positive means Number of positive-scoring matches\r\n", " \t gapopen means Number of gap openings\r\n", " \t gaps means Total number of gaps\r\n", " \t ppos means Percentage of positive-scoring matches\r\n", " \t frames means Query and subject frames separated by a '/'\r\n", " \t qframe means Query frame\r\n", " \t sframe means Subject frame\r\n", " \t btop means Blast traceback operations (BTOP)\r\n", " \t staxids means unique Subject Taxonomy ID(s), separated by a ';'\r\n", " \t\t\t (in numerical order)\r\n", " \t sscinames means unique Subject Scientific Name(s), separated by a ';'\r\n", " \t scomnames means unique Subject Common Name(s), separated by a ';'\r\n", " \tsblastnames means unique Subject Blast Name(s), separated by a ';'\r\n", " \t\t\t (in alphabetical order)\r\n", " \tsskingdoms means unique Subject Super Kingdom(s), separated by a ';'\r\n", " \t\t\t (in alphabetical order) \r\n", " \t stitle means Subject Title\r\n", " \tsalltitles means All Subject Title(s), separated by a '<>'\r\n", " \t sstrand means Subject Strand\r\n", " \t qcovs means Query Coverage Per Subject\r\n", " \t qcovhsp means Query Coverage Per HSP\r\n", " When not provided, the default value is:\r\n", " 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send\r\n", " evalue bitscore', which is equivalent to the keyword 'std'\r\n", " Default = `0'\r\n", " -show_gis\r\n", " Show NCBI GIs in deflines?\r\n", " -num_descriptions <Integer, >=0>\r\n", " Number of database sequences to show one-line descriptions for\r\n", " Not applicable for outfmt > 4\r\n", " Default = `500'\r\n", " * Incompatible with: max_target_seqs\r\n", " -num_alignments <Integer, >=0>\r\n", " Number of database sequences to show alignments for\r\n", " Default = `250'\r\n", " * Incompatible with: max_target_seqs\r\n", " -html\r\n", " Produce HTML output?\r\n", "\r\n", " *** Query filtering options\r\n", " -dust <String>\r\n", " Filter query sequence with DUST (Format: 'yes', 'level window linker', or\r\n", " 'no' to disable)\r\n", " Default = `20 64 1'\r\n", " -filtering_db <String>\r\n", " BLAST database containing filtering elements (i.e.: repeats)\r\n", " -window_masker_taxid <Integer>\r\n", " Enable WindowMasker filtering using a Taxonomic ID\r\n", " -window_masker_db <String>\r\n", " Enable WindowMasker filtering using this repeats database.\r\n", " -soft_masking <Boolean>\r\n", " Apply filtering locations as soft masks\r\n", " Default = `true'\r\n", " -lcase_masking\r\n", " Use lower case filtering in query and subject sequence(s)?\r\n", "\r\n", " *** Restrict search or results\r\n", " -gilist <String>\r\n", " Restrict search of database to list of GI's\r\n", " * Incompatible with: negative_gilist, seqidlist, remote, subject,\r\n", " subject_loc\r\n", " -seqidlist <String>\r\n", " Restrict search of database to list of SeqId's\r\n", " * Incompatible with: gilist, negative_gilist, remote, subject,\r\n", " subject_loc\r\n", " -negative_gilist <String>\r\n", " Restrict search of database to everything except the listed GIs\r\n", " * Incompatible with: gilist, seqidlist, remote, subject, subject_loc\r\n", " -entrez_query <String>\r\n", " Restrict search with the given Entrez query\r\n", " * Requires: remote\r\n", " -db_soft_mask <String>\r\n", " Filtering algorithm ID to apply to the BLAST database as soft masking\r\n", " * Incompatible with: db_hard_mask, subject, subject_loc\r\n", " -db_hard_mask <String>\r\n", " Filtering algorithm ID to apply to the BLAST database as hard masking\r\n", " * Incompatible with: db_soft_mask, subject, subject_loc\r\n", " -perc_identity <Real, 0..100>\r\n", " Percent identity\r\n", " -culling_limit <Integer, >=0>\r\n", " If the query range of a hit is enveloped by that of at least this many\r\n", " higher-scoring hits, delete the hit\r\n", " * Incompatible with: best_hit_overhang, best_hit_score_edge\r\n", " -best_hit_overhang <Real, (>=0 and =<0.5)>\r\n", " Best Hit algorithm overhang value (recommended value: 0.1)\r\n", " * Incompatible with: culling_limit\r\n", " -best_hit_score_edge <Real, (>=0 and =<0.5)>\r\n", " Best Hit algorithm score edge value (recommended value: 0.1)\r\n", " * Incompatible with: culling_limit\r\n", " -max_target_seqs <Integer, >=1>\r\n", " Maximum number of aligned sequences to keep \r\n", " Not applicable for outfmt <= 4\r\n", " Default = `500'\r\n", " * Incompatible with: num_descriptions, num_alignments\r\n", "\r\n", " *** Discontiguous MegaBLAST options\r\n", " -template_type <String, `coding', `coding_and_optimal', `optimal'>\r\n", " Discontiguous MegaBLAST template type\r\n", " * Requires: template_length\r\n", " -template_length <Integer, Permissible values: '16' '18' '21' >\r\n", " Discontiguous MegaBLAST template length\r\n", " * Requires: template_type\r\n", "\r\n", " *** Statistical options\r\n", " -dbsize <Int8>\r\n", " Effective length of the database \r\n", " -searchsp <Int8, >=0>\r\n", " Effective length of the search space\r\n", " -max_hsps <Integer, >=0>\r\n", " Set maximum number of HSPs per subject sequence to save (0 means no limit)\r\n", " Default = `0'\r\n", " -sum_statistics\r\n", " Use sum statistics\r\n", "\r\n", " *** Search strategy options\r\n", " -import_search_strategy <File_In>\r\n", " Search strategy to use\r\n", " * Incompatible with: export_search_strategy\r\n", " -export_search_strategy <File_Out>\r\n", " File name to record the search strategy used\r\n", " * Incompatible with: import_search_strategy\r\n", "\r\n", " *** Extension options\r\n", " -xdrop_ungap <Real>\r\n", " X-dropoff value (in bits) for ungapped extensions\r\n", " -xdrop_gap <Real>\r\n", " X-dropoff value (in bits) for preliminary gapped extensions\r\n", " -xdrop_gap_final <Real>\r\n", " X-dropoff value (in bits) for final gapped alignment\r\n", " -no_greedy\r\n", " Use non-greedy dynamic programming extension\r\n", " -min_raw_gapped_score <Integer>\r\n", " Minimum raw gapped score to keep an alignment in the preliminary gapped and\r\n", " traceback stages\r\n", " -ungapped\r\n", " Perform ungapped alignment only?\r\n", " -window_size <Integer, >=0>\r\n", " Multiple hits window size, use 0 to specify 1-hit algorithm\r\n", " -off_diagonal_range <Integer, >=0>\r\n", " Number of off-diagonals to search for the 2nd hit, use 0 to turn off\r\n", " Default = `0'\r\n", "\r\n", " *** Miscellaneous options\r\n", " -parse_deflines\r\n", " Should the query and subject defline(s) be parsed?\r\n", " -num_threads <Integer, >=1>\r\n", " Number of threads (CPUs) to use in the BLAST search\r\n", " Default = `1'\r\n", " * Incompatible with: remote\r\n", " -remote\r\n", " Execute search remotely?\r\n", " * Incompatible with: gilist, seqidlist, negative_gilist, subject_loc,\r\n", " num_threads\r\n", "\r\n" ] } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "#removed \"pipe\" delimeters, added column headings in LibreOffice\n", "#renamed file: /Volumes/web/Arabidopsis/AbaloneNGS_PhageID/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnPhageGBntTAB.csv" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [ "#Now using Ubuntu. Had to copy this notebook to iPython local directory, as iPython would not open it from\n", "#its location on Eagle. :(\n", "#same with data file\n", "#use sed to remove spaces in contig names for subsequent joining using SQLShare\n", "!sed 's/ //g' /home/samb/Desktop/AbWithPhageToAbMasterRefNoPhageUnmappedReadsMapping78369references.csv > /home/samb/Desktop/AbWithPhageToAbMasterRefNoPhageUnmappedReadsMapping78369referencesNoSpaces.csv" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -10 /home/samb/Desktop/AbWithPhageToAbMasterRefNoPhageUnmappedReadsMapping78369referencesNoSpaces.csv" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Name,Lengthofconsensussequence,Numberofreads,Averagecoverage,Referencesequences\r", "\r\n", "Contig5DeNovoAssemblymapping,106,90,24.67,Contig5DeNovoAssembly\r", "\r\n", "Contig6DeNovoAssemblymapping,125,40,12.744,Contig6DeNovoAssembly\r", "\r\n", "Contig9DeNovoAssemblymapping,121,36,13.198,Contig9DeNovoAssembly\r", "\r\n", "Contig10DeNovoAssemblymapping,92,31,13.848,Contig10DeNovoAssembly\r", "\r\n", "Contig11DeNovoAssemblymapping,116,30,11.026,Contig11DeNovoAssembly\r", "\r\n", "Contig13DeNovoAssemblymapping,50,5,4.9,Contig13DeNovoAssembly\r", "\r\n", "Contig15DeNovoAssemblymapping,64,11,7.141,Contig15DeNovoAssembly\r", "\r\n", "Contig16DeNovoAssemblymapping,50,6,4.76,Contig16DeNovoAssembly\r", "\r\n", "Contig17DeNovoAssemblymapping,70,41,16.829,Contig17DeNovoAssembly\r", "\r\n" ] } ], "prompt_number": 6 }, { "cell_type": "raw", "metadata": {}, "source": [ "The sed command worked. No more spaces between contig names. Quick code explanation: s - substitute, / - for a space, / - substitute no space, /g - apply globally; ie make substitution throughout the entire line, not just at the first instance of the match" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#Installed BLAST2.2.29+ on Linux\n", "#Appending install location to the PATH\n", "!cd ../.." ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 35 }, { "cell_type": "code", "collapsed": false, "input": [ "pwd" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 36, "text": [ "u'/home/samb'" ] } ], "prompt_number": 36 }, { "cell_type": "code", "collapsed": false, "input": [ "cd .." ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/home" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 37 }, { "cell_type": "code", "collapsed": false, "input": [ "cd /" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/\n" ] } ], "prompt_number": 38 }, { "cell_type": "code", "collapsed": false, "input": [ "ls" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\u001b[0m\u001b[01;34mbin\u001b[0m/ \u001b[01;34metc\u001b[0m/ \u001b[01;34mlib\u001b[0m/ \u001b[01;34mmnt\u001b[0m/ \u001b[01;34mrun\u001b[0m/ \u001b[30;42mtmp\u001b[0m/ \u001b[01;36mvmlinuz.old\u001b[0m@\r\n", "\u001b[01;34mboot\u001b[0m/ \u001b[01;34mhome\u001b[0m/ \u001b[01;34mlib64\u001b[0m/ \u001b[01;34mopt\u001b[0m/ \u001b[01;34msbin\u001b[0m/ \u001b[01;34musr\u001b[0m/\r\n", "\u001b[01;34mcdrom\u001b[0m/ \u001b[01;36minitrd.img\u001b[0m@ \u001b[01;34mlost+found\u001b[0m/ \u001b[01;34mproc\u001b[0m/ \u001b[01;34msrv\u001b[0m/ \u001b[01;34mvar\u001b[0m/\r\n", "\u001b[01;34mdev\u001b[0m/ \u001b[01;36minitrd.img.old\u001b[0m@ \u001b[01;34mmedia\u001b[0m/ \u001b[01;34mroot\u001b[0m/ \u001b[01;34msys\u001b[0m/ \u001b[01;36mvmlinuz\u001b[0m@\r\n" ] } ], "prompt_number": 39 }, { "cell_type": "code", "collapsed": false, "input": [ "cd etc/profile" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[Errno 20] Not a directory: 'etc/profile'\n", "/\n" ] } ], "prompt_number": 40 }, { "cell_type": "code", "collapsed": false, "input": [ "cd /etc/profile" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[Errno 20] Not a directory: '/etc/profile'\n", "/\n" ] } ], "prompt_number": 41 }, { "cell_type": "code", "collapsed": false, "input": [ "cd etc" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/etc\n" ] } ], "prompt_number": 42 }, { "cell_type": "code", "collapsed": false, "input": [ "ls" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\u001b[0m\u001b[01;34macpi\u001b[0m/ host.conf \u001b[01;36mprintcap\u001b[0m@\r\n", "adduser.conf hostname profile\r\n", "\u001b[01;34malternatives\u001b[0m/ hostname~ \u001b[01;34mprofile.d\u001b[0m/\r\n", "anacrontab hostname.old protocols\r\n", "apg.conf hosts \u001b[01;34mpulse\u001b[0m/\r\n", "\u001b[01;34mapm\u001b[0m/ hosts~ \u001b[01;34mpython\u001b[0m/\r\n", "\u001b[01;34mapparmor\u001b[0m/ hosts.allow \u001b[01;34mpython2.7\u001b[0m/\r\n", "\u001b[01;34mapparmor.d\u001b[0m/ hosts.deny \u001b[01;34mpython3\u001b[0m/\r\n", "\u001b[01;34mapport\u001b[0m/ hosts.old \u001b[01;34mpython3.3\u001b[0m/\r\n", "\u001b[01;34mapt\u001b[0m/ \u001b[01;34mhp\u001b[0m/ \u001b[01;34mrc0.d\u001b[0m/\r\n", "\u001b[01;34maptdaemon\u001b[0m/ \u001b[01;34mifplugd\u001b[0m/ \u001b[01;34mrc1.d\u001b[0m/\r\n", "\u001b[01;34mat-spi2\u001b[0m/ \u001b[01;34mImageMagick\u001b[0m/ \u001b[01;34mrc2.d\u001b[0m/\r\n", "\u001b[01;34mavahi\u001b[0m/ \u001b[01;34minit\u001b[0m/ \u001b[01;34mrc3.d\u001b[0m/\r\n", "bash.bashrc \u001b[01;34minit.d\u001b[0m/ \u001b[01;34mrc4.d\u001b[0m/\r\n", "bash_completion \u001b[01;34minitramfs-tools\u001b[0m/ \u001b[01;34mrc5.d\u001b[0m/\r\n", "\u001b[01;34mbash_completion.d\u001b[0m/ inputrc \u001b[01;34mrc6.d\u001b[0m/\r\n", "bindresvport.blacklist \u001b[01;34minsserv\u001b[0m/ \u001b[01;32mrc.local\u001b[0m*\r\n", "blkid.conf insserv.conf \u001b[01;34mrcS.d\u001b[0m/\r\n", "\u001b[40;31;01mblkid.tab\u001b[0m@ \u001b[01;34minsserv.conf.d\u001b[0m/ remote-login-service.conf\r\n", "\u001b[01;34mbluetooth\u001b[0m/ \u001b[01;34miproute2\u001b[0m/ \u001b[01;34mresolvconf\u001b[0m/\r\n", "\u001b[01;34mbonobo-activation\u001b[0m/ issue \u001b[01;36mresolv.conf\u001b[0m@\r\n", "brlapi.key issue.net \u001b[01;32mrmt\u001b[0m*\r\n", "\u001b[01;34mbrltty\u001b[0m/ \u001b[01;34mkbd\u001b[0m/ rpc\r\n", "brltty.conf \u001b[01;34mkernel\u001b[0m/ rsyslog.conf\r\n", "\u001b[01;34mca-certificates\u001b[0m/ kernel-img.conf \u001b[01;34mrsyslog.d\u001b[0m/\r\n", "ca-certificates.conf kerneloops.conf \u001b[01;34msamba\u001b[0m/\r\n", "\u001b[01;34mcalendar\u001b[0m/ \u001b[01;34mldap\u001b[0m/ \u001b[01;34msane.d\u001b[0m/\r\n", "\u001b[01;34mchatscripts\u001b[0m/ ld.so.cache securetty\r\n", "\u001b[01;34mcheckbox.d\u001b[0m/ ld.so.conf \u001b[01;34msecurity\u001b[0m/\r\n", "\u001b[01;34mchromium-browser\u001b[0m/ \u001b[01;34mld.so.conf.d\u001b[0m/ \u001b[01;34mselinux\u001b[0m/\r\n", "colord.conf legal sensors3.conf\r\n", "\u001b[01;34mcompizconfig\u001b[0m/ libaudit.conf \u001b[01;34msensors.d\u001b[0m/\r\n", "\u001b[01;34mconsole-setup\u001b[0m/ \u001b[01;34mlibnl-3\u001b[0m/ services\r\n", "\u001b[01;34mcracklib\u001b[0m/ \u001b[01;34mlibpaper.d\u001b[0m/ \u001b[01;34msgml\u001b[0m/\r\n", "\u001b[01;34mcron.d\u001b[0m/ \u001b[01;34mlibreoffice\u001b[0m/ shadow\r\n", "\u001b[01;34mcron.daily\u001b[0m/ \u001b[01;34mlightdm\u001b[0m/ shadow-\r\n", "\u001b[01;34mcron.hourly\u001b[0m/ lintianrc shells\r\n", "\u001b[01;34mcron.monthly\u001b[0m/ locale.alias signond.conf\r\n", "crontab localtime \u001b[01;34msignon-ui\u001b[0m/\r\n", "\u001b[01;34mcron.weekly\u001b[0m/ \u001b[01;34mlogcheck\u001b[0m/ \u001b[01;34mskel\u001b[0m/\r\n", "\u001b[01;34mcups\u001b[0m/ login.defs \u001b[01;34msound\u001b[0m/\r\n", "\u001b[01;34mcupshelpers\u001b[0m/ logrotate.conf \u001b[01;34mspeech-dispatcher\u001b[0m/\r\n", "\u001b[01;34mdbus-1\u001b[0m/ \u001b[01;34mlogrotate.d\u001b[0m/ \u001b[01;34mssh\u001b[0m/\r\n", "\u001b[01;34mdconf\u001b[0m/ lsb-release \u001b[01;34mssl\u001b[0m/\r\n", "debconf.conf ltrace.conf subgid\r\n", "debian_version magic subgid-\r\n", "\u001b[01;34mdefault\u001b[0m/ magic.mime subuid\r\n", "deluser.conf mailcap subuid-\r\n", "\u001b[01;34mdepmod.d\u001b[0m/ mailcap.order sudoers\r\n", "\u001b[01;34mdhcp\u001b[0m/ manpath.config \u001b[01;34msudoers.d\u001b[0m/\r\n", "\u001b[01;34mdhcp3\u001b[0m/ mime.types sysctl.conf\r\n", "\u001b[01;34mdictionaries-common\u001b[0m/ mke2fs.conf \u001b[01;34msysctl.d\u001b[0m/\r\n", "\u001b[01;34mdnsmasq.d\u001b[0m/ \u001b[01;34mmodprobe.d\u001b[0m/ \u001b[01;34msystemd\u001b[0m/\r\n", "\u001b[01;34mdoc-base\u001b[0m/ modules \u001b[01;34mterminfo\u001b[0m/\r\n", "\u001b[01;34mdpkg\u001b[0m/ mtab \u001b[01;34mthunderbird\u001b[0m/\r\n", "drirc mtab.fuselock timezone\r\n", "\u001b[01;34memacs\u001b[0m/ mtools.conf \u001b[01;34mtimidity\u001b[0m/\r\n", "environment \u001b[01;34mmysql\u001b[0m/ ts.conf\r\n", "\u001b[01;34mfirefox\u001b[0m/ nanorc ucf.conf\r\n", "\u001b[01;34mfonts\u001b[0m/ netscsid.conf \u001b[01;34mudev\u001b[0m/\r\n", "\u001b[01;34mfoomatic\u001b[0m/ \u001b[01;34mnetwork\u001b[0m/ \u001b[01;34mudisks2\u001b[0m/\r\n", "fstab \u001b[01;34mNetworkManager\u001b[0m/ \u001b[01;34mufw\u001b[0m/\r\n", "\u001b[01;34mfstab.d\u001b[0m/ networks updatedb.conf\r\n", "fuse.conf \u001b[01;34mnewt\u001b[0m/ \u001b[01;34mupdate-manager\u001b[0m/\r\n", "gai.conf nsswitch.conf \u001b[01;34mupdate-motd.d\u001b[0m/\r\n", "\u001b[01;34mgconf\u001b[0m/ \u001b[01;34mobex-data-server\u001b[0m/ \u001b[01;34mupdate-notifier\u001b[0m/\r\n", "\u001b[01;34mgdb\u001b[0m/ \u001b[01;34mopenal\u001b[0m/ \u001b[01;34mUPower\u001b[0m/\r\n", "\u001b[01;34mghostscript\u001b[0m/ \u001b[01;34mopt\u001b[0m/ upstart-xsessions\r\n", "\u001b[01;34mgnome\u001b[0m/ os-release usb_modeswitch.conf\r\n", "\u001b[01;34mgnome-app-install\u001b[0m/ pam.conf \u001b[01;34musb_modeswitch.d\u001b[0m/\r\n", "\u001b[01;34mgnome-settings-daemon\u001b[0m/ \u001b[01;34mpam.d\u001b[0m/ \u001b[01;34mvim\u001b[0m/\r\n", "\u001b[01;34mgnome-vfs-2.0\u001b[0m/ papersize \u001b[01;36mvtrgb\u001b[0m@\r\n", "\u001b[01;34mgroff\u001b[0m/ passwd wgetrc\r\n", "group passwd- \u001b[01;34mwildmidi\u001b[0m/\r\n", "group- \u001b[01;34mpcmcia\u001b[0m/ wodim.conf\r\n", "\u001b[01;34mgrub.d\u001b[0m/ \u001b[01;34mperl\u001b[0m/ \u001b[01;34mwpa_supplicant\u001b[0m/\r\n", "gshadow \u001b[01;34mpm\u001b[0m/ \u001b[01;34mX11\u001b[0m/\r\n", "gshadow- pnm2ppa.conf \u001b[01;34mxdg\u001b[0m/\r\n", "\u001b[01;34mgtk-2.0\u001b[0m/ \u001b[01;34mpolkit-1\u001b[0m/ \u001b[01;34mxml\u001b[0m/\r\n", "\u001b[01;34mgtk-3.0\u001b[0m/ popularity-contest.conf \u001b[01;34mxul-ext\u001b[0m/\r\n", "hdparm.conf \u001b[01;34mppp\u001b[0m/ zsh_command_not_found\r\n" ] } ], "prompt_number": 43 }, { "cell_type": "code", "collapsed": false, "input": [ "cd profile.d" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/etc/profile.d" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 46 }, { "cell_type": "code", "collapsed": false, "input": [ "ls" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "bash_completion.sh vte.sh\r\n" ] } ], "prompt_number": 47 }, { "cell_type": "code", "collapsed": false, "input": [ "!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/bin/sh: 1: cannot create myenvvars.sh: Permission denied\r\n" ] } ], "prompt_number": 49 }, { "cell_type": "code", "collapsed": false, "input": [ "!sudo export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/bin/sh: 1: cannot create myenvvars.sh: Permission denied\r\n" ] } ], "prompt_number": 50 }, { "cell_type": "code", "collapsed": false, "input": [ "!gedit bash_completion.sh" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "** (gedit:4854): WARNING **: Could not load Gedit repository: Typelib file for namespace 'GtkSource', version '3.0' not found\r\n" ] } ], "prompt_number": 51 }, { "cell_type": "code", "collapsed": false, "input": [ "!gedit vte.sh" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "** (gedit:4874): WARNING **: Could not load Gedit repository: Typelib file for namespace 'GtkSource', version '3.0' not found\r\n" ] } ], "prompt_number": 52 }, { "cell_type": "code", "collapsed": false, "input": [ "pwd" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 53, "text": [ "u'/etc/profile.d'" ] } ], "prompt_number": 53 }, { "cell_type": "code", "collapsed": false, "input": [ "!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/bin/sh: 1: cannot create myenvvars.sh: Permission denied\r\n" ] } ], "prompt_number": 54 }, { "cell_type": "code", "collapsed": false, "input": [ "ls -ld" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "drwxr-xr-x 2 root root 4096 Oct 16 12:02 \u001b[0m\u001b[01;34m.\u001b[0m/\r\n" ] } ], "prompt_number": 55 }, { "cell_type": "code", "collapsed": false, "input": [ "ls -l vte.sh" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "-rw-r--r-- 1 root root 1945 Jun 20 2013 vte.sh\r\n" ] } ], "prompt_number": 56 }, { "cell_type": "code", "collapsed": false, "input": [ "#need to change permissions on this directory in order to write to it\n", "#can't change permissions via iPython; will do this in terminal\n", "#sudo chmod 757\n", "#was 755, with root as owner and user" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "\r\n" ] } ], "prompt_number": 57 }, { "cell_type": "code", "collapsed": false, "input": [ "!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "!echo $PATH" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/usr/lib/lightdm/lightdm:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "raw", "metadata": {}, "source": [ "Need to restart the Terminal in order for the changes to PATH to be loaded. UPDATE: I think this got written to the wrong directory!" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!echo $PATH" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/usr/lib/lightdm/lightdm:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "cd /" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "cd /etc/profile.d" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/etc/profile.d\n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "ls" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "bash_completion.sh vte.sh\r\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "!export PATH=$PATH:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin > myenvvars.sh\n", "#oddly, this didn't write any text to the file\n", "#I also think I need the \"#!/bin/bash\" at the beginning of the script\n", "#Added both to the script manually using gedit" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "ls" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "bash_completion.sh myenvvars.sh vte.sh\r\n" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "!echo $PATH" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/usr/lib/lightdm/lightdm:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/bin\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "pwd" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 2, "text": [ "u'/home/samb'" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "#let's see if this worked\n", "!./blastn -h" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/bin/sh: 1: ./blastn: not found\r\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "blastn -help" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'blastn' is not defined", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m<ipython-input-6-f74ffb396a19>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mblastn\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0mhelp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'blastn' is not defined" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "!blastn -help" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "USAGE\r\n", " blastn [-h] [-help] [-import_search_strategy filename]\r\n", " [-export_search_strategy filename] [-task task_name] [-db database_name]\r\n", " [-dbsize num_letters] [-gilist filename] [-seqidlist filename]\r\n", " [-negative_gilist filename] [-entrez_query entrez_query]\r\n", " [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm]\r\n", " [-subject subject_input_file] [-subject_loc range] [-query input_file]\r\n", " [-out output_file] [-evalue evalue] [-word_size int_value]\r\n", " [-gapopen open_penalty] [-gapextend extend_penalty]\r\n", " [-perc_identity float_value] [-xdrop_ungap float_value]\r\n", " [-xdrop_gap float_value] [-xdrop_gap_final float_value]\r\n", " [-searchsp int_value] [-max_hsps int_value] [-sum_statistics]\r\n", " [-penalty penalty] [-reward reward] [-no_greedy]\r\n", " [-min_raw_gapped_score int_value] [-template_type type]\r\n", " [-template_length int_value] [-dust DUST_options]\r\n", " [-filtering_db filtering_database]\r\n", " [-window_masker_taxid window_masker_taxid]\r\n", " [-window_masker_db window_masker_db] [-soft_masking soft_masking]\r\n", " [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value]\r\n", " [-best_hit_score_edge float_value] [-window_size int_value]\r\n", " [-off_diagonal_range int_value] [-use_index boolean] [-index_name string]\r\n", " [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines]\r\n", " [-outfmt format] [-show_gis] [-num_descriptions int_value]\r\n", " [-num_alignments int_value] [-html] [-max_target_seqs num_sequences]\r\n", " [-num_threads int_value] [-remote] [-version]\r\n", "\r\n", "DESCRIPTION\r\n", " Nucleotide-Nucleotide BLAST 2.2.29+\r\n", "\r\n", "OPTIONAL ARGUMENTS\r\n", " -h\r\n", " Print USAGE and DESCRIPTION; ignore all other parameters\r\n", " -help\r\n", " Print USAGE, DESCRIPTION and ARGUMENTS; ignore all other parameters\r\n", " -version\r\n", " Print version number; ignore other arguments\r\n", "\r\n", " *** Input query options\r\n", " -query <File_In>\r\n", " Input file name\r\n", " Default = `-'\r\n", " -query_loc <String>\r\n", " Location on the query sequence in 1-based offsets (Format: start-stop)\r\n", " -strand <String, `both', `minus', `plus'>\r\n", " Query strand(s) to search against database/subject\r\n", " Default = `both'\r\n", "\r\n", " *** General search options\r\n", " -task <String, Permissible values: 'blastn' 'blastn-short' 'dc-megablast'\r\n", " 'megablast' 'rmblastn' >\r\n", " Task to execute\r\n", " Default = `megablast'\r\n", " -db <String>\r\n", " BLAST database name\r\n", " * Incompatible with: subject, subject_loc\r\n", " -out <File_Out>\r\n", " Output file name\r\n", " Default = `-'\r\n", " -evalue <Real>\r\n", " Expectation value (E) threshold for saving hits \r\n", " Default = `10'\r\n", " -word_size <Integer, >=4>\r\n", " Word size for wordfinder algorithm (length of best perfect match)\r\n", " -gapopen <Integer>\r\n", " Cost to open a gap\r\n", " -gapextend <Integer>\r\n", " Cost to extend a gap\r\n", " -penalty <Integer, <=0>\r\n", " Penalty for a nucleotide mismatch\r\n", " -reward <Integer, >=0>\r\n", " Reward for a nucleotide match\r\n", " -use_index <Boolean>\r\n", " Use MegaBLAST database index\r\n", " Default = `false'\r\n", " -index_name <String>\r\n", " MegaBLAST database index name\r\n", "\r\n", " *** BLAST-2-Sequences options\r\n", " -subject <File_In>\r\n", " Subject sequence(s) to search\r\n", " * Incompatible with: db, gilist, seqidlist, negative_gilist,\r\n", " db_soft_mask, db_hard_mask\r\n", " -subject_loc <String>\r\n", " Location on the subject sequence in 1-based offsets (Format: start-stop)\r\n", " * Incompatible with: db, gilist, seqidlist, negative_gilist,\r\n", " db_soft_mask, db_hard_mask, remote\r\n", "\r\n", " *** Formatting options\r\n", " -outfmt <String>\r\n", " alignment view options:\r\n", " 0 = pairwise,\r\n", " 1 = query-anchored showing identities,\r\n", " 2 = query-anchored no identities,\r\n", " 3 = flat query-anchored, show identities,\r\n", " 4 = flat query-anchored, no identities,\r\n", " 5 = XML Blast output,\r\n", " 6 = tabular,\r\n", " 7 = tabular with comment lines,\r\n", " 8 = Text ASN.1,\r\n", " 9 = Binary ASN.1,\r\n", " 10 = Comma-separated values,\r\n", " 11 = BLAST archive format (ASN.1) \r\n", " \r\n", " Options 6, 7, and 10 can be additionally configured to produce\r\n", " a custom format specified by space delimited format specifiers.\r\n", " The supported format specifiers are:\r\n", " \t qseqid means Query Seq-id\r\n", " \t qgi means Query GI\r\n", " \t qacc means Query accesion\r\n", " \t qaccver means Query accesion.version\r\n", " \t qlen means Query sequence length\r\n", " \t sseqid means Subject Seq-id\r\n", " \t sallseqid means All subject Seq-id(s), separated by a ';'\r\n", " \t sgi means Subject GI\r\n", " \t sallgi means All subject GIs\r\n", " \t sacc means Subject accession\r\n", " \t saccver means Subject accession.version\r\n", " \t sallacc means All subject accessions\r\n", " \t slen means Subject sequence length\r\n", " \t qstart means Start of alignment in query\r\n", " \t qend means End of alignment in query\r\n", " \t sstart means Start of alignment in subject\r\n", " \t send means End of alignment in subject\r\n", " \t qseq means Aligned part of query sequence\r\n", " \t sseq means Aligned part of subject sequence\r\n", " \t evalue means Expect value\r\n", " \t bitscore means Bit score\r\n", " \t score means Raw score\r\n", " \t length means Alignment length\r\n", " \t pident means Percentage of identical matches\r\n", " \t nident means Number of identical matches\r\n", " \t mismatch means Number of mismatches\r\n", " \t positive means Number of positive-scoring matches\r\n", " \t gapopen means Number of gap openings\r\n", " \t gaps means Total number of gaps\r\n", " \t ppos means Percentage of positive-scoring matches\r\n", " \t frames means Query and subject frames separated by a '/'\r\n", " \t qframe means Query frame\r\n", " \t sframe means Subject frame\r\n", " \t btop means Blast traceback operations (BTOP)\r\n", " \t staxids means unique Subject Taxonomy ID(s), separated by a ';'\r\n", " \t\t\t (in numerical order)\r\n", " \t sscinames means unique Subject Scientific Name(s), separated by a ';'\r\n", " \t scomnames means unique Subject Common Name(s), separated by a ';'\r\n", " \tsblastnames means unique Subject Blast Name(s), separated by a ';'\r\n", " \t\t\t (in alphabetical order)\r\n", " \tsskingdoms means unique Subject Super Kingdom(s), separated by a ';'\r\n", " \t\t\t (in alphabetical order) \r\n", " \t stitle means Subject Title\r\n", " \tsalltitles means All Subject Title(s), separated by a '<>'\r\n", " \t sstrand means Subject Strand\r\n", " \t qcovs means Query Coverage Per Subject\r\n", " \t qcovhsp means Query Coverage Per HSP\r\n", " When not provided, the default value is:\r\n", " 'qseqid sseqid pident length mismatch gapopen qstart qend sstart send\r\n", " evalue bitscore', which is equivalent to the keyword 'std'\r\n", " Default = `0'\r\n", " -show_gis\r\n", " Show NCBI GIs in deflines?\r\n", " -num_descriptions <Integer, >=0>\r\n", " Number of database sequences to show one-line descriptions for\r\n", " Not applicable for outfmt > 4\r\n", " Default = `500'\r\n", " * Incompatible with: max_target_seqs\r\n", " -num_alignments <Integer, >=0>\r\n", " Number of database sequences to show alignments for\r\n", " Default = `250'\r\n", " * Incompatible with: max_target_seqs\r\n", " -html\r\n", " Produce HTML output?\r\n", "\r\n", " *** Query filtering options\r\n", " -dust <String>\r\n", " Filter query sequence with DUST (Format: 'yes', 'level window linker', or\r\n", " 'no' to disable)\r\n", " Default = `20 64 1'\r\n", " -filtering_db <String>\r\n", " BLAST database containing filtering elements (i.e.: repeats)\r\n", " -window_masker_taxid <Integer>\r\n", " Enable WindowMasker filtering using a Taxonomic ID\r\n", " -window_masker_db <String>\r\n", " Enable WindowMasker filtering using this repeats database.\r\n", " -soft_masking <Boolean>\r\n", " Apply filtering locations as soft masks\r\n", " Default = `true'\r\n", " -lcase_masking\r\n", " Use lower case filtering in query and subject sequence(s)?\r\n", "\r\n", " *** Restrict search or results\r\n", " -gilist <String>\r\n", " Restrict search of database to list of GI's\r\n", " * Incompatible with: negative_gilist, seqidlist, remote, subject,\r\n", " subject_loc\r\n", " -seqidlist <String>\r\n", " Restrict search of database to list of SeqId's\r\n", " * Incompatible with: gilist, negative_gilist, remote, subject,\r\n", " subject_loc\r\n", " -negative_gilist <String>\r\n", " Restrict search of database to everything except the listed GIs\r\n", " * Incompatible with: gilist, seqidlist, remote, subject, subject_loc\r\n", " -entrez_query <String>\r\n", " Restrict search with the given Entrez query\r\n", " * Requires: remote\r\n", " -db_soft_mask <String>\r\n", " Filtering algorithm ID to apply to the BLAST database as soft masking\r\n", " * Incompatible with: db_hard_mask, subject, subject_loc\r\n", " -db_hard_mask <String>\r\n", " Filtering algorithm ID to apply to the BLAST database as hard masking\r\n", " * Incompatible with: db_soft_mask, subject, subject_loc\r\n", " -perc_identity <Real, 0..100>\r\n", " Percent identity\r\n", " -culling_limit <Integer, >=0>\r\n", " If the query range of a hit is enveloped by that of at least this many\r\n", " higher-scoring hits, delete the hit\r\n", " * Incompatible with: best_hit_overhang, best_hit_score_edge\r\n", " -best_hit_overhang <Real, (>=0 and =<0.5)>\r\n", " Best Hit algorithm overhang value (recommended value: 0.1)\r\n", " * Incompatible with: culling_limit\r\n", " -best_hit_score_edge <Real, (>=0 and =<0.5)>\r\n", " Best Hit algorithm score edge value (recommended value: 0.1)\r\n", " * Incompatible with: culling_limit\r\n", " -max_target_seqs <Integer, >=1>\r\n", " Maximum number of aligned sequences to keep \r\n", " Not applicable for outfmt <= 4\r\n", " Default = `500'\r\n", " * Incompatible with: num_descriptions, num_alignments\r\n", "\r\n", " *** Discontiguous MegaBLAST options\r\n", " -template_type <String, `coding', `coding_and_optimal', `optimal'>\r\n", " Discontiguous MegaBLAST template type\r\n", " * Requires: template_length\r\n", " -template_length <Integer, Permissible values: '16' '18' '21' >\r\n", " Discontiguous MegaBLAST template length\r\n", " * Requires: template_type\r\n", "\r\n", " *** Statistical options\r\n", " -dbsize <Int8>\r\n", " Effective length of the database \r\n", " -searchsp <Int8, >=0>\r\n", " Effective length of the search space\r\n", " -max_hsps <Integer, >=0>\r\n", " Set maximum number of HSPs per subject sequence to save (0 means no limit)\r\n", " Default = `0'\r\n", " -sum_statistics\r\n", " Use sum statistics\r\n", "\r\n", " *** Search strategy options\r\n", " -import_search_strategy <File_In>\r\n", " Search strategy to use\r\n", " * Incompatible with: export_search_strategy\r\n", " -export_search_strategy <File_Out>\r\n", " File name to record the search strategy used\r\n", " * Incompatible with: import_search_strategy\r\n", "\r\n", " *** Extension options\r\n", " -xdrop_ungap <Real>\r\n", " X-dropoff value (in bits) for ungapped extensions\r\n", " -xdrop_gap <Real>\r\n", " X-dropoff value (in bits) for preliminary gapped extensions\r\n", " -xdrop_gap_final <Real>\r\n", " X-dropoff value (in bits) for final gapped alignment\r\n", " -no_greedy\r\n", " Use non-greedy dynamic programming extension\r\n", " -min_raw_gapped_score <Integer>\r\n", " Minimum raw gapped score to keep an alignment in the preliminary gapped and\r\n", " traceback stages\r\n", " -ungapped\r\n", " Perform ungapped alignment only?\r\n", " -window_size <Integer, >=0>\r\n", " Multiple hits window size, use 0 to specify 1-hit algorithm\r\n", " -off_diagonal_range <Integer, >=0>\r\n", " Number of off-diagonals to search for the 2nd hit, use 0 to turn off\r\n", " Default = `0'\r\n", "\r\n", " *** Miscellaneous options\r\n", " -parse_deflines\r\n", " Should the query and subject defline(s) be parsed?\r\n", " -num_threads <Integer, >=1>\r\n", " Number of threads (CPUs) to use in the BLAST search\r\n", " Default = `1'\r\n", " * Incompatible with: remote\r\n", " -remote\r\n", " Execute search remotely?\r\n", " * Incompatible with: gilist, seqidlist, negative_gilist, subject_loc,\r\n", " num_threads\r\n", "\r\n" ] } ], "prompt_number": 7 }, { "cell_type": "raw", "metadata": {}, "source": [ "Yes! It works! Can now just run BLAST without changing directories in Linux!" ] }, { "cell_type": "code", "collapsed": false, "input": [ "#let's try creating a file for the BLAST to look in the default BLAST databases (dbs)\n", "#directory on this computer (/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs)\n", "cd home" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "SyntaxError", "evalue": "invalid syntax (<ipython-input-8-42ba6ba23109>, line 3)", "output_type": "pyerr", "traceback": [ "\u001b[0;36m File \u001b[0;32m\"<ipython-input-8-42ba6ba23109>\"\u001b[0;36m, line \u001b[0;32m3\u001b[0m\n\u001b[0;31m cd home\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "cd /home" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/home" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "!makefile blastdbs.ncbirc" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/bin/sh: 1: makefile: not found\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "!echo \"BLASTDB=/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\" > blastdbs.ncbirc" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/bin/sh: 1: cannot create blastdbs.ncbirc: Permission denied\r\n" ] } ], "prompt_number": 12 }, { "cell_type": "raw", "metadata": {}, "source": [ "Looks like I need to change permissions again. Can't do this in IPython. Be right back..." ] }, { "cell_type": "code", "collapsed": false, "input": [ "!echo \"BLASTDB=/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\" > blastdbs.ncbirc" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "raw", "metadata": {}, "source": [ "Moved that file to /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!makeblastdb -h" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "USAGE\r\n", " makeblastdb [-h] [-help] [-in input_file] [-input_type type]\r\n", " -dbtype molecule_type [-title database_title] [-parse_seqids]\r\n", " [-hash_index] [-mask_data mask_data_files] [-mask_id mask_algo_ids]\r\n", " [-mask_desc mask_algo_descriptions] [-gi_mask]\r\n", " [-gi_mask_name gi_based_mask_names] [-out database_name]\r\n", " [-max_file_sz number_of_bytes] [-taxid TaxID] [-taxid_map TaxIDMapFile]\r\n", " [-logfile File_Name] [-version]\r\n", "\r\n", "DESCRIPTION\r\n", " Application to create BLAST databases, version 2.2.29+\r\n", "\r\n", "Use '-help' to print detailed descriptions of command line arguments\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "!makeblastdb -in 20140225_RickettsiaGBnt.fasta -dbtype nucl" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "\r\n", "Building a new DB, current time: 02/25/2014 12:39:55\r\n", "New DB name: 20140225_RickettsiaGBnt.fasta\r\n", "New DB title: 20140225_RickettsiaGBnt.fasta\r\n", "Sequence type: Nucleotide\r\n", "Keep Linkouts: T\r\n", "Keep MBits: T\r\n", "Maximum file size: 1000000000B\r\n", "BLAST options error: File 20140225_RickettsiaGBnt.fasta does not exist\r\n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "!makeblastdb -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/20140225_RickettsiaGBnt.fasta -dbtype nucl -out 20140225_RickettsiaGBnt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "\r\n", "Building a new DB, current time: 02/25/2014 12:41:16\r\n", "New DB name: 20140225_RickettsiaGBnt\r\n", "New DB title: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/20140225_RickettsiaGBnt.fasta\r\n", "Sequence type: Nucleotide\r\n", "Keep Linkouts: T\r\n", "Keep MBits: T\r\n", "Maximum file size: 1000000000B\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|167' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|168' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|296' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|297' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|460' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|479' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|480' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|481' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|482' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|483' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|519' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|520' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|521' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|522' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|541' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1762' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1763' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1764' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1792' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1795' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1805' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1997' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1998' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|2068' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|2354' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5394' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5471' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5565' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5578' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Adding sequences from FASTA; added 8786 sequences in 9.54842 seconds.\r\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "#check number of entries (i.e. the '>' used to provide the description\n", "#for each entry in source fasta file\n", "!awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/20140225_RickettsiaGBnt.fasta" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "8815\r\n" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "!blastn -h" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "USAGE\r\n", " blastn [-h] [-help] [-import_search_strategy filename]\r\n", " [-export_search_strategy filename] [-task task_name] [-db database_name]\r\n", " [-dbsize num_letters] [-gilist filename] [-seqidlist filename]\r\n", " [-negative_gilist filename] [-entrez_query entrez_query]\r\n", " [-db_soft_mask filtering_algorithm] [-db_hard_mask filtering_algorithm]\r\n", " [-subject subject_input_file] [-subject_loc range] [-query input_file]\r\n", " [-out output_file] [-evalue evalue] [-word_size int_value]\r\n", " [-gapopen open_penalty] [-gapextend extend_penalty]\r\n", " [-perc_identity float_value] [-xdrop_ungap float_value]\r\n", " [-xdrop_gap float_value] [-xdrop_gap_final float_value]\r\n", " [-searchsp int_value] [-max_hsps int_value] [-sum_statistics]\r\n", " [-penalty penalty] [-reward reward] [-no_greedy]\r\n", " [-min_raw_gapped_score int_value] [-template_type type]\r\n", " [-template_length int_value] [-dust DUST_options]\r\n", " [-filtering_db filtering_database]\r\n", " [-window_masker_taxid window_masker_taxid]\r\n", " [-window_masker_db window_masker_db] [-soft_masking soft_masking]\r\n", " [-ungapped] [-culling_limit int_value] [-best_hit_overhang float_value]\r\n", " [-best_hit_score_edge float_value] [-window_size int_value]\r\n", " [-off_diagonal_range int_value] [-use_index boolean] [-index_name string]\r\n", " [-lcase_masking] [-query_loc range] [-strand strand] [-parse_deflines]\r\n", " [-outfmt format] [-show_gis] [-num_descriptions int_value]\r\n", " [-num_alignments int_value] [-html] [-max_target_seqs num_sequences]\r\n", " [-num_threads int_value] [-remote] [-version]\r\n", "\r\n", "DESCRIPTION\r\n", " Nucleotide-Nucleotide BLAST 2.2.29+\r\n", "\r\n", "Use '-help' to print detailed descriptions of command line arguments\r\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "#try this again, but moved the blastdbs.ncbirc file to /home/samb, as suggested by the error message in In[9]\n", "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "#added [BLAST] as first line in the blastdbs.ncbirc file\n", "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "#think I might actually have to specify the specific database in the .ncbirc file? Added full path to Rickettsia db in that file" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "#restarted Terminal and IPython\n", "#also resotored database path to just the \"dbs\" directory; not to an actualy db file\n", "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "pwd" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 2, "text": [ "u'/home/samb'" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "ls" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\u001b[0m\u001b[01;34mBioinformaticsTools\u001b[0m/ \u001b[01;34mDocuments\u001b[0m/ PhageNGS_ID.ipynb \u001b[01;34mTemplates\u001b[0m/\r\n", "blastdbs.ncbirc \u001b[01;34mDownloads\u001b[0m/ \u001b[01;34mPictures\u001b[0m/ \u001b[01;34mUbuntu One\u001b[0m/\r\n", "blastdbs.ncbirc~ examples.desktop \u001b[01;34mPublic\u001b[0m/ \u001b[01;34mVideos\u001b[0m/\r\n", "\u001b[01;34mDesktop\u001b[0m/ \u001b[01;34mMusic\u001b[0m/ sed_and_awk_practice.ipynb\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -10 blastdbs.ncbirc" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[BLAST]\r\n", "BLASTDB=/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\r\n" ] } ], "prompt_number": 4 }, { "cell_type": "raw", "metadata": {}, "source": [ "Hmmmm... Everything looks correct, as far as I can tell from looking at the BLAST configuration documentation and other internet resources. Not sure why this isn't working." ] }, { "cell_type": "code", "collapsed": false, "input": [ "#copied that .ncbirc file to the root directory (/) of the computer\n", "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "BLAST Database error: No alias or index file found for nucleotide database [20140225_RickettsiaGBnt] in search path [/home/samb::]\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "raw", "metadata": {}, "source": [ "Well, I've added \"BLASTDB=/home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\" to the /etc/profile.d file that I used earlier to set the append the PATH. We'll see if that works. Restarting the computer." ] }, { "cell_type": "code", "collapsed": false, "input": [ "!blastn -db 20140225_RickettsiaGBnt -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt 6 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt -max_target_seqs 1 -num_threads 8" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "raw", "metadata": {}, "source": [ "Sweet! That worked! No more specifying full directories to databases or BLAST executables!!!" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head -10 /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Contig366DeNovoAssembly\tgi|311103224|ref|NC_014640.1|\t79.95\t369\t66\t5\t1\t366\t820544\t820907\t2e-69\t 265\r\n", "Contig366DeNovoAssembly\tgi|311103224|ref|NC_014640.1|\t79.95\t369\t66\t5\t1\t366\t2208847\t2209210\t2e-69\t 265\r\n", "Contig366DeNovoAssembly\tgi|311103224|ref|NC_014640.1|\t79.95\t369\t66\t5\t1\t366\t5159121\t5158758\t2e-69\t 265\r\n", "Contig2706DeNovoAssembly\tgi|390137196|gb|AJWD01000108.1|\t92.59\t54\t4\t0\t1\t54\t1637\t1584\t3e-14\t78.7\r\n", "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t3205273\t3205225\t7e-11\t67.6\r\n", "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t3854412\t3854364\t7e-11\t67.6\r\n", "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t4413608\t4413560\t7e-11\t67.6\r\n", "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t4600084\t4600036\t7e-11\t67.6\r\n", "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t4658009\t4657961\t7e-11\t67.6\r\n", "Contig4266DeNovoAssembly\tgi|60679597|ref|NC_003228.3|\t91.84\t49\t2\t2\t1\t47\t5128163\t5128115\t7e-11\t67.6\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "raw", "metadata": {}, "source": [ "OK, starting this again because the fasta file (2014_RickettsiaGBnt) count should've indicated 11,000+ entries but the awk count (see In[7] above) indicated \n", "only 8000+ entries. Have re-downloaded all Rickettsia nucleotide (nt) entries from NCBI as a fasta file. There should be 11414 entries." ] }, { "cell_type": "code", "collapsed": false, "input": [ "#count entries in new Rickettsia GenBank fasta file\n", "!awk '/>/ { count++ } END { print count }' /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "11414\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "raw", "metadata": {}, "source": [ "Perfect. Don't know what wrong with the last one. Will now make BLAST database." ] }, { "cell_type": "code", "collapsed": false, "input": [ "!makeblastdb -in /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta -dbtype nucl -out /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/bin/sh: 1: makeblastdb: Permission denied\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "raw", "metadata": {}, "source": [ "Well, that's weird. Time to check permissions on the input/output locations. Ugh." ] }, { "cell_type": "code", "collapsed": false, "input": [ "cd /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "ls -l" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "total 2428056\r\n", "-rw------- 1 samb samb 376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta\r\n", "-rw------- 1 samb samb 1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr\r\n", "-rw------- 1 samb samb 105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin\r\n", "-rw------- 1 samb samb 92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq\r\n", "-rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta\r\n" ] } ], "prompt_number": 5 }, { "cell_type": "raw", "metadata": {}, "source": [ "OK, there are no read permissions for anyone else. However, since I'm (samb) running the commands, it seems weird that it wont' work.\n", "Going to change permissions to 744. BRB..." ] }, { "cell_type": "raw", "metadata": {}, "source": [ "Changed permissions to the \"dbs\" folder using: \n", "sudo chmod -R 744 /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\n", "\n", "Let's see if that worked." ] }, { "cell_type": "code", "collapsed": false, "input": [ "cd /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "ls -l" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "total 2428056\r\n", "-rw------- 1 samb samb 376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta\r\n", "-rw------- 1 samb samb 1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr\r\n", "-rw------- 1 samb samb 105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin\r\n", "-rw------- 1 samb samb 92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq\r\n", "-rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta\r\n" ] } ], "prompt_number": 2 }, { "cell_type": "raw", "metadata": {}, "source": [ "It didn't work! Well, that's weird. Tried this instead:\n", "sudo chmod 744 -R /media/B0FE4B1FFE4ADD6A/BioinformaticsTools/ncbi-blast-2.2.29+/dbs\n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ls -l" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "total 2428056\r\n", "-rw------- 1 samb samb 376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta\r\n", "-rw------- 1 samb samb 1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr\r\n", "-rw------- 1 samb samb 105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin\r\n", "-rw------- 1 samb samb 92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq\r\n", "-rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "raw", "metadata": {}, "source": [ "Seems like the problem is possibly related to having moved the \"BioinformaticsTools\" folder to my larger partition (which is actually a Windows partition). Might have to modify how it is mounted in Linux in order to enable changes to the read/write permissions." ] }, { "cell_type": "code", "collapsed": false, "input": [ "ls -l" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "total 2428056\r\n", "-rw------- 1 samb samb 376939569 Feb 25 12:30 20140225_RickettsiaGBnt.fasta\r\n", "-rw------- 1 samb samb 1480807 Feb 25 12:41 20140225_RickettsiaGBnt.nhr\r\n", "-rw------- 1 samb samb 105588 Feb 25 12:41 20140225_RickettsiaGBnt.nin\r\n", "-rw------- 1 samb samb 92692474 Feb 25 12:41 20140225_RickettsiaGBnt.nsq\r\n", "-rw------- 1 samb samb 2015104878 Feb 27 17:14 RickettsiaGBnt20140227.fasta\r\n" ] } ], "prompt_number": 4 }, { "cell_type": "raw", "metadata": {}, "source": [ "OK, I'll deal with this later. Moved \"BioinformaticsTools\" folder back to original location (/home/samb) and\n", "updated the myenvvars.sh (in etc/profile.d). Thinking about it some more, the problem might be related simply to me\n", "moving the BLAST folder to a different directory, instead of re-installing it in the new, desired location. Will test this out later." ] }, { "cell_type": "code", "collapsed": false, "input": [ "!makeblastdb -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta -dbtype nucl -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "\r\n", "Building a new DB, current time: 02/28/2014 12:05:12\r\n", "New DB name: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227\r\n", "New DB title: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140227.fasta\r\n", "Sequence type: Nucleotide\r\n", "Keep Linkouts: T\r\n", "Keep MBits: T\r\n", "Maximum file size: 1000000000B\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|167' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|168' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|296' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|297' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|460' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|479' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|480' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|481' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|482' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|483' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|519' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|520' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|521' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|522' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|541' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1762' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1763' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1764' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1792' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1795' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1805' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1997' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1998' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|2068' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|2354' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5394' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5471' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5565' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5578' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Adding sequences from FASTA; added 11385 sequences in 51.2601 seconds.\r\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "!blastn -db RickettsiaGBnt20140227 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt \"6 stitle std\" -max_target_seqs 1 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "!head -10 /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "gi|288926859|gb|CP000613.2| Rhodospirillum centenum SW, complete genome\tContig5DeNovoAssembly\tgi|288926859|gb|CP000613.2|\t90.00\t30\t3\t0\t67\t96\t3744736\t3744765\t0.071\t41.0\r\n", "gi|327396847|dbj|AB104413.1| Red sea bream iridovirus genomic DNA, circular physical map, complete sequence\tContig6DeNovoAssembly\tgi|327396847|dbj|AB104413.1|\t76.09\t46\t5\t1\t52\t91\t102234\t102279\t3.7\t35.6\r\n", "gi|255529916|ref|NC_013061.1| Pedobacter heparinus DSM 2366 chromosome, complete genome\tContig9DeNovoAssembly\tgi|255529916|ref|NC_013061.1|\t86.21\t29\t4\t0\t64\t92\t1205031\t1205003\t3.6\t35.6\r\n", "gi|18308982|ref|NC_003366.1| Clostridium perfringens str. 13 chromosome, complete genome\tContig10DeNovoAssembly\tgi|18308982|ref|NC_003366.1|\t83.33\t36\t6\t0\t53\t88\t1178864\t1178899\t0.20\t39.2\r\n", "gi|42494965|emb|AJ270058.1| Arabidopsis thaliana DNA chromosome 4, short arm\tContig11DeNovoAssembly\tgi|42494965|emb|AJ270058.1|\t89.29\t28\t3\t0\t75\t102\t387185\t387212\t0.98\t37.4\r\n", "gi|386818599|ref|NZ_JH651379.1| Joostella marina DSM 19592 genomic scaffold Joomascaffold_1, whole genome shotgun sequence\tContig13DeNovoAssembly\tgi|386818599|ref|NZ_JH651379.1|\t87.10\t31\t4\t0\t3\t33\t598184\t598154\t0.071\t39.2\r\n", "gi|552562410|gb|CM000780.3| Zea mays cultivar B73 chromosome 4\tContig15DeNovoAssembly\tgi|552562410|gb|CM000780.3|\t82.05\t39\t5\t1\t26\t64\t229168348\t229168384\t0.41\t37.4\r\n", "gi|55417891|dbj|BA000014.8| Arabidopsis thaliana DNA, chromosome 3, complete sequence\tContig16DeNovoAssembly\tgi|55417891|dbj|BA000014.8|\t89.66\t29\t1\t1\t17\t43\t22640442\t22640470\t0.25\t37.4\r\n", "gi|32444162|emb|BX294142.1| Rhodopirellula baltica SH 1 complete genome; segment 10/24\tContig17DeNovoAssembly\tgi|32444162|emb|BX294142.1|\t100.00\t19\t0\t0\t50\t68\t112716\t112698\t1.6\t35.6\r\n", "gi|584450787|emb|HG916852.1| Rhizobium sp. LPU83 main chrosome complete genome\tContig19DeNovoAssembly\tgi|584450787|emb|HG916852.1|\t100.00\t19\t0\t0\t6\t24\t1422493\t1422475\t0.95\t35.6\r\n" ] } ], "prompt_number": 3 }, { "cell_type": "raw", "metadata": {}, "source": [ "Um, weird that there're entries for Arabidopsis and Zea mays... Will re-download Rickettsia nucleotides from GenBank. Ugh! Never mind! I'm an idiot!\n", "Didn't filter the initial NCBI search by Taxonomy! Doh!\n", "\n", "Only bacteria should have 10788 sequences." ] }, { "cell_type": "code", "collapsed": false, "input": [ "!awk '/>/ { count++ } END { print count }' /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228.fasta" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10788\r\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "!makeblastdb -in /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228.fasta -dbtype nucl -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "\r\n", "Building a new DB, current time: 02/28/2014 15:24:39\r\n", "New DB name: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228\r\n", "New DB title: /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/dbs/RickettsiaGBnt20140228.fasta\r\n", "Sequence type: Nucleotide\r\n", "Keep Linkouts: T\r\n", "Keep MBits: T\r\n", "Maximum file size: 1000000000B\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|167' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|168' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|296' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|297' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|460' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|479' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|480' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|481' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|482' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|483' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|519' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|520' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|521' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|522' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|541' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1762' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1763' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1764' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1792' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1795' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1805' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1997' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|1998' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|2068' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|2354' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5398' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5475' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5569' as it has no sequence data\r\n", "Error: (1431.1) FASTA-Reader: Warning: FASTA-Reader: No residues given\r\n", "Ignoring sequence 'lcl|5582' as it has no sequence data\r\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Adding sequences from FASTA; added 10759 sequences in 40.3107 seconds.\r\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "!blastn -db RickettsiaGBnt20140228 -task blastn -query /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/queries/AbWithPhageToAbMasterRefNoPhageUnmappedReads.fa -outfmt \"6 stitle std\" -max_target_seqs 1 -num_threads 16 -out /home/samb/BioinformaticsTools/ncbi-blast-2.2.29+/outputs/AbWithPhageToAbMasterRefNoPhageUnmappedReadsBlastnRickettsiaGBnt.txt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }