#!/usr/bin/env ruby # coding: utf-8 # # biofetch.rb : BioFetch server (interface to TogoWS) # # Copyright (C) 2002-2004 KATAYAMA Toshiaki # 2013 GOTO Naohisa # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # require 'cgi' require 'erb' require 'open-uri' require 'fileutils' require 'tempfile' MAX_ID_NUM = 50 # script name SCRIPT_NAME = File.basename(__FILE__) # full URL for this CGI BASE_URL = "http://bioruby.org/cgi-bin/#{SCRIPT_NAME}" # cache directory for metadata # Note: The cache is only for metadata (database list and format list). # Data entries are NOT cached. CACHE_DIR = '/tmp/biofetch_rb.cache' # cache lifetime CACHE_LIFETIME = 60 * 60 # 1 hour module TogoWS TOGOWS_URL = 'http://togows.dbcls.jp/' def togows_database_complete_list result = togows_get_cached('/entry/') result.to_s.split(/\n/).collect {|x| x.split(/\t/) } end def togows_database_formats(db) db = CGI.escape(db) result = togows_get_cached("/entry/#{db}/?formats") end def togows_get(path) uristr = TOGOWS_URL + path begin result = OpenURI.open_uri(uristr).read rescue OpenURI::HTTPError result = nil end result end private def togows_get_cached(path) filepath = path.sub(/\A\//, '').sub(/\/\z/, '') filepath = filepath.gsub(/\//, " ") filepath = filepath.sub(/\?/, '_') filepath = File.join(CACHE_DIR, filepath) result = nil begin if Time.now - File.mtime(filepath) > CACHE_LIFETIME # delete expired cache file File.delete(filepath) end result = File.read(filepath) rescue IOError, SystemCallError result = nil end unless result then # valid cache is not found result = togows_get(path) if result then # create cache directory if not found FileUtils.mkdir_p(CACHE_DIR, :mode => 0700) # simple security check for the cache dir if File.stat(CACHE_DIR).mode & 0022 != 0 then raise SecurityError, "CACHE_DIR #{CACHE_DIR} is writeable by others" end # write to temporary file tmp = Tempfile.open('temp', CACHE_DIR) tmp.print result tmp.close # create a hard link from the temporary to the cache file begin File.link(tmp.path, filepath) rescue IOError, SystemCallError end # the temporay file will be automatically removed at exit end end result end end #module TogoWS module BioFetchError def print_text_page(str) print "Content-type: text/plain; charset=UTF-8\n\n" puts str exit end def print_html_page(str) print "Content-type: text/html; charset=UTF-8\n\n" print "

", CGI.escapeHTML(str), "

\n" exit end def error1(db) db = CGI.escapeHTML(db.to_s) # to avoid potential XSS with old IE str = "ERROR 1 Unknown database [#{db}]." print_text_page(str) end def error2(style) style = CGI.escapeHTML(style.to_s) # to avoid potential XSS with old IE str = "ERROR 2 Unknown style [#{style}]." print_text_page(str) end def error3(format, db) # to avoid potential XSS with old IE which ignores Content-Type db = CGI.escapeHTML(db.to_s) format = CGI.escapeHTML(format.to_s) str = "ERROR 3 Format [#{format}] not known for database [#{db}]." print_text_page(str) end def error4(entry_id, db) # to avoid potential XSS with old IE which ignores Content-Type entry_id = CGI.escapeHTML(entry_id.to_s) db = CGI.escapeHTML(db.to_s) str = "ERROR 4 ID [#{entry_id}] not found in database [#{db}]." print_text_page(str) end def error5(count) # to avoid potential XSS with old IE which ignores Content-Type count = CGI.escapeHTML(count.to_s) str = "ERROR 5 Too many IDs [#{count}]. Max [#{MAX_ID_NUM}] allowed." print_text_page(str) end def error6(info) # to avoid potential XSS with old IE which ignores Content-Type count = CGI.escapeHTML(info.to_s) str = "ERROR 6 Illegal information request [#{info}]." print_text_page(str) end end module ApiBridge include BioFetchError include TogoWS def list_databases_with_synonyms togows_database_complete_list end def list_databases list_databases_with_synonyms.flatten end def bget(db, id_list, format) case format when 'fasta' format = '.fasta' else format = '' end db = CGI.escape(db) results = '' id_list.each do |query_id| query_id = CGI.escape(query_id) path = "/entry/#{db}/#{query_id}#{format}" result = togows_get(path) if !result or result.empty? or /\AError\: / =~ result then error4(query_id, db) else results << result end end return results end def check_fasta_ok?(db) result = togows_database_formats(db) /^fasta$/ =~ result.to_s end end #module ApiBridge module BioFetchCheck include ApiBridge private def check_style(style) style = style.to_s.downcase error2(style) unless /\A(html|raw)\z/.match(style) style end def check_format(format, db) fmt = format ? format.to_s.downcase : nil case fmt when 'fasta' db = check_dbname(db) fmt = nil unless check_fasta_ok?(db) when 'default' # do nothing when nil fmt = 'default' else fmt = nil end error3(format, db) unless fmt fmt end def check_number_of_id(num) error5(num) if num > MAX_ID_NUM end def check_dbname(db) db = db.to_s.downcase error1(db) unless list_databases.include?(db) db end end #module BioFetchCheck class BioFetch include BioFetchCheck include BioFetchError include ApiBridge def initialize(db, id_list, style, format) style = check_style(style) format = check_format(format, db) check_number_of_id(id_list.length) db = check_dbname(db) entries = bget(db, id_list, format) if style == 'html' then print_html_page(entries) else print_text_page(entries) end end end #class BioFetch class BioFetchInfo include BioFetchCheck include BioFetchError include ApiBridge def initialize(info, db) @db = db begin check_info(info) ? __send__(info) : raise rescue error6(info) end end private def check_info(meth_name) /\A(dbs|formats|maxids)\z/ =~ meth_name end def dbs str = list_databases.sort.join(' ') print_text_page(str) end def formats db = check_dbname(@db) fasta = " fasta" if check_fasta_ok?(db) str = "default#{fasta}" print_text_page(str) end def maxids str = MAX_ID_NUM.to_s print_text_page(str) end end #class BioFetchInfo class BioFetchCGI include ApiBridge def initialize(cgi) @cgi = cgi show_page end private def show_page if info.empty? if id_list.empty? show_query_page else show_result_page(db, id_list, style, format) end else show_info_page(info, db) end end def show_query_page html = ERB.new(DATA.read) max_id_num = MAX_ID_NUM databases_with_synonyms = list_databases_with_synonyms databases = list_databases script_name = SCRIPT_NAME base_url = BASE_URL @cgi.out({ "type" => "text/html", "charset" => "utf-8" }) do html.result(binding) end end def show_result_page(db, id_list, style, format) BioFetch.new(db, id_list, style, format) end def show_info_page(info, db) BioFetchInfo.new(info, db) end def info @cgi['info'].downcase end def db @cgi['db'].downcase end def id_list @cgi['id'].strip.split(/[\,\s]+/) end def style s = @cgi['style'].downcase return s.empty? ? "html" : s end def format f = @cgi['format'].downcase return f.empty? ? "default" : f end end BioFetchCGI.new(CGI.new) =begin This program was created during BioHackathon 2002, Tucson and updated in Cape Town :) Rewrited in 2013 to use TogoWS API as the bioruby.org server left from The University of Tokyo and the old SOAP-based KEGG API is discontinued. =end __END__ BioFetch interface to TogoWS

BioFetch interface to TogoWS

This page allows you to retrieve up to <%= max_id_num %> entries at a time from various up-to-date biological databases.

Direct access

(NOTE: the option separator ';' can be '&')

format (optional): default|fasta|...
style (required): html|raw
db (required): <%= databases.join('|') %>
id (required): comma separated list of IDs

See the BioFetch specification for more details.

Server informations

What databases are available?: <%= base_url %>?info=dbs
What formats does the database X have?: <%= base_url %>?info=formats;db=embl
How many entries can be retrieved simultaneously?: <%= base_url %>?info=maxids

Examples

nuccore/AJ617376 (default/raw): <%= base_url %>?format=default;style=raw;db=nuccore;id=AJ617376
nuccore/AJ617376 (fasta/raw): <%= base_url %>?format=fasta;style=raw;db=nuccore;id=AJ617376
nuccore/AJ617376 (default/html): <%= base_url %>?format=default;style=html;db=nuccore;id=AJ617376
nuccore/AJ617376,AJ617377 (default/raw, multiple): <%= base_url %>?format=default;style=raw;db=nuccore;id=AJ617376,AJ617377
embl/J00231 (default/raw): <%= base_url %>?format=default;style=raw;db=embl;id=J00231
uniprot/CYC_BOVIN (default/raw): <%= base_url %>?format=default;style=raw;db=uniprot;id=CYC_BOVIN
uniprot/CYC_BOVIN (fasta/raw): <%= base_url %>?format=fasta;style=raw;db=uniprot;id=CYC_BOVIN
genes/eco:b0015 (default/raw): <%= base_url %>?format=default;style=raw;db=genes;id=eco%3Ab0015; <%= base_url %>?format=default;style=raw;db=genes;id=eco:b0015

Errors

Error1 sample : DB not found: <%= base_url %>?format=default;style=raw;db=nonexistent;id=AJ617376
Error2 sample : unknown style: <%= base_url %>?format=default;style=nonexistent;db=nuccore;id=AJ617376
Error3 sample : unknown format: <%= base_url %>?format=nonexistent;style=raw;db=nuccore;id=AJ617376
Error4 sample : ID not found: <%= base_url %>?format=default;style=raw;db=nuccore;id=nonexistent
Error5 sample : too many IDs: <%= base_url %>?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
Error6 sample : unknown info: <%= base_url %>?info=nonexistent"

Other BioFetch implementations

dbfetch at EBI

staff@BioRuby.org