# -*- coding: utf-8 -*- #VERSION: 1.2 #AUTHORS: Joost Bremmer (toost.b@gmail.com) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . try: from HTMLParser import HTMLParser except ModuleNotFoundError: from html.parser import HTMLParser # import qBT modules try: from novaprinter import prettyPrinter from helpers import retrieve_url except ModuleNotFoundError: pass class nyaasi(object): """Class used by qBittorrent to search for torrents.""" url = 'https://nyaa.si' name = 'Nyaa.si' # Whether to use magnet links or download torrent files ################### # # Set to 'True' to use magnet links, or 'False' to use torrent files use_magent_links = True # ########################################################################### # defines which search categories are supported by this search engine # and their corresponding id. Possible categories are: # 'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', # 'books' supported_categories = { 'all': '0_0', 'anime': '1_0', 'books': '3_0', 'music': '2_0', 'pictures': '5_0', 'software': '6_0', 'tv': '4_0', 'movies': '4_0'} class NyaasiParser(HTMLParser): """Parses Nyaa.si browse page for search results and stores them.""" def __init__(self, res, url, use_magnet=True): """Construct a nyaasi html parser. Parameters: :param list res: a list to store the results in :param str url: the base url of the search engine :param str use_magnet: whether to link to magnet links or torrent files """ try: super().__init__() except TypeError: # See: http://stackoverflow.com/questions/9698614/ HTMLParser.__init__(self) self.engine_url = url self.use_magnet_links = use_magnet self.results = res self.curr = None self.td_counter = -1 def handle_starttag(self, tag, attr): """Tell the parser what to do with which tags.""" if tag == 'a': self.start_a(attr) def handle_endtag(self, tag): """Handle the closing of table cells.""" if tag == 'td': self.start_td() def start_a(self, attr): """Handle the opening of anchor tags.""" params = dict(attr) # get torrent name if 'title' in params and 'class' not in params \ and params['href'].startswith('/view/'): hit = { 'name': params['title'], 'desc_link': self.engine_url + params['href']} if not self.curr: hit['engine_url'] = self.engine_url self.curr = hit elif 'href' in params and self.curr: # skip unrelated links if not params['href'].startswith("magnet:?") and \ not params['href'].endswith(".torrent"): return # check whether to use torrent files or magnet links, # then search for a matching download link, and move on if not self.use_magnet_links and \ params['href'].endswith(".torrent"): self.curr['link'] = self.engine_url + params['href'] self.td_counter += 1 elif params['href'].startswith("magnet:?") \ and self.use_magnet_links: self.curr['link'] = params['href'] self.td_counter += 1 def start_td(self): """Handle the opening of a table cell tag.""" # Keep track of timers if self.td_counter >= 0: self.td_counter += 1 # Add the hit to the results, # then reset the counters for the next result if self.td_counter >= 5: self.results.append(self.curr) self.curr = None self.td_counter = -1 def handle_data(self, data): """Extract data about the torrent.""" # These fields matter if self.td_counter > 0 and self.td_counter <= 5: # Catch the size if self.td_counter == 1: self.curr['size'] = data.strip() # Catch the seeds elif self.td_counter == 3: try: self.curr['seeds'] = int(data.strip()) except ValueError: self.curr['seeds'] = -1 # Catch the leechers elif self.td_counter == 4: try: self.curr['leech'] = int(data.strip()) except ValueError: self.curr['leech'] = -1 # The rest is not supported by prettyPrinter else: pass # DO NOT CHANGE the name and parameters of this function # This function will be the one called by nova2.py def search(self, what, cat='all'): """ Retreive and parse engine search results by category and query. Parameters: :param what: a string with the search tokens, already escaped (e.g. "Ubuntu+Linux") :param cat: the name of a search category, see supported_categories. """ url = str("{0}/?f=0&s=seeders&o=desc&c={1}&q={2}" .format(self.url, self.supported_categories.get(cat), what)) hits = [] page = 1 parser = self.NyaasiParser(hits, self.url, self.use_magent_links) while True: res = retrieve_url(url + "&p={}".format(page)) parser.feed(res) for each in hits: prettyPrinter(each) if len(hits) < 75: break del hits[:] page += 1 parser.close()