# VERSION: 1.0 # AUTHORS: BurningMop (burning.mop@yandex.com) # LICENSING INFORMATION # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import re from html.parser import HTMLParser from helpers import download_file, retrieve_url from novaprinter import prettyPrinter, anySizeToBytes class traht(object): url = 'https://traht.org' name = 'Traht' supported_categories = { 'all': 'all' } pagination_regex = r'
.*?<\/div>' class MyHtmlParser(HTMLParser): def error(self, message): pass DIV, TABLE, TBODY, TR, TD, A, SPAN, I, B = ('div', 'table', 'tbody', 'tr', 'td', 'a', 'span', 'i', 'b') def __init__(self, url): HTMLParser.__init__(self) self.magnet_regex = r'href=["\']magnet:.+?["\']' self.url = url self.row = {} self.column = 0 self.releaseTableFound = False self.insideResultTbody = False self.insideRow = False self.insideCell = False self.shouldGetName = False self.shouldParseLink = True self.shouldGetSize = False self.shouldGetPeers = False self.shouldGetSeeds = False self.shouldGetLeechs = False def handle_starttag(self, tag, attrs): params = dict(attrs) cssClasses = params.get('class', '') elementId = params.get('id', '') if elementId == 'releases-table': self.releaseTableFound = True return if self.releaseTableFound and elementId == 'highlighted' and tag == self.TBODY: self.insideResultTbody = True return if self.insideResultTbody and tag == self.TR: self.insideRow = True self.column = 0 return if self.insideRow and tag == self.TD: self.column += 1 self.insideCell = True if self.column == 5: self.shouldGetSize = True if self.column == 6: self.shouldGetPeers = True if self.column == 7: self.shouldGetPeers = False self.shouldGetSeeds = False self.shouldGetLeechs = False return if self.insideCell and self.column == 2 and tag == self.B: self.shouldGetName = True return if self.insideCell and self.column == 2 and tag == self.A: href = params.get('href') link = f'{self.url}/{href}' self.row['desc_link'] = link return if self.insideCell and self.column == 3 and tag == self.A and self.shouldParseLink: self.shouldParseLink = False href = params.get('href') link = f'{self.url}/{href}&ok=' self.row['link'] = link return if self.column == 6 and tag == self.B and self.shouldGetSeeds: if not self.shouldGetSeeds: self.shouldGetLeechs = True return def handle_data(self, data): if self.shouldGetName: self.row['name'] = data self.shouldGetName = False if self.shouldGetSize: self.row['size'] = data.replace(',', '.') self.shouldGetSize = False if self.shouldGetPeers: if "|" in data: peers = data.strip().split("|") if len(peers[1]) > 0: self.row['seeds'] = peers[0] if peers[0].isnumeric() else -1 self.row['leech'] = peers[1] if peers[1].isnumeric() else -1 else: self.row['seeds'] = peers[0] if peers[0].isnumeric() else -1 self.shouldGetLeechs = True else: self.row['seeds'] = data if data.isnumeric() else -1 self.row['leech'] = -1 self.shouldGetLeechs = True self.shouldGetPeers = False return if self.shouldGetSeeds: self.row['seeds'] = data if data.isnumeric() else -1 self.shouldGetSeeds = False if self.shouldGetLeechs: if not data == '|': self.row['leech'] = data if data.isnumeric() else -1 self.shouldGetLeechs = False def handle_endtag(self, tag): if self.insideCell and tag == self.TD: self.insideCell = False if self.insideRow and tag == self.TR: self.row['engine_url'] = self.url prettyPrinter(self.row) self.column = 0 self.row = {} self.insideRow = False self.shouldParseLink = True if self.insideResultTbody and tag == self.TBODY: self.insideResultTbody = False def download_torrent(self, info): print(download_file(info)) def search(self, what, cat='all'): parser = self.MyHtmlParser(self.url) what = what.replace('%20', '+') what = what.replace(' ', '+') page = 1 page_url = f'{self.url}/browse.php?search={what}&page={page}' retrievedHtml = retrieve_url(page_url) pagination_matches = re.finditer(self.pagination_regex, retrievedHtml, re.MULTILINE) pagination_pages = [x.group() for x in pagination_matches] if len(pagination_pages) > 0: lastPage = int(pagination_pages[0].replace('
', '').replace('
', '').split(' ')[3]) else: lastPage = 0 page += 1 if lastPage > 0: parser.feed(retrievedHtml) while page <= lastPage: page_url = f'{self.url}/browse.php?search={what}&page={page}' retrievedHtml = retrieve_url(page_url) parser.feed(retrievedHtml) page += 1 parser.close()