#VERSION: 1.0 # AUTHORS: Larsluph (remarso59@sfr.fr) import re from html.parser import HTMLParser from helpers import retrieve_url from novaprinter import prettyPrinter class pornrips(object): url = 'https://pornrips.to' name = 'Pornrips.to (PRT)' supported_categories = { 'all': '0' } class MyHtmlParser(HTMLParser): is_in_article: bool is_in_content: bool is_in_title: bool is_in_size: bool article_data: dict size_pattern = re.compile('(\d+ ?\w+)') def __init__(self) -> None: HTMLParser.__init__(self) self.is_in_article = False self.is_in_content = False self.is_in_title = False self.is_in_size = False def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: tag_attrs = dict(attrs) if not self.is_in_article and tag == 'article' and 'post' in tag_attrs.get('class'): self.is_in_article = True self.article_data = { 'seeds': -1, 'leech': -1, 'desk_link': -1, 'engine_url': pornrips.url } return elif not self.is_in_article: return if tag == 'div' and tag_attrs.get('class') == 'wrapper-excerpt-content': self.is_in_content = True return elif self.is_in_content and tag == 'h2' and tag_attrs.get('class') == 'entry-title': self.is_in_title = True return elif self.is_in_content and tag == 'p': self.is_in_size = True return def handle_data(self, data: str) -> None: if self.is_in_title: self.is_in_title = False self.article_data['name'] = data self.article_data['link'] = f"{pornrips.url}/torrents/{self.article_data['name']}.torrent" elif self.is_in_size and re.search(self.size_pattern, data): self.is_in_size = False self.article_data['size'] = data def handle_endtag(self, tag: str) -> None: if self.is_in_article and tag == 'article': self.is_in_article = False prettyPrinter(self.article_data) def search(self, what, cat='all'): data = retrieve_url(f'https://pornrips.to/?s={what}') prt_parser = self.MyHtmlParser() prt_parser.feed(data) prt_parser.close()