# VERSION: 1.02 # AUTHORS: BurningMop (burning.mop@yandex.com) # LICENSING INFORMATION # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import re import json import time import threading import urllib.request from html.parser import HTMLParser from helpers import download_file, retrieve_url from novaprinter import prettyPrinter class mypornclub(object): url = "https://myporn.club" name = "MyPorn Club" supported_categories = {"all": "all"} pagination_regex = r"
Page\s\d\sof\s\d+
" class MyHtmlParser(HTMLParser): def error(self, message): pass DIV, A, SPAN, I, B = ("div", "a", "span", "i", "b") def __init__(self, url): HTMLParser.__init__(self) self.url = url self.row = {} self.rows = [] self.foundResults = False self.insideRow = False self.insideTorrentData = False self.insideTorrentName = False self.insideMetaData = False self.insideLabelCell = False self.insideSizeCell = False self.insideSeedCell = False self.insideLeechCell = False self.shouldAddBrackets = False self.shouldAddName = False self.web_seed = False self.magnet_regex = r'href=["\']magnet:.+?["\']' self.has_web_regex = ( r"(//sxyprn.com/post/[\da-f]*\.html)[^>]*[>]\[[lL][iI][Nn][Kk][Ss]\s*\+" ) def check_for_web_seed(self, web_page_url): id = web_page_url.split("/")[-1].split(".")[0] page = retrieve_url(web_page_url) match = re.search(r'data-vnfo=(["\'])(?P{.+?})\1', page) if match: num = 0 data1 = json.loads(match.group("data")) parts = data1[id].split("/") for c in parts[6] + parts[7]: if c.isnumeric(): num += int(c) parts[5] = str(int(parts[5]) - num) parts[1] += "8" first_url = "https://sxyprn.com" + "/".join(parts) final_url = first_url user_agent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64)" headers = {"User-Agent": user_agent} req = urllib.request.Request(url=first_url, headers=headers) with urllib.request.urlopen(req) as rf: final_url = rf.url return "&ws=" + final_url else: return None def handle_starttag(self, tag, attrs): params = dict(attrs) cssClasses = params.get("class", "") if "torrents_list" in cssClasses: self.foundResults = True return if ( self.foundResults and "torrent_element" in cssClasses and tag == self.DIV ): self.insideRow = True if ( self.insideRow and "torrent_element_text_div" in cssClasses and tag == self.DIV ): self.insideTorrentData = True if ( self.insideRow and "torrent_element_info" in cssClasses and tag == self.DIV ): self.insideMetaData = True return if ( self.insideTorrentData and "torrent_element_text_span" in cssClasses and tag == self.SPAN ): self.row["name"] = "" self.insideTorrentName = True self.shouldAddName = True if self.insideTorrentName and tag == self.B: self.shouldAddBrackets = True if self.insideTorrentName and tag == self.I: self.shouldAddBrackets = False self.shouldAddName = False if ( self.insideTorrentData and tag == self.A and "uploader_tel" not in cssClasses ): href = params.get("href") link = f"{self.url}{href}" self.row["desc_link"] = link torrent_page = retrieve_url(link) matches = re.finditer(self.magnet_regex, torrent_page, re.MULTILINE) magnet_urls = [x.group() for x in matches] self.row["link"] = magnet_urls[0].replace("'", '"').split('"')[1] _has_page = re.finditer(self.has_web_regex, torrent_page, re.MULTILINE) has_page = ["https:" + x.group(1) for x in _has_page] if has_page: self.web_seed = self.check_for_web_seed(has_page[0]) if self.web_seed: self.row["link"] = self.row["link"] + self.web_seed return if self.insideMetaData and "teis" in cssClasses: self.insideLabelCell = True def handle_data(self, data): if self.insideRow: if self.insideTorrentData and self.insideTorrentName: if self.shouldAddBrackets: self.row["name"] += f"[{data}]".strip() self.shouldAddBrackets = False return if self.shouldAddName: self.row["name"] += f" {data}".strip() return if self.insideMetaData: if self.insideSizeCell: size = data.replace(",", ".") self.row["size"] = size self.insideSizeCell = False self.insideLabelCell = False if self.insideSeedCell: self.row["seeds"] = data self.insideSeedCell = False self.insideLabelCell = False if self.insideLeechCell: self.row["leech"] = data self.insideLeechCell = False self.insideLabelCell = False if self.insideLabelCell: if data == "[size]:": self.insideSizeCell = True if data == "[seeders]:": self.insideSeedCell = True if data == "[leechers]:": self.insideLeechCell = True def handle_endtag(self, tag): if self.insideRow and tag == self.DIV: if self.insideTorrentData and tag == self.DIV: self.insideTorrentData = False self.insideTorrentName = False return if self.insideMetaData and tag == self.DIV: self.insideMetaData = False return self.row["engine_url"] = self.url if self.web_seed: self.row["name"] = "💥 " + self.row["name"] prettyPrinter(self.row) self.row = {} self.insideRow = False def download_torrent(self, info): print(download_file(info)) def do_search(self, page, what): parser = self.MyHtmlParser(self.url) page_url = f"{self.url}/s/{what}/seeders/{page}" retrievedHtml = retrieve_url(page_url) parser.feed(retrievedHtml) parser.close() def search(self, what, cat="all"): parser = self.MyHtmlParser(self.url) what = what.replace("%20", "-") what = what.replace(" ", "-") page = 1 page_url = f"{self.url}/s/{what}/seeders/{page}" retrievedHtml = retrieve_url(page_url) pagination_matches = re.finditer( self.pagination_regex, retrievedHtml, re.MULTILINE ) pagination_pages = [x.group() for x in pagination_matches] lastPage = int( pagination_pages[0] .replace("
", "") .replace("
", "") .split(" ")[-1] ) page += 1 parser.feed(retrievedHtml) parser.close() threads = [] while page <= lastPage: t = threading.Thread(args=(page, what), target=self.do_search) t.start() time.sleep(0.5) threads.append(t) page += 1 for t in threads: t.join()