#VERSION: 1.2 #AUTHORS: Vikas Yadav (https://github.com/v1k45 | http://v1k45.com) # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the author nor the names of its contributors may be # used to endorse or promote products derived from this software without # specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. from __future__ import print_function import re # python3 suppport (or python2 fallback, if you will) try: from html.parser import HTMLParser except ImportError: from HTMLParser import HTMLParser from helpers import retrieve_url, download_file from novaprinter import prettyPrinter LEETX_DOMAIN = "https://1337x.to" class LeetxParser(HTMLParser): current_result = {} current_item = None inside_tbody = False inside_row = False A, TBODY, TR, TD, SPAN = ('a', 'tbody', 'tr', 'td', 'span') def handle_starttag(self, tag, attrs): # are we inside the results table body or not. self.inside_tbody = self.inside_tbody or tag == self.TBODY # if not, no need to process this tag any further. if not self.inside_tbody: return # convert attrs tuple to dictonary attrs = dict(attrs) # for torrent name and link link = attrs.get('href', '') if self.inside_tbody and tag == self.A and link.startswith('/torrent'): # noqa self.current_result['link'] = LEETX_DOMAIN + link self.current_result['desc_link'] = LEETX_DOMAIN + link self.current_result['engine_url'] = LEETX_DOMAIN self.current_item = 'name' # to ignore uploader name attached to the torrent size in span tag if tag == self.SPAN: self.current_item = None # if this is a there can be seeds, leeches or size inside it. if tag == self.TD: self.inside_row = True # find apporipate data key using class name of td for item in ['seeds', 'leeches', 'size']: if item in attrs.get('class', ''): self.current_item = item break def handle_data(self, data): # do not process data if we are not inside the table body if self.inside_tbody and self.current_item: prev_value = self.current_result.get(self.current_item, '') self.current_result[self.current_item] = prev_value + data def handle_endtag(self, tag): # we are exiting the table body # no data will be processed after this. if tag == self.TBODY: self.inside_tbody = False # exiting the table data and maybe moving td or tr element elif self.inside_tbody and self.inside_row and tag == self.TD: self.inside_row = False self.current_item = None # exiting the tr element, which means all necessary data # for a torrent has been extracted, we should save it # and clean the object's state. elif self.inside_tbody and tag == self.TR: self.current_result['leech'] = self.current_result['leeches'] prettyPrinter(self.current_result) self.current_result = {} self.current_item = None PAGINATION_PATTERN = re.compile('
  • Last
  • ') # noqa DOWNLOAD_PATTERN = re.compile('ITORRENTS MIRROR') # noqa class leetx(object): url = LEETX_DOMAIN name = "1337x" supported_categories = { 'all': 'All', 'movies': 'Movies', 'tv': 'TV', 'music': 'Music', 'games': 'Games', 'anime': 'Anime', 'software': 'Apps' } def download_torrent(self, info): # since 1337x does not provide torrent links in the search results, # we will have to fetch the page and extract the torrent link # and then call the download_file function on it. torrent_page = retrieve_url(info) torrent_link_match = DOWNLOAD_PATTERN.search(torrent_page) if torrent_link_match and torrent_link_match.groups(): torrent_file = torrent_link_match.groups()[2].replace("http", "https") # noqa print(download_file(torrent_file)) else: print('') def search(self, what, cat='all'): cat = cat.lower() # decide which type of search to perform based on category search_page = "search" if cat == 'all' else 'category-search' search_url = "{url}/{search_page}/{search_query}/".format( url=self.url, search_page=search_page, search_query=what) # apply search category to url, if any. if cat != 'all': search_url += self.supported_categories[cat] + "/" # download the page data = retrieve_url(search_url + "1/") # extract no of pages to be extracted through pagination more_pages = 1 pagination_match = PAGINATION_PATTERN.search(data) if pagination_match and pagination_match.groups()[1].isdigit(): more_pages = int(pagination_match.groups()[1]) parser = LeetxParser() parser.feed(data) parser.close() # we start the loop from 2 because we are already done first page. # the +2 at the end of the range because range(0, 100) is [0,1..,98,99] # shifing the end page by 2 positions will balance the number of pages. for current_page in range(2, more_pages + 2): # repeat data = retrieve_url(search_url + str(current_page) + "/") parser.feed(data) parser.close()