#! python3 def selChrome_set_DownloadDirectory(directory): from selenium import webdriver import os options = webdriver.ChromeOptions() options.add_experimental_option("prefs", { "download.default_directory": directory, "download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": False }) while os.path.exists(directory) == False: os.mkdir(directory) return options def selChrome_remove_IncompleteDownload(directory): import os targetdir = os.listdir(directory) for file in targetdir: if file.endswith(".crdownload"): os.remove(os.path.join(directory, file)) # https://stackoverflow.com/a/51949811 # timeout : int. how many seconds to wait until timing out. # nfiles : int. defaults to None. if provided, also wait for the expected number of files. def selChrome_wait_DownloadtoFinish(downloadDirectory, timeout, nfiles=None): import time import os seconds = 0 wait = True while (wait is True) and (seconds < timeout): time.sleep(1) wait = False files = os.listdir(downloadDirectory) if nfiles and len(files) != nfiles: wait = True for fname in files: if fname.endswith('.crdownload'): wait = True seconds += 1 return seconds def ctdiskdl(ctdiskurl, folder, timeout): # GET RENDERED HTML, DOM IS BAD import time from selenium import webdriver from selenium.webdriver.support.ui import Select import bs4 browser = webdriver.Chrome() browser.get(ctdiskurl) time.sleep(4) # important! - wait for css to load ## display 200 links per page select = Select(browser.find_element_by_css_selector('#table_files_length > label > select')) select.select_by_value("200") time.sleep(4) # important! - wait for new file list to load ## store rendered html for parsing source_html = browser.page_source ## go through all pages while "paginate_button page-item next disabled" not in source_html: browser.find_element_by_css_selector('#table_files_next > a').click() time.sleep(4) # important! - wait for next page to load source_html = source_html + browser.page_source browser.quit() # PARSE HTML import re import pprint soup = bs4.BeautifulSoup(source_html,"html.parser") dlitem_list = soup.find_all('a', href = re.compile(r"^/file/")) pprint.pprint(dlitem_list) dlitemls_count = len(dlitem_list) print("\n" + str(dlitemls_count) + " items in download list.") # ITEM SELECTION if dlitemls_count > 1: fi,li = 0,0 fi = int(input("Start Download FROM Item Number #:")) li = int(input("Download UPTO AND INCLUDE Item Number #:")) while fi <= 0 or fi > dlitemls_count or li <= 0 or li > dlitemls_count or fi > li: print("oops, something is not right. try again") fi = int(input("Start Download FROM Item Number #:")) li = int(input("Download UPTO AND INCLUDE Item Number #:")) dlmission_list = dlitem_list[fi-1:li] else: dlmission_list = dlitem_list dlmissionls_count = len(dlmission_list) print("download", dlmissionls_count, "item for this mission.") # DOWNLOAD ITEMS import time import os selChrome_set_DownloadDirectory(folder) selChrome_remove_IncompleteDownload(folder) faileddownload_list = [] faileddownload_count = 0 for num in range(0, dlmissionls_count): ## filter essential info dl_name = re.compile(r"""(?<=>)(.*)(?=<)""") dl_name = dl_name.findall(str(dlmission_list[num])) dl_name = dl_name[0] dl_link = re.compile(r"""\/file\/[\s\S]*?(?=")""") dl_link = dl_link.findall(str(dlmission_list[num])) dl_link = dl_link[0] dl_link = (r"https://545c.com") + dl_link dlmission_num = num + 1 print() print("initiating item #%d/%d" %(dlmission_num,dlmissionls_count)) ## download operation browser = webdriver.Chrome(options=selChrome_set_DownloadDirectory(folder)) browser.get(dl_link) print("file name: %s" %(dl_name)) time.sleep(4) # important! - wait for css to load browser.find_element_by_css_selector('#main-content > div > div > div:nth-child(5) > div:nth-child(1) > div.card-body.position-relative > button').click() time.sleep(5) # important! wait for connection to be made print("starting to download item #%d/%d" %(dlmission_num,dlmissionls_count)) ### if captcha has been triggered, the following line wont execute, then count as failed download selChrome_wait_DownloadtoFinish(folder,int(timeout)) ### if mission failed or froze, the program will wait for time out, then count as failed download ### manual PAUSE then RESUME wont break the program, but timeout is still in place. ### manual CANCEL will count as failed download. RETRY wont change it. browser.quit() ## download result verification filepath = os.path.join(folder, dl_name) if os.path.exists(filepath): print("item #%d/%d: download completed." %(dlmission_num,dlmissionls_count)) else: faileddownload_count = faileddownload_count + 1 faileddownload_itemnum = str(int(fi) + int(num)) faileddownload_logentry = faileddownload_itemnum + " " + dl_name + " " + dl_link faileddownload_list.append(faileddownload_logentry) print("item #%d/%d: this download went wrong. file marked." %(dlmission_num,dlmissionls_count)) selChrome_remove_IncompleteDownload(folder) # REPORT RESULT import pprint print() if faileddownload_count > 0: pprint.pprint(faileddownload_list) print("%d/%d download mission failed" %(faileddownload_count,dlmissionls_count)) print("rerun the program to try download again.") else: print("download mission has been successfully completed.") #import sys #ctdiskurl = sys.argv[1] #folder = r(sys.argv[2]) #timeout = sys.argv[3] #ctdiskdl(ctdiskurl, folder, timeout) ctdiskdl("https://545c.com/dir/11449240-25912337-ae0576", r"C:\545dl\gb", "400")