#! python3

def selChrome_set_DownloadDirectory(directory):
    from selenium import webdriver
    import os
    options = webdriver.ChromeOptions()
    options.add_experimental_option("prefs", {
      "download.default_directory": directory,
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing.enabled": False
    })
    while os.path.exists(directory) == False:
        os.mkdir(directory)
    return options
        
def selChrome_remove_IncompleteDownload(directory):
    import os
    targetdir = os.listdir(directory)
    for file in targetdir:
        if file.endswith(".crdownload"):
            os.remove(os.path.join(directory, file))

# https://stackoverflow.com/a/51949811
# timeout : int. how many seconds to wait until timing out.
# nfiles : int. defaults to None. if provided, also wait for the expected number of files.
def selChrome_wait_DownloadtoFinish(downloadDirectory, timeout, nfiles=None):
    import time
    import os
    seconds = 0
    wait = True
    while (wait is True) and (seconds < timeout):
        time.sleep(1)
        wait = False
        files = os.listdir(downloadDirectory)
        if nfiles and len(files) != nfiles:
            wait = True
        for fname in files:
            if fname.endswith('.crdownload'):
                wait = True
        seconds += 1
    return seconds

def ctdiskdl(ctdiskurl, folder, timeout):
    # GET RENDERED HTML, DOM IS BAD
    import time
    from selenium import webdriver
    from selenium.webdriver.support.ui import Select
    import bs4    
    browser = webdriver.Chrome()
    browser.get(ctdiskurl)
    time.sleep(4) # important! - wait for css to load
    ## display 200 links per page
    select = Select(browser.find_element_by_css_selector('#table_files_length > label > select'))
    select.select_by_value("200")
    time.sleep(4) # important! - wait for new file list to load
    ## store rendered html for parsing
    source_html = browser.page_source
    ## go through all pages
    while "paginate_button page-item next disabled" not in source_html:
        browser.find_element_by_css_selector('#table_files_next > a').click()
        time.sleep(4) # important! - wait for next page to load
        source_html = source_html + browser.page_source
    browser.quit()

    # PARSE HTML
    import re
    import pprint
    soup = bs4.BeautifulSoup(source_html,"html.parser")
    dlitem_list = soup.find_all('a', href = re.compile(r"^/file/"))
    pprint.pprint(dlitem_list)
    dlitemls_count = len(dlitem_list)
    print("\n" + str(dlitemls_count) + " items in download list.")

    # ITEM SELECTION
    if dlitemls_count > 1:
        fi,li = 0,0
        fi = int(input("Start Download FROM Item Number #:"))
        li = int(input("Download UPTO AND INCLUDE Item Number #:"))    
        while fi <= 0 or fi > dlitemls_count or li <= 0 or li > dlitemls_count or fi > li:
            print("oops, something is not right. try again")
            fi = int(input("Start Download FROM Item Number #:"))
            li = int(input("Download UPTO AND INCLUDE Item Number #:"))
        dlmission_list = dlitem_list[fi-1:li]
    else:
        dlmission_list = dlitem_list
    dlmissionls_count = len(dlmission_list)
    print("download", dlmissionls_count, "item for this mission.")
    
    # DOWNLOAD ITEMS
    import time
    import os
    selChrome_set_DownloadDirectory(folder)
    selChrome_remove_IncompleteDownload(folder)
    faileddownload_list = []
    faileddownload_count = 0
    for num in range(0, dlmissionls_count):
        ## filter essential info
        dl_name = re.compile(r"""(?<=>)(.*)(?=<)""")
        dl_name = dl_name.findall(str(dlmission_list[num]))
        dl_name = dl_name[0]
        dl_link = re.compile(r"""\/file\/[\s\S]*?(?=")""")
        dl_link = dl_link.findall(str(dlmission_list[num]))
        dl_link = dl_link[0]
        dl_link = (r"https://545c.com") + dl_link
        dlmission_num = num + 1
        print()
        print("initiating item #%d/%d" %(dlmission_num,dlmissionls_count))

        ## download operation
        browser = webdriver.Chrome(options=selChrome_set_DownloadDirectory(folder))
        browser.get(dl_link)
        print("file name: %s" %(dl_name))
        time.sleep(4) # important! - wait for css to load
        browser.find_element_by_css_selector('#main-content > div > div > div:nth-child(5) > div:nth-child(1) > div.card-body.position-relative > button').click()
        time.sleep(5) # important! wait for connection to be made
        print("starting to download item #%d/%d" %(dlmission_num,dlmissionls_count))
        ### if captcha has been triggered, the following line wont execute, then count as failed download
        selChrome_wait_DownloadtoFinish(folder,int(timeout))
        ### if mission failed or froze, the program will wait for time out, then count as failed download
        ### manual PAUSE then RESUME wont break the program, but timeout is still in place.
        ### manual CANCEL will count as failed download. RETRY wont change it.
        browser.quit()

        ## download result verification
        filepath = os.path.join(folder, dl_name)
        if os.path.exists(filepath):
            print("item #%d/%d: download completed." %(dlmission_num,dlmissionls_count))
        else:
            faileddownload_count = faileddownload_count + 1
            faileddownload_itemnum = str(int(fi) + int(num))
            faileddownload_logentry = faileddownload_itemnum + "  " + dl_name + "  " + dl_link
            faileddownload_list.append(faileddownload_logentry)
            print("item #%d/%d: this download went wrong. file marked." %(dlmission_num,dlmissionls_count))
        selChrome_remove_IncompleteDownload(folder)

    # REPORT RESULT
    import pprint
    print()
    if faileddownload_count > 0:
        pprint.pprint(faileddownload_list)
        print("%d/%d download mission failed" %(faileddownload_count,dlmissionls_count))
        print("rerun the program to try download again.")
    else:
        print("download mission has been successfully completed.")
    
#import sys
#ctdiskurl = sys.argv[1]
#folder = r(sys.argv[2])
#timeout = sys.argv[3]
#ctdiskdl(ctdiskurl, folder, timeout)
ctdiskdl("https://545c.com/dir/11449240-25912337-ae0576", r"C:\545dl\gb", "400")