# encoding: utf-8 # copyright: GeoDS Lab, University of Wisconsin-Madison # authors: Yuhao Kang, Song Gao, Jinmeng Rao import requests import os import pandas as pd import numpy as np import argparse parser = argparse.ArgumentParser(description='Start month, start day, and output_folder are necessary') parser.add_argument('--start_year', type=str, required=True, help='Start year') parser.add_argument('--start_month', type=str, required=True, help='Start month') parser.add_argument('--start_day', type=str, required=True, help='Start day') parser.add_argument('--end_year', type=str, help='End year') parser.add_argument('--end_month', type=str, help='End month') parser.add_argument('--end_day', type=str, help='End day') parser.add_argument('--output_folder', type=str, required=True, help='Output folder: ./') parser.add_argument('--ct', action = 'store_true', help='ct2ct') parser.add_argument('--county', action = 'store_true', help='county2county') parser.add_argument('--state', action = 'store_true', help='state2state') args = parser.parse_args() start_year = str(args.start_year).zfill(4) start_month = str(args.start_month).zfill(2) start_day = str(args.start_day).zfill(2) output_folder = args.output_folder if args.end_year == None: end_year = str(args.start_year).zfill(4) else: end_year = str(args.end_year).zfill(4) if args.end_month == None: end_month = str(args.start_month).zfill(2) else: end_month = str(args.end_month).zfill(2) if args.end_day == None: end_day = str(args.start_day).zfill(2) else: end_day = str(args.end_day).zfill(2) # Check if dates are valid all_time = pd.date_range(start=f'2018-01-01', end=f'2025-01-06', freq="7D") all_time = pd.DataFrame(all_time, columns=["date"]) all_time["date"] = all_time["date"].apply(lambda x: str(x).split(' ')[0]) if f'{start_year}-{start_month}-{start_day}'in all_time.values: is_valid_start = True else: is_valid_start = False print("The start date is not a Monday. Please re-enter.") if f'{end_year}-{end_month}-{end_day}'in all_time.values: is_valid_end = True else: is_valid_end = False print("The end date is not a Monday. Please re-enter.") # Download files of one day def download_file(scale, year, month, day, output_folder): try: if os.path.exists(f"{output_folder}/") == False: os.mkdir(f"{output_folder}/") if os.path.exists(f"{output_folder}/{scale}/") == False: os.mkdir(f"{output_folder}/{scale}/") if scale == "ct2ct": if os.path.exists(f"{output_folder}/{scale}/{year}_{month}_{day}/") == False: os.mkdir(f"{output_folder}/{scale}/{year}_{month}_{day}/") except Exception as e: print(e) print("There is no output folder. Please create the output folder first!") try: if scale == "ct2ct": for i in range(20): if year == "2019": repo = "WeeklyFlows-Ct2019" elif year == "2020": repo = "WeeklyFlows-Ct2020" elif year == "2021": repo = "WeeklyFlows-Ct2021" r = requests.get(url=f"https://raw.githubusercontent.com/GeoDS/COVID19USFlows-{repo}/master/weekly_flows/{scale}/{year}_{month}_{day}/weekly_{scale}_{year}_{month}_{day}_{i}.csv") with open(f"{output_folder}/{scale}/{year}_{month}_{day}/weekly_{scale}_{year}_{month}_{day}_{i}.csv", 'wb') as file: file.write(r.content) else: r = requests.get(url=f"https://raw.githubusercontent.com/GeoDS/COVID19USFlows-WeeklyFlows/master/weekly_flows/{scale}/weekly_{scale}_{year}_{month}_{day}.csv") with open(f"{output_folder}/{scale}/weekly_{scale}_{year}_{month}_{day}.csv", 'wb') as file: file.write(r.content) return True except Exception as e: print(e) return False if (is_valid_start == True) and (is_valid_end == True): # Create time series dataframe time_df = pd.date_range(start=f'{start_year}-{start_month}-{start_day}', end=f'{end_year}-{end_month}-{end_day}', freq='7D') time_df = pd.DataFrame(time_df, columns=["date"]) time_df["year"] = time_df["date"].apply(lambda x: str(x.year).zfill(4)) time_df["month"] = time_df["date"].apply(lambda x: str(x.month).zfill(2)) time_df["day"] = time_df["date"].apply(lambda x: str(x.day).zfill(2)) # Download files at each scale if args.ct == True: time_df.apply(lambda x: download_file('ct2ct', x.year, x.month, x.day, output_folder), axis=1) if args.county == True: time_df.apply(lambda x: download_file('county2county', x.year, x.month, x.day, output_folder), axis=1) if args.state == True: time_df.apply(lambda x: download_file('state2state', x.year, x.month, x.day, output_folder), axis=1)