#!/usr/bin/env python #-*- coding: utf-8 -*- #MODULE: r.in.usgs # #AUTHOR: Zechariah Krautwurst # #MENTORS: Anna Petrasova # Vaclav Petras # #PURPOSE: Download user-requested products through the USGS TNM API. # #COPYRIGHT: (C) 2017 Zechariah Krautwurst and the GRASS Development Team # # This program is free software under the GNU General Public # License (>=v2). Read the file COPYING that comes with GRASS # for details. #%module #% description: Download user-requested products through the USGS TNM API #% keyword: import #% keyword: raster #% keyword: USGS #% keyword: NED #% keyword: NLCD #%end #%flag #% key: i #% label: Return USGS data information without downloading files #% description: Return USGS data information without downloading files #% guisection: USGS Data Selection #%end #%option #% key: product #% required: yes #% options: ned, nlcd #% label: USGS data product #% description: Available USGS data products to query #% guisection: USGS Data Selection #%end #%option #% key: ned_dataset #% required: no #% options: 1 arc-second, 1/3 arc-second, 1/9 arc-second #% answer: 1/3 arc-second #% label: NED dataset #% description: Available NED datasets to query #% guisection: NED #%end #%option #% key: nlcd_dataset #% required: no #% options: nlcd2001, nlcd2006, nlcd2011 #% answer: nlcd2011 #% label: NLCD dataset #% description: Available NLCD datasets to query #% descriptions: nlcd2001;National Land Cover Dataset - 2001;nlcd2006;National Land Cover Dataset - 2006;nlcd2011;National Land Cover Dataset - 2011 #% guisection: NLCD #%end #%option #% key: nlcd_subset #% required: no #% options: land_cover, impervious, canopy #% answer: land_cover #% label: NLCD subset #% description: Available NLCD subsets to query #% descriptions: impervious;Percent Developed Imperviousness;canopy;Percent Tree Canopy;land_cover;Land Cover #% guisection: NLCD #%end ## Currently unavailable ## ##%option ##% key: naip_dataset ##% required: no ##% options: Imagery - 1 meter (NAIP) ##% answer: Imagery - 1 meter (NAIP) ##% label: NAIP dataset ##% description: Available NAIP datasets to query ##% guisection: NAIP ##%end ## Currently unavailable ## ##%option ##% key: ustopo_dataset ##% required: no ##% options: US Topo Current, US Topo Historical ##% answer: US Topo Current ##% label: US Topo Data ##% description: Available UStopo datasets to query ##% guisection: USTopo ##%end #%option G_OPT_M_DIR #% key: output_directory #% required: yes #% description: Directory for USGS data download and processing #% guisection: Download Options #%end #%option G_OPT_R_OUTPUT #% key: output_name #% required: yes #% guisection: Download Options #%end #%option #% key: resampling_method #% type: string #% required: no #% multiple: no #% options: default,nearest,bilinear,bicubic,lanczos,bilinear_f,bicubic_f,lanczos_f #% description: Resampling method to use #% descriptions: default;default method based on product;nearest;nearest neighbor;bilinear;bilinear interpolation;bicubic;bicubic interpolation;lanczos;lanczos filter;bilinear_f;bilinear interpolation with fallback;bicubic_f;bicubic interpolation with fallback;lanczos_f;lanczos filter with fallback #% answer: default #% guisection: Download Options #%end #%flag #% key: k #% label: Keep extracted files after GRASS import and patch #% description: Keep extracted files after GRASS import and patch #% guisection: Download Options #%end #%rules #% required: output_name, -i #%end import sys import os import zipfile import grass.script as gscript import urllib import urllib2 import json import atexit from grass.exceptions import CalledModuleError cleanup_list = [] def main(): # Hard-coded parameters needed for USGS datasets # NED and NLCD datasets fully functional # NAIP and UStopo not functional usgs_product_dict = { "ned":{ 'product':'National Elevation Dataset (NED)', 'dataset':{ '1 arc-second': (1. / 3600, 30, 100), '1/3 arc-second': (1. / 3600 / 3, 10, 30), '1/9 arc-second': (1. / 3600 / 9, 3, 10) }, 'subset':{}, 'extent':[ '1 x 1 degree', '15 x 15 minute' ], 'format':'IMG', 'extension':'img', 'zip':True, 'srs':'wgs84', 'srs_proj4':"+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs", 'interpolation':'bilinear', 'url_split':'/' }, "nlcd":{ 'product':'National Land Cover Database (NLCD)', 'dataset':{ 'National Land Cover Database (NLCD) - 2001': (1. / 3600, 30, 100), 'National Land Cover Database (NLCD) - 2006': (1. / 3600, 30, 100), 'National Land Cover Database (NLCD) - 2011': (1. / 3600, 30, 100) }, 'subset':{ 'Percent Developed Imperviousness', 'Percent Tree Canopy', 'Land Cover' }, 'extent': ['3 x 3 degree'], 'format':'GeoTIFF', 'extension':'tif', 'zip':True, 'srs':'wgs84', 'srs_proj4':"+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs", 'interpolation':'nearest', 'url_split':'&FNAME=' }, "naip":{ 'product':'USDA National Agriculture Imagery Program (NAIP)', 'dataset':{ 'Imagery - 1 meter (NAIP)': (1. / 3600 / 27, 1, 3)}, 'subset':{}, 'extent':[ '3.75 x 3.75 minute', ], 'format':'JPEG2000', 'extension':'jp2', 'zip':False, 'srs':'wgs84', 'srs_proj4':"+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs", 'interpolation':'nearest', 'url_split':'/' }, "ustopo":{ 'product':'US Topo', 'dataset':{ 'US Topo Current':(1. / 3600 / 108, 4, 12), 'US Topo Historical':(1. / 3600 / 108, 4, 12)}, 'subset':{}, 'extent':['7.5 x 7.5 minute'], 'format':'GeoPDF', 'extension':'pdf', 'zip':False, 'srs':'wgs84', 'srs_proj4':"+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs", 'interpolation':'nearest', 'url_split':'/'} } # Set GRASS GUI options and flags to python variables gui_product = options['product'] # Variable assigned from USGS product dictionary nav_string = usgs_product_dict[gui_product] product = nav_string['product'] product_format = nav_string['format'] product_extension = nav_string['extension'] product_is_zip = nav_string['zip'] product_srs = nav_string['srs'] product_proj4 = nav_string['srs_proj4'] product_interpolation = nav_string['interpolation'] product_url_split = nav_string['url_split'] product_extent = nav_string['extent'] gui_subset = None # Parameter assignments for each dataset if gui_product == 'ned': gui_dataset = options['ned_dataset'] product_tag = product + " " + gui_dataset if options['ned_dataset'] == '1 arc-second': ned_data_abbrv = 'ned_1arc_' if options['ned_dataset'] == '1/3 arc-second': ned_data_abbrv = 'ned_13arc_' if options['ned_dataset'] == '1/9 arc-second': ned_data_abbrv = 'ned_19arc_' if gui_product == 'nlcd': gui_dataset = options['nlcd_dataset'] if options['nlcd_dataset'] == 'nlcd2001': gui_dataset = 'National Land Cover Database (NLCD) - 2001' if options['nlcd_dataset'] == 'nlcd2006': gui_dataset = 'National Land Cover Database (NLCD) - 2006' if options['nlcd_dataset'] == 'nlcd2011': gui_dataset = 'National Land Cover Database (NLCD) - 2011' if options['nlcd_subset'] == 'land_cover': gui_subset = 'Land Cover' if options['nlcd_subset'] == 'impervious': gui_subset = 'Percent Developed Imperviousness' if options['nlcd_subset'] == 'canopy': gui_subset = 'Percent Tree Canopy' product_tag = gui_dataset if gui_product == 'naip': gui_dataset = options['naip_dataset'] product_tag = nav_string['product'] if gui_product == 'ustopo': gui_dataset = options['ustopo_dataset'] product_tag = nav_string['product'] # Assigning further parameters from GUI gui_output_layer = options['output_name'] gui_resampling_method = options['resampling_method'] gui_i_flag = flags['i'] gui_k_flag = flags['k'] work_dir = options['output_directory'] # Returns current units try: proj = gscript.parse_command('g.proj', flags='g') if gscript.locn_is_latlong(): product_resolution = nav_string['dataset'][gui_dataset][0] elif float(proj['meters']) == 1: product_resolution = nav_string['dataset'][gui_dataset][1] else: # we assume feet product_resolution = nav_string['dataset'][gui_dataset][2] except TypeError: product_resolution = False if gui_resampling_method == 'default': gui_resampling_method = nav_string['interpolation'] gscript.verbose(_("The default resampling method for product {product} is {res}").format(product=gui_product, res=product_interpolation)) # Get coordinates for current GRASS computational region and convert to USGS SRS gregion = gscript.region() min_coords = gscript.read_command('m.proj', coordinates=(gregion['w'], gregion['s']), proj_out=product_proj4, separator='comma', flags='d') max_coords = gscript.read_command('m.proj', coordinates=(gregion['e'], gregion['n']), proj_out=product_proj4, separator='comma', flags='d') min_list = min_coords.split(',')[:2] max_list = max_coords.split(',')[:2] list_bbox = min_list + max_list str_bbox = ",".join((str(coord) for coord in list_bbox)) # Format variables for TNM API call gui_prod_str = str(product_tag) datasets = urllib.quote_plus(gui_prod_str) prod_format = urllib.quote_plus(product_format) prod_extent = urllib.quote_plus(product_extent[0]) # Create TNM API URL base_TNM = "https://viewer.nationalmap.gov/tnmaccess/api/products?" datasets_TNM = "datasets={0}".format(datasets) bbox_TNM = "&bbox={0}".format(str_bbox) prod_format_TNM = "&prodFormats={0}".format(prod_format) TNM_API_URL = base_TNM + datasets_TNM + bbox_TNM + prod_format_TNM if gui_product == 'nlcd': TNM_API_URL += "&prodExtents={0}".format(prod_extent) gscript.verbose("TNM API Query URL:\t{0}".format(TNM_API_URL)) # Query TNM API try: TNM_API_GET = urllib2.urlopen(TNM_API_URL, timeout=12) except urllib2.URLError: gscript.fatal("USGS TNM API query has timed out. Check network configuration. Please try again.") except: gscript.fatal("USGS TNM API query has timed out. Check network configuration. Please try again.") # Parse return JSON object from API query try: return_JSON = json.load(TNM_API_GET) if return_JSON['errors']: TNM_API_error = return_JSON['errors'] api_error_msg = "TNM API Error - {0}".format(str(TNM_API_error)) gscript.fatal(api_error_msg) except: gscript.fatal("Unable to load USGS JSON object.") # Functions down_list() and exist_list() used to determine # existing files and those that need to be downloaded. def down_list(): dwnld_url.append(TNM_file_URL) dwnld_size.append(TNM_file_size) TNM_file_titles.append(TNM_file_title) if product_is_zip: extract_zip_list.append(local_zip_path) if f['datasets'][0] not in dataset_name: if len(dataset_name) <= 1: dataset_name.append(str(f['datasets'][0])) def exist_list(): exist_TNM_titles.append(TNM_file_title) exist_dwnld_url.append(TNM_file_URL) if product_is_zip: exist_zip_list.append(local_zip_path) extract_zip_list.append(local_zip_path) else: exist_tile_list.append(local_tile_path) # Assign needed parameters from returned JSON tile_API_count = int(return_JSON['total']) tiles_needed_count = 0 size_diff_tolerance = 5 exist_dwnld_size = 0 if tile_API_count > 0: dwnld_size = [] dwnld_url = [] dataset_name = [] TNM_file_titles = [] exist_dwnld_url = [] exist_TNM_titles = [] exist_zip_list = [] exist_tile_list = [] extract_zip_list = [] # for each file returned, assign variables to needed parameters for f in return_JSON['items']: TNM_file_title = f['title'] TNM_file_URL = str(f['downloadURL']) TNM_file_size = int(f['sizeInBytes']) TNM_file_name = TNM_file_URL.split(product_url_split)[-1] if gui_product == 'ned': local_file_path = os.path.join(work_dir, ned_data_abbrv + TNM_file_name) local_zip_path = os.path.join(work_dir, ned_data_abbrv + TNM_file_name) local_tile_path = os.path.join(work_dir, ned_data_abbrv + TNM_file_name) else: local_file_path = os.path.join(work_dir, TNM_file_name) local_zip_path = os.path.join(work_dir, TNM_file_name) local_tile_path = os.path.join(work_dir, TNM_file_name) file_exists = os.path.exists(local_file_path) file_complete = None # if file exists, but is incomplete, remove file and redownload if file_exists: existing_local_file_size = os.path.getsize(local_file_path) # if local file is incomplete if abs(existing_local_file_size - TNM_file_size) > size_diff_tolerance: # add file to cleanup list cleanup_list.append(local_file_path) # NLCD API query returns subsets that cannot be filtered before # results are returned. gui_subset is used to filter results. if not gui_subset: tiles_needed_count += 1 down_list() else: if gui_subset in TNM_file_title: tiles_needed_count += 1 down_list() else: continue else: if not gui_subset: tiles_needed_count += 1 exist_list() exist_dwnld_size += TNM_file_size else: if gui_subset in TNM_file_title: tiles_needed_count += 1 exist_list() exist_dwnld_size += TNM_file_size else: continue else: if not gui_subset: tiles_needed_count += 1 down_list() else: if gui_subset in TNM_file_title: tiles_needed_count += 1 down_list() continue # return fatal error if API query returns no results for GUI input elif tile_API_count == 0: gscript.fatal("TNM API ERROR or Zero tiles available for given input parameters.") # number of files to be downloaded file_download_count = len(dwnld_url) # remove existing files from download lists for t in exist_TNM_titles: if t in TNM_file_titles: TNM_file_titles.remove(t) for url in exist_dwnld_url: if url in dwnld_url: dwnld_url.remove(url) # messages to user about status of files to be kept, removed, or downloaded if exist_zip_list: exist_msg = "\n{0} of {1} files/archive(s) exist locally and will be used by module.".format(len(exist_zip_list), tiles_needed_count) gscript.message(exist_msg) if exist_tile_list: exist_msg = "\n{0} of {1} files/archive(s) exist locally and will be used by module.".format(len(exist_tile_list), tiles_needed_count) gscript.message(exist_msg) if cleanup_list: cleanup_msg = "\n{0} existing incomplete file(s) detected and removed. Run module again.".format(len(cleanup_list)) gscript.fatal(cleanup_msg) # formats JSON size from bites into needed units for combined file size if dwnld_size: total_size = sum(dwnld_size) len_total_size = len(str(total_size)) if 6 < len_total_size < 10: total_size_float = total_size * 1e-6 total_size_str = str("{0:.2f}".format(total_size_float) + " MB") if len_total_size >= 10: total_size_float = total_size * 1e-9 total_size_str = str("{0:.2f}".format(total_size_float) + " GB") else: total_size_str = '0' # Prints 'none' if all tiles available locally if TNM_file_titles: TNM_file_titles_info = "\n".join(TNM_file_titles) else: TNM_file_titles_info = 'none' # Formatted return for 'i' flag if file_download_count <= 0: data_info = "\n\nUSGS file(s) to download: NONE" if gui_product == 'nlcd': if tile_API_count != file_download_count: if tiles_needed_count == 0: nlcd_unavailable = "NLCD {0} data unavailable for input parameters".format(gui_subset) gscript.fatal(nlcd_unavailable) else: data_info = ( "USGS file(s) to download:", "-------------------------", "Total download size:\t{size}", "Tile count:\t{count}", "USGS SRS:\t{srs}", "USGS tile titles:\n{tile}", "-------------------------", ) data_info = '\n'.join(data_info).format(size=total_size_str, count=file_download_count, srs=product_srs, tile=TNM_file_titles_info) print data_info if gui_i_flag: gscript.info("To download USGS data, remove flag, and rerun r.in.usgs.") sys.exit() # USGS data download process if file_download_count <= 0: gscript.message("Extracting existing USGS Data...") else: gscript.message("Downloading USGS Data...") TNM_count = len(dwnld_url) download_count = 0 local_tile_path_list = [] local_zip_path_list = [] patch_names = [] # Download files for url in dwnld_url: # create file name by splitting name from returned url # add file name to local download directory if gui_product == 'ned': file_name = ned_data_abbrv + url.split(product_url_split)[-1] local_file_path = os.path.join(work_dir, file_name) else: file_name = url.split(product_url_split)[-1] local_file_path = os.path.join(work_dir, file_name) try: # download files in chunks rather than write complete files to memory dwnld_req = urllib2.urlopen(url, timeout=12) download_bytes = int(dwnld_req.info()['Content-Length']) CHUNK = 16 * 1024 with open(local_file_path, "wb+") as local_file: count = 0 steps = int(download_bytes / CHUNK) + 1 while True: chunk = dwnld_req.read(CHUNK) gscript.percent(count, steps, 10) count += 1 if not chunk: break local_file.write(chunk) local_file.close() download_count += 1 # determine if file is a zip archive or another format if product_is_zip: local_zip_path_list.append(local_file_path) else: local_tile_path_list.append(local_file_path) file_complete = "Download {0} of {1}: COMPLETE".format( download_count, TNM_count) gscript.info(file_complete) except urllib2.URLError: gscript.fatal("USGS download request has timed out. Network or formatting error.") except StandardError: cleanup_list.append(local_file_path) if download_count: file_failed = "Download {0} of {1}: FAILED".format( download_count, TNM_count) gscript.fatal(file_failed) # sets already downloaded zip files or tiles to be extracted or imported if exist_zip_list: for z in exist_zip_list: local_zip_path_list.append(z) if exist_tile_list: for t in exist_tile_list: local_tile_path_list.append(t) if product_is_zip: if file_download_count == 0: pass else: gscript.message("Extracting data...") # for each zip archive, extract needed file for z in local_zip_path_list: # Extract tiles from ZIP archives try: with zipfile.ZipFile(z, "r") as read_zip: for f in read_zip.namelist(): if f.endswith(product_extension): extracted_tile = os.path.join(work_dir, str(f)) if os.path.exists(extracted_tile): os.remove(extracted_tile) read_zip.extract(f, work_dir) else: read_zip.extract(f, work_dir) if os.path.exists(extracted_tile): local_tile_path_list.append(extracted_tile) cleanup_list.append(extracted_tile) except: cleanup_list.append(extracted_tile) gscript.fatal("Unable to locate or extract IMG file from ZIP archive.") # operations for extracted or complete files available locally for t in local_tile_path_list: # create variables for use in GRASS GIS import process LT_file_name = os.path.basename(t) LT_layer_name = os.path.splitext(LT_file_name)[0] patch_names.append(LT_layer_name) in_info = ("Importing and reprojecting {0}...").format(LT_file_name) gscript.info(in_info) # import to GRASS GIS try: gscript.run_command('r.import', input=t, output=LT_layer_name, resolution='value', resolution_value=product_resolution, extent="region", resample=product_interpolation) if not gui_k_flag: cleanup_list.append(t) except CalledModuleError: in_error = ("Unable to import '{0}'").format(LT_file_name) gscript.fatal(in_error) # if control variables match and multiple files need to be patched, # check product resolution, run r.patch # Check that downloaded files match expected count completed_tiles_count = len(local_tile_path_list) if completed_tiles_count == tiles_needed_count: if completed_tiles_count > 1: try: gscript.use_temp_region() # set the resolution if product_resolution: gscript.run_command('g.region', res=product_resolution, flags='a') gscript.run_command('r.patch', input=patch_names, output=gui_output_layer) gscript.del_temp_region() out_info = ("Patched composite layer '{0}' added").format(gui_output_layer) gscript.verbose(out_info) # Remove files if 'k' flag if not gui_k_flag: gscript.run_command('g.remove', type='raster', name=patch_names, flags='f') except CalledModuleError: gscript.fatal("Unable to patch tiles.") elif completed_tiles_count == 1: gscript.run_command('g.rename', raster=(patch_names[0], gui_output_layer)) temp_down_count = "\n{0} of {1} tile/s succesfully imported and patched.".format(completed_tiles_count, tiles_needed_count) gscript.info(temp_down_count) else: gscript.fatal("Error downloading files. Please retry.") # Keep source files if 'k' flag active if gui_k_flag: src_msg = (" flag selected: Source tiles remain in '{0}'").format(work_dir) gscript.info(src_msg) # set appropriate color table if gui_product == 'ned': gscript.run_command('r.colors', map=gui_output_layer, color='elevation') def cleanup(): # Remove files in cleanup_list for f in cleanup_list: if os.path.exists(f): gscript.try_remove(f) if __name__ == "__main__": options, flags = gscript.parser() atexit.register(cleanup) sys.exit(main())