{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "import os, shutil" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Fire perimeter datasets for the US can be found via GeoMAC at \n", "# https://rmgsc.cr.usgs.gov/outgoing/GeoMAC/current_year_fire_data/current_year_all_states/\n", "# Fire perimeters for Canada can be found at\n", "# https://catalogue.data.gov.bc.ca/dataset/fire-perimeters-current" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Define which state/province we are downloading L8 data for\n", "state = 'WY'\n", "\n", "# Define the filepaths where the shapefile data resides\n", "CA = r'D:\\data\\FirePerimeters\\2018_2019_Canada_perimeters.shp'\n", "US = r'D:\\data\\FirePerimeters\\perimeters_dd83.shp'\n", "stateboundaries = (os.path.join(r'D:\\data\\boundaries',state + '.shp'))\n", "wrsfile = r'D:\\data\\l8\\wrs2_descending.shp'\n", "\n", "# Define where the resultant l8 scenes and metadata files will go\n", "l8out = r'D:\\data\\imagery'\n", "sceneinfo = r'D:\\data\\l8'" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "if state == 'BC' or state == 'AB':\n", " country = 'Canada'\n", " fire = gpd.GeoDataFrame.from_file(CA)\n", " # Used est perimeter data from http://cwfis.cfs.nrcan.gc.ca/downloads/hotspots/ (merged 2019/2018)\n", "else: \n", " country = 'US'\n", " fire = gpd.GeoDataFrame.from_file(US)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Read the state boundary shapefile and the wrs path/row shapefile\n", "# State files need to be the same projection as the WRS file\n", "bounds = gpd.read_file(stateboundaries)\n", "wrs = gpd.GeoDataFrame.from_file(wrsfile)\n", "\n", "# Select the Landsat path/rows that intersect with the state of interest\n", "wrs_intersection = wrs[wrs.intersects(bounds.geometry[0])]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Select the fires that intersect to later determine the needed imagery date\n", "fires = gpd.sjoin(fire, wrs, how='inner', op='within')###\n", "\n", "# sort dataframe by most recent date, change date format to match AWS's landsat metadata date format\n", "if country == 'Canada':\n", " fires['enddate'] = fires['LASTDATE']+ '.000000'\n", "else:\n", " fires['enddate'] = fires['DATECRNT']+' 00:00:00.000000'\n", "\n", "# empty gdf for most recent fire perimeter date\n", "recent_fire = gpd.GeoDataFrame()\n", "\n", "# select just fires in the state, make lowercase strings for consistent matching of fire names\n", "if country == 'US':\n", " fires = fires.loc[(fires.STATE == state)]\n", " fires.FIRENAME = fires.FIRENAME.str.lower()\n", " fires = fires[['FIRENAME','PATH','ROW','enddate']]\n", "else:\n", " print 'Skipping firename'\n", " fires = fires[['PATH','ROW','enddate']]\n", "\n", "fires['PR'] = fires['PATH'].astype(str)+' '+fires['ROW'].astype(str)\n", "\n", "# for each fire, pick the latest date\n", "if country == 'US':\n", " for firename in fires['FIRENAME']:\n", " rec_fire = fires.loc[(fires.FIRENAME == firename)]\n", " rec_fire['enddate'].sort_values()\n", " rec_fire = rec_fire.tail(1)\n", " recent_fire = recent_fire.append(rec_fire)\n", "else:\n", " rec_fire = fires\n", " recent_fire = recent_fire.append(rec_fire)\n", " \n", "# then find the latest fire date for the path/row\n", "pr_date = gpd.GeoDataFrame()\n", "\n", "for pr in recent_fire['PR'].unique():\n", " prdate = recent_fire.loc[(recent_fire.PR == pr)]\n", " prdate['enddate'].sort_values()\n", " prdate = prdate.tail(1)\n", " pr_date = pr_date.append(prdate)\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\python27\\lib\\site-packages\\folium\\__init__.py:59: UserWarning: This version of folium is the last to support Python 2. Transition to Python 3 to be able to receive updates and fixes. Check out https://python3statement.org/ for more info.\n", " UserWarning\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# OPTIONAL: view folium map of the path/rows selected to visualize coverage\n", "import folium\n", "import numpy as np\n", "xy = np.asarray(bounds.centroid[0].xy).squeeze()\n", "center = list(xy[::-1])\n", "zoom = 6\n", "m = folium.Map(location=center, zoom_start=zoom, control_scale=True)\n", "m.add_child(folium.GeoJson(bounds.__geo_interface__, name='Path/Row Coverage', \n", " style_function=lambda x: {'color': 'red', 'alpha': 0}))\n", "for i, row in wrs_intersection.iterrows():\n", " # Create a string for the name containing the path and row of this Polygon\n", " name = 'path: %03d, row: %03d' % (row.PATH, row.ROW)\n", " # Create the folium geometry of this Polygon \n", " g = folium.GeoJson(row.geometry.__geo_interface__, name=name)\n", " # Add a folium Popup object with the name string\n", " g.add_child(folium.Popup(name))\n", " # Add the object to the map\n", " g.add_to(m)\n", "\n", "folium.LayerControl().add_to(m)\n", "m" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "paths, rows = wrs_intersection['PATH'].values, wrs_intersection['ROW'].values" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "24 scenes\n" ] } ], "source": [ "# Count how many paths and rows there are to download imagery for\n", "count_images = 0\n", "for (path, row) in enumerate(zip(paths, rows)):\n", " count_images = count_images + 1\n", "print str(count_images) + ' scenes'" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Read AWS metadata csv for l8 into a dataframe. This is the data we will use to select scenes matching our requirements.\n", "s3_scenes = pd.read_csv('http://landsat-pds.s3.amazonaws.com/c1/L8/scene_list.gz', compression='gzip')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "Path: 36 Row: 29\n", " FIRENAME PATH ROW enddate PR\n", "2103 valley 2 36 29 2019-08-17 00:00:00.000000 36 29\n", "Fire occured - new scene daterange 2019-08-17 to 2019-09-30\n", "Found 1 images\n", "\n", "2\n", "Path: 36 Row: 30\n", " FIRENAME PATH ROW enddate PR\n", "2328 sawmill 36 30 2019-08-13 00:00:00.000000 36 30\n", "Fire occured - new scene daterange 2019-08-13 to 2019-09-30\n", "Found 2 images\n", "\n", "3\n", "Path: 36 Row: 31\n", "Empty DataFrame\n", "Columns: [FIRENAME, PATH, ROW, enddate, PR]\n", "Index: []\n", "No fire - daterange unchanged 2019-06-01 to 2019-09-30\n", "Found 5 images\n", "\n", "4\n", "Path: 36 Row: 32\n", "Empty DataFrame\n", "Columns: [FIRENAME, PATH, ROW, enddate, PR]\n", "Index: []\n", "No fire - daterange unchanged 2019-06-01 to 2019-09-30\n", "Found 5 images\n", "\n", "5\n", "Path: 34 Row: 29\n", " FIRENAME PATH ROW enddate PR\n", "1306 prairie dog draw 34 29 2019-09-10 00:00:00.000000 34 29\n", "Fire occured - new scene daterange 2019-09-10 to 2019-09-30\n", "Found 1 images\n", "\n", "6\n", "Path: 34 Row: 30\n", " FIRENAME PATH ROW enddate PR\n", "1784 spring canyon 34 30 2019-08-26 00:00:00.000000 34 30\n", "Fire occured - new scene daterange 2019-08-26 to 2019-09-30\n", "Found 2 images\n", "\n", "7\n", "Path: 34 Row: 31\n", " FIRENAME PATH ROW enddate PR\n", "2304 ashenfelder 34 31 2019-09-06 00:00:00.000000 34 31\n", "Fire occured - new scene daterange 2019-09-06 to 2019-09-30\n", "Found 1 images\n", "\n", "8\n", "Path: 34 Row: 32\n", "Empty DataFrame\n", "Columns: [FIRENAME, PATH, ROW, enddate, PR]\n", "Index: []\n", "No fire - daterange unchanged 2019-06-01 to 2019-09-30\n", "Found 1 images\n", "\n", "9\n", "Path: 39 Row: 29\n", "Empty DataFrame\n", "Columns: [FIRENAME, PATH, ROW, enddate, PR]\n", "Index: []\n", "No fire - daterange unchanged 2019-06-01 to 2019-09-30\n", "Found 0 images\n", "\n", "Retry with higher cloudcover threshold:\n", "Try #2: 20% cloudcover threshold\n", "Found 2 images\n", "\n", "10\n", "Path: 37 Row: 29\n", " FIRENAME PATH ROW enddate PR\n", "2871 brimstone 37 29 2019-09-17 00:00:00.000000 37 29\n", "Fire occured - new scene daterange 2019-09-17 to 2019-09-30\n", "Found 0 images\n", "\n", "Retry with higher cloudcover threshold:\n", "Try #2: 20% cloudcover threshold\n", "Found 1 images\n", "\n", "11\n", "Path: 37 Row: 30\n", " FIRENAME PATH ROW enddate PR\n", "2459 bomber lake 37 30 2019-08-29 00:00:00.000000 37 30\n", "Fire occured - new scene daterange 2019-08-29 to 2019-09-30\n", "Found 0 images\n", "\n", "Retry with higher cloudcover threshold:\n", "Try #2: 20% cloudcover threshold\n", "Found 2 images\n", "\n", "12\n", "Path: 37 Row: 31\n", " FIRENAME PATH ROW enddate PR\n", "2280 currant 37 31 2019-09-03 00:00:00.000000 37 31\n", "Fire occured - new scene daterange 2019-09-03 to 2019-09-30\n", "Found 2 images\n", "\n", "13\n", "Path: 37 Row: 32\n", "Empty DataFrame\n", "Columns: [FIRENAME, PATH, ROW, enddate, PR]\n", "Index: []\n", "No fire - daterange unchanged 2019-06-01 to 2019-09-30\n", "Found 5 images\n", "\n", "14\n", "Path: 35 Row: 29\n", " FIRENAME PATH ROW enddate PR\n", "1299 fortification creek 35 29 2019-08-22 00:00:00.000000 35 29\n", "Fire occured - new scene daterange 2019-08-22 to 2019-09-30\n", "Found 1 images\n", "\n", "15\n", "Path: 35 Row: 30\n", " FIRENAME PATH ROW enddate PR\n", "1849 whipsaw 35 30 2019-08-19 00:00:00.000000 35 30\n", "Fire occured - new scene daterange 2019-08-19 to 2019-09-30\n", "Found 2 images\n", "\n", "16\n", "Path: 35 Row: 31\n", " FIRENAME PATH ROW enddate PR\n", "2850 pedro mountain 35 31 2019-08-28 00:00:00.000000 35 31\n", "Fire occured - new scene daterange 2019-08-28 to 2019-09-30\n", "Found 0 images\n", "\n", "Retry with higher cloudcover threshold:\n", "Try #2: 20% cloudcover threshold\n", "Found 2 images\n", "\n", "17\n", "Path: 35 Row: 32\n", "Empty DataFrame\n", "Columns: [FIRENAME, PATH, ROW, enddate, PR]\n", "Index: []\n", "No fire - daterange unchanged 2019-06-01 to 2019-09-30\n", "Found 2 images\n", "\n", "18\n", "Path: 33 Row: 29\n", "Empty DataFrame\n", "Columns: [FIRENAME, PATH, ROW, enddate, PR]\n", "Index: []\n", "No fire - daterange unchanged 2019-06-01 to 2019-09-30\n", "Found 1 images\n", "\n", "19\n", "Path: 33 Row: 30\n", "Empty DataFrame\n", "Columns: [FIRENAME, PATH, ROW, enddate, PR]\n", "Index: []\n", "No fire - daterange unchanged 2019-06-01 to 2019-09-30\n", "Found 2 images\n", "\n", "20\n", "Path: 33 Row: 31\n", " FIRENAME PATH ROW enddate PR\n", "1428 finkbine 33 31 2019-07-24 00:00:00.000000 33 31\n", "Fire occured - new scene daterange 2019-07-24 to 2019-09-30\n", "Found 0 images\n", "\n", "Retry with higher cloudcover threshold:\n", "Try #2: 20% cloudcover threshold\n", "Found 1 images\n", "\n", "21\n", "Path: 33 Row: 32\n", "Empty DataFrame\n", "Columns: [FIRENAME, PATH, ROW, enddate, PR]\n", "Index: []\n", "No fire - daterange unchanged 2019-06-01 to 2019-09-30\n", "Found 3 images\n", "\n", "22\n", "Path: 38 Row: 29\n", " FIRENAME PATH ROW enddate PR\n", "1819 box creek 38 29 2019-09-11 00:00:00.000000 38 29\n", "Fire occured - new scene daterange 2019-09-11 to 2019-09-30\n", "Found 0 images\n", "\n", "Retry with higher cloudcover threshold:\n", "Try #2: 20% cloudcover threshold\n", "Found 1 images\n", "\n", "23\n", "Path: 38 Row: 30\n", " FIRENAME PATH ROW enddate PR\n", "1587 saddle butte 38 30 2019-09-03 00:00:00.000000 38 30\n", "Fire occured - new scene daterange 2019-09-03 to 2019-09-30\n", "Found 0 images\n", "\n", "Retry with higher cloudcover threshold:\n", "Try #2: 20% cloudcover threshold\n", "Found 1 images\n", "\n", "24\n", "Path: 38 Row: 31\n", "Empty DataFrame\n", "Columns: [FIRENAME, PATH, ROW, enddate, PR]\n", "Index: []\n", "No fire - daterange unchanged 2019-06-01 to 2019-09-30\n", "Found 4 images\n", "\n" ] } ], "source": [ "# bulk download list\n", "bulk_list = []\n", "not_found = []\n", "n = 0\n", "\n", "\n", "# Find scenes for each path/row\n", "for path, row in zip(paths, rows):\n", " n = n + 1\n", " print n\n", "\n", " ## Define the thresholds for date range and cloud cover:\n", " datelowest = '2019-06-01 00:00:00.000000'\n", " datehigh = '2019-09-30 00:00:00.000000'\n", " cloudcover = 10\n", " print 'Path: ' + str(path) + ' Row: ' + str(row)\n", " \n", " #def fire_scene():\n", " # Check if the Path/Row has a recent fire, use the fire's end date for the datelow L8 scene search\n", " pr = str(path) +' '+ str(row)\n", " prloc = pr_date.loc[(pr_date.PR == pr)]\n", " print prloc.head()\n", "\n", " if prloc.shape[0] != 1:\n", " datelow = datelowest\n", " print 'No fire - daterange unchanged '+ datelow[:-15] +'to '+ datehigh[:-16]\n", " elif prloc['enddate'].values[0] > datelowest: #Ensure most recent years imagery used, if using more than 1 fire year\n", " datelow = prloc['enddate'].values[0]\n", " print 'Fire occured - new scene daterange ' + datelow[:-15] +'to '+ datehigh[:-16]\n", " else:\n", " datelow = datelowest\n", " print 'Fires present from previous year, using current year imagery '+ datelow[:-15] +'to '+ datehigh[:-16]\n", " if datelow == '':\n", " datelow = datelowest\n", " #fire_scene()\n", " \n", " # Filter the Landsat Amazon S3 table for images matching path/row and cloudcover parameters.\n", " tries = 10\n", " \n", " # Ideally, imagery will be <10% scene cloud cover. The below code loops through the imagery in increments\n", " # of 10% cover until a 100% threshhold is reached. Change the threshold requirements as needed.\n", " # Currently there is no way to look at cloud cover within the fire perimeter/aoi before download - but this\n", " # method (looking at total scene cover) should be adequate for most purposes.\n", " while tries >= 10 and tries <= 90:\n", " if tries > 10:\n", " ntries = tries/10\n", " cloudcover = tries\n", " print 'Try #' + str(ntries) +': '+ str(cloudcover) + '% cloudcover threshold'\n", " scenes = s3_scenes[(s3_scenes.path == path) & (s3_scenes.row == row) & \n", " (s3_scenes.cloudCover <= cloudcover) & \n", " (s3_scenes.acquisitionDate >= datelow) & \n", " (s3_scenes.acquisitionDate <= datehigh) &\n", " # We don't want any tier2/uncorrected data\n", " (~s3_scenes.productId.str.contains('_T2')) &\n", " (~s3_scenes.productId.str.contains('_RT'))]\n", " \n", " print 'Found {} images\\n'.format(len(scenes))\n", " if len(scenes) == 0:\n", " tries = tries + 10\n", " print 'Retry with higher cloudcover threshold:'\n", " else: tries = 100\n", " \n", " # Select the scenes that meet the date and cloud cover criteria\n", " if len(scenes)>0:\n", " # select a scene in the middle of the date ranges if possible - for my purposes, full leaf imagery is ideal\n", " sc = len(scenes)\n", " sd = sc / 2\n", " sl = sc - sd\n", " if sd > 2 and sl < 2:\n", " sl = -1\n", " else:\n", " sl = sl * -1\n", " \n", " # pick the middle date scene\n", " scene = scenes.sort_values('acquisitionDate').iloc[sl]\n", " \n", " # Add the selected scene to the bulk download list.\n", " bulk_list.append(scene)\n", " else:\n", " # if there are no scenes found even after altering the cloudcover threshold, create a list (find manually)\n", " print 'No scenes were selected for this path/row'\n", " nf = str(path) + ',' + str(row)\n", " not_found.append(nf)\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
productIdentityIdacquisitionDatecloudCoverprocessingLevelpathrowmin_latmin_lonmax_latmax_londownload_url
1698771LC08_L1TP_036029_20190831_20190916_01_T1LC80360292019243LGN002019-08-31 17:54:50.6490401.11L1TP362943.4775-108.98345.6465-105.894https://s3-us-west-2.amazonaws.com/landsat-pds...
1698381LC08_L1TP_036030_20190831_20190916_01_T1LC80360302019243LGN002019-08-31 17:55:14.5358440.52L1TP363042.0485-109.45344.2195-106.418https://s3-us-west-2.amazonaws.com/landsat-pds...
1641340LC08_L1TP_036031_20190714_20190720_01_T1LC80360312019195LGN002019-07-14 17:55:23.1583341.07L1TP363140.6706-109.7842.8245-107.077https://s3-us-west-2.amazonaws.com/landsat-pds...
1641341LC08_L1TP_036032_20190714_20190720_01_T1LC80360322019195LGN002019-07-14 17:55:47.0451362.04L1TP363239.2485-110.20941.3938-107.557https://s3-us-west-2.amazonaws.com/landsat-pds...
1715327LC08_L1TP_034029_20190918_20190926_01_T1LC80340292019261LGN002019-09-18 17:42:34.8294120L1TP342943.5227-105.81145.6673-102.901https://s3-us-west-2.amazonaws.com/landsat-pds...
1715084LC08_L1TP_034030_20190918_20190926_01_T1LC80340302019261LGN002019-09-18 17:42:58.7162160.01L1TP343042.1046-106.28444.2397-103.42https://s3-us-west-2.amazonaws.com/landsat-pds...
1715409LC08_L1TP_034031_20190918_20190926_01_T1LC80340312019261LGN002019-09-18 17:43:22.6114925.09L1TP343140.6765-106.73842.8102-103.92https://s3-us-west-2.amazonaws.com/landsat-pds...
1697556LC08_L1TP_034032_20190902_20190916_01_T1LC80340322019245LGN002019-09-02 17:43:40.9773377.84L1TP343239.2426-107.16941.379-104.394https://s3-us-west-2.amazonaws.com/landsat-pds...
1701659LC08_L1TP_039029_20190905_20190917_01_T1LC80390292019248LGN002019-09-05 18:13:23.83389514.55L1TP392943.5041-113.57945.6572-110.568https://s3-us-west-2.amazonaws.com/landsat-pds...
1712605LC08_L1TP_037029_20190923_20190926_01_T1LC80370292019266LGN002019-09-23 18:01:08.86609414.89L1TP372943.5178-110.4345.674-107.58https://s3-us-west-2.amazonaws.com/landsat-pds...
\n", "
" ], "text/plain": [ " productId entityId \\\n", "1698771 LC08_L1TP_036029_20190831_20190916_01_T1 LC80360292019243LGN00 \n", "1698381 LC08_L1TP_036030_20190831_20190916_01_T1 LC80360302019243LGN00 \n", "1641340 LC08_L1TP_036031_20190714_20190720_01_T1 LC80360312019195LGN00 \n", "1641341 LC08_L1TP_036032_20190714_20190720_01_T1 LC80360322019195LGN00 \n", "1715327 LC08_L1TP_034029_20190918_20190926_01_T1 LC80340292019261LGN00 \n", "1715084 LC08_L1TP_034030_20190918_20190926_01_T1 LC80340302019261LGN00 \n", "1715409 LC08_L1TP_034031_20190918_20190926_01_T1 LC80340312019261LGN00 \n", "1697556 LC08_L1TP_034032_20190902_20190916_01_T1 LC80340322019245LGN00 \n", "1701659 LC08_L1TP_039029_20190905_20190917_01_T1 LC80390292019248LGN00 \n", "1712605 LC08_L1TP_037029_20190923_20190926_01_T1 LC80370292019266LGN00 \n", "\n", " acquisitionDate cloudCover processingLevel path row \\\n", "1698771 2019-08-31 17:54:50.649040 1.11 L1TP 36 29 \n", "1698381 2019-08-31 17:55:14.535844 0.52 L1TP 36 30 \n", "1641340 2019-07-14 17:55:23.158334 1.07 L1TP 36 31 \n", "1641341 2019-07-14 17:55:47.045136 2.04 L1TP 36 32 \n", "1715327 2019-09-18 17:42:34.829412 0 L1TP 34 29 \n", "1715084 2019-09-18 17:42:58.716216 0.01 L1TP 34 30 \n", "1715409 2019-09-18 17:43:22.611492 5.09 L1TP 34 31 \n", "1697556 2019-09-02 17:43:40.977337 7.84 L1TP 34 32 \n", "1701659 2019-09-05 18:13:23.833895 14.55 L1TP 39 29 \n", "1712605 2019-09-23 18:01:08.866094 14.89 L1TP 37 29 \n", "\n", " min_lat min_lon max_lat max_lon \\\n", "1698771 43.4775 -108.983 45.6465 -105.894 \n", "1698381 42.0485 -109.453 44.2195 -106.418 \n", "1641340 40.6706 -109.78 42.8245 -107.077 \n", "1641341 39.2485 -110.209 41.3938 -107.557 \n", "1715327 43.5227 -105.811 45.6673 -102.901 \n", "1715084 42.1046 -106.284 44.2397 -103.42 \n", "1715409 40.6765 -106.738 42.8102 -103.92 \n", "1697556 39.2426 -107.169 41.379 -104.394 \n", "1701659 43.5041 -113.579 45.6572 -110.568 \n", "1712605 43.5178 -110.43 45.674 -107.58 \n", "\n", " download_url \n", "1698771 https://s3-us-west-2.amazonaws.com/landsat-pds... \n", "1698381 https://s3-us-west-2.amazonaws.com/landsat-pds... \n", "1641340 https://s3-us-west-2.amazonaws.com/landsat-pds... \n", "1641341 https://s3-us-west-2.amazonaws.com/landsat-pds... \n", "1715327 https://s3-us-west-2.amazonaws.com/landsat-pds... \n", "1715084 https://s3-us-west-2.amazonaws.com/landsat-pds... \n", "1715409 https://s3-us-west-2.amazonaws.com/landsat-pds... \n", "1697556 https://s3-us-west-2.amazonaws.com/landsat-pds... \n", "1701659 https://s3-us-west-2.amazonaws.com/landsat-pds... \n", "1712605 https://s3-us-west-2.amazonaws.com/landsat-pds... " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Concatenate the scene info into two lists: scenes that have no match, and scenes we want to download.\n", "bulk_frame = pd.concat(bulk_list, 1).T\n", "nf_frame = pd.DataFrame(not_found)\n", "nf_frame.to_csv((os.path.join(sceneinfo, state + 'scenes_missing.txt')),sep='\\t', index=False, header=False)\n", "bulk_frame.head(10)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# Option 1 - get the scene list to upload to earthexplorer.usgs.gov/filelist\n", "bulklist = bulk_frame[['entityId']]\n", "bulklist.to_csv((os.path.join(sceneinfo, state + 'pathrowlist.txt')),sep='\\t', index=False, header=False)\n", "bulk_frame.to_csv((os.path.join(sceneinfo, state + 'frame.txt')),sep='\\t', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Skipping D:\\data\\imagery\\WY\\l8imagery\\LC08_L1TP_036029_20190831_20190916_01_T1 as it already exists\n", "Skipping D:\\data\\imagery\\WY\\l8imagery\\LC08_L1TP_036030_20190831_20190916_01_T1 as it already exists\n", "Skipping D:\\data\\imagery\\WY\\l8imagery\\LC08_L1TP_036031_20190714_20190720_01_T1 as it already exists\n", "\n", "EntityId: LC08_L1TP_036032_20190714_20190720_01_T1 \n", "\n", " Downloading: LC08_L1TP_036032_20190714_20190720_01_T1_B6.TIF\n", " Downloading: LC08_L1TP_036032_20190714_20190720_01_T1_B8.TIF\n" ] } ], "source": [ "# Option 2 - download the data directly\n", "import requests\n", "from bs4 import BeautifulSoup\n", "\n", "LANDSAT_PATH = os.path.join(l8out, state, 'l8imagery')\n", "\n", "# For each row\n", "for i, row in bulk_frame.iterrows():\n", " \n", " entity_dir = os.path.join(LANDSAT_PATH, row.productId)\n", " \n", " # added to skip the file if it already has been downloaded - check and re-download any files that may be corrupted\n", " # if download is interrupted\n", " if os.path.isdir(entity_dir): \n", " print'Skipping ' + entity_dir + ' as it already exists'\n", " else:\n", " # Print the product ID\n", " print '\\n', 'EntityId:', row.productId, '\\n'\n", "\n", " # Request the html text of the download_url from the amazon server. \n", " response = requests.get(row.download_url)\n", "\n", " # If the response status code is fine (200)\n", " if response.status_code == 200:\n", "\n", " # Import the html to beautiful soup\n", " html = BeautifulSoup(response.content, 'html.parser')\n", "\n", " # Create the dir where we will put this image files.\n", " if not os.path.exists(entity_dir):\n", " os.makedirs(entity_dir)\n", "\n", "\n", " # Second loop: for each band of this image that we find using the html
  • tag\n", " for li in html.find_all('li'):\n", "\n", " # Get the href tag\n", " file = li.find_next('a').get('href')\n", "\n", " filestring = str(file)\n", " filen = os.path.join(LANDSAT_PATH,entity_dir,filestring)\n", "\n", " # only download the .tif and metadata files, other formats (.IMD) aren't necessary for what I need\n", " if filestring[-4:] == '.TIF' or filestring[-8:] == '_MTL.txt' or filestring[-8:] == '_ANG.txt':\n", " if not os.path.isfile(os.path.join(filen)): # skip anything already downloaded\n", " print ' Downloading: {}'.format(file)\n", "\n", " # Download the files\n", " response = requests.get(row.download_url.replace('index.html', file), stream=True)\n", "\n", " with open(os.path.join(entity_dir, file), 'wb') as output:\n", " shutil.copyfileobj(response.raw, output)\n", " del response\n", " else: print filestring + ' exists'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.15" } }, "nbformat": 4, "nbformat_minor": 2 }