{ "cells": [ { "cell_type": "markdown", "id": "8017c719-7fa9-4b81-a0df-48d5872f89b3", "metadata": {}, "source": [ "# process global" ] }, { "cell_type": "code", "execution_count": null, "id": "b04003da-340b-4c09-b39d-48e21f705849", "metadata": { "tags": [] }, "outputs": [], "source": [ "# need to upgrade odc-stac to fix keyerror issue for some tiles\n", "#!pip install -q odc-stac -U\n", "!pip install -q odc-stac==0.3.6" ] }, { "cell_type": "code", "execution_count": null, "id": "9ad638fb-219f-4c04-b8f4-aef6c1ea78d8", "metadata": { "tags": [] }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import geopandas as gpd\n", "import rasterio as rio\n", "import pystac\n", "import pystac_client\n", "import stackstac\n", "import matplotlib.pyplot as plt\n", "import xarray as xr\n", "from datetime import datetime\n", "import matplotlib\n", "import ipyleaflet\n", "import sys\n", "import os\n", "import dask_gateway\n", "import planetary_computer\n", "from rechunker import rechunk\n", "sys.path.append('../sar_snowmelt_timing')\n", "import s1_rtc_bs_utils\n", "import contextily as ctx\n", "import rioxarray as rxr\n", "import skimage\n", "import pathlib\n", "import glob\n", "import re\n", "import time\n", "import fsspec\n", "import urllib.request\n", "from urllib.error import URLError\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": null, "id": "a7f5d4e3-a28f-47da-8a06-969767e3843f", "metadata": { "tags": [] }, "outputs": [], "source": [ "import getpass\n", "import azure.storage.blob\n", "import io\n", "import adlfs\n", "\n", "\n", "#sas_token = getpass.getpass() # prompts for the sas_token\n", "sas_token = \"se=2023-12-08T18%3A45Z&sp=racwdl&sv=2022-11-02&sr=c&skoid=598b2582-4d1d-4c4e-9bb2-3ad571b791b5&sktid=f6b6dd5b-f02f-441a-99a0-162ac5060bd2&skt=2023-12-01T18%3A46%3A01Z&ske=2023-12-08T18%3A45%3A00Z&sks=b&skv=2022-11-02&sig=up1iyCiyySNtrQnG%2BFb7yCIshOTLZ%2BFq9XphTbDeLxo%3D\"\n", "container_client = azure.storage.blob.ContainerClient(\n", " \"https://snowmelt.blob.core.windows.net\",\n", " container_name=\"snowmelt\",\n", " credential=sas_token,\n", ")\n", "fs = adlfs.AzureBlobFileSystem(account_name=\"snowmelt\", credential=sas_token, anon=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "8c339d12-8195-4873-a1ff-2886d6973044", "metadata": { "tags": [] }, "outputs": [], "source": [ "# cluster = dask_gateway.GatewayCluster()\n", "# client = cluster.get_client()\n", "# cluster.adapt(minimum=10, maximum=50)\n", "# print(client.dashboard_link)\n", "\n", "\n", "cluster = dask_gateway.GatewayCluster()\n", "client = cluster.get_client()\n", "cluster.scale(50)\n", "print(cluster.dashboard_link)" ] }, { "cell_type": "code", "execution_count": null, "id": "a0fe05ba-c7fd-44de-97a7-c4701c268476", "metadata": { "tags": [] }, "outputs": [], "source": [ "with fsspec.open('https://github.com/egagli/mgrs_tiles_snow_area/raw/main/mgrs_land_tiles_snow_area.parquet') as file:\n", " mgrs_gdf = gpd.read_parquet(file)" ] }, { "cell_type": "code", "execution_count": null, "id": "62465c0a-c29e-484d-9816-60725654cf12", "metadata": { "tags": [] }, "outputs": [], "source": [ "mgrs_snow_gdf = mgrs_gdf[mgrs_gdf['mountain_seasonal_and_ephemeral_snow_area_km2']>100].sort_values(by='mountain_seasonal_and_ephemeral_snow_area_km2',ascending=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "734cbf48-874d-46b1-b0b5-78fba7b9606c", "metadata": { "tags": [] }, "outputs": [], "source": [ "mgrs_snow_gdf = mgrs_snow_gdf[mgrs_snow_gdf.epsg<32700] # northern hemisphere\n", "#mgrs_snow_gdf = mgrs_snow_gdf[mgrs_snow_gdf.intersects(gpd.read_file('../input/shapefiles/western_us_canada.geojson').geometry[0],align=True)] # western us and canada" ] }, { "cell_type": "code", "execution_count": null, "id": "3a17317b-db05-4cb5-9d02-cbb40b8288e1", "metadata": { "tags": [] }, "outputs": [], "source": [ "mgrs_snow_gdf" ] }, { "cell_type": "code", "execution_count": null, "id": "1f55d9c4-578b-454f-bf1b-b91a6e979fd1", "metadata": { "tags": [] }, "outputs": [], "source": [ "mgrs_snow_gdf.explore(column='mountain_seasonal_and_ephemeral_snow_area_km2')" ] }, { "cell_type": "code", "execution_count": null, "id": "05d56fa0-c2dc-43ff-9677-45b886ef282a", "metadata": { "tags": [] }, "outputs": [], "source": [ "# nm = mgrs_snow_gdf.iloc[3000].tile\n", "# bbox_gdf = mgrs_gdf[mgrs_gdf.tile==nm]\n", "\n", "# f,ax=plt.subplots()\n", "# bbox_gdf.geometry.boundary.plot(ax=ax,color='red')\n", "# xmin, ymin, xmax, ymax = bbox_gdf.total_bounds\n", "# ax.set_xlim([xmin-10,xmax+10])\n", "# ax.set_ylim([ymin-10,ymax+10])\n", "\n", "# ctx.add_basemap(ax=ax,source=ctx.providers.OpenTopoMap, crs=bbox_gdf.crs)" ] }, { "cell_type": "code", "execution_count": null, "id": "2821ed07-9a50-4d50-8652-ec5cc4275864", "metadata": { "tags": [] }, "outputs": [], "source": [ "# classes = [ # page 13 of https://esa-worldcover.s3.amazonaws.com/v100/2020/docs/WorldCover_PUM_V1.0.pdf\n", "# # 10, # treecover\n", "# 20, # shrubland\n", "# 30, # grassland\n", "# 40, # cropland\n", "# # 50, # built-up\n", "# 60, #bare / sparse vegetation\n", "# 70, # snow and ice\n", "# # 80, # permanent water bodies\n", "# 90, # herbaceous wetlands\n", "# 95, # mangroves\n", "# 100 # loss and lichen\n", "# ]\n", "# worldcover = s1_rtc_bs_utils.get_worldcover(ts_ds)\n", "#ts_ds = ts_ds.where(worldcover.isin(classes))" ] }, { "cell_type": "code", "execution_count": null, "id": "9c98bd17-af43-41fd-8826-32163128e88e", "metadata": { "tags": [] }, "outputs": [], "source": [ "resolution = 80\n", "min_acqusitions = 6\n", "years = ['allyears_median','allyears_std','all_years_polyfit','allyears_corr_strength','2015_median','2016_median','2017_median','2018_median','2019_median','2020_median','2021_median','2022_median','2023_median']" ] }, { "cell_type": "code", "execution_count": null, "id": "3e1e1862-daaa-4083-988e-84b72fe3fc28", "metadata": { "tags": [] }, "outputs": [], "source": [ "def get_runoff_onset_mgrs(ts_ds,return_seperate_orbits_and_polarizations=False, num_acquisitions_during_melt_season=6):\n", "\n", " #orbits = s1_rtc_bs_utils.get_orbits_with_melt_season_coverage(ts_ds,num_acquisitions_during_melt_season=num_acquisitions_during_melt_season)\n", " #ts_ds = ts_ds[ts_ds['sat:relative_orbit'].isin(orbits).compute()]\n", " \n", " ts_ds = s1_rtc_bs_utils.remove_border_noise(ts_ds)\n", " \n", " counts = ts_ds.groupby('sat:relative_orbit').count()\n", " \n", " runoffs_int64 = ts_ds.groupby('sat:relative_orbit').map(lambda c: c.idxmin(dim='time')).astype(np.int64).where(counts>=num_acquisitions_during_melt_season)\n", " \n", " if return_seperate_orbits_and_polarizations==False: \n", " runoffs_int64 = runoffs_int64.where(runoffs_int64>0).median(dim=['sat:relative_orbit','band'],skipna=True)\n", "\n", " return runoffs_int64.astype('datetime64[ns]')" ] }, { "cell_type": "code", "execution_count": null, "id": "a6208a03-feb7-489c-96be-6993a5fc0277", "metadata": { "tags": [] }, "outputs": [], "source": [ "for tile in tqdm(mgrs_snow_gdf.tile):\n", "\n", " if any(tile in s for s in fs.glob(f'snowmelt/eric/global_4326/')):\n", " print(f'folder {tile} already exists, skipping...')\n", " continue\n", " \n", " if any(tile in s for s in fs.open('snowmelt/eric/global/exclude.txt','r').readlines()):\n", " print(f'{tile} is in exclude.txt, skipping...')\n", " continue \n", " \n", " print(f'working on tile {tile}...')\n", " \n", " try:\n", " tic = time.perf_counter()\n", "\n", " bbox_gdf = mgrs_snow_gdf[mgrs_snow_gdf.tile==tile]\n", " ts_ds = s1_rtc_bs_utils.get_s1_rtc_stac_odc_pc(bbox_gdf,start_time='2015-01-01',end_time='2023-12-31',resolution=resolution,epsg=f'EPSG:{bbox_gdf.epsg.values[0]}')\n", "\n", " allyears = ts_ds.where((ts_ds['time.month']>=2) & (ts_ds['time.month']<8), drop=True).groupby('time.year').apply(\n", " get_runoff_onset_mgrs,return_seperate_orbits_and_polarizations=False, num_acquisitions_during_melt_season=min_acqusitions)\n", "\n", " runoffs_allyears = allyears.dt.dayofyear.where(allyears.dt.dayofyear>0).compute().dropna(dim='year',how='all').rio.write_nodata(np.iinfo(np.int16).min,encoded=True)\n", "\n", " polyfits = runoffs_allyears.polyfit('year',deg=1,full=True,cov=True)\n", "\n", "\n", " years = ['allyears_median','allyears_std','allyears_polyfit','allyears_corr_strength','2015_median','2016_median','2017_median','2018_median','2019_median','2020_median','2021_median','2022_median','2023_median']\n", "\n", " for year in years:\n", " #print(f'writing {year}...')\n", "\n", "\n", " if year == 'allyears_std':\n", " data_type = 'float32'\n", " raster = runoffs_allyears.std(dim='year').rio.write_nodata(np.finfo(np.float32).min,encoded=True)\n", "\n", " elif year == 'allyears_polyfit':\n", " data_type = 'float32'\n", " raster = polyfits.polyfit_coefficients[0].rio.write_nodata(np.finfo(np.float32).min,encoded=True).rio.set_crs(runoffs_allyears.rio.crs)\n", "\n", " elif year == 'allyears_corr_strength':\n", " data_type = 'float32'\n", " raster = xr.corr(runoffs_allyears.year,runoffs_allyears,dim='year').rio.write_nodata(np.finfo(np.float32).min,encoded=True).rio.set_crs(runoffs_allyears.rio.crs)\n", "\n", " elif year == 'allyears_median':\n", " data_type = 'int16'\n", " raster = runoffs_allyears.median(dim='year').rio.write_nodata(np.iinfo(np.int16).min,encoded=True)\n", "\n", " else:\n", " data_type = 'int16'\n", " int_year = int(year[0:4])\n", " if int_year in runoffs_allyears.year:\n", " raster = runoffs_allyears.sel(year=int_year)\n", " else:\n", " continue\n", "\n", "\n", " # actually changing the reasmpling here might be helpful\n", " with io.BytesIO() as buffer:\n", " raster.rio.to_raster(buffer, driver=\"COG\", dtype=data_type)\n", " buffer.seek(0)\n", " blob_client = container_client.get_blob_client(f\"eric/global/{tile}/runoff_onset_{tile}_{year}_{resolution}m.tif\")\n", " blob_client.upload_blob(buffer, overwrite=True)\n", "\n", " with io.BytesIO() as buffer:\n", " raster.rio.reproject('epsg:4326').rio.to_raster(buffer, driver=\"COG\", dtype=data_type)\n", " buffer.seek(0)\n", " blob_client = container_client.get_blob_client(f\"eric/global_4326/{tile}/runoff_onset_{tile}_{year}_{resolution}m_4326.tif\")\n", " blob_client.upload_blob(buffer, overwrite=True)\n", "\n", "\n", " toc = time.perf_counter()\n", " print(f'Processing time: {toc - tic:0.1f} seconds')\n", " \n", " except Exception as e:\n", " print(e)\n", " \n", " exclude = tile\n", " with io.BytesIO() as buffer:\n", " with fs.open('snowmelt/eric/global/exclude.txt','r',newline='\\n') as exclude_file:\n", " exclude_list = exclude_file.readlines()\n", "\n", " exclude_list = [x.replace('\\n','') for x in exclude_list]\n", " exclude_list.append(exclude)\n", "\n", " np.savetxt(buffer,exclude_list, fmt='%s',delimiter='\\n')\n", " buffer.seek(0)\n", " blob_client = container_client.get_blob_client('eric/global/exclude.txt')\n", " blob_client.upload_blob(buffer, overwrite=True)\n", " \n", " print(f'Failed, added {exclude} to exclude.txt')" ] }, { "cell_type": "code", "execution_count": null, "id": "b3e48855-517e-47d4-b5bc-f630c169ce53", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "885771dc-b589-419f-bd66-cd3ed7c0f5b0", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "0b5580f2-a4d7-47cb-a5c0-7d1a5a8aaed4", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "b951418b-b52c-4a49-809e-93f44c918cd5", "metadata": {}, "outputs": [], "source": [ "fs.glob(f'snowmelt/eric/global_4326/{tile}/*')" ] }, { "cell_type": "code", "execution_count": null, "id": "a7d91d1c-c46c-48a5-8d94-697ae27aedca", "metadata": { "tags": [] }, "outputs": [], "source": [ "url = \"https://snowmelt.blob.core.windows.net/\"\n", "\n", "geotiff_list = fs.glob(f'snowmelt/eric/global/{tile}/*_????_median*')\n", "\n", "year_list = []\n", "\n", "for geotiff in geotiff_list:\n", " year_list.append(re.findall(\"_(\\d{4})_\", geotiff)[0])\n", "year_list = [int(year) for year in year_list]\n", "time_var = xr.Variable('time', year_list)\n", "\n", "runoff_allyears_vis = xr.concat([rxr.open_rasterio(f\"{url}{i}\") for i in geotiff_list],dim=time_var).squeeze().sortby('time')\n", "runoff_allyears_vis = runoff_allyears_vis.where(runoff_allyears_vis>-32768)" ] }, { "cell_type": "code", "execution_count": null, "id": "83fd7aab-9ede-4321-bfef-476021c4049d", "metadata": { "tags": [] }, "outputs": [], "source": [ "runoff_allyears_vis.plot(col='time',col_wrap=3, vmin=80, vmax=240)" ] }, { "cell_type": "code", "execution_count": null, "id": "384922a3-61f0-473d-8093-92eec2fdd0e7", "metadata": { "tags": [] }, "outputs": [], "source": [ "#runoff_allyears_vis.median(dim='time').where(runoff_allyears_vis.median(dim='time')>0).rio.write_nodata(-32768,encoded=True).plot(vmin=80,vmax=240)" ] }, { "cell_type": "code", "execution_count": null, "id": "16e96405-fb2a-409d-af23-23df3f6d25d0", "metadata": {}, "outputs": [], "source": [ "rxr.open_rasterio(f'{url}snowmelt/eric/global_4326/47WMQ/runoff_onset_47WMQ_allyears_polyfit_80m_4326.tif',mask_and_scale=True).plot()" ] }, { "cell_type": "code", "execution_count": null, "id": "3dbcc570-0340-46b1-bd9a-9b1b9f1fb094", "metadata": { "tags": [] }, "outputs": [], "source": [ "url = \"https://snowmelt.blob.core.windows.net/\"\n", "\n", "geotiff_list = fs.glob(f'snowmelt/eric/global_4326/{tile}/*')\n", "\n", "year_list = []\n", "\n", "for geotiff in geotiff_list:\n", " year_list.append(re.findall(\"_(\\d{4})_\", geotiff)[0])\n", "year_list = [int(year) for year in year_list]\n", "time_var = xr.Variable('time', year_list)\n", "\n", "runoff_allyears_vis = xr.concat([rxr.open_rasterio(f\"{url}{i}\") for i in geotiff_list],dim=time_var).squeeze().sortby('time')\n", "runoff_allyears_vis = runoff_allyears_vis.where(runoff_allyears_vis>-32768)" ] }, { "cell_type": "code", "execution_count": null, "id": "9abc50da-fe06-4f49-8e63-2adf0a1c00df", "metadata": { "tags": [] }, "outputs": [], "source": [ "runoff_allyears_vis.plot(col='time',col_wrap=3, vmin=80, vmax=240)" ] }, { "cell_type": "code", "execution_count": null, "id": "2c9b4865-501c-486a-90fb-a8e16fb2b5a8", "metadata": { "tags": [] }, "outputs": [], "source": [ "runoff_allyears_vis.median(dim='time').where(runoff_allyears_vis.median(dim='time')>0).rio.write_nodata(-32768,encoded=True).plot(vmin=80,vmax=240)" ] }, { "cell_type": "code", "execution_count": null, "id": "5a456ce7-6b30-4225-86c2-bc0a3c05c8d7", "metadata": { "tags": [] }, "outputs": [], "source": [ "#fn = f\"{url}{fs.glob('snowmelt/eric/global/04WFA/')[3]}\"\n", "fn = f\"{url}snowmelt/eric/global_4326/47WMQ/runoff_onset_47WMQ_allyears_std_80m_4326.tif\"\n", "fn = f\"{url}snowmelt/eric/global_4326/47WMQ/runoff_onset_47WMQ_2021_median_80m_4326.tif\"" ] }, { "cell_type": "code", "execution_count": null, "id": "5dc2bf22-9f7a-4c67-b808-6dff56ebeff6", "metadata": { "tags": [] }, "outputs": [], "source": [ "fn" ] }, { "cell_type": "code", "execution_count": null, "id": "f35512de-6660-49a1-8067-28a241a4334b", "metadata": { "tags": [] }, "outputs": [], "source": [ "!gdalinfo $fn" ] }, { "cell_type": "code", "execution_count": null, "id": "61f37009-8448-4c1f-a67d-a8c91db4e259", "metadata": {}, "outputs": [], "source": [ "# ts_ds = s1_rtc_bs_utils.get_s1_rtc_stac_odc_pc(bbox_gdf,start_time='2015-01-01',end_time='2023-12-31',resolution=resolution,epsg=f'EPSG:{bbox_gdf.epsg.values[0]}')\n", "# allyears = ts_ds.where((ts_ds['time.month']>=2) & (ts_ds['time.month']<8), drop=True).groupby('time.year').apply(\n", "# get_runoff_onset_2,return_seperate_orbits_and_polarizations=False, num_acquisitions_during_melt_season=min_acqusitions)\n", "# %%time\n", "# runoffs_allyears = allyears.dt.dayofyear.where(allyears.dt.dayofyear>0).compute().dropna(dim='year',how='all')\n", "# runoffs_allyears.plot(col='year',col_wrap=4,vmin=80,vmax=240)\n", "# runoffs_allyears#.rio.write_nodata(-32768,encoded=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "e3877cd3-053e-4e2b-8a35-c98b866dfc56", "metadata": { "tags": [] }, "outputs": [], "source": [ "# min_acqusitions = 8\n", "# counts = ts_ds_oneyear.groupby('sat:relative_orbit').count()\n", "# counts\n", "# runoffs_median = s1_rtc_bs_utils.get_runoff_onset(ts_ds_oneyear,return_seperate_orbits_and_polarizations=True,num_acquisitions_during_melt_season=8)\n", "# runoffs_median\n", "# runoffs_median_masked = runoffs_median.where(counts>min_acqusitions)\n", "# runoffs_median_masked\n", "# runoffs_median_masked_compute = runoffs_median_masked.dt.dayofyear.where(runoffs_median_masked.dt.dayofyear>0).compute()\n", "# runoffs_median_masked_compute\n", "# runoffs_median_masked_compute.plot(col='band',row='sat:relative_orbit',vmin=80,vmax=240)\n", "# runoffs_median_masked_compute.median(dim=['sat:relative_orbit','band'],skipna=True).plot(vmin=80,vmax=240)" ] }, { "cell_type": "code", "execution_count": null, "id": "41f77f33-4bdc-4236-a6fc-ac8938e135d0", "metadata": { "tags": [] }, "outputs": [], "source": [ "# year = 2020\n", "# melt_year = slice(f'{year}-02-01',f'{year}-08-01')\n", "# ts_ds_oneyear = ts_ds.sel(time=melt_year)\n", "# ts_ds_oneyear\n", "#runoffs = get_runoff_onset_mgrs(ts_ds_oneyear, return_seperate_orbits_and_polarizations=True, num_acquisitions_during_melt_season=min_acqusitions).compute()\n", "#runoffs.dt.dayofyear.where(runoffs.dt.dayofyear>0).plot(col='band',row='sat:relative_orbit',vmin=80,vmax=240)\n", "# runoffs_med = get_runoff_onset_2(ts_ds_oneyear, return_seperate_orbits_and_polarizations=False, num_acquisitions_during_melt_season=min_acqusitions).compute()\n", "# runoffs_med.dt.dayofyear.where(runoffs_med.dt.dayofyear>0).plot(vmin=80,vmax=240)" ] }, { "cell_type": "code", "execution_count": null, "id": "b9accaa5-2bf2-432a-93d0-d4439c45e611", "metadata": {}, "outputs": [], "source": [ "#read the exclude file\n", "# with fs.open('snowmelt/eric/global/exclude.txt','r',newline='\\n') as exclude_file:\n", "# exclude_list = exclude_file.readlines()\n", "\n", "# exclude_list = [x.replace('\\n','') for x in exclude_list]\n", "# exclude_list" ] }, { "cell_type": "code", "execution_count": null, "id": "b8e99ee3-5e70-4782-a671-12b43dbc9ad4", "metadata": { "tags": [] }, "outputs": [], "source": [ "#WARNING THIS LINE RESETS THE EXCLUDE FILE\n", "##############fs.touch('snowmelt/eric/global/exclude.txt')" ] }, { "cell_type": "code", "execution_count": null, "id": "aeef8249-8e26-4441-9d24-7be26b52a6b2", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 5 }