{
"cells": [
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import geopandas as gpd\n",
"import xarray as xr\n",
"import rioxarray as rxr\n",
"import os\n",
"import xarray as xr\n",
"import geopandas as gpd\n",
"import os\n",
"from osgeo import gdal\n",
"\n",
"\n",
"os.chdir(r'C:/Users/jtrum/world_bank/data/')"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
"P_1in5 clipped\n",
"P_1in10 clipped\n",
"P_1in20 clipped\n",
"P_1in50 clipped\n",
"P_1in75 clipped\n",
"P_1in100 clipped\n",
"P_1in200 clipped\n",
"P_1in250 clipped\n",
"P_1in500 clipped\n",
"P_1in1000 clipped\n",
"FU_1in5 clipped\n",
"FU_1in10 clipped\n",
"FU_1in20 clipped\n",
"FU_1in50 clipped\n",
"FU_1in75 clipped\n",
"FU_1in100 clipped\n",
"FU_1in200 clipped\n",
"FU_1in250 clipped\n",
"FU_1in500 clipped\n",
"FU_1in1000 clipped\n"
]
}
],
"source": [
"datadir = 'C:/Users/jtrum/world_bank/data/'\n",
"\n",
"pluvial = ['P_1in5', 'P_1in10', 'P_1in20', 'P_1in50', 'P_1in75', 'P_1in100', 'P_1in200', 'P_1in250', 'P_1in500', 'P_1in1000']\n",
"fluvial_undefined = ['FU_1in5', 'FU_1in10', 'FU_1in20', 'FU_1in50', 'FU_1in75', 'FU_1in100', 'FU_1in200', 'FU_1in250', 'FU_1in500', 'FU_1in1000']\n",
"\n",
"data_dict = {}\n",
"\n",
"for i in pluvial:\n",
" data_dict[i] = xr.open_dataset(datadir + f'AngolaFathom/Angola/pluvial/{i}.tif')\n",
"\n",
"for i in fluvial_undefined:\n",
" data_dict[i] = xr.open_dataset(datadir + f'AngolaFathom/Angola/fluvial_undefended/{i}.tif')\n",
"\n",
"# crop the 20 rasters to the extent of catchment basin\n",
"catchment = gpd.read_file(datadir + 'catchment.geojson')\n",
"clip = catchment.geometry\n",
"\n",
"# set crs of rasters to equal crs of catchment\n",
"for i in data_dict:\n",
" data_dict[i] = data_dict[i].rio.write_crs(catchment.crs)\n",
" # print crs of raster, then crs of catchment\n",
" print(data_dict[i].rio.crs)\n",
" print(catchment.crs)\n",
"\n",
"# clip rasters to catchment extent\n",
"for i in data_dict:\n",
" data_dict[i] = data_dict[i].rio.clip(clip, from_disk=True)\n",
" print(i, 'clipped')"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"P_1in5 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/P_1in5_clipped.tif\n",
"P_1in10 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/P_1in10_clipped.tif\n",
"P_1in20 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/P_1in20_clipped.tif\n",
"P_1in50 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/P_1in50_clipped.tif\n",
"P_1in75 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/P_1in75_clipped.tif\n",
"P_1in100 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/P_1in100_clipped.tif\n",
"P_1in200 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/P_1in200_clipped.tif\n",
"P_1in250 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/P_1in250_clipped.tif\n",
"P_1in500 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/P_1in500_clipped.tif\n",
"P_1in1000 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/P_1in1000_clipped.tif\n",
"FU_1in5 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/FU_1in5_clipped.tif\n",
"FU_1in10 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/FU_1in10_clipped.tif\n",
"FU_1in20 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/FU_1in20_clipped.tif\n",
"FU_1in50 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/FU_1in50_clipped.tif\n",
"FU_1in75 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/FU_1in75_clipped.tif\n",
"FU_1in100 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/FU_1in100_clipped.tif\n",
"FU_1in200 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/FU_1in200_clipped.tif\n",
"FU_1in250 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/FU_1in250_clipped.tif\n",
"FU_1in500 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/FU_1in500_clipped.tif\n",
"FU_1in1000 clipped\n",
"Binarized C:/Users/jtrum/world_bank/data/FU_1in1000_clipped.tif\n"
]
}
],
"source": [
"def binarize_band(raster_path, threshold=0.15):\n",
" raster_dataset = gdal.Open(raster_path, gdal.GA_Update)\n",
" \n",
" if raster_dataset is None:\n",
" print(f\"Unable to open {raster_path}\")\n",
" return\n",
" \n",
" band = raster_dataset.GetRasterBand(1)\n",
" band_data = band.ReadAsArray()\n",
" \n",
" binary_data = (band_data > threshold).astype(int)\n",
" \n",
" new_band = raster_dataset.GetRasterBand(1) # You can choose any band number you want\n",
" new_band.WriteArray(binary_data)\n",
" \n",
" raster_dataset.FlushCache()\n",
" raster_dataset = None\n",
"\n",
" print(f\"Binarized {raster_path}\")\n",
"\n",
"# Define the threshold value\n",
"threshold = 0.15\n",
"\n",
"# Clip rasters to catchment extent\n",
"for i in data_dict:\n",
" data_dict[i] = data_dict[i].rio.clip(clip, from_disk=True)\n",
" print(i, 'clipped')\n",
" \n",
" # Get the path to the clipped raster file\n",
" clipped_raster_path = datadir + i + '_clipped.tif'\n",
" data_dict[i].to_netcdf(clipped_raster_path) # Save the clipped raster as a NetCDF\n",
" \n",
" # Binarize the band data in the clipped raster\n",
" binarize_band(clipped_raster_path, threshold)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"# turn the clipped rasters into a dictionary\n",
"clipped_dict = {}\n",
"\n",
"for i in pluvial:\n",
" clipped_dict[i] = xr.open_dataset(datadir + f'{i}_clipped.tif')\n",
"\n",
"for i in fluvial_undefined:\n",
" clipped_dict[i] = xr.open_dataset(datadir + f'{i}_clipped.tif')"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\jtrum\\AppData\\Local\\Temp\\ipykernel_17268\\1252790497.py:18: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.\n",
" mode_data = stats.mode(stacked_data, axis=0, nan_policy='omit')\n"
]
}
],
"source": [
"from scipy import stats\n",
"\n",
"# Define the groups based on your criteria\n",
"groups = [\n",
" ['P_1in5', 'P_1in10'],\n",
" ['P_1in20', 'P_1in50', 'P_1in75'],\n",
" ['P_1in100', 'P_1in200', 'P_1in250', 'P_1in500', 'P_1in1000'],\n",
" ['FU_1in5', 'FU_1in10'],\n",
" ['FU_1in20', 'FU_1in50', 'FU_1in75'],\n",
" ['FU_1in100', 'FU_1in200', 'FU_1in250', 'FU_1in500', 'FU_1in1000']\n",
"]\n",
"\n",
"def compute_mode_value(rasters):\n",
" # Stack the rasters along a new dimension\n",
" stacked_data = xr.concat([rasters[i]['band_data'] for i in range(len(rasters))], dim='raster')\n",
" \n",
" # Compute the mode along the 'raster' dimension\n",
" mode_data = stats.mode(stacked_data, axis=0, nan_policy='omit')\n",
"\n",
" # Extract the mode values as a DataArray\n",
" mode_values = xr.DataArray(mode_data.mode, dims=stacked_data.dims, coords=stacked_data.coords)\n",
"\n",
" return mode_values\n",
"\n",
"# Create a dictionary to store mode values for each group\n",
"mode_values_dict = {}\n",
"\n",
"# Compute mode values for each group\n",
"for group_indices, group_name in enumerate(groups):\n",
" group_rasters = [clipped_dict[i] for i in group_name]\n",
" mode_values = compute_mode_value(group_rasters)\n",
" mode_values_dict[f'Group_{group_indices + 1}'] = mode_values\n",
"\n",
"# The mode values for each group can be accessed as mode_values_dict['Group_1'], mode_values_dict['Group_2'], etc.\n"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'FU_1in500'"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# now plot the 6 groups post-binarization\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.colors as colors\n",
"import matplotlib.patches as mpatches\n",
"import matplotlib.lines as mlines\n",
"import matplotlib.gridspec as gridspec\n",
"\n",
"\n",
"# Define the groups based on your criteria\n",
"\n",
"groups = [\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'DataFrame' object has no attribute 'to_dataframe'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17268\\1667624259.py\u001b[0m in \u001b[0;36m?\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;31m#print as a dataframe\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_dataframe\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# turn each dataframe into its own variable so we don't have to call the dictionary every time\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mglobals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 5985\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5986\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5987\u001b[0m ):\n\u001b[0;32m 5988\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 5989\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'to_dataframe'"
]
}
],
"source": [
"#print as a dataframe \n",
"for i in data_dict:\n",
" data_dict[i] = data_dict[i].to_dataframe()\n",
" # turn each dataframe into its own variable so we don't have to call the dictionary every time\n",
" globals()[i] = data_dict[i]\n",
" print(i, 'converted to dataframe')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"P_1in5 converted to geodataframe\n",
"P_1in10 converted to geodataframe\n",
"P_1in20 converted to geodataframe\n",
"P_1in50 converted to geodataframe\n",
"P_1in75 converted to geodataframe\n",
"P_1in100 converted to geodataframe\n",
"P_1in200 converted to geodataframe\n",
"P_1in250 converted to geodataframe\n",
"P_1in500 converted to geodataframe\n",
"P_1in1000 converted to geodataframe\n",
"FU_1in5 converted to geodataframe\n",
"FU_1in10 converted to geodataframe\n",
"FU_1in20 converted to geodataframe\n",
"FU_1in50 converted to geodataframe\n",
"FU_1in75 converted to geodataframe\n",
"FU_1in100 converted to geodataframe\n",
"FU_1in200 converted to geodataframe\n",
"FU_1in250 converted to geodataframe\n",
"FU_1in500 converted to geodataframe\n"
]
},
{
"ename": "TypeError",
"evalue": "reset_index() missing 1 required positional argument: 'dims_or_levels'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mUntitled-1.ipynb Cell 4\u001b[0m line \u001b[0;36m3\n\u001b[0;32m 1\u001b[0m \u001b[39m# use x and y to make into geodataframe\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m data_dict:\n\u001b[1;32m----> 3\u001b[0m data_dict[i] \u001b[39m=\u001b[39m data_dict[i]\u001b[39m.\u001b[39;49mreset_index()\n\u001b[0;32m 4\u001b[0m data_dict[i] \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39mGeoDataFrame(data_dict[i], geometry\u001b[39m=\u001b[39mgpd\u001b[39m.\u001b[39mpoints_from_xy(data_dict[i]\u001b[39m.\u001b[39mx, data_dict[i]\u001b[39m.\u001b[39my))\n\u001b[0;32m 5\u001b[0m \u001b[39mprint\u001b[39m(i, \u001b[39m'\u001b[39m\u001b[39mconverted to geodataframe\u001b[39m\u001b[39m'\u001b[39m)\n",
"\u001b[1;31mTypeError\u001b[0m: reset_index() missing 1 required positional argument: 'dims_or_levels'"
]
}
],
"source": [
"# use x and y to make into geodataframe\n",
"for i in data_dict:\n",
" data_dict[i] = data_dict[i].reset_index()\n",
" data_dict[i] = gpd.GeoDataFrame(data_dict[i], geometry=gpd.points_from_xy(data_dict[i].x, data_dict[i].y))\n",
" print(i, 'converted to geodataframe')"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"# make a dictionary of the dataframes\n",
"df_dict = {'P_1in5', \n",
" 'P_1in10',}"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# for each dataframe, in the 'band_data' column, if the value is greater than or equal to 0.15, in a new column called 'threshold' put a 1, all else put a 0. use their actual dataframes and not the dictionary\n",
"P_1in5['threshold'] = P_1in5['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"P_1in10['threshold'] = P_1in10['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"P_1in20['threshold'] = P_1in20['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"P_1in50['threshold'] = P_1in50['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"P_1in75['threshold'] = P_1in75['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"P_1in100['threshold'] = P_1in100['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"P_1in200['threshold'] = P_1in200['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"P_1in250['threshold'] = P_1in250['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"P_1in500['threshold'] = P_1in500['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"P_1in1000['threshold'] = P_1in1000['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"FU_1in5['threshold'] = FU_1in5['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"FU_1in10['threshold'] = FU_1in10['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"FU_1in20['threshold'] = FU_1in20['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"FU_1in50['threshold'] = FU_1in50['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"FU_1in75['threshold'] = FU_1in75['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"FU_1in100['threshold'] = FU_1in100['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"FU_1in200['threshold'] = FU_1in200['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"FU_1in250['threshold'] = FU_1in250['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"FU_1in500['threshold'] = FU_1in500['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
"FU_1in1000['threshold'] = FU_1in1000['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"# make 6 groupings of the dataframes, where each will be a new dataframe using the x and y columns and then the threshold values from each of the dataframes in the group, with the column being named 'threshold' + the name of the dataframe\n",
"# group 1 (P_1in5, P_1in10)\n",
"G1 = pd.DataFrame(P_1in5, columns=['band', 'x', 'y'])\n",
"G1['threshold_P_1in5'] = P_1in5['threshold']\n",
"G1['threshold_P_1in10'] = P_1in10['threshold']\n",
"\n",
"# group 2 (P_1in20, P_1in50, P_1in75)\n",
"G2 = pd.DataFrame(P_1in20, columns=['band', 'x', 'y'])\n",
"G2['threshold_P_1in20'] = P_1in20['threshold']\n",
"G2['threshold_P_1in50'] = P_1in50['threshold']\n",
"G2['threshold_P_1in75'] = P_1in75['threshold']\n",
"\n",
"# group 3 (P_1in100, P_1in200, P_1in250, P_1in500, P_1in1000)\n",
"G3 = pd.DataFrame(P_1in100, columns=['band', 'x', 'y'])\n",
"G3['threshold_P_1in100'] = P_1in100['threshold']\n",
"G3['threshold_P_1in200'] = P_1in200['threshold']\n",
"G3['threshold_P_1in250'] = P_1in250['threshold']\n",
"G3['threshold_P_1in500'] = P_1in500['threshold']\n",
"G3['threshold_P_1in1000'] = P_1in1000['threshold']\n",
"\n",
"# group 4 (FU_1in5, FU_1in10)\n",
"G4 = pd.DataFrame(FU_1in5, columns=['band', 'x', 'y'])\n",
"G4['threshold_FU_1in5'] = FU_1in5['threshold']\n",
"G4['threshold_FU_1in10'] = FU_1in10['threshold']\n",
"\n",
"# group 5 (FU_1in20, FU_1in50, FU_1in75)\n",
"G5 = pd.DataFrame(FU_1in20, columns=['band', 'x', 'y'])\n",
"G5['threshold_FU_1in20'] = FU_1in20['threshold']\n",
"G5['threshold_FU_1in50'] = FU_1in50['threshold']\n",
"G5['threshold_FU_1in75'] = FU_1in75['threshold']\n",
"\n",
"# group 6 (FU_1in100, FU_1in200, FU_1in250, FU_1in500, FU_1in1000)\n",
"G6 = pd.DataFrame(FU_1in100, columns=['band', 'x', 'y'])\n",
"G6['threshold_FU_1in100'] = FU_1in100['threshold']\n",
"G6['threshold_FU_1in200'] = FU_1in200['threshold']\n",
"G6['threshold_FU_1in250'] = FU_1in250['threshold']\n",
"G6['threshold_FU_1in500'] = FU_1in500['threshold']\n",
"G6['threshold_FU_1in1000'] = FU_1in1000['threshold']\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mUntitled-1.ipynb Cell 8\u001b[0m line \u001b[0;36m2\n\u001b[0;32m 1\u001b[0m \u001b[39m# take the mode of each of the threshold columns and make it the new column 'threshold' in each of the groups\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m G1[\u001b[39m'\u001b[39m\u001b[39mthreshold\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m G1[[\u001b[39m'\u001b[39;49m\u001b[39mthreshold_P_1in5\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mthreshold_P_1in10\u001b[39;49m\u001b[39m'\u001b[39;49m]]\u001b[39m.\u001b[39;49mmode(axis\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m)[\u001b[39m0\u001b[39m]\n\u001b[0;32m 3\u001b[0m \u001b[39m# print(\"G1 threshold column created\")\u001b[39;00m\n\u001b[0;32m 4\u001b[0m \u001b[39m# G2['threshold'] = G2[['threshold_P_1in20', 'threshold_P_1in50', 'threshold_P_1in75']].mode(axis=1)[0]\u001b[39;00m\n\u001b[0;32m 5\u001b[0m \u001b[39m# print(\"G2 threshold column created\")\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[39m# G6['threshold'] = G6[['threshold_FU_1in100', 'threshold_FU_1in200', 'threshold_FU_1in250', 'threshold_FU_1in500', 'threshold_FU_1in1000']].mode(axis=1)[0]\u001b[39;00m\n\u001b[0;32m 13\u001b[0m \u001b[39m# print(\"G6 threshold column created\")\u001b[39;00m\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\frame.py:10746\u001b[0m, in \u001b[0;36mDataFrame.mode\u001b[1;34m(self, axis, numeric_only, dropna)\u001b[0m\n\u001b[0;32m 10743\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mf\u001b[39m(s):\n\u001b[0;32m 10744\u001b[0m \u001b[39mreturn\u001b[39;00m s\u001b[39m.\u001b[39mmode(dropna\u001b[39m=\u001b[39mdropna)\n\u001b[1;32m> 10746\u001b[0m data \u001b[39m=\u001b[39m data\u001b[39m.\u001b[39;49mapply(f, axis\u001b[39m=\u001b[39;49maxis)\n\u001b[0;32m 10747\u001b[0m \u001b[39m# Ensure index is type stable (should always use int index)\u001b[39;00m\n\u001b[0;32m 10748\u001b[0m \u001b[39mif\u001b[39;00m data\u001b[39m.\u001b[39mempty:\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\frame.py:9423\u001b[0m, in \u001b[0;36mDataFrame.apply\u001b[1;34m(self, func, axis, raw, result_type, args, **kwargs)\u001b[0m\n\u001b[0;32m 9412\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcore\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mapply\u001b[39;00m \u001b[39mimport\u001b[39;00m frame_apply\n\u001b[0;32m 9414\u001b[0m op \u001b[39m=\u001b[39m frame_apply(\n\u001b[0;32m 9415\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 9416\u001b[0m func\u001b[39m=\u001b[39mfunc,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 9421\u001b[0m kwargs\u001b[39m=\u001b[39mkwargs,\n\u001b[0;32m 9422\u001b[0m )\n\u001b[1;32m-> 9423\u001b[0m \u001b[39mreturn\u001b[39;00m op\u001b[39m.\u001b[39;49mapply()\u001b[39m.\u001b[39m__finalize__(\u001b[39mself\u001b[39m, method\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mapply\u001b[39m\u001b[39m\"\u001b[39m)\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\apply.py:678\u001b[0m, in \u001b[0;36mFrameApply.apply\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 675\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mraw:\n\u001b[0;32m 676\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapply_raw()\n\u001b[1;32m--> 678\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_standard()\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\apply.py:798\u001b[0m, in \u001b[0;36mFrameApply.apply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 797\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply_standard\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m--> 798\u001b[0m results, res_index \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_series_generator()\n\u001b[0;32m 800\u001b[0m \u001b[39m# wrap results\u001b[39;00m\n\u001b[0;32m 801\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mwrap_results(results, res_index)\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\apply.py:814\u001b[0m, in \u001b[0;36mFrameApply.apply_series_generator\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 811\u001b[0m \u001b[39mwith\u001b[39;00m option_context(\u001b[39m\"\u001b[39m\u001b[39mmode.chained_assignment\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m):\n\u001b[0;32m 812\u001b[0m \u001b[39mfor\u001b[39;00m i, v \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(series_gen):\n\u001b[0;32m 813\u001b[0m \u001b[39m# ignore SettingWithCopy here in case the user mutates\u001b[39;00m\n\u001b[1;32m--> 814\u001b[0m results[i] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mf(v)\n\u001b[0;32m 815\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(results[i], ABCSeries):\n\u001b[0;32m 816\u001b[0m \u001b[39m# If we have a view on v, we need to make a copy because\u001b[39;00m\n\u001b[0;32m 817\u001b[0m \u001b[39m# series_generator will swap out the underlying data\u001b[39;00m\n\u001b[0;32m 818\u001b[0m results[i] \u001b[39m=\u001b[39m results[i]\u001b[39m.\u001b[39mcopy(deep\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m)\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\frame.py:10744\u001b[0m, in \u001b[0;36mDataFrame.mode..f\u001b[1;34m(s)\u001b[0m\n\u001b[0;32m 10743\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mf\u001b[39m(s):\n\u001b[1;32m> 10744\u001b[0m \u001b[39mreturn\u001b[39;00m s\u001b[39m.\u001b[39;49mmode(dropna\u001b[39m=\u001b[39;49mdropna)\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\series.py:2122\u001b[0m, in \u001b[0;36mSeries.mode\u001b[1;34m(self, dropna)\u001b[0m\n\u001b[0;32m 2120\u001b[0m values \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_values\n\u001b[0;32m 2121\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(values, np\u001b[39m.\u001b[39mndarray):\n\u001b[1;32m-> 2122\u001b[0m res_values \u001b[39m=\u001b[39m algorithms\u001b[39m.\u001b[39;49mmode(values, dropna\u001b[39m=\u001b[39;49mdropna)\n\u001b[0;32m 2123\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 2124\u001b[0m res_values \u001b[39m=\u001b[39m values\u001b[39m.\u001b[39m_mode(dropna\u001b[39m=\u001b[39mdropna)\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\algorithms.py:998\u001b[0m, in \u001b[0;36mmode\u001b[1;34m(values, dropna, mask)\u001b[0m\n\u001b[0;32m 996\u001b[0m npresult \u001b[39m=\u001b[39m htable\u001b[39m.\u001b[39mmode(values, dropna\u001b[39m=\u001b[39mdropna, mask\u001b[39m=\u001b[39mmask)\n\u001b[0;32m 997\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 998\u001b[0m npresult \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39;49msort(npresult)\n\u001b[0;32m 999\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mTypeError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m 1000\u001b[0m warnings\u001b[39m.\u001b[39mwarn(\n\u001b[0;32m 1001\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mUnable to sort modes: \u001b[39m\u001b[39m{\u001b[39;00merr\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 1002\u001b[0m stacklevel\u001b[39m=\u001b[39mfind_stack_level(),\n\u001b[0;32m 1003\u001b[0m )\n",
"File \u001b[1;32m<__array_function__ internals>:200\u001b[0m, in \u001b[0;36msort\u001b[1;34m(*args, **kwargs)\u001b[0m\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\numpy\\core\\fromnumeric.py:1029\u001b[0m, in \u001b[0;36msort\u001b[1;34m(a, axis, kind, order)\u001b[0m\n\u001b[0;32m 1027\u001b[0m axis \u001b[39m=\u001b[39m \u001b[39m-\u001b[39m\u001b[39m1\u001b[39m\n\u001b[0;32m 1028\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1029\u001b[0m a \u001b[39m=\u001b[39m asanyarray(a)\u001b[39m.\u001b[39;49mcopy(order\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mK\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[0;32m 1030\u001b[0m a\u001b[39m.\u001b[39msort(axis\u001b[39m=\u001b[39maxis, kind\u001b[39m=\u001b[39mkind, order\u001b[39m=\u001b[39morder)\n\u001b[0;32m 1031\u001b[0m \u001b[39mreturn\u001b[39;00m a\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"# take the mode of each of the threshold columns and make it the new column 'threshold' in each of the groups\n",
"G1['threshold'] = G1[['threshold_P_1in5', 'threshold_P_1in10']].mode(axis=1)[0]\n",
"# print(\"G1 threshold column created\")\n",
"# G2['threshold'] = G2[['threshold_P_1in20', 'threshold_P_1in50', 'threshold_P_1in75']].mode(axis=1)[0]\n",
"# print(\"G2 threshold column created\")\n",
"# G3['threshold'] = G3[['threshold_P_1in100', 'threshold_P_1in200', 'threshold_P_1in250', 'threshold_P_1in500', 'threshold_P_1in1000']].mode(axis=1)[0]\n",
"# print(\"G3 threshold column created\")\n",
"# G4['threshold'] = G4[['threshold_FU_1in5', 'threshold_FU_1in10']].mode(axis=1)[0]\n",
"# print(\"G4 threshold column created\")\n",
"# G5['threshold'] = G5[['threshold_FU_1in20', 'threshold_FU_1in50', 'threshold_FU_1in75']].mode(axis=1)[0]\n",
"# print(\"G5 threshold column created\")\n",
"# G6['threshold'] = G6[['threshold_FU_1in100', 'threshold_FU_1in200', 'threshold_FU_1in250', 'threshold_FU_1in500', 'threshold_FU_1in1000']].mode(axis=1)[0]\n",
"# print(\"G6 threshold column created\")"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"P_1in5 converted to variable\n",
"P_1in10 converted to variable\n",
"P_1in20 converted to variable\n",
"P_1in50 converted to variable\n",
"P_1in75 converted to variable\n",
"P_1in100 converted to variable\n",
"P_1in200 converted to variable\n",
"P_1in250 converted to variable\n",
"P_1in500 converted to variable\n",
"P_1in1000 converted to variable\n",
"FU_1in5 converted to variable\n",
"FU_1in10 converted to variable\n",
"FU_1in20 converted to variable\n",
"FU_1in50 converted to variable\n",
"FU_1in75 converted to variable\n",
"FU_1in100 converted to variable\n",
"FU_1in200 converted to variable\n",
"FU_1in250 converted to variable\n",
"FU_1in500 converted to variable\n",
"FU_1in1000 converted to variable\n"
]
}
],
"source": [
"# turn all dataframes into own variables\n",
"for i in data_dict:\n",
" globals()[i] = data_dict[i]\n",
" print(i, 'converted to variable')"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"P_1in5 dropped na values\n",
"P_1in10 dropped na values\n",
"P_1in20 dropped na values\n",
"P_1in50 dropped na values\n",
"P_1in75 dropped na values\n",
"P_1in100 dropped na values\n",
"P_1in200 dropped na values\n",
"P_1in250 dropped na values\n",
"P_1in500 dropped na values\n",
"P_1in1000 dropped na values\n",
"FU_1in5 dropped na values\n",
"FU_1in10 dropped na values\n",
"FU_1in20 dropped na values\n",
"FU_1in50 dropped na values\n",
"FU_1in75 dropped na values\n",
"FU_1in100 dropped na values\n",
"FU_1in200 dropped na values\n",
"FU_1in250 dropped na values\n",
"FU_1in500 dropped na values\n",
"FU_1in1000 dropped na values\n"
]
},
{
"data": {
"text/plain": [
"6627410"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# drop na values\n",
"for i in data_dict:\n",
" data_dict[i] = data_dict[i].dropna()\n",
" print(i, 'dropped na values')\n",
"\n",
"len(P_1in5)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\jtrum\\AppData\\Local\\Temp\\ipykernel_12616\\3019214630.py:20: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.\n",
" mode_values = mode(data_array, axis=0, nan_policy='omit')\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mUntitled-1.ipynb Cell 3\u001b[0m line \u001b[0;36m2\n\u001b[0;32m 18\u001b[0m data_array \u001b[39m=\u001b[39m data_dict[i]\u001b[39m.\u001b[39mto_array()\n\u001b[0;32m 19\u001b[0m \u001b[39m# Calculate the mode along the 'band' dimension (axis=0)\u001b[39;00m\n\u001b[1;32m---> 20\u001b[0m mode_values \u001b[39m=\u001b[39m mode(data_array, axis\u001b[39m=\u001b[39;49m\u001b[39m0\u001b[39;49m, nan_policy\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39momit\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[0;32m 21\u001b[0m \u001b[39m# Store the mode values in the dictionary\u001b[39;00m\n\u001b[0;32m 22\u001b[0m mode_dict[i] \u001b[39m=\u001b[39m mode_values\u001b[39m.\u001b[39mmode[\u001b[39m0\u001b[39m]\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\scipy\\stats\\_stats_py.py:604\u001b[0m, in \u001b[0;36mmode\u001b[1;34m(a, axis, nan_policy, keepdims)\u001b[0m\n\u001b[0;32m 602\u001b[0m \u001b[39mif\u001b[39;00m contains_nan \u001b[39mand\u001b[39;00m nan_policy \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39momit\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[0;32m 603\u001b[0m a \u001b[39m=\u001b[39m ma\u001b[39m.\u001b[39mmasked_invalid(a)\n\u001b[1;32m--> 604\u001b[0m \u001b[39mreturn\u001b[39;00m mstats_basic\u001b[39m.\u001b[39;49m_mode(a, axis, keepdims\u001b[39m=\u001b[39;49mkeepdims)\n\u001b[0;32m 606\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m np\u001b[39m.\u001b[39missubdtype(a\u001b[39m.\u001b[39mdtype, np\u001b[39m.\u001b[39mnumber):\n\u001b[0;32m 607\u001b[0m warnings\u001b[39m.\u001b[39mwarn(\u001b[39m\"\u001b[39m\u001b[39mSupport for non-numeric arrays has been deprecated \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 608\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mas of SciPy 1.9.0 and will be removed in \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 609\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m1.11.0. `pandas.DataFrame.mode` can be used instead, \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 610\u001b[0m \u001b[39m\"\u001b[39m\u001b[39msee https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mode.html.\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m# noqa: E501\u001b[39;00m\n\u001b[0;32m 611\u001b[0m \u001b[39mDeprecationWarning\u001b[39;00m, stacklevel\u001b[39m=\u001b[39m\u001b[39m2\u001b[39m)\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\scipy\\stats\\_mstats_basic.py:354\u001b[0m, in \u001b[0;36m_mode\u001b[1;34m(a, axis, keepdims)\u001b[0m\n\u001b[0;32m 352\u001b[0m output \u001b[39m=\u001b[39m (ma\u001b[39m.\u001b[39marray(output[\u001b[39m0\u001b[39m]), ma\u001b[39m.\u001b[39marray(output[\u001b[39m1\u001b[39m]))\n\u001b[0;32m 353\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m--> 354\u001b[0m output \u001b[39m=\u001b[39m ma\u001b[39m.\u001b[39;49mapply_along_axis(_mode1D, axis, a)\n\u001b[0;32m 355\u001b[0m \u001b[39mif\u001b[39;00m keepdims \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mor\u001b[39;00m keepdims:\n\u001b[0;32m 356\u001b[0m newshape \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(a\u001b[39m.\u001b[39mshape)\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\numpy\\ma\\extras.py:440\u001b[0m, in \u001b[0;36mapply_along_axis\u001b[1;34m(func1d, axis, arr, *args, **kwargs)\u001b[0m\n\u001b[0;32m 438\u001b[0m i\u001b[39m.\u001b[39mput(indlist, ind)\n\u001b[0;32m 439\u001b[0m j\u001b[39m.\u001b[39mput(indlist, ind)\n\u001b[1;32m--> 440\u001b[0m res \u001b[39m=\u001b[39m func1d(arr[\u001b[39mtuple\u001b[39;49m(i\u001b[39m.\u001b[39;49mtolist())], \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[0;32m 441\u001b[0m outarr[\u001b[39mtuple\u001b[39m(flatten_inplace(j\u001b[39m.\u001b[39mtolist()))] \u001b[39m=\u001b[39m res\n\u001b[0;32m 442\u001b[0m dtypes\u001b[39m.\u001b[39mappend(asarray(res)\u001b[39m.\u001b[39mdtype)\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\scipy\\stats\\_mstats_basic.py:342\u001b[0m, in \u001b[0;36m_mode.._mode1D\u001b[1;34m(a)\u001b[0m\n\u001b[0;32m 341\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_mode1D\u001b[39m(a):\n\u001b[1;32m--> 342\u001b[0m (rep,cnt) \u001b[39m=\u001b[39m find_repeats(a)\n\u001b[0;32m 343\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m cnt\u001b[39m.\u001b[39mndim:\n\u001b[0;32m 344\u001b[0m \u001b[39mreturn\u001b[39;00m (\u001b[39m0\u001b[39m, \u001b[39m0\u001b[39m)\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\scipy\\stats\\_mstats_basic.py:186\u001b[0m, in \u001b[0;36mfind_repeats\u001b[1;34m(arr)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Find repeats in arr and return a tuple (repeats, repeat_count).\u001b[39;00m\n\u001b[0;32m 155\u001b[0m \n\u001b[0;32m 156\u001b[0m \u001b[39mThe input is cast to float64. Masked values are discarded.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 182\u001b[0m \n\u001b[0;32m 183\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 184\u001b[0m \u001b[39m# Make sure we get a copy. ma.compressed promises a \"new array\", but can\u001b[39;00m\n\u001b[0;32m 185\u001b[0m \u001b[39m# actually return a reference.\u001b[39;00m\n\u001b[1;32m--> 186\u001b[0m compr \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39;49masarray(ma\u001b[39m.\u001b[39;49mcompressed(arr), dtype\u001b[39m=\u001b[39;49mnp\u001b[39m.\u001b[39;49mfloat64)\n\u001b[0;32m 187\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 188\u001b[0m need_copy \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mmay_share_memory(compr, arr)\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"# import mode function\n",
"from scipy.stats import mode\n",
"\n",
"# create grouping categories per return period per flood type\n",
"P_under1 = ['P_1in1000', 'P_1in500', 'P_1in250', 'P_1in200', 'P_1in100']\n",
"P_1to10 = ['P_1in75', 'P_1in50', 'P_1in20'] \n",
"P_over10 = ['P_1in10', 'P_1in5']\n",
"FU_under1 = ['FU_1in1000', 'FU_1in500', 'FU_1in250', 'FU_1in200', 'FU_1in100']\n",
"FU_1to10 = ['FU_1in75', 'FU_1in50', 'FU_1in20']\n",
"FU_over10 = ['FU_1in10', 'FU_1in5']\n",
"\n",
"# create empty dictionary to store mode values\n",
"mode_dict = {}\n",
"\n",
"# Calculate mode for each return period per flood type\n",
"for i in P_under1 + P_1to10 + P_over10 + FU_under1 + FU_1to10 + FU_over10:\n",
" # Get the data array from the xarray Dataset\n",
" data_array = data_dict[i].to_array()\n",
" # Calculate the mode along the 'band' dimension (axis=0)\n",
" mode_values = mode(data_array, axis=0, nan_policy='omit')\n",
" # Store the mode values in the dictionary\n",
" mode_dict[i] = mode_values.mode[0]\n",
"\n",
" print(i, 'mode calculated:', mode_values.mode[0])\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"P_1in5 reprojected\n",
"P_1in10 reprojected\n",
"P_1in20 reprojected\n",
"P_1in50 reprojected\n",
"P_1in75 reprojected\n",
"P_1in100 reprojected\n",
"P_1in200 reprojected\n",
"P_1in250 reprojected\n",
"P_1in500 reprojected\n",
"P_1in1000 reprojected\n",
"FU_1in5 reprojected\n",
"FU_1in10 reprojected\n",
"FU_1in20 reprojected\n",
"FU_1in50 reprojected\n",
"FU_1in75 reprojected\n",
"FU_1in100 reprojected\n",
"FU_1in200 reprojected\n",
"FU_1in250 reprojected\n",
"FU_1in500 reprojected\n",
"FU_1in1000 reprojected\n"
]
}
],
"source": [
"# set rasters to same CRS as catchment\n",
"for i in data_dict:\n",
" data_dict[i] = data_dict[i].rio.reproject(catchment.crs, resampling=0)\n",
" print(i, 'reprojected')"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"low_prob done\n",
"medium_prob done\n",
"high_prob done\n"
]
}
],
"source": [
"import geopandas as gpd\n",
"from scipy.stats import mode\n",
"\n",
"# Set the data directory\n",
"datadir = 'C:/Users/jtrum/world_bank/data/'\n",
"\n",
"# Load the catchment boundary\n",
"catchment = gpd.read_file(datadir + 'catchment.geojson')\n",
"\n",
"# Define the probability groups\n",
"groups = {\n",
" 'low_prob': ['P_1in1000', 'P_1in500', 'P_1in250', 'P_1in200', 'P_1in100'],\n",
" 'medium_prob': ['P_1in75', 'P_1in50', 'P_1in20'],\n",
" 'high_prob': ['P_1in10', 'P_1in5']\n",
"}\n",
"\n",
"# Create empty DataArrays to store binary classifications\n",
"binary_classifications = {}\n",
"\n",
"for group_name, group_rasters in groups.items():\n",
" binary_data = xr.open_dataarray(datadir + f'AngolaFathom/Angola/pluvial/{group_rasters[0]}.tif')\n",
" \n",
" for raster_name in group_rasters[1:]:\n",
" data = xr.open_dataarray(datadir + f'AngolaFathom/Angola/pluvial/{raster_name}.tif')\n",
" binary_data = binary_data + data\n",
" \n",
" # Set values to 1 if they are greater than or equal to half of the raster count\n",
" binary_data = (binary_data >= len(group_rasters) / 2).astype(float)\n",
" \n",
" binary_classifications[group_name] = binary_data\n",
" print(group_name, 'done')\n",
"\n",
"# Now, you have six binary classifications in the 'binary_classifications' dictionary\n"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"low_prob reprojected\n",
"medium_prob reprojected\n",
"high_prob reprojected\n"
]
}
],
"source": [
"# change the crs of low_prob, medium_prob, and high_prob to match catchment\n",
"for i in binary_classifications:\n",
" binary_classifications[i] = binary_classifications[i].rio.reproject(catchment.crs, resampling=0)\n",
" print(i, 'reprojected')"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"low_prob clipped\n",
"medium_prob clipped\n",
"high_prob clipped\n"
]
}
],
"source": [
"# crop the low_prob, medium_prob, and high_prob rasters to the extent of catchment basin\n",
"clip = catchment.geometry\n",
"\n",
"for i in binary_classifications:\n",
" binary_classifications[i] = binary_classifications[i].rio.clip(clip, from_disk=True)\n",
" print(i, 'clipped')"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" | \n",
" spatial_ref | \n",
" band_data | \n",
"
\n",
" \n",
" band | \n",
" y | \n",
" x | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [spatial_ref, band_data]\n",
"Index: []"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# view data as a dataframe\n",
"df = binary_classifications['low_prob'].to_dataframe()\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'probability'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\xarray\\core\\dataset.py:1348\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 1347\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1348\u001b[0m variable \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_variables[name]\n\u001b[0;32m 1349\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n",
"\u001b[1;31mKeyError\u001b[0m: 'probability'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mUntitled-1.ipynb Cell 3\u001b[0m line \u001b[0;36m2\n\u001b[0;32m 22\u001b[0m \u001b[39m# Group the data based on probability thresholds\u001b[39;00m\n\u001b[0;32m 23\u001b[0m \u001b[39mfor\u001b[39;00m key, (min_prob, max_prob) \u001b[39min\u001b[39;00m thresholds\u001b[39m.\u001b[39mitems():\n\u001b[1;32m---> 24\u001b[0m mask \u001b[39m=\u001b[39m (data[\u001b[39m'\u001b[39;49m\u001b[39mprobability\u001b[39;49m\u001b[39m'\u001b[39;49m] \u001b[39m>\u001b[39m\u001b[39m=\u001b[39m min_prob) \u001b[39m&\u001b[39m (data[\u001b[39m'\u001b[39m\u001b[39mprobability\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m<\u001b[39m max_prob)\n\u001b[0;32m 25\u001b[0m grouped_data[key][i] \u001b[39m=\u001b[39m data\u001b[39m.\u001b[39mwhere(mask, drop\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m 27\u001b[0m \u001b[39m# Create a dictionary to store the binary classifications\u001b[39;00m\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\xarray\\core\\dataset.py:1439\u001b[0m, in \u001b[0;36mDataset.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 1437\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39misel(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkey)\n\u001b[0;32m 1438\u001b[0m \u001b[39mif\u001b[39;00m utils\u001b[39m.\u001b[39mhashable(key):\n\u001b[1;32m-> 1439\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_construct_dataarray(key)\n\u001b[0;32m 1440\u001b[0m \u001b[39mif\u001b[39;00m utils\u001b[39m.\u001b[39miterable_of_hashable(key):\n\u001b[0;32m 1441\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_copy_listed(key)\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\xarray\\core\\dataset.py:1350\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 1348\u001b[0m variable \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_variables[name]\n\u001b[0;32m 1349\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[1;32m-> 1350\u001b[0m _, name, variable \u001b[39m=\u001b[39m _get_virtual_variable(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_variables, name, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdims)\n\u001b[0;32m 1352\u001b[0m needed_dims \u001b[39m=\u001b[39m \u001b[39mset\u001b[39m(variable\u001b[39m.\u001b[39mdims)\n\u001b[0;32m 1354\u001b[0m coords: \u001b[39mdict\u001b[39m[Hashable, Variable] \u001b[39m=\u001b[39m {}\n",
"File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\xarray\\core\\dataset.py:186\u001b[0m, in \u001b[0;36m_get_virtual_variable\u001b[1;34m(variables, key, dim_sizes)\u001b[0m\n\u001b[0;32m 184\u001b[0m split_key \u001b[39m=\u001b[39m key\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m1\u001b[39m)\n\u001b[0;32m 185\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(split_key) \u001b[39m!=\u001b[39m \u001b[39m2\u001b[39m:\n\u001b[1;32m--> 186\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(key)\n\u001b[0;32m 188\u001b[0m ref_name, var_name \u001b[39m=\u001b[39m split_key\n\u001b[0;32m 189\u001b[0m ref_var \u001b[39m=\u001b[39m variables[ref_name]\n",
"\u001b[1;31mKeyError\u001b[0m: 'probability'"
]
}
],
"source": [
"import xarray as xr\n",
"import geopandas as gpd\n",
"import numpy as np\n",
"from scipy.stats import mode\n",
"\n",
"# Create groups for probability thresholds\n",
"thresholds = {\n",
" 'low_prob': (0, 0.01), # <1% probability\n",
" 'medium_prob': (0.01, 0.1), # 1-10% probability\n",
" 'high_prob': (0.1, 1) # >10% probability\n",
"}\n",
"\n",
"# Create a dictionary to store the grouped data\n",
"grouped_data = {key: {} for key in thresholds}\n",
"\n",
"# Load and crop the rasters\n",
"for i in pluvial + fluvial_undefined:\n",
" data = xr.open_dataset(datadir + f'AngolaFathom/Angola/pluvial/{i}.tif')\n",
" # Crop the data to the extent of the catchment\n",
" data = data.sel(x=slice(catchment.total_bounds[0], catchment.total_bounds[2]),\n",
" y=slice(catchment.total_bounds[1], catchment.total_bounds[3]))\n",
" # Group the data based on probability thresholds\n",
" for key, (min_prob, max_prob) in thresholds.items():\n",
" mask = (data['probability'] >= min_prob) & (data['probability'] < max_prob)\n",
" grouped_data[key][i] = data.where(mask, drop=True)\n",
"\n",
"# Create a dictionary to store the binary classifications\n",
"binary_classification = {key: {} for key in thresholds}\n",
"\n",
"# Create a binary classification based on majority values\n",
"for group, group_data in grouped_data.items():\n",
" for i in pluvial + fluvial_undefined:\n",
" binary_data = group_data[i].copy()\n",
" # Replace NaN values with -1 to handle them separately\n",
" binary_data = binary_data.fillna(-1)\n",
" # Compute the mode (majority value) for each cell\n",
" mode_result = mode(binary_data, axis=0, nan_policy='omit')\n",
" # Set majority value to 1, if there's a majority of 1 values\n",
" binary_data = np.where(mode_result.mode == 1, 1, 0)\n",
" # Set NaN values back to NaN\n",
" binary_data = binary_data.astype(float)\n",
" binary_classification[group][i] = xr.DataArray(binary_data, coords=binary_data.coords, dims=binary_data.dims)\n",
"\n",
"# Now you have `binary_classification` dictionary containing binary data for each group.\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "wash",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
}
},
"nbformat": 4,
"nbformat_minor": 2
}