{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import geopandas as gpd\n",
    "import xarray as xr\n",
    "import rioxarray as rxr\n",
    "import os\n",
    "import xarray as xr\n",
    "import geopandas as gpd\n",
    "import os\n",
    "from osgeo import gdal\n",
    "\n",
    "\n",
    "os.chdir(r'C:/Users/jtrum/world_bank/data/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"World Geodetic System 1984\",SPHEROID[\"WGS 84\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST],AUTHORITY[\"EPSG\",\"4326\"]]\n",
      "P_1in5 clipped\n",
      "P_1in10 clipped\n",
      "P_1in20 clipped\n",
      "P_1in50 clipped\n",
      "P_1in75 clipped\n",
      "P_1in100 clipped\n",
      "P_1in200 clipped\n",
      "P_1in250 clipped\n",
      "P_1in500 clipped\n",
      "P_1in1000 clipped\n",
      "FU_1in5 clipped\n",
      "FU_1in10 clipped\n",
      "FU_1in20 clipped\n",
      "FU_1in50 clipped\n",
      "FU_1in75 clipped\n",
      "FU_1in100 clipped\n",
      "FU_1in200 clipped\n",
      "FU_1in250 clipped\n",
      "FU_1in500 clipped\n",
      "FU_1in1000 clipped\n"
     ]
    }
   ],
   "source": [
    "datadir = 'C:/Users/jtrum/world_bank/data/'\n",
    "\n",
    "pluvial = ['P_1in5', 'P_1in10', 'P_1in20', 'P_1in50', 'P_1in75', 'P_1in100', 'P_1in200', 'P_1in250', 'P_1in500', 'P_1in1000']\n",
    "fluvial_undefined = ['FU_1in5', 'FU_1in10', 'FU_1in20', 'FU_1in50', 'FU_1in75', 'FU_1in100', 'FU_1in200', 'FU_1in250', 'FU_1in500', 'FU_1in1000']\n",
    "\n",
    "data_dict = {}\n",
    "\n",
    "for i in pluvial:\n",
    "    data_dict[i] = xr.open_dataset(datadir + f'AngolaFathom/Angola/pluvial/{i}.tif')\n",
    "\n",
    "for i in fluvial_undefined:\n",
    "    data_dict[i] = xr.open_dataset(datadir + f'AngolaFathom/Angola/fluvial_undefended/{i}.tif')\n",
    "\n",
    "# crop the 20 rasters to the extent of catchment basin\n",
    "catchment = gpd.read_file(datadir + 'catchment.geojson')\n",
    "clip = catchment.geometry\n",
    "\n",
    "# set crs of rasters to equal crs of catchment\n",
    "for i in data_dict:\n",
    "    data_dict[i] = data_dict[i].rio.write_crs(catchment.crs)\n",
    "    # print crs of raster, then crs of catchment\n",
    "    print(data_dict[i].rio.crs)\n",
    "    print(catchment.crs)\n",
    "\n",
    "# clip rasters to catchment extent\n",
    "for i in data_dict:\n",
    "    data_dict[i] = data_dict[i].rio.clip(clip, from_disk=True)\n",
    "    print(i, 'clipped')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "P_1in5 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/P_1in5_clipped.tif\n",
      "P_1in10 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/P_1in10_clipped.tif\n",
      "P_1in20 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/P_1in20_clipped.tif\n",
      "P_1in50 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/P_1in50_clipped.tif\n",
      "P_1in75 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/P_1in75_clipped.tif\n",
      "P_1in100 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/P_1in100_clipped.tif\n",
      "P_1in200 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/P_1in200_clipped.tif\n",
      "P_1in250 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/P_1in250_clipped.tif\n",
      "P_1in500 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/P_1in500_clipped.tif\n",
      "P_1in1000 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/P_1in1000_clipped.tif\n",
      "FU_1in5 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/FU_1in5_clipped.tif\n",
      "FU_1in10 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/FU_1in10_clipped.tif\n",
      "FU_1in20 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/FU_1in20_clipped.tif\n",
      "FU_1in50 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/FU_1in50_clipped.tif\n",
      "FU_1in75 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/FU_1in75_clipped.tif\n",
      "FU_1in100 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/FU_1in100_clipped.tif\n",
      "FU_1in200 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/FU_1in200_clipped.tif\n",
      "FU_1in250 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/FU_1in250_clipped.tif\n",
      "FU_1in500 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/FU_1in500_clipped.tif\n",
      "FU_1in1000 clipped\n",
      "Binarized C:/Users/jtrum/world_bank/data/FU_1in1000_clipped.tif\n"
     ]
    }
   ],
   "source": [
    "def binarize_band(raster_path, threshold=0.15):\n",
    "    raster_dataset = gdal.Open(raster_path, gdal.GA_Update)\n",
    "    \n",
    "    if raster_dataset is None:\n",
    "        print(f\"Unable to open {raster_path}\")\n",
    "        return\n",
    "    \n",
    "    band = raster_dataset.GetRasterBand(1)\n",
    "    band_data = band.ReadAsArray()\n",
    "    \n",
    "    binary_data = (band_data > threshold).astype(int)\n",
    "    \n",
    "    new_band = raster_dataset.GetRasterBand(1)  # You can choose any band number you want\n",
    "    new_band.WriteArray(binary_data)\n",
    "    \n",
    "    raster_dataset.FlushCache()\n",
    "    raster_dataset = None\n",
    "\n",
    "    print(f\"Binarized {raster_path}\")\n",
    "\n",
    "# Define the threshold value\n",
    "threshold = 0.15\n",
    "\n",
    "# Clip rasters to catchment extent\n",
    "for i in data_dict:\n",
    "    data_dict[i] = data_dict[i].rio.clip(clip, from_disk=True)\n",
    "    print(i, 'clipped')\n",
    "    \n",
    "    # Get the path to the clipped raster file\n",
    "    clipped_raster_path = datadir + i + '_clipped.tif'\n",
    "    data_dict[i].to_netcdf(clipped_raster_path)  # Save the clipped raster as a NetCDF\n",
    "    \n",
    "    # Binarize the band data in the clipped raster\n",
    "    binarize_band(clipped_raster_path, threshold)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "# turn the clipped rasters into a dictionary\n",
    "clipped_dict = {}\n",
    "\n",
    "for i in pluvial:\n",
    "    clipped_dict[i] = xr.open_dataset(datadir + f'{i}_clipped.tif')\n",
    "\n",
    "for i in fluvial_undefined:\n",
    "    clipped_dict[i] = xr.open_dataset(datadir + f'{i}_clipped.tif')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\jtrum\\AppData\\Local\\Temp\\ipykernel_17268\\1252790497.py:18: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.\n",
      "  mode_data = stats.mode(stacked_data, axis=0, nan_policy='omit')\n"
     ]
    }
   ],
   "source": [
    "from scipy import stats\n",
    "\n",
    "# Define the groups based on your criteria\n",
    "groups = [\n",
    "    ['P_1in5', 'P_1in10'],\n",
    "    ['P_1in20', 'P_1in50', 'P_1in75'],\n",
    "    ['P_1in100', 'P_1in200', 'P_1in250', 'P_1in500', 'P_1in1000'],\n",
    "    ['FU_1in5', 'FU_1in10'],\n",
    "    ['FU_1in20', 'FU_1in50', 'FU_1in75'],\n",
    "    ['FU_1in100', 'FU_1in200', 'FU_1in250', 'FU_1in500', 'FU_1in1000']\n",
    "]\n",
    "\n",
    "def compute_mode_value(rasters):\n",
    "    # Stack the rasters along a new dimension\n",
    "    stacked_data = xr.concat([rasters[i]['band_data'] for i in range(len(rasters))], dim='raster')\n",
    "    \n",
    "    # Compute the mode along the 'raster' dimension\n",
    "    mode_data = stats.mode(stacked_data, axis=0, nan_policy='omit')\n",
    "\n",
    "    # Extract the mode values as a DataArray\n",
    "    mode_values = xr.DataArray(mode_data.mode, dims=stacked_data.dims, coords=stacked_data.coords)\n",
    "\n",
    "    return mode_values\n",
    "\n",
    "# Create a dictionary to store mode values for each group\n",
    "mode_values_dict = {}\n",
    "\n",
    "# Compute mode values for each group\n",
    "for group_indices, group_name in enumerate(groups):\n",
    "    group_rasters = [clipped_dict[i] for i in group_name]\n",
    "    mode_values = compute_mode_value(group_rasters)\n",
    "    mode_values_dict[f'Group_{group_indices + 1}'] = mode_values\n",
    "\n",
    "# The mode values for each group can be accessed as mode_values_dict['Group_1'], mode_values_dict['Group_2'], etc.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'FU_1in500'"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# now plot the 6 groups post-binarization\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.colors as colors\n",
    "import matplotlib.patches as mpatches\n",
    "import matplotlib.lines as mlines\n",
    "import matplotlib.gridspec as gridspec\n",
    "\n",
    "\n",
    "# Define the groups based on your criteria\n",
    "\n",
    "groups = [\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'DataFrame' object has no attribute 'to_dataframe'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17268\\1667624259.py\u001b[0m in \u001b[0;36m?\u001b[1;34m()\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;31m#print as a dataframe\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m     \u001b[0mdata_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_dataframe\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m     \u001b[1;31m# turn each dataframe into its own variable so we don't have to call the dictionary every time\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m     \u001b[0mglobals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m   5985\u001b[0m             \u001b[1;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   5986\u001b[0m             \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   5987\u001b[0m         ):\n\u001b[0;32m   5988\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 5989\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'to_dataframe'"
     ]
    }
   ],
   "source": [
    "#print as a dataframe \n",
    "for i in data_dict:\n",
    "    data_dict[i] = data_dict[i].to_dataframe()\n",
    "    # turn each dataframe into its own variable so we don't have to call the dictionary every time\n",
    "    globals()[i] = data_dict[i]\n",
    "    print(i, 'converted to dataframe')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "P_1in5 converted to geodataframe\n",
      "P_1in10 converted to geodataframe\n",
      "P_1in20 converted to geodataframe\n",
      "P_1in50 converted to geodataframe\n",
      "P_1in75 converted to geodataframe\n",
      "P_1in100 converted to geodataframe\n",
      "P_1in200 converted to geodataframe\n",
      "P_1in250 converted to geodataframe\n",
      "P_1in500 converted to geodataframe\n",
      "P_1in1000 converted to geodataframe\n",
      "FU_1in5 converted to geodataframe\n",
      "FU_1in10 converted to geodataframe\n",
      "FU_1in20 converted to geodataframe\n",
      "FU_1in50 converted to geodataframe\n",
      "FU_1in75 converted to geodataframe\n",
      "FU_1in100 converted to geodataframe\n",
      "FU_1in200 converted to geodataframe\n",
      "FU_1in250 converted to geodataframe\n",
      "FU_1in500 converted to geodataframe\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "reset_index() missing 1 required positional argument: 'dims_or_levels'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32mUntitled-1.ipynb Cell 4\u001b[0m line \u001b[0;36m3\n\u001b[0;32m      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=0'>1</a>\u001b[0m \u001b[39m# use x and y to make into geodataframe\u001b[39;00m\n\u001b[0;32m      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=1'>2</a>\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m data_dict:\n\u001b[1;32m----> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=2'>3</a>\u001b[0m     data_dict[i] \u001b[39m=\u001b[39m data_dict[i]\u001b[39m.\u001b[39;49mreset_index()\n\u001b[0;32m      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=3'>4</a>\u001b[0m     data_dict[i] \u001b[39m=\u001b[39m gpd\u001b[39m.\u001b[39mGeoDataFrame(data_dict[i], geometry\u001b[39m=\u001b[39mgpd\u001b[39m.\u001b[39mpoints_from_xy(data_dict[i]\u001b[39m.\u001b[39mx, data_dict[i]\u001b[39m.\u001b[39my))\n\u001b[0;32m      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X24sdW50aXRsZWQ%3D?line=4'>5</a>\u001b[0m     \u001b[39mprint\u001b[39m(i, \u001b[39m'\u001b[39m\u001b[39mconverted to geodataframe\u001b[39m\u001b[39m'\u001b[39m)\n",
      "\u001b[1;31mTypeError\u001b[0m: reset_index() missing 1 required positional argument: 'dims_or_levels'"
     ]
    }
   ],
   "source": [
    "# use x and y to make into geodataframe\n",
    "for i in data_dict:\n",
    "    data_dict[i] = data_dict[i].reset_index()\n",
    "    data_dict[i] = gpd.GeoDataFrame(data_dict[i], geometry=gpd.points_from_xy(data_dict[i].x, data_dict[i].y))\n",
    "    print(i, 'converted to geodataframe')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "# make a dictionary of the dataframes\n",
    "df_dict = {'P_1in5', \n",
    "           'P_1in10',}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for each dataframe, in the 'band_data' column, if the value is greater than or equal to 0.15, in a new column called 'threshold' put a 1, all else put a 0. use their actual dataframes and not the dictionary\n",
    "P_1in5['threshold'] = P_1in5['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "P_1in10['threshold'] = P_1in10['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "P_1in20['threshold'] = P_1in20['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "P_1in50['threshold'] = P_1in50['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "P_1in75['threshold'] = P_1in75['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "P_1in100['threshold'] = P_1in100['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "P_1in200['threshold'] = P_1in200['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "P_1in250['threshold'] = P_1in250['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "P_1in500['threshold'] = P_1in500['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "P_1in1000['threshold'] = P_1in1000['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "FU_1in5['threshold'] = FU_1in5['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "FU_1in10['threshold'] = FU_1in10['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "FU_1in20['threshold'] = FU_1in20['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "FU_1in50['threshold'] = FU_1in50['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "FU_1in75['threshold'] = FU_1in75['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "FU_1in100['threshold'] = FU_1in100['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "FU_1in200['threshold'] = FU_1in200['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "FU_1in250['threshold'] = FU_1in250['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "FU_1in500['threshold'] = FU_1in500['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)\n",
    "FU_1in1000['threshold'] = FU_1in1000['band_data'].apply(lambda x: 1 if x >= 0.15 else 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "# make 6 groupings of the dataframes, where each will be a new dataframe using the x and y columns and then the threshold values from each of the dataframes in the group, with the column being named 'threshold' + the name of the dataframe\n",
    "# group 1 (P_1in5, P_1in10)\n",
    "G1 = pd.DataFrame(P_1in5, columns=['band', 'x', 'y'])\n",
    "G1['threshold_P_1in5'] = P_1in5['threshold']\n",
    "G1['threshold_P_1in10'] = P_1in10['threshold']\n",
    "\n",
    "# group 2 (P_1in20, P_1in50, P_1in75)\n",
    "G2 = pd.DataFrame(P_1in20, columns=['band', 'x', 'y'])\n",
    "G2['threshold_P_1in20'] = P_1in20['threshold']\n",
    "G2['threshold_P_1in50'] = P_1in50['threshold']\n",
    "G2['threshold_P_1in75'] = P_1in75['threshold']\n",
    "\n",
    "# group 3 (P_1in100, P_1in200, P_1in250, P_1in500, P_1in1000)\n",
    "G3 = pd.DataFrame(P_1in100, columns=['band', 'x', 'y'])\n",
    "G3['threshold_P_1in100'] = P_1in100['threshold']\n",
    "G3['threshold_P_1in200'] = P_1in200['threshold']\n",
    "G3['threshold_P_1in250'] = P_1in250['threshold']\n",
    "G3['threshold_P_1in500'] = P_1in500['threshold']\n",
    "G3['threshold_P_1in1000'] = P_1in1000['threshold']\n",
    "\n",
    "# group 4 (FU_1in5, FU_1in10)\n",
    "G4 = pd.DataFrame(FU_1in5, columns=['band', 'x', 'y'])\n",
    "G4['threshold_FU_1in5'] = FU_1in5['threshold']\n",
    "G4['threshold_FU_1in10'] = FU_1in10['threshold']\n",
    "\n",
    "# group 5 (FU_1in20, FU_1in50, FU_1in75)\n",
    "G5 = pd.DataFrame(FU_1in20, columns=['band', 'x', 'y'])\n",
    "G5['threshold_FU_1in20'] = FU_1in20['threshold']\n",
    "G5['threshold_FU_1in50'] = FU_1in50['threshold']\n",
    "G5['threshold_FU_1in75'] = FU_1in75['threshold']\n",
    "\n",
    "# group 6 (FU_1in100, FU_1in200, FU_1in250, FU_1in500, FU_1in1000)\n",
    "G6 = pd.DataFrame(FU_1in100, columns=['band', 'x', 'y'])\n",
    "G6['threshold_FU_1in100'] = FU_1in100['threshold']\n",
    "G6['threshold_FU_1in200'] = FU_1in200['threshold']\n",
    "G6['threshold_FU_1in250'] = FU_1in250['threshold']\n",
    "G6['threshold_FU_1in500'] = FU_1in500['threshold']\n",
    "G6['threshold_FU_1in1000'] = FU_1in1000['threshold']\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32mUntitled-1.ipynb Cell 8\u001b[0m line \u001b[0;36m2\n\u001b[0;32m      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=0'>1</a>\u001b[0m \u001b[39m# take the mode of each of the threshold columns and make it the new column 'threshold' in each of the groups\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=1'>2</a>\u001b[0m G1[\u001b[39m'\u001b[39m\u001b[39mthreshold\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m=\u001b[39m G1[[\u001b[39m'\u001b[39;49m\u001b[39mthreshold_P_1in5\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mthreshold_P_1in10\u001b[39;49m\u001b[39m'\u001b[39;49m]]\u001b[39m.\u001b[39;49mmode(axis\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m)[\u001b[39m0\u001b[39m]\n\u001b[0;32m      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=2'>3</a>\u001b[0m \u001b[39m# print(\"G1 threshold column created\")\u001b[39;00m\n\u001b[0;32m      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=3'>4</a>\u001b[0m \u001b[39m# G2['threshold'] = G2[['threshold_P_1in20', 'threshold_P_1in50', 'threshold_P_1in75']].mode(axis=1)[0]\u001b[39;00m\n\u001b[0;32m      <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=4'>5</a>\u001b[0m \u001b[39m# print(\"G2 threshold column created\")\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=11'>12</a>\u001b[0m \u001b[39m# G6['threshold'] = G6[['threshold_FU_1in100', 'threshold_FU_1in200', 'threshold_FU_1in250', 'threshold_FU_1in500', 'threshold_FU_1in1000']].mode(axis=1)[0]\u001b[39;00m\n\u001b[0;32m     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X34sdW50aXRsZWQ%3D?line=12'>13</a>\u001b[0m \u001b[39m# print(\"G6 threshold column created\")\u001b[39;00m\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\frame.py:10746\u001b[0m, in \u001b[0;36mDataFrame.mode\u001b[1;34m(self, axis, numeric_only, dropna)\u001b[0m\n\u001b[0;32m  10743\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mf\u001b[39m(s):\n\u001b[0;32m  10744\u001b[0m     \u001b[39mreturn\u001b[39;00m s\u001b[39m.\u001b[39mmode(dropna\u001b[39m=\u001b[39mdropna)\n\u001b[1;32m> 10746\u001b[0m data \u001b[39m=\u001b[39m data\u001b[39m.\u001b[39;49mapply(f, axis\u001b[39m=\u001b[39;49maxis)\n\u001b[0;32m  10747\u001b[0m \u001b[39m# Ensure index is type stable (should always use int index)\u001b[39;00m\n\u001b[0;32m  10748\u001b[0m \u001b[39mif\u001b[39;00m data\u001b[39m.\u001b[39mempty:\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\frame.py:9423\u001b[0m, in \u001b[0;36mDataFrame.apply\u001b[1;34m(self, func, axis, raw, result_type, args, **kwargs)\u001b[0m\n\u001b[0;32m   9412\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcore\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mapply\u001b[39;00m \u001b[39mimport\u001b[39;00m frame_apply\n\u001b[0;32m   9414\u001b[0m op \u001b[39m=\u001b[39m frame_apply(\n\u001b[0;32m   9415\u001b[0m     \u001b[39mself\u001b[39m,\n\u001b[0;32m   9416\u001b[0m     func\u001b[39m=\u001b[39mfunc,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   9421\u001b[0m     kwargs\u001b[39m=\u001b[39mkwargs,\n\u001b[0;32m   9422\u001b[0m )\n\u001b[1;32m-> 9423\u001b[0m \u001b[39mreturn\u001b[39;00m op\u001b[39m.\u001b[39;49mapply()\u001b[39m.\u001b[39m__finalize__(\u001b[39mself\u001b[39m, method\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mapply\u001b[39m\u001b[39m\"\u001b[39m)\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\apply.py:678\u001b[0m, in \u001b[0;36mFrameApply.apply\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    675\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mraw:\n\u001b[0;32m    676\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapply_raw()\n\u001b[1;32m--> 678\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_standard()\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\apply.py:798\u001b[0m, in \u001b[0;36mFrameApply.apply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    797\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply_standard\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m--> 798\u001b[0m     results, res_index \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_series_generator()\n\u001b[0;32m    800\u001b[0m     \u001b[39m# wrap results\u001b[39;00m\n\u001b[0;32m    801\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mwrap_results(results, res_index)\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\apply.py:814\u001b[0m, in \u001b[0;36mFrameApply.apply_series_generator\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    811\u001b[0m \u001b[39mwith\u001b[39;00m option_context(\u001b[39m\"\u001b[39m\u001b[39mmode.chained_assignment\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m):\n\u001b[0;32m    812\u001b[0m     \u001b[39mfor\u001b[39;00m i, v \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(series_gen):\n\u001b[0;32m    813\u001b[0m         \u001b[39m# ignore SettingWithCopy here in case the user mutates\u001b[39;00m\n\u001b[1;32m--> 814\u001b[0m         results[i] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mf(v)\n\u001b[0;32m    815\u001b[0m         \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(results[i], ABCSeries):\n\u001b[0;32m    816\u001b[0m             \u001b[39m# If we have a view on v, we need to make a copy because\u001b[39;00m\n\u001b[0;32m    817\u001b[0m             \u001b[39m#  series_generator will swap out the underlying data\u001b[39;00m\n\u001b[0;32m    818\u001b[0m             results[i] \u001b[39m=\u001b[39m results[i]\u001b[39m.\u001b[39mcopy(deep\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m)\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\frame.py:10744\u001b[0m, in \u001b[0;36mDataFrame.mode.<locals>.f\u001b[1;34m(s)\u001b[0m\n\u001b[0;32m  10743\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mf\u001b[39m(s):\n\u001b[1;32m> 10744\u001b[0m     \u001b[39mreturn\u001b[39;00m s\u001b[39m.\u001b[39;49mmode(dropna\u001b[39m=\u001b[39;49mdropna)\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\series.py:2122\u001b[0m, in \u001b[0;36mSeries.mode\u001b[1;34m(self, dropna)\u001b[0m\n\u001b[0;32m   2120\u001b[0m values \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_values\n\u001b[0;32m   2121\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(values, np\u001b[39m.\u001b[39mndarray):\n\u001b[1;32m-> 2122\u001b[0m     res_values \u001b[39m=\u001b[39m algorithms\u001b[39m.\u001b[39;49mmode(values, dropna\u001b[39m=\u001b[39;49mdropna)\n\u001b[0;32m   2123\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m   2124\u001b[0m     res_values \u001b[39m=\u001b[39m values\u001b[39m.\u001b[39m_mode(dropna\u001b[39m=\u001b[39mdropna)\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\pandas\\core\\algorithms.py:998\u001b[0m, in \u001b[0;36mmode\u001b[1;34m(values, dropna, mask)\u001b[0m\n\u001b[0;32m    996\u001b[0m npresult \u001b[39m=\u001b[39m htable\u001b[39m.\u001b[39mmode(values, dropna\u001b[39m=\u001b[39mdropna, mask\u001b[39m=\u001b[39mmask)\n\u001b[0;32m    997\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 998\u001b[0m     npresult \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39;49msort(npresult)\n\u001b[0;32m    999\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mTypeError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m   1000\u001b[0m     warnings\u001b[39m.\u001b[39mwarn(\n\u001b[0;32m   1001\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mUnable to sort modes: \u001b[39m\u001b[39m{\u001b[39;00merr\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m,\n\u001b[0;32m   1002\u001b[0m         stacklevel\u001b[39m=\u001b[39mfind_stack_level(),\n\u001b[0;32m   1003\u001b[0m     )\n",
      "File \u001b[1;32m<__array_function__ internals>:200\u001b[0m, in \u001b[0;36msort\u001b[1;34m(*args, **kwargs)\u001b[0m\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\numpy\\core\\fromnumeric.py:1029\u001b[0m, in \u001b[0;36msort\u001b[1;34m(a, axis, kind, order)\u001b[0m\n\u001b[0;32m   1027\u001b[0m     axis \u001b[39m=\u001b[39m \u001b[39m-\u001b[39m\u001b[39m1\u001b[39m\n\u001b[0;32m   1028\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1029\u001b[0m     a \u001b[39m=\u001b[39m asanyarray(a)\u001b[39m.\u001b[39;49mcopy(order\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mK\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[0;32m   1030\u001b[0m a\u001b[39m.\u001b[39msort(axis\u001b[39m=\u001b[39maxis, kind\u001b[39m=\u001b[39mkind, order\u001b[39m=\u001b[39morder)\n\u001b[0;32m   1031\u001b[0m \u001b[39mreturn\u001b[39;00m a\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "# take the mode of each of the threshold columns and make it the new column 'threshold' in each of the groups\n",
    "G1['threshold'] = G1[['threshold_P_1in5', 'threshold_P_1in10']].mode(axis=1)[0]\n",
    "# print(\"G1 threshold column created\")\n",
    "# G2['threshold'] = G2[['threshold_P_1in20', 'threshold_P_1in50', 'threshold_P_1in75']].mode(axis=1)[0]\n",
    "# print(\"G2 threshold column created\")\n",
    "# G3['threshold'] = G3[['threshold_P_1in100', 'threshold_P_1in200', 'threshold_P_1in250', 'threshold_P_1in500', 'threshold_P_1in1000']].mode(axis=1)[0]\n",
    "# print(\"G3 threshold column created\")\n",
    "# G4['threshold'] = G4[['threshold_FU_1in5', 'threshold_FU_1in10']].mode(axis=1)[0]\n",
    "# print(\"G4 threshold column created\")\n",
    "# G5['threshold'] = G5[['threshold_FU_1in20', 'threshold_FU_1in50', 'threshold_FU_1in75']].mode(axis=1)[0]\n",
    "# print(\"G5 threshold column created\")\n",
    "# G6['threshold'] = G6[['threshold_FU_1in100', 'threshold_FU_1in200', 'threshold_FU_1in250', 'threshold_FU_1in500', 'threshold_FU_1in1000']].mode(axis=1)[0]\n",
    "# print(\"G6 threshold column created\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "P_1in5 converted to variable\n",
      "P_1in10 converted to variable\n",
      "P_1in20 converted to variable\n",
      "P_1in50 converted to variable\n",
      "P_1in75 converted to variable\n",
      "P_1in100 converted to variable\n",
      "P_1in200 converted to variable\n",
      "P_1in250 converted to variable\n",
      "P_1in500 converted to variable\n",
      "P_1in1000 converted to variable\n",
      "FU_1in5 converted to variable\n",
      "FU_1in10 converted to variable\n",
      "FU_1in20 converted to variable\n",
      "FU_1in50 converted to variable\n",
      "FU_1in75 converted to variable\n",
      "FU_1in100 converted to variable\n",
      "FU_1in200 converted to variable\n",
      "FU_1in250 converted to variable\n",
      "FU_1in500 converted to variable\n",
      "FU_1in1000 converted to variable\n"
     ]
    }
   ],
   "source": [
    "# turn all dataframes into own variables\n",
    "for i in data_dict:\n",
    "    globals()[i] = data_dict[i]\n",
    "    print(i, 'converted to variable')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "P_1in5 dropped na values\n",
      "P_1in10 dropped na values\n",
      "P_1in20 dropped na values\n",
      "P_1in50 dropped na values\n",
      "P_1in75 dropped na values\n",
      "P_1in100 dropped na values\n",
      "P_1in200 dropped na values\n",
      "P_1in250 dropped na values\n",
      "P_1in500 dropped na values\n",
      "P_1in1000 dropped na values\n",
      "FU_1in5 dropped na values\n",
      "FU_1in10 dropped na values\n",
      "FU_1in20 dropped na values\n",
      "FU_1in50 dropped na values\n",
      "FU_1in75 dropped na values\n",
      "FU_1in100 dropped na values\n",
      "FU_1in200 dropped na values\n",
      "FU_1in250 dropped na values\n",
      "FU_1in500 dropped na values\n",
      "FU_1in1000 dropped na values\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "6627410"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# drop na values\n",
    "for i in data_dict:\n",
    "    data_dict[i] = data_dict[i].dropna()\n",
    "    print(i, 'dropped na values')\n",
    "\n",
    "len(P_1in5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\jtrum\\AppData\\Local\\Temp\\ipykernel_12616\\3019214630.py:20: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.\n",
      "  mode_values = mode(data_array, axis=0, nan_policy='omit')\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32mUntitled-1.ipynb Cell 3\u001b[0m line \u001b[0;36m2\n\u001b[0;32m     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=17'>18</a>\u001b[0m data_array \u001b[39m=\u001b[39m data_dict[i]\u001b[39m.\u001b[39mto_array()\n\u001b[0;32m     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=18'>19</a>\u001b[0m \u001b[39m# Calculate the mode along the 'band' dimension (axis=0)\u001b[39;00m\n\u001b[1;32m---> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=19'>20</a>\u001b[0m mode_values \u001b[39m=\u001b[39m mode(data_array, axis\u001b[39m=\u001b[39;49m\u001b[39m0\u001b[39;49m, nan_policy\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39momit\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[0;32m     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=20'>21</a>\u001b[0m \u001b[39m# Store the mode values in the dictionary\u001b[39;00m\n\u001b[0;32m     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#X16sdW50aXRsZWQ%3D?line=21'>22</a>\u001b[0m mode_dict[i] \u001b[39m=\u001b[39m mode_values\u001b[39m.\u001b[39mmode[\u001b[39m0\u001b[39m]\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\scipy\\stats\\_stats_py.py:604\u001b[0m, in \u001b[0;36mmode\u001b[1;34m(a, axis, nan_policy, keepdims)\u001b[0m\n\u001b[0;32m    602\u001b[0m \u001b[39mif\u001b[39;00m contains_nan \u001b[39mand\u001b[39;00m nan_policy \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39momit\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[0;32m    603\u001b[0m     a \u001b[39m=\u001b[39m ma\u001b[39m.\u001b[39mmasked_invalid(a)\n\u001b[1;32m--> 604\u001b[0m     \u001b[39mreturn\u001b[39;00m mstats_basic\u001b[39m.\u001b[39;49m_mode(a, axis, keepdims\u001b[39m=\u001b[39;49mkeepdims)\n\u001b[0;32m    606\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m np\u001b[39m.\u001b[39missubdtype(a\u001b[39m.\u001b[39mdtype, np\u001b[39m.\u001b[39mnumber):\n\u001b[0;32m    607\u001b[0m     warnings\u001b[39m.\u001b[39mwarn(\u001b[39m\"\u001b[39m\u001b[39mSupport for non-numeric arrays has been deprecated \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m    608\u001b[0m                   \u001b[39m\"\u001b[39m\u001b[39mas of SciPy 1.9.0 and will be removed in \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m    609\u001b[0m                   \u001b[39m\"\u001b[39m\u001b[39m1.11.0. `pandas.DataFrame.mode` can be used instead, \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m    610\u001b[0m                   \u001b[39m\"\u001b[39m\u001b[39msee https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mode.html.\u001b[39m\u001b[39m\"\u001b[39m,  \u001b[39m# noqa: E501\u001b[39;00m\n\u001b[0;32m    611\u001b[0m                   \u001b[39mDeprecationWarning\u001b[39;00m, stacklevel\u001b[39m=\u001b[39m\u001b[39m2\u001b[39m)\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\scipy\\stats\\_mstats_basic.py:354\u001b[0m, in \u001b[0;36m_mode\u001b[1;34m(a, axis, keepdims)\u001b[0m\n\u001b[0;32m    352\u001b[0m     output \u001b[39m=\u001b[39m (ma\u001b[39m.\u001b[39marray(output[\u001b[39m0\u001b[39m]), ma\u001b[39m.\u001b[39marray(output[\u001b[39m1\u001b[39m]))\n\u001b[0;32m    353\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m--> 354\u001b[0m     output \u001b[39m=\u001b[39m ma\u001b[39m.\u001b[39;49mapply_along_axis(_mode1D, axis, a)\n\u001b[0;32m    355\u001b[0m     \u001b[39mif\u001b[39;00m keepdims \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mor\u001b[39;00m keepdims:\n\u001b[0;32m    356\u001b[0m         newshape \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(a\u001b[39m.\u001b[39mshape)\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\numpy\\ma\\extras.py:440\u001b[0m, in \u001b[0;36mapply_along_axis\u001b[1;34m(func1d, axis, arr, *args, **kwargs)\u001b[0m\n\u001b[0;32m    438\u001b[0m i\u001b[39m.\u001b[39mput(indlist, ind)\n\u001b[0;32m    439\u001b[0m j\u001b[39m.\u001b[39mput(indlist, ind)\n\u001b[1;32m--> 440\u001b[0m res \u001b[39m=\u001b[39m func1d(arr[\u001b[39mtuple\u001b[39;49m(i\u001b[39m.\u001b[39;49mtolist())], \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[0;32m    441\u001b[0m outarr[\u001b[39mtuple\u001b[39m(flatten_inplace(j\u001b[39m.\u001b[39mtolist()))] \u001b[39m=\u001b[39m res\n\u001b[0;32m    442\u001b[0m dtypes\u001b[39m.\u001b[39mappend(asarray(res)\u001b[39m.\u001b[39mdtype)\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\scipy\\stats\\_mstats_basic.py:342\u001b[0m, in \u001b[0;36m_mode.<locals>._mode1D\u001b[1;34m(a)\u001b[0m\n\u001b[0;32m    341\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_mode1D\u001b[39m(a):\n\u001b[1;32m--> 342\u001b[0m     (rep,cnt) \u001b[39m=\u001b[39m find_repeats(a)\n\u001b[0;32m    343\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m cnt\u001b[39m.\u001b[39mndim:\n\u001b[0;32m    344\u001b[0m         \u001b[39mreturn\u001b[39;00m (\u001b[39m0\u001b[39m, \u001b[39m0\u001b[39m)\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\scipy\\stats\\_mstats_basic.py:186\u001b[0m, in \u001b[0;36mfind_repeats\u001b[1;34m(arr)\u001b[0m\n\u001b[0;32m    154\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Find repeats in arr and return a tuple (repeats, repeat_count).\u001b[39;00m\n\u001b[0;32m    155\u001b[0m \n\u001b[0;32m    156\u001b[0m \u001b[39mThe input is cast to float64. Masked values are discarded.\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    182\u001b[0m \n\u001b[0;32m    183\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m    184\u001b[0m \u001b[39m# Make sure we get a copy. ma.compressed promises a \"new array\", but can\u001b[39;00m\n\u001b[0;32m    185\u001b[0m \u001b[39m# actually return a reference.\u001b[39;00m\n\u001b[1;32m--> 186\u001b[0m compr \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39;49masarray(ma\u001b[39m.\u001b[39;49mcompressed(arr), dtype\u001b[39m=\u001b[39;49mnp\u001b[39m.\u001b[39;49mfloat64)\n\u001b[0;32m    187\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m    188\u001b[0m     need_copy \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mmay_share_memory(compr, arr)\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "# import mode function\n",
    "from scipy.stats import mode\n",
    "\n",
    "# create grouping categories per return period per flood type\n",
    "P_under1 = ['P_1in1000', 'P_1in500', 'P_1in250', 'P_1in200', 'P_1in100']\n",
    "P_1to10 = ['P_1in75', 'P_1in50', 'P_1in20'] \n",
    "P_over10 = ['P_1in10', 'P_1in5']\n",
    "FU_under1 = ['FU_1in1000', 'FU_1in500', 'FU_1in250', 'FU_1in200', 'FU_1in100']\n",
    "FU_1to10 = ['FU_1in75', 'FU_1in50', 'FU_1in20']\n",
    "FU_over10 = ['FU_1in10', 'FU_1in5']\n",
    "\n",
    "# create empty dictionary to store mode values\n",
    "mode_dict = {}\n",
    "\n",
    "# Calculate mode for each return period per flood type\n",
    "for i in P_under1 + P_1to10 + P_over10 + FU_under1 + FU_1to10 + FU_over10:\n",
    "    # Get the data array from the xarray Dataset\n",
    "    data_array = data_dict[i].to_array()\n",
    "    # Calculate the mode along the 'band' dimension (axis=0)\n",
    "    mode_values = mode(data_array, axis=0, nan_policy='omit')\n",
    "    # Store the mode values in the dictionary\n",
    "    mode_dict[i] = mode_values.mode[0]\n",
    "\n",
    "    print(i, 'mode calculated:', mode_values.mode[0])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "P_1in5 reprojected\n",
      "P_1in10 reprojected\n",
      "P_1in20 reprojected\n",
      "P_1in50 reprojected\n",
      "P_1in75 reprojected\n",
      "P_1in100 reprojected\n",
      "P_1in200 reprojected\n",
      "P_1in250 reprojected\n",
      "P_1in500 reprojected\n",
      "P_1in1000 reprojected\n",
      "FU_1in5 reprojected\n",
      "FU_1in10 reprojected\n",
      "FU_1in20 reprojected\n",
      "FU_1in50 reprojected\n",
      "FU_1in75 reprojected\n",
      "FU_1in100 reprojected\n",
      "FU_1in200 reprojected\n",
      "FU_1in250 reprojected\n",
      "FU_1in500 reprojected\n",
      "FU_1in1000 reprojected\n"
     ]
    }
   ],
   "source": [
    "# set rasters to same CRS as catchment\n",
    "for i in data_dict:\n",
    "    data_dict[i] = data_dict[i].rio.reproject(catchment.crs, resampling=0)\n",
    "    print(i, 'reprojected')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "low_prob done\n",
      "medium_prob done\n",
      "high_prob done\n"
     ]
    }
   ],
   "source": [
    "import geopandas as gpd\n",
    "from scipy.stats import mode\n",
    "\n",
    "# Set the data directory\n",
    "datadir = 'C:/Users/jtrum/world_bank/data/'\n",
    "\n",
    "# Load the catchment boundary\n",
    "catchment = gpd.read_file(datadir + 'catchment.geojson')\n",
    "\n",
    "# Define the probability groups\n",
    "groups = {\n",
    "    'low_prob': ['P_1in1000', 'P_1in500', 'P_1in250', 'P_1in200', 'P_1in100'],\n",
    "    'medium_prob': ['P_1in75', 'P_1in50', 'P_1in20'],\n",
    "    'high_prob': ['P_1in10', 'P_1in5']\n",
    "}\n",
    "\n",
    "# Create empty DataArrays to store binary classifications\n",
    "binary_classifications = {}\n",
    "\n",
    "for group_name, group_rasters in groups.items():\n",
    "    binary_data = xr.open_dataarray(datadir + f'AngolaFathom/Angola/pluvial/{group_rasters[0]}.tif')\n",
    "    \n",
    "    for raster_name in group_rasters[1:]:\n",
    "        data = xr.open_dataarray(datadir + f'AngolaFathom/Angola/pluvial/{raster_name}.tif')\n",
    "        binary_data = binary_data + data\n",
    "    \n",
    "    # Set values to 1 if they are greater than or equal to half of the raster count\n",
    "    binary_data = (binary_data >= len(group_rasters) / 2).astype(float)\n",
    "    \n",
    "    binary_classifications[group_name] = binary_data\n",
    "    print(group_name, 'done')\n",
    "\n",
    "# Now, you have six binary classifications in the 'binary_classifications' dictionary\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "low_prob reprojected\n",
      "medium_prob reprojected\n",
      "high_prob reprojected\n"
     ]
    }
   ],
   "source": [
    "# change the crs of low_prob, medium_prob, and high_prob to match catchment\n",
    "for i in binary_classifications:\n",
    "    binary_classifications[i] = binary_classifications[i].rio.reproject(catchment.crs, resampling=0)\n",
    "    print(i, 'reprojected')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "low_prob clipped\n",
      "medium_prob clipped\n",
      "high_prob clipped\n"
     ]
    }
   ],
   "source": [
    "# crop the low_prob, medium_prob, and high_prob rasters to the extent of catchment basin\n",
    "clip = catchment.geometry\n",
    "\n",
    "for i in binary_classifications:\n",
    "    binary_classifications[i] = binary_classifications[i].rio.clip(clip, from_disk=True)\n",
    "    print(i, 'clipped')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>spatial_ref</th>\n",
       "      <th>band_data</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>band</th>\n",
       "      <th>y</th>\n",
       "      <th>x</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [spatial_ref, band_data]\n",
       "Index: []"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# view data as a dataframe\n",
    "df = binary_classifications['low_prob'].to_dataframe()\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'probability'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\xarray\\core\\dataset.py:1348\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m   1347\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m-> 1348\u001b[0m     variable \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_variables[name]\n\u001b[0;32m   1349\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n",
      "\u001b[1;31mKeyError\u001b[0m: 'probability'",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[1;32mUntitled-1.ipynb Cell 3\u001b[0m line \u001b[0;36m2\n\u001b[0;32m     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=21'>22</a>\u001b[0m     \u001b[39m# Group the data based on probability thresholds\u001b[39;00m\n\u001b[0;32m     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=22'>23</a>\u001b[0m     \u001b[39mfor\u001b[39;00m key, (min_prob, max_prob) \u001b[39min\u001b[39;00m thresholds\u001b[39m.\u001b[39mitems():\n\u001b[1;32m---> <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=23'>24</a>\u001b[0m         mask \u001b[39m=\u001b[39m (data[\u001b[39m'\u001b[39;49m\u001b[39mprobability\u001b[39;49m\u001b[39m'\u001b[39;49m] \u001b[39m>\u001b[39m\u001b[39m=\u001b[39m min_prob) \u001b[39m&\u001b[39m (data[\u001b[39m'\u001b[39m\u001b[39mprobability\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m<\u001b[39m max_prob)\n\u001b[0;32m     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=24'>25</a>\u001b[0m         grouped_data[key][i] \u001b[39m=\u001b[39m data\u001b[39m.\u001b[39mwhere(mask, drop\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m     <a href='vscode-notebook-cell:Untitled-1.ipynb?jupyter-notebook#W2sdW50aXRsZWQ%3D?line=26'>27</a>\u001b[0m \u001b[39m# Create a dictionary to store the binary classifications\u001b[39;00m\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\xarray\\core\\dataset.py:1439\u001b[0m, in \u001b[0;36mDataset.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m   1437\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39misel(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkey)\n\u001b[0;32m   1438\u001b[0m \u001b[39mif\u001b[39;00m utils\u001b[39m.\u001b[39mhashable(key):\n\u001b[1;32m-> 1439\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_construct_dataarray(key)\n\u001b[0;32m   1440\u001b[0m \u001b[39mif\u001b[39;00m utils\u001b[39m.\u001b[39miterable_of_hashable(key):\n\u001b[0;32m   1441\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_copy_listed(key)\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\xarray\\core\\dataset.py:1350\u001b[0m, in \u001b[0;36mDataset._construct_dataarray\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m   1348\u001b[0m     variable \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_variables[name]\n\u001b[0;32m   1349\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[1;32m-> 1350\u001b[0m     _, name, variable \u001b[39m=\u001b[39m _get_virtual_variable(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_variables, name, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdims)\n\u001b[0;32m   1352\u001b[0m needed_dims \u001b[39m=\u001b[39m \u001b[39mset\u001b[39m(variable\u001b[39m.\u001b[39mdims)\n\u001b[0;32m   1354\u001b[0m coords: \u001b[39mdict\u001b[39m[Hashable, Variable] \u001b[39m=\u001b[39m {}\n",
      "File \u001b[1;32mc:\\Users\\jtrum\\miniconda3\\envs\\wash\\lib\\site-packages\\xarray\\core\\dataset.py:186\u001b[0m, in \u001b[0;36m_get_virtual_variable\u001b[1;34m(variables, key, dim_sizes)\u001b[0m\n\u001b[0;32m    184\u001b[0m split_key \u001b[39m=\u001b[39m key\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m1\u001b[39m)\n\u001b[0;32m    185\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(split_key) \u001b[39m!=\u001b[39m \u001b[39m2\u001b[39m:\n\u001b[1;32m--> 186\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(key)\n\u001b[0;32m    188\u001b[0m ref_name, var_name \u001b[39m=\u001b[39m split_key\n\u001b[0;32m    189\u001b[0m ref_var \u001b[39m=\u001b[39m variables[ref_name]\n",
      "\u001b[1;31mKeyError\u001b[0m: 'probability'"
     ]
    }
   ],
   "source": [
    "import xarray as xr\n",
    "import geopandas as gpd\n",
    "import numpy as np\n",
    "from scipy.stats import mode\n",
    "\n",
    "# Create groups for probability thresholds\n",
    "thresholds = {\n",
    "    'low_prob': (0, 0.01),  # <1% probability\n",
    "    'medium_prob': (0.01, 0.1),  # 1-10% probability\n",
    "    'high_prob': (0.1, 1)  # >10% probability\n",
    "}\n",
    "\n",
    "# Create a dictionary to store the grouped data\n",
    "grouped_data = {key: {} for key in thresholds}\n",
    "\n",
    "# Load and crop the rasters\n",
    "for i in pluvial + fluvial_undefined:\n",
    "    data = xr.open_dataset(datadir + f'AngolaFathom/Angola/pluvial/{i}.tif')\n",
    "    # Crop the data to the extent of the catchment\n",
    "    data = data.sel(x=slice(catchment.total_bounds[0], catchment.total_bounds[2]),\n",
    "                    y=slice(catchment.total_bounds[1], catchment.total_bounds[3]))\n",
    "    # Group the data based on probability thresholds\n",
    "    for key, (min_prob, max_prob) in thresholds.items():\n",
    "        mask = (data['probability'] >= min_prob) & (data['probability'] < max_prob)\n",
    "        grouped_data[key][i] = data.where(mask, drop=True)\n",
    "\n",
    "# Create a dictionary to store the binary classifications\n",
    "binary_classification = {key: {} for key in thresholds}\n",
    "\n",
    "# Create a binary classification based on majority values\n",
    "for group, group_data in grouped_data.items():\n",
    "    for i in pluvial + fluvial_undefined:\n",
    "        binary_data = group_data[i].copy()\n",
    "        # Replace NaN values with -1 to handle them separately\n",
    "        binary_data = binary_data.fillna(-1)\n",
    "        # Compute the mode (majority value) for each cell\n",
    "        mode_result = mode(binary_data, axis=0, nan_policy='omit')\n",
    "        # Set majority value to 1, if there's a majority of 1 values\n",
    "        binary_data = np.where(mode_result.mode == 1, 1, 0)\n",
    "        # Set NaN values back to NaN\n",
    "        binary_data = binary_data.astype(float)\n",
    "        binary_classification[group][i] = xr.DataArray(binary_data, coords=binary_data.coords, dims=binary_data.dims)\n",
    "\n",
    "# Now you have `binary_classification` dictionary containing binary data for each group.\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "wash",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}