{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Elaborate statistics features for Silvereye\n",
    "\n",
    "This is the first of a series of notebooks to tease the statistics on AWRA and related data for Silvereye.\n",
    "\n",
    "The current notebook is for the initial trial-error.\n",
    "\n",
    "## Purpose\n",
    "\n",
    "Merge capabilities from:\n",
    "\n",
    "* Ben's code in the prototype app\n",
    "* Ramneek's stat on daily data.\n",
    "* possibly some of the princeton-chelsa blend code (optional)\n",
    "\n",
    "Plan is:\n",
    "\n",
    "* Subset AWRA daily data sets to a more manageable size for initial elaboration. ACT or Tassie.\n",
    "* Elaborate on use cases:\n",
    "    * Existing features infered from BoM web served maps\n",
    "    * compare periods (e.g. recent past ) to similar historical periods* Subset AWRA daily data sets to a more manageable size for initial elaboration. ACT or Tassie.\n",
    "\n",
    "## Dependencies imports\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import xarray as xr\n",
    "import os\n",
    "import sys\n",
    "import pandas as pd\n",
    "from functools import wraps\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns  # noqa, pandas aware plotting library"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if ('SP_SRC' in os.environ):\n",
    "    root_src_dir = os.environ['SP_SRC']\n",
    "elif sys.platform == 'win32':\n",
    "    root_src_dir = r'C:\\src\\csiro\\stash\\silverpieces'\n",
    "else:\n",
    "    root_src_dir = '/home/per202/src/csiro/stash/silverpieces'\n",
    "\n",
    "pkg_src_dir = root_src_dir\n",
    "sys.path.append(pkg_src_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if ('SP_DATA' in os.environ):\n",
    "    root_data_dir = os.environ['SP_DATA']\n",
    "elif sys.platform == 'win32':\n",
    "    root_data_dir = r'C:\\data\\silverpieces'\n",
    "else:\n",
    "    root_data_dir = '/home/per202/data/silverpieces'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#from silverpieces.blah import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "%config InlineBackend.figure_format = 'retina'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from siphon.catalog import TDSCatalog\n",
    "import requests\n",
    "# from distributed import Client, LocalCluster\n",
    "# from flask import Flask, redirect, url_for, request, render_template, jsonify, abort\n",
    "# from flask_cors import CORS\n",
    "# import intake\n",
    "# import regionmask\n",
    "# import random\n",
    "# from flask import jsonify, make_response\n",
    "# import json\n",
    "# import uuid\n",
    "# from flask.logging import default_handler\n",
    "# import geopandas as gpd\n",
    "# from owslib.wps import WebProcessingService\n",
    "# from owslib.wps import printInputOutput\n",
    "# from birdy import WPSClient\n",
    "# import birdy\n",
    "# import tarfile\n",
    "\n",
    "# import tempfile\n",
    "# import shutil\n",
    "# import urllib\n",
    "import urllib.request\n",
    "\n",
    "# from urllib.parse import urlparse\n",
    "# from datetime import datetime\n",
    "# from dateutil.relativedelta import relativedelta\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "I tried to use [xarray-tips-and-tricks](https://rabernat.github.io/research_computing_2018/xarray-tips-and-tricks.html) by similarity but while this looked OK, retrieving values ended up in a `RuntimeError: NetCDF: Access failure`. Park this. This may be an issue with the thredds server serving awra data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# #base_url = 'http://www.esrl.noaa.gov/psd/thredds/dodsC/Datasets/ncep.reanalysis/surface/air.sig995'\n",
    "# base_url = 'http://data-mel.it.csiro.au/thredds/dodsC/catch_all/lw_oznome/AWRA-L_historical_data/qtot/qtot_avg_'\n",
    "# f = ['http://data-mel.it.csiro.au/thredds/dodsC/catch_all/lw_oznome/AWRA-L_historical_data/qtot/qtot_avg_' + str(yr) + '.nc' for yr in [2011, 2012]]\n",
    "# files = [f'{base_url}{year}.nc' for year in range(2010, 2015)]\n",
    "# files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ds = xr.open_mfdataset(files)\n",
    "# ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# x = ds.qtot_avg\n",
    "# x[1,1,1].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_url = 'http://data-mel.it.csiro.au/thredds/catalog/catch_all/lw_oznome/AWRA-L_historical_data/qtot/catalog.xml'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat = TDSCatalog(cat_url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_names = [str(x) for x in cat.datasets]\n",
    "m_indices = [i for i in range(len(ds_names)) if ds_names[i].startswith('qtot') and ds_names[i].endswith('nc')]\n",
    "# somehow sorted\n",
    "m_indices.reverse()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = [cat.datasets[i] for i in m_indices]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds[0].access_urls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dsets = [cds.remote_access(use_xarray=True).reset_coords(drop=True).chunk({'latitude': 681, 'longitude': 841})  # 731, latitude: 681, longitude: 841\n",
    "         for cds in ds[:3]] # eventually want to use the whole catalog here\n",
    "#dsets = [cds.remote_access(use_xarray=True).reset_coords(drop=True)\n",
    "#         for cds in ds[:3]] # eventually want to use the whole catalog here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(dsets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "blah = xr.auto_combine(dsets)\n",
    "blah"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = blah.qtot_avg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x['time'].values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`RuntimeError: NetCDF: Access failure`. That said, it worked one time out of ~10 so this is probably more a problem with the thredds server."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Random but frequently ends up RuntimeError: NetCDF: Access failure\n",
    "m = x[1,:,:].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.imshow(m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time b_box = x.isel(latitude=slice(600,700), longitude=slice(650,750))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time b_box.isel(time=310).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SILO data\n",
    "\n",
    "Starting from some of code J Yu had authored:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_desc = 'http://data-cbr.it.csiro.au/thredds/catalog/catch_all/Digiscape_Climate_Data_Portal/silo/climate/catalog.xml?dataset=allDatasetScan/Digiscape_Climate_Data_Portal/silo/climate/daily_rain.nc'\n",
    "catalog = TDSCatalog(ds_desc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "catalog.datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = catalog.datasets[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "type(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "blah"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ncss = dataset.subset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "type(ncss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = ncss.query()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "tt = pd.to_datetime(\"2014-01-01\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query.lonlat_box(north=-40, south=-44, east=149, west=144).time(tt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query.accept('netcdf4')\n",
    "query.variables('daily_rain')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# With the following I may have bumped into https://github.com/Unidata/thredds-docker/issues/216 \n",
    "data = ncss.get_data(query)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Try another approach:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the dataset\n",
    "cat = catalog\n",
    "dataset_name = sorted(cat.datasets.keys())[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = cat.datasets[dataset_name]\n",
    "ds = dataset.remote_access(service='OPENDAP')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from xarray.backends import NetCDF4DataStore"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = NetCDF4DataStore(ds)\n",
    "ds = xr.open_dataset(ds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = ds.daily_rain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x.isel(time=22000).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time b_box = x.isel(lat=slice(600,700), lon=slice(650,750))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time b_box.isel(time=22000).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Appendix\n",
    "\n",
    "Below are attempts to use Numpy-ic ways to calculate quantile classes. No joy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)')\n",
    "convolve(np.eye(2), [1, 2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "convolve = np.vectorize(np.convolve)\n",
    "convolve(np.eye(2), [1, 2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "convolve(np.eye(2), [1, 2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "z = three_years_rains[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "z.searchsorted(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def searchsorted2d(a,b):\n",
    "    m,n = a.shape\n",
    "    max_num = np.maximum(a.max() - a.min(), b.max() - b.min()) + 1\n",
    "    r = max_num*np.arange(a.shape[0])[:,None]\n",
    "    p = np.searchsorted( (a+r).ravel(), (b+r).ravel() ).reshape(m,-1)\n",
    "    return p - n*(np.arange(m)[:,None])\n",
    "\n",
    "def searchsorted2d(mat,ensembles):\n",
    "    m,n = mat.shape\n",
    "    max_num = np.maximum(a.max() - a.min(), b.max() - b.min()) + 1\n",
    "    r = max_num*np.arange(a.shape[0])[:,None]\n",
    "    p = np.searchsorted( (a+r).ravel(), (b+r).ravel() ).reshape(m,-1)\n",
    "    return p - n*(np.arange(m)[:,None])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(123)\n",
    "mat = np.sort(np.arange(12)).reshape(3, 4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mat = mat.reshape(3, 4)\n",
    "mat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mat[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "v_searchsorted = np.vectorize(np.searchsorted, signature='(n),(m)->(k)')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "v_searchsorted(mat, np.array([0.5, 5.5, 10.1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "v_searchsorted = np.vectorize(np.searchsorted, signature='(n,m),(n,m)->(n)')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "v_searchsorted(mat, np.array([0.5, 5.5]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.searchsorted(np.array([-1, 0, 1, 2]), np.array([-1, 0, 1.1, 2]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y.groupby_bins(group, bins, right: bool = True, labels=None, precision: int = 3, include_lowest: bool = False, squeeze: bool = True, restore_coord_dims: Optional[bool] = None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "z.shape, y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "very_drier = z <= y[0,:,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "very_drier.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "z[50,50], y[:,50, 50]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.argmin( (200 > y[:,50, 50].values) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "3000 > y[:,50, 50].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.argmin(z[50,50].values > y[:,50, 50].values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.argmin(z[50,50].values < y[:,50, 50].values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(z[50,50].values > y[:,50, 50].values) , (z[50,50].values < y[:,50, 50].values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.eye(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.argmax(z[50,50].values > y[:,50, 50].values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.argmax(z[50,50].values < y[:,50, 50].values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.searchsorted(y[:,50, 50].values, z[50,50].values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.searchsorted(np.arange(10), 23)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def searchsorted(a, b):\n",
    "    func = lambda x, y: np.searchsorted(x[:,,].values, y[].values)\n",
    "    return xr.apply_ufunc(func, a, b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "xr.apply_ufunc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dim = 'time'\n",
    "xr.apply_ufunc(np.searchsorted,\n",
    "                   y, z,\n",
    "                   input_core_dims=[[dim], []])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy.stats\n",
    "\n",
    "def earth_mover_distance(first_samples,\n",
    "                         second_samples,\n",
    "                         dim='ensemble'):\n",
    "    return apply_ufunc(scipy.stats.wasserstein_distance,\n",
    "                       first_samples, second_samples,\n",
    "                       input_core_dims=[[dim], [dim]],\n",
    "                       vectorize=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Py3 (DCX)",
   "language": "python",
   "name": "dcx"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}