{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create an ensemble forecast dataset\n",
    "\n",
    "\n",
    "## Dependencies imports\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import xarray as xr\n",
    "import os\n",
    "import sys\n",
    "import pandas as pd\n",
    "from functools import wraps\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns  # noqa, pandas aware plotting library"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if ('SP_SRC' in os.environ):\n",
    "    root_src_dir = os.environ['SP_SRC']\n",
    "elif sys.platform == 'win32':\n",
    "    root_src_dir = r'C:\\src\\csiro\\stash\\silverpieces'\n",
    "else:\n",
    "    root_src_dir = '/home/per202/src/csiro/stash/silverpieces'\n",
    "\n",
    "pkg_src_dir = root_src_dir\n",
    "sys.path.append(pkg_src_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if ('SP_DATA' in os.environ):\n",
    "    root_data_dir = os.environ['SP_DATA']\n",
    "elif sys.platform == 'win32':\n",
    "    root_data_dir = r'C:\\data\\silverpieces'\n",
    "else:\n",
    "    root_data_dir = '/home/per202/data/silverpieces'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#from silverpieces.blah import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "%config InlineBackend.figure_format = 'retina'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from siphon.catalog import TDSCatalog"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## subset data to tasmania\n",
    "\n",
    "I did not manage to get the [AWRA data served by thredds](http://data-mel.it.csiro.au/thredds). \n",
    "\n",
    "Try another approach:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_desc = 'http://data-cbr.it.csiro.au/thredds/catalog/catch_all/Digiscape_Climate_Data_Portal/silo/climate/catalog.xml?dataset=allDatasetScan/Digiscape_Climate_Data_Portal/silo/climate/daily_rain.nc'\n",
    "ds_desc = 'http://data-mel.it.csiro.au/thredds/catalog/catch_all/lw_oznome/AWRA-L_historical_data/ss/catalog.xml'\n",
    "catalog = TDSCatalog(ds_desc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the dataset\n",
    "cat = catalog\n",
    "dataset_name = sorted(cat.datasets.keys())[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_names = [str(x) for x in cat.datasets]\n",
    "m_indices = [i for i in range(len(ds_names)) if ds_names[i].startswith('ss_avg') and ds_names[i].endswith('nc')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dataset = cat.datasets[dataset_name]\n",
    "# ds = dataset.remote_access(service='OPENDAP')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = [cat.datasets[i] for i in m_indices]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "blah = ds[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = blah.remote_access(service='OPENDAP', use_xarray=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a.chunks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dsets = [cds.remote_access(service='OPENDAP', use_xarray=True).reset_coords(drop=True).chunk({'latitude': 681, 'longitude': 841})  # 731, latitude: 681, longitude: 841\n",
    "         for cds in ds] # eventually want to use the whole catalog here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dsets[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dsets[-1].ss_avg[10,:,:].plot(figsize=(12,8))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "d = dsets[0]\n",
    "c = d.coords\n",
    "la = d.coords[\"latitude\"]\n",
    "ll = d.coords[\"longitude\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "subsetting = {\n",
    "    'latitude': la[np.logical_and( c['latitude'] >= -36.40, c['latitude'] <= -34.40)],\n",
    "    'longitude': ll[np.logical_and( c['longitude'] >= 148.20, c['longitude'] <= 150.20)]\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss = d.sel( subsetting ).ss_avg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = ss.chunk({'latitude': 10, 'longitude': 10, 'time': 366})\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = ss.chunk({'latitude': 1, 'longitude': 1, 'time': 1})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a[1,:,:].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss_ds =  xr.combine_by_coords(dsets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss_ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss_sub = ss_ds.sel( subsetting )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ss_sub"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "da = ss_sub.ss_avg.chunk({'latitude': 41, 'longitude': 41, 'time': 365})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "da[38000,20,20].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "da.isel(time=38350).values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from xarray.backends import NetCDF4DataStore"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = NetCDF4DataStore(ds)\n",
    "ds = xr.open_dataset(ds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = ds.ss_avg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x.isel(time=300).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time b_box = x.isel(lat=slice(600,700), lon=slice(650,750))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time b_box.isel(time=22000).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "b_box"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lc = x.coords['lon']\n",
    "la = x.coords['lat']\n",
    "lt = x.coords['time']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start_time = pd.to_datetime('2007-01-01')\n",
    "end_time = pd.to_datetime('2018-12-31')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#query.lonlat_box(north=-40, south=-44, east=149, west=144).time(tt)\n",
    "tassie = x.loc[dict(lon=lc[(lc>144)&(lc<149)], lat=la[(la>-44)&(la<-40)], time=slice(start_time, end_time))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tassie"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time tassie.isel(time=4300).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fn = os.path.join(root_data_dir, 'tassie_silo_rain.nc')\n",
    "if not os.path.exists(fn):\n",
    "    tassie.to_netcdf(os.path.join(root_data_dir, 'tassie_silo_rain.nc'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "del(tassie)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tassie = xr.open_dataset(os.path.join(root_data_dir, 'tassie_silo_rain.nc'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tassie"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dr = tassie.daily_rain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dr.isel(time=4300).plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Use cases\n",
    "\n",
    "### 3 year period statistic compared to all 3 years periods in the historical record\n",
    "\n",
    "We want to be able to compare a grid of statistics for a period compared to all periods of similar lengths.\n",
    "The start and end of the period should be as arbitrary as possible. The sliding window could however be limited or fixed to a year: it is probably moot to compare windows with shifted seasonality. \n",
    "\n",
    "#### How does the cumulated rainfall 2016-2018 over TAS compare with all 3 year periods over the record?\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start_time = pd.to_datetime('2016-01-01')\n",
    "end_time = pd.to_datetime('2018-12-31')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dr.isel(time = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "blah = dr.loc[dict(time=slice(start_time, end_time))].sum(dim='time',skipna=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "blah.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "TIME_DIMNAME = 'time'\n",
    "\n",
    "blah = dr.loc[dict(time=slice(start_time, end_time))].sum(dim=TIME_DIMNAME,skipna=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start_time = pd.to_datetime('2007-01-01')\n",
    "end_time = pd.to_datetime('2009-12-31')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import date\n",
    "from dateutil.relativedelta import relativedelta # $ pip install python-dateutil"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(start_time + relativedelta(years=+1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cumulated = [dr.loc[dict(time=slice(start_time + relativedelta(years=year), end_time+ relativedelta(years=year)))].sum(dim='time',skipna=False) for year in range(10)]\n",
    "\n",
    "for year in range(10):\n",
    "    cumulated[year].name = 'annual rainfall'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cumulated[0].name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "def year_end(year):\n",
    "    return pd.Timestamp(datetime(year, 12, 31))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "annual_rainfall = xr.concat(cumulated, dim=pd.Index(name=TIME_DIMNAME, data=[year_end(start_time.year + i) for i in range(10)]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# blah = xr.auto_combine([xr.Dataset(cumulated[i]) for i in range(len(cumulated))])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "g_simple = annual_rainfall.plot(x='lon', y='lat', col='time', col_wrap=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "\n",
    "\n",
    "remote_data = xr.open_dataset(\n",
    "    'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods',\n",
    "    decode_times=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "remote_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tmax = remote_data['tmax'][:500, ::3, ::3]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tmax[0].plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Py3 (sv)",
   "language": "python",
   "name": "sv"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}