{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Subset a test data set\n",
    "\n",
    "* Subset (AWRA?) daily data sets to a more manageable size for initial elaboration. ACT or Tassie.\n",
    "\n",
    "## Dependencies imports\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import xarray as xr\n",
    "import os\n",
    "import sys\n",
    "import pandas as pd\n",
    "from functools import wraps\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns  # noqa, pandas aware plotting library"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if ('SP_SRC' in os.environ):\n",
    "    root_src_dir = os.environ['SP_SRC']\n",
    "elif sys.platform == 'win32':\n",
    "    root_src_dir = r'C:\\src\\csiro\\stash\\silverpieces'\n",
    "else:\n",
    "    root_src_dir = '/home/per202/src/csiro/stash/silverpieces'\n",
    "\n",
    "pkg_src_dir = root_src_dir\n",
    "sys.path.append(pkg_src_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if ('SP_DATA' in os.environ):\n",
    "    root_data_dir = os.environ['SP_DATA']\n",
    "elif sys.platform == 'win32':\n",
    "    root_data_dir = r'C:\\data\\silverpieces'\n",
    "else:\n",
    "    root_data_dir = '/home/per202/data/silverpieces'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#from silverpieces.blah import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "%config InlineBackend.figure_format = 'retina'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from siphon.catalog import TDSCatalog"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## subset data to tasmania\n",
    "\n",
    "I did not manage to get the [AWRA data served by thredds](http://data-mel.it.csiro.au/thredds). \n",
    "\n",
    "Try another approach:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_desc = 'http://data-cbr.it.csiro.au/thredds/catalog/catch_all/Digiscape_Climate_Data_Portal/silo/climate/catalog.xml?dataset=allDatasetScan/Digiscape_Climate_Data_Portal/silo/climate/daily_rain.nc'\n",
    "catalog = TDSCatalog(ds_desc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the dataset\n",
    "cat = catalog\n",
    "dataset_name = sorted(cat.datasets.keys())[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = cat.datasets[dataset_name]\n",
    "ds = dataset.remote_access(service='OPENDAP')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from xarray.backends import NetCDF4DataStore"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = NetCDF4DataStore(ds)\n",
    "ds = xr.open_dataset(ds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = ds.daily_rain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x.isel(time=22000).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time b_box = x.isel(lat=slice(600,700), lon=slice(650,750))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time b_box.isel(time=22000).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "b_box"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lc = x.coords['lon']\n",
    "la = x.coords['lat']\n",
    "lt = x.coords['time']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start_time = pd.to_datetime('2007-01-01')\n",
    "end_time = pd.to_datetime('2018-12-31')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#query.lonlat_box(north=-40, south=-44, east=149, west=144).time(tt)\n",
    "tassie = x.loc[dict(lon=lc[(lc>144)&(lc<149)], lat=la[(la>-44)&(la<-40)], time=slice(start_time, end_time))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tassie"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time tassie.isel(time=4300).plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fn = os.path.join(root_data_dir, 'tassie_silo_rain.nc')\n",
    "if not os.path.exists(fn):\n",
    "    tassie.to_netcdf(os.path.join(root_data_dir, 'tassie_silo_rain.nc'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "del(tassie)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tassie = xr.open_dataset(os.path.join(root_data_dir, 'tassie_silo_rain.nc'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tassie"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dr = tassie.daily_rain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dr.isel(time=4300).plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Use cases\n",
    "\n",
    "### 3 year period statistic compared to all 3 years periods in the historical record\n",
    "\n",
    "We want to be able to compare a grid of statistics for a period compared to all periods of similar lengths.\n",
    "The start and end of the period should be as arbitrary as possible. The sliding window could however be limited or fixed to a year: it is probably moot to compare windows with shifted seasonality. \n",
    "\n",
    "#### How does the cumulated rainfall 2016-2018 over TAS compare with all 3 year periods over the record?\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start_time = pd.to_datetime('2016-01-01')\n",
    "end_time = pd.to_datetime('2018-12-31')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dr.isel(time = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "blah = dr.loc[dict(time=slice(start_time, end_time))].sum(dim='time',skipna=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "blah.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "TIME_DIMNAME = 'time'\n",
    "\n",
    "\n",
    "blah = dr.loc[dict(time=slice(start_time, end_time))].sum(dim='time',skipna=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start_time = pd.to_datetime('2007-01-01')\n",
    "end_time = pd.to_datetime('2009-12-31')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import date\n",
    "from dateutil.relativedelta import relativedelta # $ pip install python-dateutil"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(start_time + relativedelta(years=+1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cumulated = [dr.loc[dict(time=slice(start_time + relativedelta(years=year), end_time+ relativedelta(years=year)))].sum(dim='time',skipna=False) for year in range(10)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cumulated[0].plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "blah = xr.concat(cumulated)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "blah"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "blah = xr.auto_combine([xr.Dataset(cumulated[i]) for i in range(len(cumulated))])\n",
    "\n",
    "g_simple = t.plot(x='lon', y='lat', col='time', col_wrap=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Py3 (DCX)",
   "language": "python",
   "name": "dcx"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}