{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Create an ensemble forecast dataset\n", "\n", "\n", "## Dependencies imports\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import xarray as xr\n", "import os\n", "import sys\n", "import pandas as pd\n", "from functools import wraps\n", "import numpy as np\n", "\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import seaborn as sns # noqa, pandas aware plotting library" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if ('SP_SRC' in os.environ):\n", " root_src_dir = os.environ['SP_SRC']\n", "elif sys.platform == 'win32':\n", " root_src_dir = r'C:\\src\\csiro\\stash\\silverpieces'\n", "else:\n", " root_src_dir = '/home/per202/src/csiro/stash/silverpieces'\n", "\n", "pkg_src_dir = root_src_dir\n", "sys.path.append(pkg_src_dir)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if ('SP_DATA' in os.environ):\n", " root_data_dir = os.environ['SP_DATA']\n", "elif sys.platform == 'win32':\n", " root_data_dir = r'C:\\data\\silverpieces'\n", "else:\n", " root_data_dir = '/home/per202/data/silverpieces'\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#from silverpieces.blah import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "%config InlineBackend.figure_format = 'retina'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from siphon.catalog import TDSCatalog" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## subset data to tasmania\n", "\n", "I did not manage to get the [AWRA data served by thredds](http://data-mel.it.csiro.au/thredds). \n", "\n", "Try another approach:\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds_desc = 'http://data-cbr.it.csiro.au/thredds/catalog/catch_all/Digiscape_Climate_Data_Portal/silo/climate/catalog.xml?dataset=allDatasetScan/Digiscape_Climate_Data_Portal/silo/climate/daily_rain.nc'\n", "ds_desc = 'http://data-mel.it.csiro.au/thredds/catalog/catch_all/lw_oznome/AWRA-L_historical_data/ss/catalog.xml'\n", "catalog = TDSCatalog(ds_desc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load the dataset\n", "cat = catalog\n", "dataset_name = sorted(cat.datasets.keys())[-1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds_names = [str(x) for x in cat.datasets]\n", "m_indices = [i for i in range(len(ds_names)) if ds_names[i].startswith('ss_avg') and ds_names[i].endswith('nc')]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# dataset = cat.datasets[dataset_name]\n", "# ds = dataset.remote_access(service='OPENDAP')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = [cat.datasets[i] for i in m_indices]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "blah = ds[1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a = blah.remote_access(service='OPENDAP', use_xarray=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a.chunks" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dsets = [cds.remote_access(service='OPENDAP', use_xarray=True).reset_coords(drop=True).chunk({'latitude': 681, 'longitude': 841}) # 731, latitude: 681, longitude: 841\n", " for cds in ds] # eventually want to use the whole catalog here" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dsets[-1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dsets[-1].ss_avg[10,:,:].plot(figsize=(12,8))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "d = dsets[0]\n", "c = d.coords\n", "la = d.coords[\"latitude\"]\n", "ll = d.coords[\"longitude\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "subsetting = {\n", " 'latitude': la[np.logical_and( c['latitude'] >= -36.40, c['latitude'] <= -34.40)],\n", " 'longitude': ll[np.logical_and( c['longitude'] >= 148.20, c['longitude'] <= 150.20)]\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ss = d.sel( subsetting ).ss_avg" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a = ss.chunk({'latitude': 10, 'longitude': 10, 'time': 366})\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a = ss.chunk({'latitude': 1, 'longitude': 1, 'time': 1})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a[1,:,:].values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ss_ds = xr.combine_by_coords(dsets)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ss_ds" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ss_sub = ss_ds.sel( subsetting )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ss_sub" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "da = ss_sub.ss_avg.chunk({'latitude': 41, 'longitude': 41, 'time': 365})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "da[38000,20,20].values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "da.isel(time=38350).values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from xarray.backends import NetCDF4DataStore" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = NetCDF4DataStore(ds)\n", "ds = xr.open_dataset(ds)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x = ds.ss_avg" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x.isel(time=300).plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%time b_box = x.isel(lat=slice(600,700), lon=slice(650,750))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%time b_box.isel(time=22000).plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%time plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "b_box" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lc = x.coords['lon']\n", "la = x.coords['lat']\n", "lt = x.coords['time']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "start_time = pd.to_datetime('2007-01-01')\n", "end_time = pd.to_datetime('2018-12-31')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#query.lonlat_box(north=-40, south=-44, east=149, west=144).time(tt)\n", "tassie = x.loc[dict(lon=lc[(lc>144)&(lc<149)], lat=la[(la>-44)&(la<-40)], time=slice(start_time, end_time))]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tassie" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%time tassie.isel(time=4300).plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fn = os.path.join(root_data_dir, 'tassie_silo_rain.nc')\n", "if not os.path.exists(fn):\n", " tassie.to_netcdf(os.path.join(root_data_dir, 'tassie_silo_rain.nc'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "del(tassie)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tassie = xr.open_dataset(os.path.join(root_data_dir, 'tassie_silo_rain.nc'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tassie" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dr = tassie.daily_rain" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dr.isel(time=4300).plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Use cases\n", "\n", "### 3 year period statistic compared to all 3 years periods in the historical record\n", "\n", "We want to be able to compare a grid of statistics for a period compared to all periods of similar lengths.\n", "The start and end of the period should be as arbitrary as possible. The sliding window could however be limited or fixed to a year: it is probably moot to compare windows with shifted seasonality. \n", "\n", "#### How does the cumulated rainfall 2016-2018 over TAS compare with all 3 year periods over the record?\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "start_time = pd.to_datetime('2016-01-01')\n", "end_time = pd.to_datetime('2018-12-31')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dr.isel(time = 1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "blah = dr.loc[dict(time=slice(start_time, end_time))].sum(dim='time',skipna=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "blah.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "TIME_DIMNAME = 'time'\n", "\n", "blah = dr.loc[dict(time=slice(start_time, end_time))].sum(dim=TIME_DIMNAME,skipna=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "start_time = pd.to_datetime('2007-01-01')\n", "end_time = pd.to_datetime('2009-12-31')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datetime import date\n", "from dateutil.relativedelta import relativedelta # $ pip install python-dateutil" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(start_time + relativedelta(years=+1))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cumulated = [dr.loc[dict(time=slice(start_time + relativedelta(years=year), end_time+ relativedelta(years=year)))].sum(dim='time',skipna=False) for year in range(10)]\n", "\n", "for year in range(10):\n", " cumulated[year].name = 'annual rainfall'\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cumulated[0].name" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datetime import datetime\n", "def year_end(year):\n", " return pd.Timestamp(datetime(year, 12, 31))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "annual_rainfall = xr.concat(cumulated, dim=pd.Index(name=TIME_DIMNAME, data=[year_end(start_time.year + i) for i in range(10)]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# blah = xr.auto_combine([xr.Dataset(cumulated[i]) for i in range(len(cumulated))])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "g_simple = annual_rainfall.plot(x='lon', y='lat', col='time', col_wrap=3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "\n", "\n", "remote_data = xr.open_dataset(\n", " 'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods',\n", " decode_times=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "remote_data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tmax = remote_data['tmax'][:500, ::3, ::3]\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tmax[0].plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Py3 (sv)", "language": "python", "name": "sv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }