{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CMIP6 Precipitation Frequency Analysis\n",
    "\n",
    "This notebook shows an advanced analysis case. The calculation was inspired by Angie Pendergrass’s work on precipitation statistics, as described in the following websites / papers:\n",
    "\n",
    "- https://journals.ametsoc.org/doi/full/10.1175/JCLI-D-16-0684.1\n",
    "- https://climatedataguide.ucar.edu/climate-data/gpcp-daily-global-precipitation-climatology-project\n",
    "\n",
    "We use [xhistogram](https://xhistogram.readthedocs.io/) to calculate the distribution of precipitation intensity and its changes in a warming climate."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "from matplotlib import pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import xarray as xr\n",
    "import fsspec\n",
    "from tqdm.autonotebook import tqdm\n",
    "\n",
    "from xhistogram.xarray import histogram\n",
    "\n",
    "%matplotlib inline\n",
    "plt.rcParams['figure.figsize'] = 12, 6\n",
    "%config InlineBackend.figure_format = 'retina' "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compute Cluster\n",
    "\n",
    "Here we use a dask cluster to parallelize our analysis. The cluster scales up and down adaptively."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dask_gateway import Gateway\n",
    "from dask.distributed import Client\n",
    "\n",
    "gateway = Gateway()\n",
    "cluster = gateway.new_cluster()\n",
    "cluster.adapt(minimum=1, maximum=20)\n",
    "client = Client(cluster)\n",
    "cluster"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load Data Catalog"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_3hr_pr = df[(df.table_id == '3hr') & (df.variable_id == 'pr')]\n",
    "len(df_3hr_pr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "run_counts = df_3hr_pr.groupby(['source_id', 'experiment_id'])['zstore'].count()\n",
    "run_counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "source_ids = []\n",
    "experiment_ids = ['historical', 'ssp585']\n",
    "for name, group in df_3hr_pr.groupby('source_id'):\n",
    "    if all([expt in group.experiment_id.values\n",
    "            for expt in experiment_ids]):\n",
    "        source_ids.append(name)\n",
    "source_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_pr_data(source_id, expt_id):\n",
    "    \"\"\"\n",
    "    Load 3hr precip data for given source and expt ids\n",
    "    \"\"\"\n",
    "    uri = df_3hr_pr[(df_3hr_pr.source_id == source_id) &\n",
    "                         (df_3hr_pr.experiment_id == expt_id)].zstore.values[0]\n",
    "    \n",
    "    ds = xr.open_zarr(fsspec.get_mapper(uri), consolidated=True)\n",
    "    return ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def precip_hist(ds, nbins=100, pr_log_min=-3, pr_log_max=2):\n",
    "    \"\"\"\n",
    "    Calculate precipitation histogram for a single model. \n",
    "    Lazy.\n",
    "    \"\"\"\n",
    "    assert ds.pr.units == 'kg m-2 s-1'\n",
    "    \n",
    "    # mm/day\n",
    "    bins_mm_day = np.hstack([[0], np.logspace(pr_log_min, pr_log_max, nbins)]) \n",
    "    bins_kg_m2s = bins_mm_day / (24*60*60)\n",
    "\n",
    "    pr_hist = histogram(ds.pr, bins=[bins_kg_m2s], dim=['lon']).mean(dim='time')\n",
    "    \n",
    "    log_bin_spacing = np.diff(np.log(bins_kg_m2s[1:3])).item()\n",
    "    pr_hist_norm = 100 * pr_hist / ds.dims['lon'] / log_bin_spacing\n",
    "    pr_hist_norm.attrs.update({'long_name': 'zonal mean rain frequency',\n",
    "                               'units': '%/Δln(r)'})\n",
    "    return pr_hist_norm\n",
    "\n",
    "def precip_hist_for_expts(dsets, experiment_ids):\n",
    "    \"\"\"\n",
    "    Calculate histogram for a suite of experiments.\n",
    "    Eager.\n",
    "    \"\"\"\n",
    "    # actual data loading and computations happen in this next line\n",
    "    pr_hists = [precip_hist(ds).load()\n",
    "            for ds in [ds_hist, ds_ssp]]\n",
    "    pr_hist = xr.concat(pr_hists, dim=xr.Variable('experiment_id', experiment_ids))\n",
    "    return pr_hist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results = {}\n",
    "for source_id in tqdm(source_ids):\n",
    "    # get a 20 year period\n",
    "    ds_hist = load_pr_data(source_id, 'historical').sel(time=slice('1980', '2000'))\n",
    "    ds_ssp = load_pr_data(source_id, 'ssp585').sel(time=slice('2080', '2100'))\n",
    "    pr_hist = precip_hist_for_expts([ds_hist, ds_ssp], experiment_ids)\n",
    "    results[source_id] = pr_hist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_precip_changes(pr_hist, vmax=5):\n",
    "    \"\"\"\n",
    "    Visualize the output\n",
    "    \"\"\"\n",
    "    pr_hist_diff = (pr_hist.sel(experiment_id='ssp585') - \n",
    "                    pr_hist.sel(experiment_id='historical'))\n",
    "    pr_hist.sel(experiment_id='historical')[:, 1:].plot.contour(xscale='log', colors='0.5', levels=21)\n",
    "    pr_hist_diff[:, 1:].plot.contourf(xscale='log', vmax=vmax, levels=21)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "title = 'Change in Zonal Mean Rain Frequency'\n",
    "for source_id, pr_hist in results.items():\n",
    "    plt.figure()\n",
    "    plot_precip_changes(pr_hist)\n",
    "    plt.title(f'{title}: {source_id}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}