{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Authors: Mackenzie Blanusa, A.Radhakrishnan" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from glob import glob\n", "import xarray as xr\n", "import cftime\n", "import nc_time_axis\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import intake, intake_esm\n", "from dask_gateway import Gateway\n", "import pandas as pd\n", "pd.set_option(\"display.max_colwidth\", None)\n", "#!pip install cmip6_preprocessing\n", "\n", "%pip install git+https://github.com/jbusecke/cmip6_preprocessing.git\n", "\n", "\n", "from cmip6_preprocessing.preprocessing import combined_preprocessing\n", "from cmip6_preprocessing.preprocessing import (correct_units,rename_cmip6)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def latest_version(cat):\n", " \"\"\"\n", " input\n", " cat: esmdatastore \n", " output\n", " esmdatastore with latest DRS versions\n", " \"\"\"\n", " \n", " latest_cat = cat.df.sort_values(by=['version','path']).drop_duplicates(['temporal subset','model','mip_table',\n", " 'institute','variable','ensemble_member',\n", " 'grid_label','experiment_id'],keep='last')\n", " return latest_cat" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def fix_time(ds):\n", " \"\"\" force calendar to noleap\"\"\"\n", " import xarray as xr\n", " ds = ds.copy()\n", " if \"time\" not in ds.dims:\n", " return ds\n", " \n", " if (\"calendar\" not in ds[\"time\"].attrs): \n", " ds[\"time\"].attrs.update({\"calendar\": \"noleap\"})\n", " \n", " if ds[\"time\"].attrs[\"calendar\"] not in [\"noleap\", \"NOLEAP\", \"365_day\"]:\n", " ds[\"time\"].attrs.update({\"calendar\": \"noleap\"})\n", " \n", " ds = xr.decode_cf(ds)\n", " return ds" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def fix_units(ds):\n", " if \"units\" in ds[\"lev\"].attrs:\n", " if ds[\"lev\"].attrs[\"units\"] in [\"cm\", \"centimeters\"]:\n", " ds[\"lev\"] = xr.DataArray(ds[\"lev\"].values / 100., dims=ds[\"lev\"].dims)\n", " return ds" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def pp_thetao(ds):\n", " ds = ds.copy() #the wrapper function makes a copy of the ds and works from this\n", " ds = rename_cmip6(ds)\n", " ds = fix_time(ds)\n", " #ds = fix_units(ds)\n", " ds = correct_units(ds)\n", " return ds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load the catalog " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "col_url = \"https://cmip6-nc.s3.us-east-2.amazonaws.com/esgf-world.json\"" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "col = intake.open_esm_datastore(col_url)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "debug starts" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "query = dict(experiment_id=['historical'],\n", " mip_table='Omon',\n", " ensemble_member=[\"r1i1p1f1\"],\n", " model=['IPSL-CM6A-LR'],\n", " grid_label=['gn'],\n", " variable=[\"thetao\"]\n", " )\n", "cat_T = col.search(**query)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "WHAT DOES NOT WORK: \n", "\n", "the following misses olevel_bounds (renamed to lev_bnds) and other variables after preprocesssing. \n", "Without preprocessing, the datasets have the old dim/var names as found in the original files/objects" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "--> The keys in the returned dictionary of datasets are constructed as follows:\n", "\t'project.institute.model.experiment_id.mip_table'\n" ] }, { "data": { "text/html": [ "\n", "
<xarray.Dataset>\n", "Dimensions: (ensemble_member: 1, lev: 75, time: 1980, x: 362, y: 332)\n", "Coordinates:\n", " lat (y, x) float32 dask.array<chunksize=(332, 362), meta=np.ndarray>\n", " lon (y, x) float32 dask.array<chunksize=(332, 362), meta=np.ndarray>\n", " * lev (lev) float32 0.50576 1.5558553 ... 5698.0605 5902.0576\n", " * time (time) object 1850-01-16 12:00:00 ... 2015-01-25 12:00:00\n", " * ensemble_member (ensemble_member) <U8 'r1i1p1f1'\n", "Dimensions without coordinates: x, y\n", "Data variables:\n", " thetao (ensemble_member, time, lev, y, x) float32 dask.array<chunksize=(1, 1, 1, 332, 362), meta=np.ndarray>\n", "Attributes:\n", " title: IPSL-CM6A-LR model output prepared for CMIP6 / C...\n", " intake_esm_varname: ['thetao']\n", " source: IPSL-CM6A-LR (2017): atmos: LMDZ (NPv6, N96; 14...\n", " institution_id: IPSL\n", " history: Sat Dec 1 12:16:38 2018: ncatted -O -a realizat...\n", " physics_index: [1]\n", " parent_variant_label: r1i1p1f1\n", " parent_experiment_id: piControl\n", " branch_method: standard\n", " grid: native ocean tri-polar grid with 105 k ocean cells\n", " realization_index: [1]\n", " parent_source_id: IPSL-CM6A-LR\n", " sub_experiment_id: none\n", " model_version: 6.1.5\n", " variant_label: r1i1p1f1\n", " sub_experiment: none\n", " branch_time_in_parent: [21914.]\n", " forcing_index: [1]\n", " initialization_index: [1]\n", " dr2xml_md5sum: f1e40c1fc5d8281f865f72fbf4e38f9d\n", " license: CMIP6 model data produced by IPSL is licensed un...\n", " EXPID: historical\n", " grid_label: gn\n", " Conventions: CF-1.7 CMIP-6.2\n", " source_id: IPSL-CM6A-LR\n", " description: CMIP6 historical\n", " institution: Institut Pierre Simon Laplace, Paris 75252, France\n", " experiment: all-forcing simulation of the recent past\n", " frequency: mon\n", " activity_id: CMIP\n", " parent_activity_id: CMIP\n", " contact: ipsl-cmip6@listes.ipsl.fr\n", " realm: ocean\n", " source_type: AOGCM BGC\n", " data_specs_version: 01.00.21\n", " further_info_url: https://furtherinfo.es-doc.org/CMIP6.IPSL.IPSL-C...\n", " dr2xml_version: 1.11\n", " variable_id: thetao\n", " parent_time_units: days since 1850-01-01 00:00:00\n", " parent_mip_era: CMIP6\n", " CMIP6_CV_version: cv=6.2.3.5-2-g63b123e\n", " product: model-output\n", " NCO: "4.6.0"\n", " experiment_id: historical\n", " branch_time_in_child: [0.]\n", " nominal_resolution: 100 km\n", " tracking_id: hdl:21.14100/2357970e-3f77-4595-80d8-e3d5c69d0bd...\n", " table_id: Omon\n", " external_variables: areacello volcello\n", " mip_era: CMIP6\n", " name: /ccc/work/cont003/gencmip6/p86caub/IGCM_OUT/IPSL...\n", " intake_esm_dataset_key: CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
array([5.057600e-01, 1.555855e+00, 2.667682e+00, 3.856280e+00, 5.140361e+00,\n", " 6.543034e+00, 8.092519e+00, 9.822750e+00, 1.177368e+01, 1.399104e+01,\n", " 1.652532e+01, 1.942980e+01, 2.275762e+01, 2.655830e+01, 3.087456e+01,\n", " 3.574020e+01, 4.118002e+01, 4.721189e+01, 5.385064e+01, 6.111284e+01,\n", " 6.902168e+01, 7.761116e+01, 8.692943e+01, 9.704131e+01, 1.080303e+02,\n", " 1.200000e+02, 1.330758e+02, 1.474062e+02, 1.631645e+02, 1.805499e+02,\n", " 1.997900e+02, 2.211412e+02, 2.448906e+02, 2.713564e+02, 3.008875e+02,\n", " 3.338628e+02, 3.706885e+02, 4.117939e+02, 4.576256e+02, 5.086399e+02,\n", " 5.652923e+02, 6.280260e+02, 6.972587e+02, 7.733683e+02, 8.566790e+02,\n", " 9.474479e+02, 1.045854e+03, 1.151991e+03, 1.265861e+03, 1.387377e+03,\n", " 1.516364e+03, 1.652568e+03, 1.795671e+03, 1.945296e+03, 2.101027e+03,\n", " 2.262422e+03, 2.429025e+03, 2.600380e+03, 2.776039e+03, 2.955570e+03,\n", " 3.138565e+03, 3.324641e+03, 3.513446e+03, 3.704657e+03, 3.897982e+03,\n", " 4.093159e+03, 4.289953e+03, 4.488155e+03, 4.687581e+03, 4.888070e+03,\n", " 5.089479e+03, 5.291683e+03, 5.494575e+03, 5.698061e+03, 5.902058e+03],\n", " dtype=float32)
array([cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0),\n", " cftime.DatetimeNoLeap(1850, 2, 15, 0, 0, 0, 0),\n", " cftime.DatetimeNoLeap(1850, 3, 16, 12, 0, 0, 0), ...,\n", " cftime.DatetimeNoLeap(2014, 11, 25, 12, 0, 0, 0),\n", " cftime.DatetimeNoLeap(2014, 12, 26, 0, 0, 0, 0),\n", " cftime.DatetimeNoLeap(2015, 1, 25, 12, 0, 0, 0)], dtype=object)
array(['r1i1p1f1'], dtype='<U8')
\n",
"
| \n",
"\n", "\n", " | \n", "
<xarray.Dataset>\n", "Dimensions: (bnds: 2, ensemble_member: 1, lev: 75, time: 1980, vertex: 4, x: 362, y: 332)\n", "Coordinates:\n", " lat (y, x) float32 dask.array<chunksize=(332, 362), meta=np.ndarray>\n", " lon (y, x) float32 dask.array<chunksize=(332, 362), meta=np.ndarray>\n", " * lev (lev) float32 0.50576 1.5558553 ... 5698.0605 5902.0576\n", " * time (time) object 1850-01-16 12:00:00 ... 2015-01-25 12:00:00\n", " * ensemble_member (ensemble_member) <U8 'r1i1p1f1'\n", "Dimensions without coordinates: bnds, vertex, x, y\n", "Data variables:\n", " lon_bounds (y, x, vertex) float32 dask.array<chunksize=(332, 362, 4), meta=np.ndarray>\n", " lat_bounds (y, x, vertex) float32 dask.array<chunksize=(332, 362, 4), meta=np.ndarray>\n", " area (y, x) float32 dask.array<chunksize=(332, 362), meta=np.ndarray>\n", " lev_bounds (lev, bnds) float32 dask.array<chunksize=(75, 2), meta=np.ndarray>\n", " time_bounds (time, bnds) object dask.array<chunksize=(1, 2), meta=np.ndarray>\n", " thetao (ensemble_member, time, lev, y, x) float32 dask.array<chunksize=(1, 1, 75, 332, 362), meta=np.ndarray>\n", "Attributes:\n", " title: IPSL-CM6A-LR model output prepared for CMIP6 / C...\n", " intake_esm_varname: ['thetao']\n", " source: IPSL-CM6A-LR (2017): atmos: LMDZ (NPv6, N96; 14...\n", " institution_id: IPSL\n", " history: Sat Dec 1 12:16:38 2018: ncatted -O -a realizat...\n", " physics_index: [1]\n", " parent_variant_label: r1i1p1f1\n", " parent_experiment_id: piControl\n", " branch_method: standard\n", " grid: native ocean tri-polar grid with 105 k ocean cells\n", " realization_index: [1]\n", " parent_source_id: IPSL-CM6A-LR\n", " sub_experiment_id: none\n", " model_version: 6.1.5\n", " variant_label: r1i1p1f1\n", " sub_experiment: none\n", " branch_time_in_parent: [21914.]\n", " forcing_index: [1]\n", " initialization_index: [1]\n", " dr2xml_md5sum: f1e40c1fc5d8281f865f72fbf4e38f9d\n", " license: CMIP6 model data produced by IPSL is licensed un...\n", " EXPID: historical\n", " grid_label: gn\n", " Conventions: CF-1.7 CMIP-6.2\n", " source_id: IPSL-CM6A-LR\n", " description: CMIP6 historical\n", " institution: Institut Pierre Simon Laplace, Paris 75252, France\n", " experiment: all-forcing simulation of the recent past\n", " frequency: mon\n", " activity_id: CMIP\n", " parent_activity_id: CMIP\n", " contact: ipsl-cmip6@listes.ipsl.fr\n", " realm: ocean\n", " source_type: AOGCM BGC\n", " data_specs_version: 01.00.21\n", " further_info_url: https://furtherinfo.es-doc.org/CMIP6.IPSL.IPSL-C...\n", " dr2xml_version: 1.11\n", " variable_id: thetao\n", " parent_time_units: days since 1850-01-01 00:00:00\n", " parent_mip_era: CMIP6\n", " CMIP6_CV_version: cv=6.2.3.5-2-g63b123e\n", " product: model-output\n", " NCO: "4.6.0"\n", " experiment_id: historical\n", " branch_time_in_child: [0.]\n", " nominal_resolution: 100 km\n", " tracking_id: hdl:21.14100/2357970e-3f77-4595-80d8-e3d5c69d0bd...\n", " table_id: Omon\n", " external_variables: areacello volcello\n", " mip_era: CMIP6\n", " name: /ccc/work/cont003/gencmip6/p86caub/IGCM_OUT/IPSL...\n", " intake_esm_dataset_key: CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
array([5.057600e-01, 1.555855e+00, 2.667682e+00, 3.856280e+00, 5.140361e+00,\n", " 6.543034e+00, 8.092519e+00, 9.822750e+00, 1.177368e+01, 1.399104e+01,\n", " 1.652532e+01, 1.942980e+01, 2.275762e+01, 2.655830e+01, 3.087456e+01,\n", " 3.574020e+01, 4.118002e+01, 4.721189e+01, 5.385064e+01, 6.111284e+01,\n", " 6.902168e+01, 7.761116e+01, 8.692943e+01, 9.704131e+01, 1.080303e+02,\n", " 1.200000e+02, 1.330758e+02, 1.474062e+02, 1.631645e+02, 1.805499e+02,\n", " 1.997900e+02, 2.211412e+02, 2.448906e+02, 2.713564e+02, 3.008875e+02,\n", " 3.338628e+02, 3.706885e+02, 4.117939e+02, 4.576256e+02, 5.086399e+02,\n", " 5.652923e+02, 6.280260e+02, 6.972587e+02, 7.733683e+02, 8.566790e+02,\n", " 9.474479e+02, 1.045854e+03, 1.151991e+03, 1.265861e+03, 1.387377e+03,\n", " 1.516364e+03, 1.652568e+03, 1.795671e+03, 1.945296e+03, 2.101027e+03,\n", " 2.262422e+03, 2.429025e+03, 2.600380e+03, 2.776039e+03, 2.955570e+03,\n", " 3.138565e+03, 3.324641e+03, 3.513446e+03, 3.704657e+03, 3.897982e+03,\n", " 4.093159e+03, 4.289953e+03, 4.488155e+03, 4.687581e+03, 4.888070e+03,\n", " 5.089479e+03, 5.291683e+03, 5.494575e+03, 5.698061e+03, 5.902058e+03],\n", " dtype=float32)
array([cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0),\n", " cftime.DatetimeNoLeap(1850, 2, 15, 0, 0, 0, 0),\n", " cftime.DatetimeNoLeap(1850, 3, 16, 12, 0, 0, 0), ...,\n", " cftime.DatetimeNoLeap(2014, 11, 25, 12, 0, 0, 0),\n", " cftime.DatetimeNoLeap(2014, 12, 26, 0, 0, 0, 0),\n", " cftime.DatetimeNoLeap(2015, 1, 25, 12, 0, 0, 0)], dtype=object)
array(['r1i1p1f1'], dtype='<U8')
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "