{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Authors: Mackenzie Blanusa, A.Radhakrishnan" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from glob import glob\n", "import xarray as xr\n", "import cftime\n", "import nc_time_axis\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import intake, intake_esm\n", "from dask_gateway import Gateway\n", "import pandas as pd\n", "pd.set_option(\"display.max_colwidth\", None)\n", "#!pip install cmip6_preprocessing\n", "\n", "%pip install git+https://github.com/jbusecke/cmip6_preprocessing.git\n", "\n", "\n", "from cmip6_preprocessing.preprocessing import combined_preprocessing\n", "from cmip6_preprocessing.preprocessing import (correct_units,rename_cmip6)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def latest_version(cat):\n", " \"\"\"\n", " input\n", " cat: esmdatastore \n", " output\n", " esmdatastore with latest DRS versions\n", " \"\"\"\n", " \n", " latest_cat = cat.df.sort_values(by=['version','path']).drop_duplicates(['temporal subset','model','mip_table',\n", " 'institute','variable','ensemble_member',\n", " 'grid_label','experiment_id'],keep='last')\n", " return latest_cat" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def fix_time(ds):\n", " \"\"\" force calendar to noleap\"\"\"\n", " import xarray as xr\n", " ds = ds.copy()\n", " if \"time\" not in ds.dims:\n", " return ds\n", " \n", " if (\"calendar\" not in ds[\"time\"].attrs): \n", " ds[\"time\"].attrs.update({\"calendar\": \"noleap\"})\n", " \n", " if ds[\"time\"].attrs[\"calendar\"] not in [\"noleap\", \"NOLEAP\", \"365_day\"]:\n", " ds[\"time\"].attrs.update({\"calendar\": \"noleap\"})\n", " \n", " ds = xr.decode_cf(ds)\n", " return ds" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def fix_units(ds):\n", " if \"units\" in ds[\"lev\"].attrs:\n", " if ds[\"lev\"].attrs[\"units\"] in [\"cm\", \"centimeters\"]:\n", " ds[\"lev\"] = xr.DataArray(ds[\"lev\"].values / 100., dims=ds[\"lev\"].dims)\n", " return ds" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def pp_thetao(ds):\n", " ds = ds.copy() #the wrapper function makes a copy of the ds and works from this\n", " ds = rename_cmip6(ds)\n", " ds = fix_time(ds)\n", " #ds = fix_units(ds)\n", " ds = correct_units(ds)\n", " return ds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load the catalog " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "col_url = \"https://cmip6-nc.s3.us-east-2.amazonaws.com/esgf-world.json\"" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "col = intake.open_esm_datastore(col_url)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "debug starts" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "query = dict(experiment_id=['historical'],\n", " mip_table='Omon',\n", " ensemble_member=[\"r1i1p1f1\"],\n", " model=['IPSL-CM6A-LR'],\n", " grid_label=['gn'],\n", " variable=[\"thetao\"]\n", " )\n", "cat_T = col.search(**query)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "WHAT DOES NOT WORK: \n", "\n", "the following misses olevel_bounds (renamed to lev_bnds) and other variables after preprocesssing. \n", "Without preprocessing, the datasets have the old dim/var names as found in the original files/objects" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "--> The keys in the returned dictionary of datasets are constructed as follows:\n", "\t'project.institute.model.experiment_id.mip_table'\n" ] }, { "data": { "text/html": [ "\n", "
<xarray.Dataset>\n",
"Dimensions: (ensemble_member: 1, lev: 75, time: 1980, x: 362, y: 332)\n",
"Coordinates:\n",
" lat (y, x) float32 dask.array<chunksize=(332, 362), meta=np.ndarray>\n",
" lon (y, x) float32 dask.array<chunksize=(332, 362), meta=np.ndarray>\n",
" * lev (lev) float32 0.50576 1.5558553 ... 5698.0605 5902.0576\n",
" * time (time) object 1850-01-16 12:00:00 ... 2015-01-25 12:00:00\n",
" * ensemble_member (ensemble_member) <U8 'r1i1p1f1'\n",
"Dimensions without coordinates: x, y\n",
"Data variables:\n",
" thetao (ensemble_member, time, lev, y, x) float32 dask.array<chunksize=(1, 1, 1, 332, 362), meta=np.ndarray>\n",
"Attributes:\n",
" title: IPSL-CM6A-LR model output prepared for CMIP6 / C...\n",
" intake_esm_varname: ['thetao']\n",
" source: IPSL-CM6A-LR (2017): atmos: LMDZ (NPv6, N96; 14...\n",
" institution_id: IPSL\n",
" history: Sat Dec 1 12:16:38 2018: ncatted -O -a realizat...\n",
" physics_index: [1]\n",
" parent_variant_label: r1i1p1f1\n",
" parent_experiment_id: piControl\n",
" branch_method: standard\n",
" grid: native ocean tri-polar grid with 105 k ocean cells\n",
" realization_index: [1]\n",
" parent_source_id: IPSL-CM6A-LR\n",
" sub_experiment_id: none\n",
" model_version: 6.1.5\n",
" variant_label: r1i1p1f1\n",
" sub_experiment: none\n",
" branch_time_in_parent: [21914.]\n",
" forcing_index: [1]\n",
" initialization_index: [1]\n",
" dr2xml_md5sum: f1e40c1fc5d8281f865f72fbf4e38f9d\n",
" license: CMIP6 model data produced by IPSL is licensed un...\n",
" EXPID: historical\n",
" grid_label: gn\n",
" Conventions: CF-1.7 CMIP-6.2\n",
" source_id: IPSL-CM6A-LR\n",
" description: CMIP6 historical\n",
" institution: Institut Pierre Simon Laplace, Paris 75252, France\n",
" experiment: all-forcing simulation of the recent past\n",
" frequency: mon\n",
" activity_id: CMIP\n",
" parent_activity_id: CMIP\n",
" contact: ipsl-cmip6@listes.ipsl.fr\n",
" realm: ocean\n",
" source_type: AOGCM BGC\n",
" data_specs_version: 01.00.21\n",
" further_info_url: https://furtherinfo.es-doc.org/CMIP6.IPSL.IPSL-C...\n",
" dr2xml_version: 1.11\n",
" variable_id: thetao\n",
" parent_time_units: days since 1850-01-01 00:00:00\n",
" parent_mip_era: CMIP6\n",
" CMIP6_CV_version: cv=6.2.3.5-2-g63b123e\n",
" product: model-output\n",
" NCO: "4.6.0"\n",
" experiment_id: historical\n",
" branch_time_in_child: [0.]\n",
" nominal_resolution: 100 km\n",
" tracking_id: hdl:21.14100/2357970e-3f77-4595-80d8-e3d5c69d0bd...\n",
" table_id: Omon\n",
" external_variables: areacello volcello\n",
" mip_era: CMIP6\n",
" name: /ccc/work/cont003/gencmip6/p86caub/IGCM_OUT/IPSL...\n",
" intake_esm_dataset_key: CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
array([5.057600e-01, 1.555855e+00, 2.667682e+00, 3.856280e+00, 5.140361e+00,\n",
" 6.543034e+00, 8.092519e+00, 9.822750e+00, 1.177368e+01, 1.399104e+01,\n",
" 1.652532e+01, 1.942980e+01, 2.275762e+01, 2.655830e+01, 3.087456e+01,\n",
" 3.574020e+01, 4.118002e+01, 4.721189e+01, 5.385064e+01, 6.111284e+01,\n",
" 6.902168e+01, 7.761116e+01, 8.692943e+01, 9.704131e+01, 1.080303e+02,\n",
" 1.200000e+02, 1.330758e+02, 1.474062e+02, 1.631645e+02, 1.805499e+02,\n",
" 1.997900e+02, 2.211412e+02, 2.448906e+02, 2.713564e+02, 3.008875e+02,\n",
" 3.338628e+02, 3.706885e+02, 4.117939e+02, 4.576256e+02, 5.086399e+02,\n",
" 5.652923e+02, 6.280260e+02, 6.972587e+02, 7.733683e+02, 8.566790e+02,\n",
" 9.474479e+02, 1.045854e+03, 1.151991e+03, 1.265861e+03, 1.387377e+03,\n",
" 1.516364e+03, 1.652568e+03, 1.795671e+03, 1.945296e+03, 2.101027e+03,\n",
" 2.262422e+03, 2.429025e+03, 2.600380e+03, 2.776039e+03, 2.955570e+03,\n",
" 3.138565e+03, 3.324641e+03, 3.513446e+03, 3.704657e+03, 3.897982e+03,\n",
" 4.093159e+03, 4.289953e+03, 4.488155e+03, 4.687581e+03, 4.888070e+03,\n",
" 5.089479e+03, 5.291683e+03, 5.494575e+03, 5.698061e+03, 5.902058e+03],\n",
" dtype=float32)array([cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0),\n",
" cftime.DatetimeNoLeap(1850, 2, 15, 0, 0, 0, 0),\n",
" cftime.DatetimeNoLeap(1850, 3, 16, 12, 0, 0, 0), ...,\n",
" cftime.DatetimeNoLeap(2014, 11, 25, 12, 0, 0, 0),\n",
" cftime.DatetimeNoLeap(2014, 12, 26, 0, 0, 0, 0),\n",
" cftime.DatetimeNoLeap(2015, 1, 25, 12, 0, 0, 0)], dtype=object)array(['r1i1p1f1'], dtype='<U8')
\n",
"
| \n",
"\n", "\n", " | \n", "
<xarray.Dataset>\n",
"Dimensions: (bnds: 2, ensemble_member: 1, lev: 75, time: 1980, vertex: 4, x: 362, y: 332)\n",
"Coordinates:\n",
" lat (y, x) float32 dask.array<chunksize=(332, 362), meta=np.ndarray>\n",
" lon (y, x) float32 dask.array<chunksize=(332, 362), meta=np.ndarray>\n",
" * lev (lev) float32 0.50576 1.5558553 ... 5698.0605 5902.0576\n",
" * time (time) object 1850-01-16 12:00:00 ... 2015-01-25 12:00:00\n",
" * ensemble_member (ensemble_member) <U8 'r1i1p1f1'\n",
"Dimensions without coordinates: bnds, vertex, x, y\n",
"Data variables:\n",
" lon_bounds (y, x, vertex) float32 dask.array<chunksize=(332, 362, 4), meta=np.ndarray>\n",
" lat_bounds (y, x, vertex) float32 dask.array<chunksize=(332, 362, 4), meta=np.ndarray>\n",
" area (y, x) float32 dask.array<chunksize=(332, 362), meta=np.ndarray>\n",
" lev_bounds (lev, bnds) float32 dask.array<chunksize=(75, 2), meta=np.ndarray>\n",
" time_bounds (time, bnds) object dask.array<chunksize=(1, 2), meta=np.ndarray>\n",
" thetao (ensemble_member, time, lev, y, x) float32 dask.array<chunksize=(1, 1, 75, 332, 362), meta=np.ndarray>\n",
"Attributes:\n",
" title: IPSL-CM6A-LR model output prepared for CMIP6 / C...\n",
" intake_esm_varname: ['thetao']\n",
" source: IPSL-CM6A-LR (2017): atmos: LMDZ (NPv6, N96; 14...\n",
" institution_id: IPSL\n",
" history: Sat Dec 1 12:16:38 2018: ncatted -O -a realizat...\n",
" physics_index: [1]\n",
" parent_variant_label: r1i1p1f1\n",
" parent_experiment_id: piControl\n",
" branch_method: standard\n",
" grid: native ocean tri-polar grid with 105 k ocean cells\n",
" realization_index: [1]\n",
" parent_source_id: IPSL-CM6A-LR\n",
" sub_experiment_id: none\n",
" model_version: 6.1.5\n",
" variant_label: r1i1p1f1\n",
" sub_experiment: none\n",
" branch_time_in_parent: [21914.]\n",
" forcing_index: [1]\n",
" initialization_index: [1]\n",
" dr2xml_md5sum: f1e40c1fc5d8281f865f72fbf4e38f9d\n",
" license: CMIP6 model data produced by IPSL is licensed un...\n",
" EXPID: historical\n",
" grid_label: gn\n",
" Conventions: CF-1.7 CMIP-6.2\n",
" source_id: IPSL-CM6A-LR\n",
" description: CMIP6 historical\n",
" institution: Institut Pierre Simon Laplace, Paris 75252, France\n",
" experiment: all-forcing simulation of the recent past\n",
" frequency: mon\n",
" activity_id: CMIP\n",
" parent_activity_id: CMIP\n",
" contact: ipsl-cmip6@listes.ipsl.fr\n",
" realm: ocean\n",
" source_type: AOGCM BGC\n",
" data_specs_version: 01.00.21\n",
" further_info_url: https://furtherinfo.es-doc.org/CMIP6.IPSL.IPSL-C...\n",
" dr2xml_version: 1.11\n",
" variable_id: thetao\n",
" parent_time_units: days since 1850-01-01 00:00:00\n",
" parent_mip_era: CMIP6\n",
" CMIP6_CV_version: cv=6.2.3.5-2-g63b123e\n",
" product: model-output\n",
" NCO: "4.6.0"\n",
" experiment_id: historical\n",
" branch_time_in_child: [0.]\n",
" nominal_resolution: 100 km\n",
" tracking_id: hdl:21.14100/2357970e-3f77-4595-80d8-e3d5c69d0bd...\n",
" table_id: Omon\n",
" external_variables: areacello volcello\n",
" mip_era: CMIP6\n",
" name: /ccc/work/cont003/gencmip6/p86caub/IGCM_OUT/IPSL...\n",
" intake_esm_dataset_key: CMIP6.IPSL.IPSL-CM6A-LR.historical.Omon\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
array([5.057600e-01, 1.555855e+00, 2.667682e+00, 3.856280e+00, 5.140361e+00,\n",
" 6.543034e+00, 8.092519e+00, 9.822750e+00, 1.177368e+01, 1.399104e+01,\n",
" 1.652532e+01, 1.942980e+01, 2.275762e+01, 2.655830e+01, 3.087456e+01,\n",
" 3.574020e+01, 4.118002e+01, 4.721189e+01, 5.385064e+01, 6.111284e+01,\n",
" 6.902168e+01, 7.761116e+01, 8.692943e+01, 9.704131e+01, 1.080303e+02,\n",
" 1.200000e+02, 1.330758e+02, 1.474062e+02, 1.631645e+02, 1.805499e+02,\n",
" 1.997900e+02, 2.211412e+02, 2.448906e+02, 2.713564e+02, 3.008875e+02,\n",
" 3.338628e+02, 3.706885e+02, 4.117939e+02, 4.576256e+02, 5.086399e+02,\n",
" 5.652923e+02, 6.280260e+02, 6.972587e+02, 7.733683e+02, 8.566790e+02,\n",
" 9.474479e+02, 1.045854e+03, 1.151991e+03, 1.265861e+03, 1.387377e+03,\n",
" 1.516364e+03, 1.652568e+03, 1.795671e+03, 1.945296e+03, 2.101027e+03,\n",
" 2.262422e+03, 2.429025e+03, 2.600380e+03, 2.776039e+03, 2.955570e+03,\n",
" 3.138565e+03, 3.324641e+03, 3.513446e+03, 3.704657e+03, 3.897982e+03,\n",
" 4.093159e+03, 4.289953e+03, 4.488155e+03, 4.687581e+03, 4.888070e+03,\n",
" 5.089479e+03, 5.291683e+03, 5.494575e+03, 5.698061e+03, 5.902058e+03],\n",
" dtype=float32)array([cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0),\n",
" cftime.DatetimeNoLeap(1850, 2, 15, 0, 0, 0, 0),\n",
" cftime.DatetimeNoLeap(1850, 3, 16, 12, 0, 0, 0), ...,\n",
" cftime.DatetimeNoLeap(2014, 11, 25, 12, 0, 0, 0),\n",
" cftime.DatetimeNoLeap(2014, 12, 26, 0, 0, 0, 0),\n",
" cftime.DatetimeNoLeap(2015, 1, 25, 12, 0, 0, 0)], dtype=object)array(['r1i1p1f1'], dtype='<U8')
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "