{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Search and Load CMIP6 Data via ESGF / OPeNDAP\n", "\n", "This notebooks shows how to search and load data via [Earth System Grid Federation](https://esgf.llnl.gov/) infrastructure. This infrastructure works great and is the foundation of the CMIP6 distribution system.\n", "\n", "The main technologies used here are the [ESGF search API](https://github.com/ESGF/esgf.github.io/wiki/ESGF_Search_REST_API), used to figure out what data we want, and [OPeNDAP](https://www.opendap.org/), a remote data access protocol over HTTP." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from matplotlib import pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "\n", "xr.set_options(display_style='html')\n", "%matplotlib inline\n", "%config InlineBackend.figure_format = 'retina' " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Search using ESGF API" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#!/usr/bin/env python\n", "from __future__ import print_function\n", "import requests\n", "import xml.etree.ElementTree as ET\n", "import numpy\n", "\n", "# Author: Unknown\n", "# I got the original version from a word document published by ESGF\n", "# https://docs.google.com/document/d/1pxz1Kd3JHfFp8vR2JCVBfApbsHmbUQQstifhGNdc6U0/edit?usp=sharing\n", "\n", "# API AT: https://github.com/ESGF/esgf.github.io/wiki/ESGF_Search_REST_API#results-pagination\n", "\n", "def esgf_search(server=\"https://esgf-node.llnl.gov/esg-search/search\",\n", " files_type=\"OPENDAP\", local_node=True, project=\"CMIP6\",\n", " verbose=False, format=\"application%2Fsolr%2Bjson\",\n", " use_csrf=False, **search):\n", " client = requests.session()\n", " payload = search\n", " payload[\"project\"] = project\n", " payload[\"type\"]= \"File\"\n", " if local_node:\n", " payload[\"distrib\"] = \"false\"\n", " if use_csrf:\n", " client.get(server)\n", " if 'csrftoken' in client.cookies:\n", " # Django 1.6 and up\n", " csrftoken = client.cookies['csrftoken']\n", " else:\n", " # older versions\n", " csrftoken = client.cookies['csrf']\n", " payload[\"csrfmiddlewaretoken\"] = csrftoken\n", "\n", " payload[\"format\"] = format\n", "\n", " offset = 0\n", " numFound = 10000\n", " all_files = []\n", " files_type = files_type.upper()\n", " while offset < numFound:\n", " payload[\"offset\"] = offset\n", " url_keys = [] \n", " for k in payload:\n", " url_keys += [\"{}={}\".format(k, payload[k])]\n", "\n", " url = \"{}/?{}\".format(server, \"&\".join(url_keys))\n", " print(url)\n", " r = client.get(url)\n", " r.raise_for_status()\n", " resp = r.json()[\"response\"]\n", " numFound = int(resp[\"numFound\"])\n", " resp = resp[\"docs\"]\n", " offset += len(resp)\n", " for d in resp:\n", " if verbose:\n", " for k in d:\n", " print(\"{}: {}\".format(k,d[k]))\n", " url = d[\"url\"]\n", " for f in d[\"url\"]:\n", " sp = f.split(\"|\")\n", " if sp[-1] == files_type:\n", " all_files.append(sp[0].split(\".html\")[0])\n", " return sorted(all_files)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&variable_id=tas&experiment_id=historical&institution_id=NCAR&source_id=CESM2&member_id=r10i1p1f1&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0\n" ] }, { "data": { "text/plain": [ "['http://aims3.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_185001-189912.nc',\n", " 'http://aims3.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_190001-194912.nc',\n", " 'http://aims3.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_195001-199912.nc',\n", " 'http://aims3.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_200001-201412.nc',\n", " 'http://esgf-data.ucar.edu/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_185001-189912.nc',\n", " 'http://esgf-data.ucar.edu/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_190001-194912.nc',\n", " 'http://esgf-data.ucar.edu/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_195001-199912.nc',\n", " 'http://esgf-data.ucar.edu/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_200001-201412.nc']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result = esgf_search(activity_id='CMIP', table_id='Amon', variable_id='tas', experiment_id='historical',\n", " institution_id=\"NCAR\", source_id=\"CESM2\", member_id=\"r10i1p1f1\")\n", "result" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load Data with Xarray\n", "\n", "These are OPeNDAP endpoints. Xarray, together with the netCDF4 python library, allow lazy loading." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/rpa/Code/xarray/xarray/conventions.py:494: SerializationWarning: variable 'tas' has multiple fill values {1e+20, 1e+20}, decoding all values to NaN.\n", " use_cftime=use_cftime,\n" ] }, { "data": { "text/html": [ "
array([-90. , -89.057592, -88.115183, -87.172775, -86.230366, -85.287958,\n", " -84.34555 , -83.403141, -82.460733, -81.518325, -80.575916, -79.633508,\n", " -78.691099, -77.748691, -76.806283, -75.863874, -74.921466, -73.979058,\n", " -73.036649, -72.094241, -71.151832, -70.209424, -69.267016, -68.324607,\n", " -67.382199, -66.439791, -65.497382, -64.554974, -63.612565, -62.670157,\n", " -61.727749, -60.78534 , -59.842932, -58.900524, -57.958115, -57.015707,\n", " -56.073298, -55.13089 , -54.188482, -53.246073, -52.303665, -51.361257,\n", " -50.418848, -49.47644 , -48.534031, -47.591623, -46.649215, -45.706806,\n", " -44.764398, -43.82199 , -42.879581, -41.937173, -40.994764, -40.052356,\n", " -39.109948, -38.167539, -37.225131, -36.282723, -35.340314, -34.397906,\n", " -33.455497, -32.513089, -31.570681, -30.628272, -29.685864, -28.743455,\n", " -27.801047, -26.858639, -25.91623 , -24.973822, -24.031414, -23.089005,\n", " -22.146597, -21.204188, -20.26178 , -19.319372, -18.376963, -17.434555,\n", " -16.492147, -15.549738, -14.60733 , -13.664921, -12.722513, -11.780105,\n", " -10.837696, -9.895288, -8.95288 , -8.010471, -7.068063, -6.125654,\n", " -5.183246, -4.240838, -3.298429, -2.356021, -1.413613, -0.471204,\n", " 0.471204, 1.413613, 2.356021, 3.298429, 4.240838, 5.183246,\n", " 6.125654, 7.068063, 8.010471, 8.95288 , 9.895288, 10.837696,\n", " 11.780105, 12.722513, 13.664921, 14.60733 , 15.549738, 16.492147,\n", " 17.434555, 18.376963, 19.319372, 20.26178 , 21.204188, 22.146597,\n", " 23.089005, 24.031414, 24.973822, 25.91623 , 26.858639, 27.801047,\n", " 28.743455, 29.685864, 30.628272, 31.570681, 32.513089, 33.455497,\n", " 34.397906, 35.340314, 36.282723, 37.225131, 38.167539, 39.109948,\n", " 40.052356, 40.994764, 41.937173, 42.879581, 43.82199 , 44.764398,\n", " 45.706806, 46.649215, 47.591623, 48.534031, 49.47644 , 50.418848,\n", " 51.361257, 52.303665, 53.246073, 54.188482, 55.13089 , 56.073298,\n", " 57.015707, 57.958115, 58.900524, 59.842932, 60.78534 , 61.727749,\n", " 62.670157, 63.612565, 64.554974, 65.497382, 66.439791, 67.382199,\n", " 68.324607, 69.267016, 70.209424, 71.151832, 72.094241, 73.036649,\n", " 73.979058, 74.921466, 75.863874, 76.806283, 77.748691, 78.691099,\n", " 79.633508, 80.575916, 81.518325, 82.460733, 83.403141, 84.34555 ,\n", " 85.287958, 86.230366, 87.172775, 88.115183, 89.057592, 90. ])
array([ 0. , 1.25, 2.5 , ..., 356.25, 357.5 , 358.75])
array([cftime.DatetimeNoLeap(1850, 1, 15, 12, 0, 0, 0, 2, 15),\n", " cftime.DatetimeNoLeap(1850, 2, 14, 0, 0, 0, 0, 4, 45),\n", " cftime.DatetimeNoLeap(1850, 3, 15, 12, 0, 0, 0, 5, 74), ...,\n", " cftime.DatetimeNoLeap(2014, 10, 15, 12, 0, 0, 0, 5, 288),\n", " cftime.DatetimeNoLeap(2014, 11, 15, 0, 0, 0, 0, 1, 319),\n", " cftime.DatetimeNoLeap(2014, 12, 15, 12, 0, 0, 0, 3, 349)], dtype=object)
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
\n",
"
| \n",
"\n", "\n", " | \n", "
array([-90. , -89.057592, -88.115183, -87.172775, -86.230366, -85.287958,\n", " -84.34555 , -83.403141, -82.460733, -81.518325, -80.575916, -79.633508,\n", " -78.691099, -77.748691, -76.806283, -75.863874, -74.921466, -73.979058,\n", " -73.036649, -72.094241, -71.151832, -70.209424, -69.267016, -68.324607,\n", " -67.382199, -66.439791, -65.497382, -64.554974, -63.612565, -62.670157,\n", " -61.727749, -60.78534 , -59.842932, -58.900524, -57.958115, -57.015707,\n", " -56.073298, -55.13089 , -54.188482, -53.246073, -52.303665, -51.361257,\n", " -50.418848, -49.47644 , -48.534031, -47.591623, -46.649215, -45.706806,\n", " -44.764398, -43.82199 , -42.879581, -41.937173, -40.994764, -40.052356,\n", " -39.109948, -38.167539, -37.225131, -36.282723, -35.340314, -34.397906,\n", " -33.455497, -32.513089, -31.570681, -30.628272, -29.685864, -28.743455,\n", " -27.801047, -26.858639, -25.91623 , -24.973822, -24.031414, -23.089005,\n", " -22.146597, -21.204188, -20.26178 , -19.319372, -18.376963, -17.434555,\n", " -16.492147, -15.549738, -14.60733 , -13.664921, -12.722513, -11.780105,\n", " -10.837696, -9.895288, -8.95288 , -8.010471, -7.068063, -6.125654,\n", " -5.183246, -4.240838, -3.298429, -2.356021, -1.413613, -0.471204,\n", " 0.471204, 1.413613, 2.356021, 3.298429, 4.240838, 5.183246,\n", " 6.125654, 7.068063, 8.010471, 8.95288 , 9.895288, 10.837696,\n", " 11.780105, 12.722513, 13.664921, 14.60733 , 15.549738, 16.492147,\n", " 17.434555, 18.376963, 19.319372, 20.26178 , 21.204188, 22.146597,\n", " 23.089005, 24.031414, 24.973822, 25.91623 , 26.858639, 27.801047,\n", " 28.743455, 29.685864, 30.628272, 31.570681, 32.513089, 33.455497,\n", " 34.397906, 35.340314, 36.282723, 37.225131, 38.167539, 39.109948,\n", " 40.052356, 40.994764, 41.937173, 42.879581, 43.82199 , 44.764398,\n", " 45.706806, 46.649215, 47.591623, 48.534031, 49.47644 , 50.418848,\n", " 51.361257, 52.303665, 53.246073, 54.188482, 55.13089 , 56.073298,\n", " 57.015707, 57.958115, 58.900524, 59.842932, 60.78534 , 61.727749,\n", " 62.670157, 63.612565, 64.554974, 65.497382, 66.439791, 67.382199,\n", " 68.324607, 69.267016, 70.209424, 71.151832, 72.094241, 73.036649,\n", " 73.979058, 74.921466, 75.863874, 76.806283, 77.748691, 78.691099,\n", " 79.633508, 80.575916, 81.518325, 82.460733, 83.403141, 84.34555 ,\n", " 85.287958, 86.230366, 87.172775, 88.115183, 89.057592, 90. ])
array([ 0. , 1.25, 2.5 , ..., 356.25, 357.5 , 358.75])
array([[-90. , -89.528796],\n", " [-89.528796, -88.586387],\n", " [-88.586387, -87.643979],\n", " ...,\n", " [ 87.643979, 88.586387],\n", " [ 88.586387, 89.528796],\n", " [ 89.528796, 90. ]])
array([[ -0.625, 0.625],\n", " [ 0.625, 1.875],\n", " [ 1.875, 3.125],\n", " ...,\n", " [355.625, 356.875],\n", " [356.875, 358.125],\n", " [358.125, 359.375]])
array([[2.994837e+07, 2.994837e+07, 2.994837e+07, ..., 2.994837e+07,\n", " 2.994837e+07, 2.994837e+07],\n", " [2.395748e+08, 2.395748e+08, 2.395748e+08, ..., 2.395748e+08,\n", " 2.395748e+08, 2.395748e+08],\n", " [4.790848e+08, 4.790848e+08, 4.790848e+08, ..., 4.790848e+08,\n", " 4.790848e+08, 4.790848e+08],\n", " ...,\n", " [4.790848e+08, 4.790848e+08, 4.790848e+08, ..., 4.790848e+08,\n", " 4.790848e+08, 4.790848e+08],\n", " [2.395748e+08, 2.395748e+08, 2.395748e+08, ..., 2.395748e+08,\n", " 2.395748e+08, 2.395748e+08],\n", " [2.994837e+07, 2.994837e+07, 2.994837e+07, ..., 2.994837e+07,\n", " 2.994837e+07, 2.994837e+07]], dtype=float32)
\n",
"
| \n",
"\n", "\n", " | \n", "
array([cftime.DatetimeNoLeap(1850, 1, 15, 12, 0, 0, 0, 2, 15),\n", " cftime.DatetimeNoLeap(1850, 2, 14, 0, 0, 0, 0, 4, 45),\n", " cftime.DatetimeNoLeap(1850, 3, 15, 12, 0, 0, 0, 5, 74), ...,\n", " cftime.DatetimeNoLeap(2014, 10, 15, 12, 0, 0, 0, 5, 288),\n", " cftime.DatetimeNoLeap(2014, 11, 15, 0, 0, 0, 0, 1, 319),\n", " cftime.DatetimeNoLeap(2014, 12, 15, 12, 0, 0, 0, 3, 349)], dtype=object)
array([284.99948, 285.23215, 285.85364, ..., 288.54376, 287.61884,\n", " 287.06284], dtype=float32)
array([cftime.DatetimeNoLeap(1850, 1, 15, 12, 0, 0, 0, 2, 15),\n", " cftime.DatetimeNoLeap(1850, 2, 14, 0, 0, 0, 0, 4, 45),\n", " cftime.DatetimeNoLeap(1850, 3, 15, 12, 0, 0, 0, 5, 74), ...,\n", " cftime.DatetimeNoLeap(2014, 10, 15, 12, 0, 0, 0, 5, 288),\n", " cftime.DatetimeNoLeap(2014, 11, 15, 0, 0, 0, 0, 1, 319),\n", " cftime.DatetimeNoLeap(2014, 12, 15, 12, 0, 0, 0, 3, 349)], dtype=object)