{ "cells": [ { "cell_type": "markdown", "id": "atomic-verification", "metadata": {}, "source": [ "# Demo with esgf search for CMIP6 at DKRZ site (Subset + Average)\n", "\n", "ESGF Node at DKRZ: https://esgf-data.dkrz.de/search/cmip6-dkrz/" ] }, { "cell_type": "markdown", "id": "atmospheric-sarah", "metadata": {}, "source": [ "## Use esgf search at DKRZ ... no distributed search\n", "\n", "\n", "\n", "Using ``esgf-pyclient``: \n", "https://esgf-pyclient.readthedocs.io/en/latest/notebooks/examples/search.html" ] }, { "cell_type": "code", "execution_count": null, "id": "wound-mother", "metadata": {}, "outputs": [], "source": [ "from pyesgf.search import SearchConnection\n", "conn = SearchConnection('http://esgf-data.dkrz.de/esg-search',\n", " distrib=False)" ] }, { "cell_type": "markdown", "id": "broke-little", "metadata": {}, "source": [ "**Search only CMIP6 files locally available at DKRZ**" ] }, { "cell_type": "code", "execution_count": null, "id": "gentle-excerpt", "metadata": {}, "outputs": [], "source": [ "ctx = conn.new_context(project='CMIP6', data_node='esgf3.dkrz.de', latest=True, replica=False)\n", "ctx.hit_count" ] }, { "cell_type": "markdown", "id": "minor-lobby", "metadata": {}, "source": [ "Select only one dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "architectural-title", "metadata": {}, "outputs": [], "source": [ "results = ctx.search(\n", " institution_id='MPI-M',\n", " source_id='MPI-ESM1-2-HR',\n", " experiment_id='historical', \n", " variable='tas', \n", " frequency='day',\n", " variant_label='r1i1p1f1'\n", ")\n", "len(results)" ] }, { "cell_type": "code", "execution_count": null, "id": "former-finish", "metadata": {}, "outputs": [], "source": [ "ds = results[0]\n", "ds.json" ] }, { "cell_type": "markdown", "id": "metallic-fisher", "metadata": {}, "source": [ "Get a dataset identifier used by rook" ] }, { "cell_type": "code", "execution_count": null, "id": "native-motor", "metadata": {}, "outputs": [], "source": [ "dataset_id = ds.json['instance_id']\n", "dataset_id" ] }, { "cell_type": "markdown", "id": "revised-integral", "metadata": {}, "source": [ "Time range" ] }, { "cell_type": "code", "execution_count": null, "id": "champion-singles", "metadata": {}, "outputs": [], "source": [ "f\"{ds.json['datetime_start']}/{ds.json['datetime_stop']})\"" ] }, { "cell_type": "markdown", "id": "instrumental-crazy", "metadata": {}, "source": [ "Bounding Box: (West, Sout, East, North)" ] }, { "cell_type": "code", "execution_count": null, "id": "fewer-straight", "metadata": {}, "outputs": [], "source": [ "f\"({ds.json['west_degrees']}, {ds.json['south_degrees']},{ds.json['east_degrees']}, {ds.json['west_degrees']}, {ds.json['north_degrees']})\"\n" ] }, { "cell_type": "markdown", "id": "played-capture", "metadata": {}, "source": [ "Size in GB" ] }, { "cell_type": "code", "execution_count": null, "id": "ready-fabric", "metadata": {}, "outputs": [], "source": [ "f\"{ds.json['size'] / 1024 / 1024 / 1024} GB\"" ] }, { "cell_type": "markdown", "id": "sitting-origin", "metadata": {}, "source": [ "## Use Rook to run subset + average" ] }, { "cell_type": "code", "execution_count": null, "id": "aquatic-tsunami", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ['ROOK_URL'] = 'http://rook.dkrz.de/wps'\n", "os.environ['ROOK_MODE'] = 'async'\n", "\n", "from rooki import operators as ops" ] }, { "cell_type": "markdown", "id": "moving-warner", "metadata": {}, "source": [ "Run subset workflow\n", "\n", "http://bboxfinder.com/" ] }, { "cell_type": "code", "execution_count": null, "id": "minute-garden", "metadata": {}, "outputs": [], "source": [ "bbox_africa = \"-23.906250,-35.746512,63.632813,37.996163\"\n", "\n", "\n", "subset = ops.Subset(\n", " ops.Input(\n", " 'tas', [dataset_id]\n", " ),\n", " time=\"1850-01-01/1850-12-31\",\n", " area=bbox_africa,\n", " )\n", "wf = ops.Average(subset, dims=\"time\")\n", "\n", "resp = wf.orchestrate()\n", "resp.ok" ] }, { "cell_type": "markdown", "id": "alternate-angle", "metadata": {}, "source": [ "### The outputs are available as a Metalink document\n", "https://github.com/metalink-dev" ] }, { "cell_type": "markdown", "id": "answering-founder", "metadata": {}, "source": [ "Metalink URL" ] }, { "cell_type": "code", "execution_count": null, "id": "exempt-space", "metadata": {}, "outputs": [], "source": [ "resp.url" ] }, { "cell_type": "markdown", "id": "partial-quantum", "metadata": {}, "source": [ "Number of files" ] }, { "cell_type": "code", "execution_count": null, "id": "willing-academy", "metadata": {}, "outputs": [], "source": [ "resp.num_files" ] }, { "cell_type": "markdown", "id": "incoming-springer", "metadata": {}, "source": [ "Total size in MB" ] }, { "cell_type": "code", "execution_count": null, "id": "legislative-projection", "metadata": {}, "outputs": [], "source": [ "resp.size_in_mb" ] }, { "cell_type": "markdown", "id": "accessory-swift", "metadata": {}, "source": [ "Download URLs" ] }, { "cell_type": "code", "execution_count": null, "id": "committed-founder", "metadata": {}, "outputs": [], "source": [ "resp.download_urls()" ] }, { "cell_type": "markdown", "id": "adult-electron", "metadata": {}, "source": [ "Download and open with xarray" ] }, { "cell_type": "code", "execution_count": null, "id": "dependent-cholesterol", "metadata": {}, "outputs": [], "source": [ "ds_0 = resp.datasets()[0]\n", "ds_0" ] }, { "cell_type": "markdown", "id": "statewide-satin", "metadata": {}, "source": [ "### Provenance\n", "\n", "Provenance information is given using the *PROV* standard.\n", "https://pypi.org/project/prov/" ] }, { "cell_type": "markdown", "id": "drawn-reliance", "metadata": {}, "source": [ "Provenance: URL to json document" ] }, { "cell_type": "code", "execution_count": null, "id": "upper-profit", "metadata": {}, "outputs": [], "source": [ "resp.provenance()" ] }, { "cell_type": "markdown", "id": "optional-russell", "metadata": {}, "source": [ "Provenance Plot" ] }, { "cell_type": "code", "execution_count": null, "id": "public-marker", "metadata": {}, "outputs": [], "source": [ "from IPython.display import Image\n", "Image(resp.provenance_image())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 5 }