{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Performance of concat operator on cmip6 decadal: DWD\n", "\n", "**Rooki** calls climate data operations on the **rook** processing service." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import time\n", "os.environ['ROOK_URL'] = 'http://rook.dkrz.de/wps'\n", "\n", "from rooki import operators as ops" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load intake catalog for cmip6 decadal" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import intake" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/Caskroom/mambaforge/base/envs/rooki/lib/python3.11/site-packages/dask/dataframe/io/csv.py:542: UserWarning: Warning gzip compression does not support breaking apart files\n", "Please ensure that each individual file can fit in memory and\n", "use the keyword ``blocksize=None to remove this message``\n", "Setting ``blocksize=None``\n", " warn(\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ds_idpathsizemip_eraactivity_idinstitution_idsource_idexperiment_idmember_idtable_idvariable_idgrid_labelversionstart_timeend_timebboxlevelmember_yearmember_ensembleprefix
0c3s-cmip6-decadal.DCPP.MPI-M.MPI-ESM1-2-HR.dcp...DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/s2016-...492479692c3s-cmip6-decadalDCPPMPI-MMPI-ESM1-2-HRdcppA-hindcasts2016-r8i1p1f1daytasmaxgnv202101112016-11-01T12:00:002026-12-31T12:00:000.00, -89.28, 359.06, 89.282.00s2016r8i1p1f1c3s-cmip6-decadal
1c3s-cmip6-decadal.DCPP.MPI-M.MPI-ESM1-2-HR.dcp...DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/s2009-...737204109c3s-cmip6-decadalDCPPMPI-MMPI-ESM1-2-HRdcppA-hindcasts2009-r8i1p1f1dayprgnv202101072009-11-01T12:00:002019-12-31T12:00:000.00, -89.28, 359.06, 89.28NaNs2009r8i1p1f1c3s-cmip6-decadal
2c3s-cmip6-decadal.DCPP.MOHC.HadGEM3-GC31-MM.dc...DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/s1987...953384c3s-cmip6-decadalDCPPMOHCHadGEM3-GC31-MMdcppA-hindcasts1987-r3i1p1f2Amonprgnv202004171987-11-16T00:00:001987-12-16T00:00:000.42, -89.72, 359.58, 89.72NaNs1987r3i1p1f2c3s-cmip6-decadal
3c3s-cmip6-decadal.DCPP.MOHC.HadGEM3-GC31-MM.dc...DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/s1995...603757c3s-cmip6-decadalDCPPMOHCHadGEM3-GC31-MMdcppA-hindcasts1995-r6i1p1f2Amontasgnv202004171995-11-16T00:00:001995-12-16T00:00:000.42, -89.72, 359.58, 89.721.50s1995r6i1p1f2c3s-cmip6-decadal
4c3s-cmip6-decadal.DCPP.MOHC.HadGEM3-GC31-MM.dc...DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/s2004...14824913c3s-cmip6-decadalDCPPMOHCHadGEM3-GC31-MMdcppA-hindcasts2004-r10i1p1f2AERdayzg500gnv202004172004-11-01T12:00:002004-12-30T12:00:000.42, -89.72, 359.58, 89.7250000.00s2004r10i1p1f2c3s-cmip6-decadal
\n", "
" ], "text/plain": [ " ds_id \n", "0 c3s-cmip6-decadal.DCPP.MPI-M.MPI-ESM1-2-HR.dcp... \\\n", "1 c3s-cmip6-decadal.DCPP.MPI-M.MPI-ESM1-2-HR.dcp... \n", "2 c3s-cmip6-decadal.DCPP.MOHC.HadGEM3-GC31-MM.dc... \n", "3 c3s-cmip6-decadal.DCPP.MOHC.HadGEM3-GC31-MM.dc... \n", "4 c3s-cmip6-decadal.DCPP.MOHC.HadGEM3-GC31-MM.dc... \n", "\n", " path size \n", "0 DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/s2016-... 492479692 \\\n", "1 DCPP/MPI-M/MPI-ESM1-2-HR/dcppA-hindcast/s2009-... 737204109 \n", "2 DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/s1987... 953384 \n", "3 DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/s1995... 603757 \n", "4 DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/s2004... 14824913 \n", "\n", " mip_era activity_id institution_id source_id \n", "0 c3s-cmip6-decadal DCPP MPI-M MPI-ESM1-2-HR \\\n", "1 c3s-cmip6-decadal DCPP MPI-M MPI-ESM1-2-HR \n", "2 c3s-cmip6-decadal DCPP MOHC HadGEM3-GC31-MM \n", "3 c3s-cmip6-decadal DCPP MOHC HadGEM3-GC31-MM \n", "4 c3s-cmip6-decadal DCPP MOHC HadGEM3-GC31-MM \n", "\n", " experiment_id member_id table_id variable_id grid_label version \n", "0 dcppA-hindcast s2016-r8i1p1f1 day tasmax gn v20210111 \\\n", "1 dcppA-hindcast s2009-r8i1p1f1 day pr gn v20210107 \n", "2 dcppA-hindcast s1987-r3i1p1f2 Amon pr gn v20200417 \n", "3 dcppA-hindcast s1995-r6i1p1f2 Amon tas gn v20200417 \n", "4 dcppA-hindcast s2004-r10i1p1f2 AERday zg500 gn v20200417 \n", "\n", " start_time end_time bbox \n", "0 2016-11-01T12:00:00 2026-12-31T12:00:00 0.00, -89.28, 359.06, 89.28 \\\n", "1 2009-11-01T12:00:00 2019-12-31T12:00:00 0.00, -89.28, 359.06, 89.28 \n", "2 1987-11-16T00:00:00 1987-12-16T00:00:00 0.42, -89.72, 359.58, 89.72 \n", "3 1995-11-16T00:00:00 1995-12-16T00:00:00 0.42, -89.72, 359.58, 89.72 \n", "4 2004-11-01T12:00:00 2004-12-30T12:00:00 0.42, -89.72, 359.58, 89.72 \n", "\n", " level member_year member_ensemble prefix \n", "0 2.00 s2016 r8i1p1f1 c3s-cmip6-decadal \n", "1 NaN s2009 r8i1p1f1 c3s-cmip6-decadal \n", "2 NaN s1987 r3i1p1f2 c3s-cmip6-decadal \n", "3 1.50 s1995 r6i1p1f2 c3s-cmip6-decadal \n", "4 50000.00 s2004 r10i1p1f2 c3s-cmip6-decadal " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cat_url = \"https://raw.githubusercontent.com/cp4cds/c3s_34g_manifests/master/intake/catalogs/c3s.yaml\"\n", "\n", "cat = intake.open_catalog(cat_url)\n", "df_cmip6_decadal = cat['c3s-cmip6-decadal'].read()\n", "df_cmip6_decadal.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r5i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r2i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r6i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r8i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r14i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r4i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r11i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r15i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r10i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r13i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r12i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r7i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r16i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r1i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r3i1p1f1.Amon.pr.gn.v20220126',\n", " 'c3s-cmip6-decadal.DCPP.DWD.MPI-ESM1-2-LR.dcppA-hindcast.s2018-r9i1p1f1.Amon.pr.gn.v20220126']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df_cmip6_decadal.loc[\n", " (df_cmip6_decadal.variable_id==\"pr\") \n", " & (df_cmip6_decadal.experiment_id==\"dcppA-hindcast\")\n", " & (df_cmip6_decadal.table_id==\"Amon\")\n", " & (df_cmip6_decadal.source_id==\"MPI-ESM1-2-LR\")\n", " & (df_cmip6_decadal.member_year==\"s2018\")\n", "]\n", "dsets = list(df.ds_id.unique())\n", "dsets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Run concat\n", "\n", "https://docs.xarray.dev/en/stable/generated/xarray.concat.html" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "duration: 12 secs\n", "Metalink URL: http://rook4.cloud.dkrz.de:80/outputs/rook/cbe2f91a-140c-11ee-be07-fa163ed6c06f/input.meta4, num files: 1\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "start = time.time()\n", "\n", "t = \"2019/2019\"\n", "#tc = \"month:dec|day:01\"\n", "\n", "wf = ops.Concat(\n", " ops.Input('ds', dsets),\n", " time=t,\n", " #time_components=tc,\n", " dims=\"realization\",\n", " #apply_average=1\n", ")\n", "\n", "# optional average\n", "wf = ops.Average(wf, dims=\"realization\")\n", "\n", "# optional subset\n", "wf = ops.Subset(\n", " wf, time=t, \n", " # time_components=tc\n", ")\n", "\n", "resp = wf.orchestrate()\n", "\n", "end = time.time()\n", "print(f\"duration: {int(end - start)} secs\")\n", "print(resp)\n", "\n", "resp.ok" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**open with xarray**" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8807840347290039" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resp.size_in_mb" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['http://rook4.cloud.dkrz.de:80/outputs/rook/d1b99826-140c-11ee-a82b-fa163ed6c06f/pr_Amon_MPI-ESM1-2-LR_dcppA-hindcast_r10i1p1f1_gn_20190131-20191231.nc']" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resp.download_urls()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading to /var/folders/qb/mg0csz190wd4rxybhhnwjln80000gn/T/metalink_3_p_smm3/pr_Amon_MPI-ESM1-2-LR_dcppA-hindcast_r10i1p1f1_gn_20190131-20191231.nc.\n" ] } ], "source": [ "dsets = resp.datasets()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:      (lat: 96, bnds: 2, lon: 192, time: 12)\n",
       "Coordinates:\n",
       "  * time         (time) datetime64[ns] 2019-01-31T18:00:00 ... 2019-12-31T18:...\n",
       "  * lon          (lon) float64 0.0 1.875 3.75 5.625 ... 352.5 354.4 356.2 358.1\n",
       "  * lat          (lat) float64 -88.57 -86.72 -84.86 -83.0 ... 84.86 86.72 88.57\n",
       "    reftime      datetime64[ns] ...\n",
       "    leadtime     (time) timedelta64[ns] ...\n",
       "Dimensions without coordinates: bnds\n",
       "Data variables:\n",
       "    lat_bnds     (lat, bnds) float64 ...\n",
       "    lon_bnds     (lon, bnds) float64 ...\n",
       "    pr           (time, lat, lon) float32 ...\n",
       "    realization  float64 ...\n",
       "Attributes: (12/47)\n",
       "    CDI:                         Climate Data Interface version 2.0.3 (https:...\n",
       "    cdo_openmp_thread_number:    10\n",
       "    NCO:                         netCDF Operators version 4.9.2 (Homepage = h...\n",
       "    institute_id:                DWD\n",
       "    model_id:                    MPI-ESM-LR\n",
       "    modeling_realm:              atmos\n",
       "    ...                          ...\n",
       "    license:                     CMIP6 model data produced by DWD is licensed...\n",
       "    cmor_version:                3.5.0\n",
       "    forcing_description:         f1, CMIP6 historical forcings\n",
       "    physics_description:         physics from the standard model configuratio...\n",
       "    initialization_description:  hindcast initialized based on observations a...\n",
       "    startdate:                   s201811
" ], "text/plain": [ "\n", "Dimensions: (lat: 96, bnds: 2, lon: 192, time: 12)\n", "Coordinates:\n", " * time (time) datetime64[ns] 2019-01-31T18:00:00 ... 2019-12-31T18:...\n", " * lon (lon) float64 0.0 1.875 3.75 5.625 ... 352.5 354.4 356.2 358.1\n", " * lat (lat) float64 -88.57 -86.72 -84.86 -83.0 ... 84.86 86.72 88.57\n", " reftime datetime64[ns] ...\n", " leadtime (time) timedelta64[ns] ...\n", "Dimensions without coordinates: bnds\n", "Data variables:\n", " lat_bnds (lat, bnds) float64 ...\n", " lon_bnds (lon, bnds) float64 ...\n", " pr (time, lat, lon) float32 ...\n", " realization float64 ...\n", "Attributes: (12/47)\n", " CDI: Climate Data Interface version 2.0.3 (https:...\n", " cdo_openmp_thread_number: 10\n", " NCO: netCDF Operators version 4.9.2 (Homepage = h...\n", " institute_id: DWD\n", " model_id: MPI-ESM-LR\n", " modeling_realm: atmos\n", " ... ...\n", " license: CMIP6 model data produced by DWD is licensed...\n", " cmor_version: 3.5.0\n", " forcing_description: f1, CMIP6 historical forcings\n", " physics_description: physics from the standard model configuratio...\n", " initialization_description: hindcast initialized based on observations a...\n", " startdate: s201811" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds = dsets[0]\n", "ds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**provenance**" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'http://rook4.cloud.dkrz.de:80/outputs/rook/cbe2f91a-140c-11ee-be07-fa163ed6c06f/provenance.png'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prov_plot_url = resp.provenance_image()\n", "prov_plot_url" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# from IPython.display import Image\n", "# Image(prov_plot_url)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'http://rook4.cloud.dkrz.de:80/outputs/rook/cbe2f91a-140c-11ee-be07-fa163ed6c06f/provenance.json'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resp.provenance()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3" } }, "nbformat": 4, "nbformat_minor": 4 }