{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Description of Rechunked National Water Model v2.1 Retrospective Simulation\n", "This notebook gives a look at the metadata of the six Zarr stores which comprise the rechunked NWM v2.1 retrospective simulation. \n", "The perspective of the metadata is from the `xarray.Dataset` point of view (as xarray would work with the data in memory). For a more detailed\n", "data description please see this \n", "[(html)](https://htmlpreview.github.io/?https://github.com/NCAR/rechunk\\_retro_nwm\\_v21/blob/main/notebooks/data_description_detail.html) \n", "[(notebook)](https://htmlpreview.github.io/?https://github.com/NCAR/rechunk\\_retro_nwm\\_v21/blob/main/notebooks/data_description_detail.ipynb)\n", "which presents information from the Zarr perspective as well (storage, compression), for each variable. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/glade/work/jamesmcc/python_envs/379zr/lib/python3.7/site-packages/xarray/backends/cfgrib_.py:28: UserWarning: Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. Try `import cfgrib` to get the full error message\n", " \"Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. \"\n" ] } ], "source": [ "import xarray as xr" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "files = {\n", " 'gwout': '/glade/p/datashare/ishitas/nwm_retro_v2.1/gwout.zarr',\n", " 'lakeout': '/glade/p/datashare/jamesmcc/nwm_retro_v2.1/lakeout.zarr',\n", " 'chrtout': '/glade/p/datashare/ishitas/nwm_retro_v2.1/chrtout.zarr',\n", " 'precip': '/glade/p/datashare/jamesmcc/nwm_retro_v2.1/precip.zarr',\n", " 'ldasout': '/glade/p/datashare/ishitas/nwm_retro_v2.1/ldasout.zarr',\n", " 'rtout': '/glade/p/datashare/jamesmcc/nwm_retro_v2.1/rtout.zarr', \n", "}\n", "\n", "message = ('Please click on the dropdown carets, metadata (file) symbols, and the data' \n", " 'information (silos) symbols below for additional information.')\n", "\n", "def describe(tag):\n", " ds = xr.open_zarr(files[tag])\n", " print(message)\n", " display(ds)\n", " ds.close()\n", " del ds\n", " return None" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## LAKEOUT: lake model output\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Please click on the dropdown carets, metadata (file) symbols, and the datainformation (silos) symbols below for additional information.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/glade/work/jamesmcc/python_envs/379zr/lib/python3.7/site-packages/xarray/backends/plugins.py:61: RuntimeWarning: Engine 'cfgrib' loading failed:\n", "Cannot find the ecCodes library\n", " warnings.warn(f\"Engine {name!r} loading failed:\\n{ex}\", RuntimeWarning)\n" ] }, { "data": { "text/html": [ "
<xarray.Dataset>\n",
"Dimensions: (feature_id: 5783, time: 367439)\n",
"Coordinates:\n",
" * feature_id (feature_id) int32 491 531 747 ... 947070204 1021092845\n",
" latitude (feature_id) float32 dask.array<chunksize=(5783,), meta=np.ndarray>\n",
" longitude (feature_id) float32 dask.array<chunksize=(5783,), meta=np.ndarray>\n",
" * time (time) datetime64[ns] 1979-02-01T01:00:00 ... 2020-12-31T...\n",
"Data variables:\n",
" crs |S1 ...\n",
" inflow (time, feature_id) float64 dask.array<chunksize=(8064, 500), meta=np.ndarray>\n",
" outflow (time, feature_id) float64 dask.array<chunksize=(8064, 500), meta=np.ndarray>\n",
" water_sfc_elev (time, feature_id) float32 dask.array<chunksize=(8064, 500), meta=np.ndarray>\n",
"Attributes:\n",
" Conventions: CF-1.6\n",
" TITLE: OUTPUT FROM WRF-Hydro v5.2.0-beta2\n",
" code_version: v5.2.0-beta2\n",
" featureType: timeSeries\n",
" model_configuration: retrospective\n",
" model_output_type: reservoir\n",
" proj4: +proj=lcc +units=m +a=6370000.0 +b=6370000....\n",
" reservoir_assimilated_value: Assimilation not performed\n",
" reservoir_type: 1 = level pool everywhere\n",
" station_dimension: lake_idarray([ 491, 531, 747, ..., 947070203, 947070204,\n",
" 1021092845], dtype=int32)\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
array(['1979-02-01T01:00:00.000000000', '1979-02-01T02:00:00.000000000',\n",
" '1979-02-01T03:00:00.000000000', ..., '2020-12-31T21:00:00.000000000',\n",
" '2020-12-31T22:00:00.000000000', '2020-12-31T23:00:00.000000000'],\n",
" dtype='datetime64[ns]')array(b'', dtype='|S1')
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
<xarray.Dataset>\n",
"Dimensions: (time: 367439, feature_id: 2776738)\n",
"Coordinates:\n",
" * feature_id (feature_id) int32 6635572 6635590 6635598 ... 25293410 15450136\n",
" * time (time) datetime64[ns] 1979-02-01T01:00:00 ... 2020-12-31T23:0...\n",
"Data variables:\n",
" depth (time, feature_id) float64 dask.array<chunksize=(672, 30000), meta=np.ndarray>\n",
" inflow (time, feature_id) float64 dask.array<chunksize=(672, 30000), meta=np.ndarray>\n",
" outflow (time, feature_id) float64 dask.array<chunksize=(672, 30000), meta=np.ndarray>\n",
"Attributes:\n",
" Conventions: CF-1.6\n",
" TITLE: OUTPUT FROM WRF-Hydro v5.2.0-beta2\n",
" code_version: v5.2.0-beta2\n",
" featureType: timeSeries\n",
" model_configuration: retrospective\n",
" model_output_type: groundwater_rt\n",
" station_dimension: gw_idarray([ 6635572, 6635590, 6635598, ..., 15448486, 25293410, 15450136],\n",
" dtype=int32)array(['1979-02-01T01:00:00.000000000', '1979-02-01T02:00:00.000000000',\n",
" '1979-02-01T03:00:00.000000000', ..., '2020-12-31T21:00:00.000000000',\n",
" '2020-12-31T22:00:00.000000000', '2020-12-31T23:00:00.000000000'],\n",
" dtype='datetime64[ns]')\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
<xarray.Dataset>\n",
"Dimensions: (feature_id: 2776738, time: 367439)\n",
"Coordinates:\n",
" elevation (feature_id) float32 dask.array<chunksize=(2776738,), meta=np.ndarray>\n",
" * feature_id (feature_id) int32 101 179 181 ... 1180001803 1180001804\n",
" gage_id (feature_id) |S15 dask.array<chunksize=(2776738,), meta=np.ndarray>\n",
" latitude (feature_id) float32 dask.array<chunksize=(2776738,), meta=np.ndarray>\n",
" longitude (feature_id) float32 dask.array<chunksize=(2776738,), meta=np.ndarray>\n",
" order (feature_id) int32 dask.array<chunksize=(2776738,), meta=np.ndarray>\n",
" * time (time) datetime64[ns] 1979-02-01T01:00:00 ... 2020-12-31T23:0...\n",
"Data variables:\n",
" crs |S1 ...\n",
" streamflow (time, feature_id) float64 dask.array<chunksize=(672, 30000), meta=np.ndarray>\n",
" velocity (time, feature_id) float64 dask.array<chunksize=(672, 30000), meta=np.ndarray>\n",
"Attributes:\n",
" TITLE: OUTPUT FROM WRF-Hydro v5.2.0-beta2\n",
" code_version: v5.2.0-beta2\n",
" featureType: timeSeries\n",
" model_configuration: retrospective\n",
" proj4: +proj=lcc +units=m +a=6370000.0 +b=6370000.0 +lat_1...\n",
"
| \n",
" \n", " \n", " | \n", "
array([ 101, 179, 181, ..., 1180001802, 1180001803,\n",
" 1180001804], dtype=int32)\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
array(['1979-02-01T01:00:00.000000000', '1979-02-01T02:00:00.000000000',\n",
" '1979-02-01T03:00:00.000000000', ..., '2020-12-31T21:00:00.000000000',\n",
" '2020-12-31T22:00:00.000000000', '2020-12-31T23:00:00.000000000'],\n",
" dtype='datetime64[ns]')array(b'', dtype='|S1')
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
<xarray.Dataset>\n",
"Dimensions: (time: 367440, y: 3840, x: 4608)\n",
"Coordinates:\n",
" * time (time) datetime64[ns] 1979-02-01 ... 2020-12-31T23:00:00\n",
" * x (x) float64 -2.303e+06 -2.302e+06 ... 2.303e+06 2.304e+06\n",
" * y (y) float64 -1.92e+06 -1.919e+06 ... 1.918e+06 1.919e+06\n",
"Data variables:\n",
" RAINRATE (time, y, x) float32 dask.array<chunksize=(672, 350, 350), meta=np.ndarray>\n",
" crs |S1 ...\n",
"Attributes:\n",
" NWM_version_number: v2.1\n",
" model_configuration: AORC\n",
" model_output_type: forcingarray(['1979-02-01T00:00:00.000000000', '1979-02-01T01:00:00.000000000',\n",
" '1979-02-01T02:00:00.000000000', ..., '2020-12-31T21:00:00.000000000',\n",
" '2020-12-31T22:00:00.000000000', '2020-12-31T23:00:00.000000000'],\n",
" dtype='datetime64[ns]')array([-2303499.25, -2302499.25, -2301499.25, ..., 2301500.75, 2302500.75,\n",
" 2303500.75])array([-1919500.375, -1918500.375, -1917500.375, ..., 1917499.625,\n",
" 1918499.625, 1919499.625])\n",
"
| \n",
" \n", " \n", " | \n", "
array(b'', dtype='|S1')
<xarray.Dataset>\n",
"Dimensions: (time: 122479, y: 3840, x: 4608, vis_nir: 2, soil_layers_stag: 4)\n",
"Coordinates:\n",
" * time (time) datetime64[ns] 1979-02-01T03:00:00 ... 2020-12-31T21:00:00\n",
" * x (x) float64 -2.303e+06 -2.302e+06 ... 2.303e+06 2.304e+06\n",
" * y (y) float64 -1.92e+06 -1.919e+06 ... 1.918e+06 1.919e+06\n",
"Dimensions without coordinates: vis_nir, soil_layers_stag\n",
"Data variables: (12/20)\n",
" ACCET (time, y, x) float64 dask.array<chunksize=(224, 350, 350), meta=np.ndarray>\n",
" ACSNOM (time, y, x) float64 dask.array<chunksize=(224, 350, 350), meta=np.ndarray>\n",
" ALBEDO (time, y, x) float64 dask.array<chunksize=(224, 350, 350), meta=np.ndarray>\n",
" ALBSND (time, y, vis_nir, x) float64 dask.array<chunksize=(224, 350, 1, 350), meta=np.ndarray>\n",
" ALBSNI (time, y, vis_nir, x) float64 dask.array<chunksize=(224, 350, 1, 350), meta=np.ndarray>\n",
" COSZ (time, y, x) float64 dask.array<chunksize=(224, 350, 350), meta=np.ndarray>\n",
" ... ...\n",
" SNEQV (time, y, x) float64 dask.array<chunksize=(224, 350, 350), meta=np.ndarray>\n",
" SNOWH (time, y, x) float64 dask.array<chunksize=(224, 350, 350), meta=np.ndarray>\n",
" SOIL_M (time, y, soil_layers_stag, x) float64 dask.array<chunksize=(224, 350, 1, 350), meta=np.ndarray>\n",
" SOIL_W (time, y, soil_layers_stag, x) float64 dask.array<chunksize=(224, 350, 1, 350), meta=np.ndarray>\n",
" TRAD (time, y, x) float64 dask.array<chunksize=(224, 350, 350), meta=np.ndarray>\n",
" UGDRNOFF (time, y, x) float64 dask.array<chunksize=(224, 350, 350), meta=np.ndarray>\n",
"Attributes:\n",
" Conventions: CF-1.6\n",
" GDAL_DataType: Generic\n",
" TITLE: OUTPUT FROM WRF-Hydro v5.2.0-beta2\n",
" code_version: v5.2.0-beta2\n",
" model_configuration: retrospective\n",
" model_output_type: land\n",
" proj4: +proj=lcc +units=m +a=6370000.0 +b=6370000.0 +lat_1...array(['1979-02-01T03:00:00.000000000', '1979-02-01T06:00:00.000000000',\n",
" '1979-02-01T09:00:00.000000000', ..., '2020-12-31T15:00:00.000000000',\n",
" '2020-12-31T18:00:00.000000000', '2020-12-31T21:00:00.000000000'],\n",
" dtype='datetime64[ns]')array([-2303499.25, -2302499.25, -2301499.25, ..., 2301500.75, 2302500.75,\n",
" 2303500.75])array([-1919500.375, -1918500.375, -1917500.375, ..., 1917499.625,\n",
" 1918499.625, 1919499.625])\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "
<xarray.Dataset>\n",
"Dimensions: (time: 122479, y: 15360, x: 18432)\n",
"Coordinates:\n",
" * time (time) datetime64[ns] 1979-02-01T03:00:00 ... 2020-12-31T21...\n",
" * x (x) float64 -2.304e+06 -2.304e+06 ... 2.304e+06 2.304e+06\n",
" * y (y) float64 -1.92e+06 -1.92e+06 ... 1.92e+06 1.92e+06\n",
"Data variables:\n",
" crs |S1 ...\n",
" sfcheadsubrt (time, y, x) float64 dask.array<chunksize=(224, 350, 350), meta=np.ndarray>\n",
" zwattablrt (time, y, x) float64 dask.array<chunksize=(224, 350, 350), meta=np.ndarray>\n",
"Attributes:\n",
" Conventions: CF-1.6\n",
" GDAL_DataType: Generic\n",
" TITLE: OUTPUT FROM WRF-Hydro v5.2.0-beta2\n",
" code_version: v5.2.0-beta2\n",
" model_configuration: retrospective\n",
" model_output_type: terrain_rt\n",
" proj4: +proj=lcc +units=m +a=6370000.0 +b=6370000.0 +lat_1...array(['1979-02-01T03:00:00.000000000', '1979-02-01T06:00:00.000000000',\n",
" '1979-02-01T09:00:00.000000000', ..., '2020-12-31T15:00:00.000000000',\n",
" '2020-12-31T18:00:00.000000000', '2020-12-31T21:00:00.000000000'],\n",
" dtype='datetime64[ns]')array([-2303875., -2303625., -2303375., ..., 2303375., 2303625., 2303875.])
array([-1919875., -1919625., -1919375., ..., 1919375., 1919625., 1919875.])
array(b'', dtype='|S1')
\n",
"
| \n",
" \n", " \n", " | \n", "
\n",
"
| \n",
" \n", " \n", " | \n", "