{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Combining data from multiple WRF files and saving to CF compliant netCDF\n", "\n", "* In this example the variable data and coordinates are split between files (don't ask me why WRF does this...)\n", "* An in-memory netcdf file is created and fed into wrf-python (https://wrf-python.readthedocs.io/en/latest/)\n", "* Once the WRF data is in an Xarray DataArray there are additional tools you can use to process the data (http://xarray.pydata.org/en/stable/)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from netCDF4 import Dataset\n", "import wrf\n", "import xarray as xr\n", "import numpy as np\n", "\n", "import cartopy.crs as crs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Reading a variable from a wrfout file" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "root_dir = '/data/fiss_aic/WRF/runA_2010'\n", "nc = Dataset(root_dir+'/wrfout_d02_2010-03-18_00:00:00')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Combining information from multiple WRF files\n", "\n", "e.g., where your lat/lon coordinates are stored in another file\n", "\n", "In this example we will extract infomation from two netCDF files (src + coord_file) and add to an empty netCDF Dataset (dst)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "root_dir = '/data/fiss_aic/WRF/runA_2010'\n", "my_vars = ['SPDUV10MEAN', 'V10MEAN', 'U10MEAN']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">> closing dst << \n", "\n" ] } ], "source": [ "#################################\n", "### Get variables and dimensions\n", "#################################\n", "### source\n", "src = Dataset(root_dir+'/wrfxtrm_d02') \n", "\n", "### destination (tmp netCDF file stored in memory) - close first if exists\n", "### close in-memory netCDF Dataset (if exists)\n", "try:\n", " print('>> closing dst << \\n')\n", " dst.close()\n", "except NameError:\n", " pass\n", "dst = Dataset(\"dst_tmp.nc\", \"w\", format=\"NETCDF4\", diskless=True)\n", "\n", "### copy netCDF attributes from source file to destination file\n", "dst.setncatts(src.__dict__)\n", "\n", "### copy all dimensions from src to dst\n", "for name, dimension in src.dimensions.items():\n", " dst.createDimension(name, \n", " len(dimension) if not dimension.isunlimited() else None)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#################################\n", "### Add time coordinates\n", "#################################\n", "name = 'Times'\n", "variable = src.variables[name]\n", "dst.createVariable(name, variable.datatype, variable.dimensions)\n", "dst.variables[name][:] = src.variables[name][:]\n", "dst[name].setncatts(src[name].__dict__)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#################################\n", "### Add lat/lon coordinates\n", "#################################\n", "coord_file = Dataset(root_dir+'/wrfout_d02_2010-03-18_00:00:00')\n", "coords = ['XLAT', 'XLONG']\n", "for name in coords:\n", " variable = coord_file.variables[name]\n", " dst.createVariable(name, variable.datatype, variable.dimensions)\n", " \n", " ### create coord array with the correct shape (i.e. lat x lon for all times)\n", " correct_shape_arr = np.zeros( dst.variables[name].shape )\n", " correct_shape_arr[:,:,:] = coord_file.variables[name][0,:,:]\n", " \n", " dst.variables[name][:] = correct_shape_arr\n", " dst[name].setncatts(coord_file[name].__dict__) " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "#################################\n", "### Extract any useful information \n", "### from nc file\n", "#################################\n", "nx = dst.dimensions['west_east'].size\n", "ny = dst.dimensions['south_north'].size\n", "dt, dx, dy = dst.DT, dst.DX, dst.DY\n", "cen_lat, cen_lon = dst.CEN_LAT, dst.CEN_LON\n", "truelat1, truelat2, STAND_LON = dst.TRUELAT1, dst.TRUELAT2, dst.STAND_LON\n", "pole_lat, pole_lon = dst.POLE_LAT, dst.POLE_LON" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "#################################\n", "### Add south_north and west_east - NOT NEEDED BY WRF-PYTHON\n", "#################################\n", "x0_y0 = wrf.get_cartopy(wrfin=dst).transform_point( dst.variables['XLONG'][0,0,0], \n", " dst.variables['XLAT'][0,0,0], \n", " crs.PlateCarree())\n", "\n", "west_east_points = np.arange(nx, dtype='float32') * dy + x0_y0[0]\n", "south_north_points = np.arange(ny, dtype='float32') * dy + x0_y0[1]\n", "\n", "### Create nc variable\n", "dst.createVariable('south_north', south_north_points.dtype, ('south_north') )\n", "dst.variables['south_north'][:] = south_north_points\n", "dst['south_north'].setncatts( {'units':'m', 'axis':'y'} )\n", "\n", "dst.createVariable('west_east', west_east_points.dtype, ('west_east') )\n", "dst.variables['west_east'][:] = west_east_points\n", "dst['west_east'].setncatts( {'units':'m', 'axis':'x'} )" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "#################################\n", "### copy chosen variables (my_var)\n", "### from src to dst\n", "#################################\n", "for name in list(my_vars):\n", " variable = src.variables[name]\n", " dst.createVariable(name, variable.datatype, variable.dimensions)\n", " dst.variables[name][:] = src.variables[name][:]\n", " dst[name].setncatts(src[name].__dict__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create an Xarray Dataset of WRF variables" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\n", "Dimensions: (Time: 380, south_north: 201, west_east: 147)\n", "Coordinates:\n", " XLONG (south_north, west_east) float32 -123.254684 ... -28.57109\n", " XLAT (south_north, west_east) float32 -78.73641 ... -60.162468\n", " * Time (Time) datetime64[ns] 2010-03-18 2010-03-19 ... 2011-04-01\n", "Dimensions without coordinates: south_north, west_east\n", "Data variables:\n", " SPDUV10MEAN (Time, south_north, west_east) float32 0.0 0.0 ... 11.252552\n", " V10MEAN (Time, south_north, west_east) float32 0.0 0.0 ... 8.698959\n", " U10MEAN (Time, south_north, west_east) float32 0.0 0.0 ... -6.9788127" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "### create list of variables in the Xarray DataArray format\n", "da_list = []\n", "for my_var in my_vars:\n", " da_list.append( wrf.getvar(dst, my_var, timeidx=wrf.ALL_TIMES) )\n", "\n", "### merge all variables (Xarray DataArrays) into one Xarray Dataset\n", "ds = xr.merge(da_list)\n", " \n", "### Show me a summary of the Dataset...\n", "ds" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ">> closing dst << \n", "\n" ] } ], "source": [ "### close in-memory netCDF Dataset (if exists)\n", "try:\n", " print('>> closing dst << \\n')\n", " dst.close()\n", "except NameError:\n", " pass" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "Invalid value for attr: PolarStereographic(stand_lon=-45.0, moad_cen_lat=-75.49999237060547, truelat1=-90.0, truelat2=-90.0, pole_lat=90.0, pole_lon=0.0) must be a number string, ndarray or a list/tuple of numbers/strings for serialization to netCDF files", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_netcdf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'test.nc'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/data/clivarm/wip/jask/miniconda3/lib/python3.6/site-packages/xarray/core/dataset.py\u001b[0m in \u001b[0;36mto_netcdf\u001b[0;34m(self, path, mode, format, group, engine, encoding, unlimited_dims, compute)\u001b[0m\n\u001b[1;32m 1252\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1253\u001b[0m \u001b[0munlimited_dims\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0munlimited_dims\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1254\u001b[0;31m compute=compute)\n\u001b[0m\u001b[1;32m 1255\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1256\u001b[0m def to_zarr(self, store=None, mode='w-', synchronizer=None, group=None,\n", "\u001b[0;32m/data/clivarm/wip/jask/miniconda3/lib/python3.6/site-packages/xarray/backends/api.py\u001b[0m in \u001b[0;36mto_netcdf\u001b[0;34m(dataset, path_or_file, mode, format, group, engine, writer, encoding, unlimited_dims, compute)\u001b[0m\n\u001b[1;32m 686\u001b[0m \u001b[0;31m# validate Dataset keys, DataArray names, and attr keys/values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 687\u001b[0m \u001b[0m_validate_dataset_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 688\u001b[0;31m \u001b[0m_validate_attrs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 689\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 690\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/data/clivarm/wip/jask/miniconda3/lib/python3.6/site-packages/xarray/backends/api.py\u001b[0m in \u001b[0;36m_validate_attrs\u001b[0;34m(dataset)\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mvariable\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvariables\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvariable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattrs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 120\u001b[0;31m \u001b[0mcheck_attr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 121\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/data/clivarm/wip/jask/miniconda3/lib/python3.6/site-packages/xarray/backends/api.py\u001b[0m in \u001b[0;36mcheck_attr\u001b[0;34m(name, value)\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[0;34m'string, ndarray or a list/tuple of '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[0;34m'numbers/strings for serialization to netCDF '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 111\u001b[0;31m 'files'.format(value))\n\u001b[0m\u001b[1;32m 112\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0;31m# Check attrs on the dataset itself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: Invalid value for attr: PolarStereographic(stand_lon=-45.0, moad_cen_lat=-75.49999237060547, truelat1=-90.0, truelat2=-90.0, pole_lat=90.0, pole_lon=0.0) must be a number string, ndarray or a list/tuple of numbers/strings for serialization to netCDF files" ] } ], "source": [ "ds.to_netcdf('test.nc')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "OrderedDict([('FieldType', 104),\n", " ('MemoryOrder', 'XY '),\n", " ('description',\n", " 'MEAN WIND SPEED AT 10M HEIGHT IN DIAGNOSTIC OUTPUT INTERVAL'),\n", " ('units', 'm s-1'),\n", " ('stagger', ''),\n", " ('coordinates', 'XLONG XLAT XTIME'),\n", " ('projection',\n", " PolarStereographic(stand_lon=-45.0, moad_cen_lat=-75.49999237060547, truelat1=-90.0, truelat2=-90.0, pole_lat=90.0, pole_lon=0.0))])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds['SPDUV10MEAN'].attrs" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "### Fix attributes so that we can save to file...\n", "for my_var in my_vars:\n", " ds[my_var].attrs['projection'] = str(ds[my_var].attrs['projection'])\n", " ds[my_var].attrs.pop('coordinates')\n", "ds.to_netcdf('test.nc')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-rw-r--r-- 1 jask psd 129M Feb 5 16:16 test.nc\n" ] } ], "source": [ "### Success!!??\n", "!ls -ltrh test.nc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Open the netcdf file" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\n", "Dimensions: (Time: 380, south_north: 201, west_east: 147)\n", "Coordinates:\n", " XLONG (south_north, west_east) float32 ...\n", " XLAT (south_north, west_east) float32 ...\n", " * Time (Time) datetime64[ns] 2010-03-18 2010-03-19 ... 2011-04-01\n", "Dimensions without coordinates: south_north, west_east\n", "Data variables:\n", " SPDUV10MEAN (Time, south_north, west_east) float32 ...\n", " V10MEAN (Time, south_north, west_east) float32 ...\n", " U10MEAN (Time, south_north, west_east) float32 ..." ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds2 = xr.open_dataset('test.nc')\n", "ds2" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "OrderedDict([('FieldType', 104),\n", " ('MemoryOrder', 'XY '),\n", " ('description',\n", " 'MEAN WIND SPEED AT 10M HEIGHT IN DIAGNOSTIC OUTPUT INTERVAL'),\n", " ('units', 'm s-1'),\n", " ('stagger', ''),\n", " ('projection',\n", " 'PolarStereographic(stand_lon=-45.0, moad_cen_lat=-75.49999237060547, truelat1=-90.0, truelat2=-90.0, pole_lat=90.0, pole_lon=0.0)')])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds2['SPDUV10MEAN'].attrs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }