{ "cells": [ { "cell_type": "markdown", "id": "33ded939-7e56-4412-98bf-e4ef7217b86f", "metadata": {}, "source": [ "# Calculate monthly mean data from daily inputs" ] }, { "cell_type": "markdown", "id": "0a49237b-801a-4a88-9c37-52983414134e", "metadata": {}, "source": [ "## notes" ] }, { "cell_type": "raw", "id": "af7dbb62-4d24-430a-80f3-0e7592ec4106", "metadata": {}, "source": [ "MW5-1 19980101 to 20230603\n", "MW-IR5-1 20020601 to 20230603\n", "xr.ds.to-netcdf\n", "ds.var.attr\n", ".compute() or .load()\n", "\n", "https://github.com/PCMDI/input4MIPs_CVs/issues/5\n", "\n", "Salinity\n", "OISSS/UHawaii\n", "https://search.earthdata.nasa.gov/search/granules?p=C2589160971-POCLOUD&pg[0][v]=f&pg[0][gsk]=-start_date&tl=1700072617!3!!\n", "https://podaac.jpl.nasa.gov/dataset/OISSS_L4_multimission_7day_v2\n", "https://github.com/podaac/data-subscriber/tree/main - https://podaac.jpl.nasa.gov/legacy_retirement.html\n", "\n", "240111: updated from xcd060 to xcd061cdmcdu env\n", "(xcd061cdmcdu) bash-4.2$ cdscan -x 199801.xml v20230605/199801*.nc\n", "240112: xCDAT temporal_average issue https://github.com/xCDAT/xcdat/issues/586\n", "\n", "To-do" ] }, { "cell_type": "markdown", "id": "61c5cbe2-ca4c-4e0a-b49a-3b3801c29d80", "metadata": {}, "source": [ "## imports" ] }, { "cell_type": "code", "execution_count": 1, "id": "f9706d3a-6ba0-4947-97dd-4285314d03e8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 7.03 s, sys: 1.56 s, total: 8.59 s\n", "Wall time: 7.5 s\n" ] } ], "source": [ "%%time\n", "import cartopy.crs as ccrs\n", "import datetime as dt\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import os\n", "import xarray as xr\n", "import xcdat as xc\n", "import cdms2 as cdm\n", "import cdutil as cdu\n", "import cdtime as cdt\n", "import cftime as cft" ] }, { "cell_type": "markdown", "id": "43892fa7-ab78-400e-8eb9-47b5e4402aa8", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "## functions" ] }, { "cell_type": "code", "execution_count": 2, "id": "e729d48b-25c4-42de-8654-6817d98d834d", "metadata": {}, "outputs": [], "source": [ "def nearestNeighbourFill(data, missingValue=0):\n", " \"\"\"\n", " Documentation for nearestNeighbourFill():\n", " -------\n", " The nearestNeighbourFill() function iteratively infills a 2D matrix\n", " with values from immediately neighbouring cells\n", "\n", " Author: Paul J. Durack : pauldurack@llnl.gov\n", "\n", " Inputs:\n", " -----\n", "\n", " | **data** - a numpy 2D array\n", " | **missingValue** - missing value of data matrix\n", "\n", " Returns:\n", " -------\n", "\n", " | **filledData** - a numpy array with no missingValues\n", "\n", " Usage:\n", " ------\n", " data = np.array([[1, 2, 3, 4],\n", " [5, 0, 7, 8],\n", " [9, 10, 11, 12]])\n", "\n", " filledData = nearestNeighborFill(data, missingValue=0)\n", " print(filledData)\n", " \n", " Notes:\n", " -----\n", " * PJD 28 Nov 2023 - Started\n", " \"\"\"\n", "\n", " # Make copy of input matrix\n", " filledData = data.copy()\n", "\n", " # Find indices of missing values\n", " missingIndices = np.argwhere(data == missingValue)\n", "\n", " for idx in missingIndices:\n", " row, col = idx\n", " neighbors = []\n", "\n", " # Iterate over neighbouring cells\n", " for i in range(max(0, row - 1), min(data.shape[0], row + 2)):\n", " for j in range(max(0, col - 1), min(data.shape[1], col + 2)):\n", " if (i, j) != (row, col) and data[i, j] != missingValue:\n", " neighbours.append(data[i, j])\n", "\n", " # Fill missing value with the mean of neighbours\n", " if neighbours:\n", " filledData[row, col] = np.mean(neighbours)\n", "\n", " return filledData\n", "\n", "\n", "def iterativeZonalFill(data, missingValue=0):\n", " \"\"\"\n", " Documentation for iterativeZonalFill():\n", " -------\n", " The iterativeZonalFill() function iteratively infills a 2D matrix\n", " with values zonal neighbouring cells\n", "\n", " Author: Paul J. Durack : pauldurack@llnl.gov\n", "\n", " Inputs:\n", " -----\n", "\n", " | **data** - a numpy 2D array\n", " | **missingValue** - missing value of data matrix\n", "\n", " Returns:\n", " -------\n", "\n", " | **filledData** - a numpy array with no missingValues\n", "\n", " Usage:\n", " ------\n", " data = np.array([[1, 2, 3, 4],\n", " [5, 0, 7, 8],\n", " [9, 10, 11, 12]])\n", "\n", " filledData = iterativeZonalFill(data, missingValue=0)\n", " print(filledData)\n", " \n", " Notes:\n", " -----\n", " * PJD 28 Nov 2023 - Started\n", " \"\"\"\n", " \n", " # Make copy of input matrix\n", " filledData = data.copy()\n", "\n", " # Find indices of missing values\n", " missingIndices = np.argwhere(data == missingValue)\n", "\n", " # Define directions for iteration (right, down, left, up)\n", " directions = [(0, 1), (1, 0), (0, -1), (-1, 0)]\n", "\n", " for direction in directions:\n", " dx, dy = direction\n", "\n", " # Iterate over the data in the specified direction\n", " for i in range(1, max(data.shape) + 1):\n", " for idx in missingIndices:\n", " row, col = idx\n", " new_row, new_col = row + i * dx, col + i * dy\n", "\n", " # Check if the new indices are within the data boundaries\n", " if 0 <= new_row < data.shape[0] and 0 <= new_col < data.shape[1]:\n", " if data[new_row, new_col] != missingValue:\n", " filledData[row, col] = data[new_row, new_col]\n", "\n", " return filledData\n", "\n" ] }, { "cell_type": "markdown", "id": "6303958e-aeb7-4956-a963-47ef1271bce9", "metadata": {}, "source": [ "## set data paths" ] }, { "cell_type": "code", "execution_count": 3, "id": "6efe82fe-42e1-4162-8e01-424936c02473", "metadata": {}, "outputs": [], "source": [ "obsPath = \"/p/user_pub/PCMDIobs/obs4MIPs_input/RSS/RSS-MW5-1/v20230605/\"" ] }, { "cell_type": "markdown", "id": "677f879a-e424-46c3-8e14-dee8a1cddbce", "metadata": {}, "source": [ "# cdat reads" ] }, { "cell_type": "code", "execution_count": 4, "id": "bac8167a-0bb7-4537-b6b9-b2e79b6c3898", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['p', 'user_pub', 'PCMDIobs', 'obs4MIPs_input', 'RSS', 'RSS-MW5-1', 'v20230605', '']\n", "sst199801.getTime(): id: time\n", " Designated a time axis.\n", " units: seconds since 1981-01-01 00:00:00\n", " Length: 31\n", " First: 536500800.0\n", " Last: 539092800.0\n", " Other axis attributes:\n", " axis: T\n", " calendar: gregorian\n", " realtopology: linear\n", " Python id: 0x7f2aeb89cee0\n", "\n", "sst199801.shape: (31, 720, 1440)\n", "sst199801mean.shape: (1, 720, 1440)\n", "\n", "time199801mean:\n", " 1998-1-16 12:0:0.0\n", "time199801mean bounds:\n", " 1998-01-01 00:00:00 \n", " 1998-02-01 00:00:00\n" ] } ], "source": [ "print(obsPath.split(\"/\")[1:])\n", "obsPathXml = os.path.join(\"/\", *obsPath.split(\"/\")[1:7], \"199801.xml\")\n", "# open file handle\n", "fH = cdm.open(obsPathXml)\n", "# read sst variable\n", "sst199801 = fH(\"analysed_sst\")\n", "print(\"sst199801.getTime():\", sst199801.getTime())\n", "time199801 = sst199801.getTime().asComponentTime()\n", "#print(\"time199801:\", time199801)\n", "# assign correct bounds for daily data\n", "cdu.setTimeBoundsDaily(sst199801)\n", "time199801d = sst199801.getTime().asComponentTime()\n", "#print(\"time199801d:\", time199801d) # identical to time199801\n", "# calculate monthly mean\n", "sst199801mean = cdu.JAN(sst199801)\n", "# query array shapes\n", "print(\"sst199801.shape:\", sst199801.shape)\n", "print(\"sst199801mean.shape:\", sst199801mean.shape)\n", "# query cdat-generated time values\n", "time199801mean = sst199801mean.getTime().asComponentTime()\n", "print()\n", "print(\"time199801mean:\\n\", time199801mean[0])\n", "sst1998meanTimeBounds = sst199801mean.getTime().getBounds()[0]\n", "# map back to relative\n", "origin = dt.datetime(1981, 1, 1, 0, 0, 0)\n", "startBounds = origin + dt.timedelta(0, sst1998meanTimeBounds[0])\n", "endBounds = origin + dt.timedelta(0, sst1998meanTimeBounds[1])\n", "print(\"time199801mean bounds:\\n\", startBounds, \"\\n\", endBounds)\n", "fH.close()\n" ] }, { "cell_type": "markdown", "id": "761422b6-ff40-47e6-9e56-1edb225c06bd", "metadata": {}, "source": [ "# xcdat reads" ] }, { "cell_type": "code", "execution_count": 4, "id": "ed5d095e-942e-4963-b413-688d312b283a", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dataPath: /p/user_pub/PCMDIobs/obs4MIPs_input/RSS/RSS-MW5-1/v20230605/199801*.nc\n", "done!\n" ] } ], "source": [ "def setCalendar(ds):\n", " # https://github.com/pydata/xarray/issues/6259\n", " ds.time.attrs[\"calendar\"] = \"standard\"\n", " ds.time.attrs[\"units\"] = \"seconds since 1981-01-01 00:00:00\"\n", " return ds\n", " #return xr.decode_cf(ds)\n", "\n", "dataPath = os.path.join(obsPath, \"199801*.nc\")\n", "#dataPath = os.path.join(obsPath, \"1998*.nc\")\n", "print(\"dataPath:\", dataPath)\n", "ds = xc.open_mfdataset(dataPath, preprocess=setCalendar)\n", "print(\"done!\")" ] }, { "cell_type": "markdown", "id": "8ad5e75b-4604-4a08-aa2f-27c05728437c", "metadata": {}, "source": [ "## view dataset" ] }, { "cell_type": "code", "execution_count": 5, "id": "4b076987-b771-41c8-ad2e-c00e5598b56f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
<xarray.Dataset>\n", "Dimensions: (lat: 720, lon: 1440, time: 31, bnds: 2)\n", "Coordinates:\n", " * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88\n", " * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9\n", " * time (time) object 1998-01-01 12:00:00 ... 1998-01-31 12:00:00\n", "Dimensions without coordinates: bnds\n", "Data variables:\n", " analysed_sst (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " analysis_error (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " sea_ice_fraction (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " mask (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 180.0\n", " lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 90.0\n", "Attributes: (12/46)\n", " Conventions: CF-1.8,ACDD-1.3\n", " title: Analysed foundation sea surface temperature o...\n", " summary: A merged, multi-sensor L4 foundation SST prod...\n", " references: http://www.remss.com/measurements/sea-surface...\n", " institution: REMSS\n", " history: 2021-11-11 18:18:15+0000 created by sst_fusio...\n", " ... ...\n", " project: Group for High Resolution Sea Surface Tempera...\n", " publisher_name: The GHRSST Project Office\n", " publisher_email: ghrsst-po@nceo.ac.uk\n", " publisher_url: http://www.ghrsst.org\n", " processing_level: L4\n", " cdm_data_type: grid
<xarray.Dataset>\n", "Dimensions: (lat: 720, lon: 1440, time: 31, bnds: 2)\n", "Coordinates:\n", " * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88\n", " * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9\n", " * time (time) object 1998-01-01 12:00:00 ... 1998-01-31 12:00:00\n", "Dimensions without coordinates: bnds\n", "Data variables:\n", " analysed_sst (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " analysis_error (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " sea_ice_fraction (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " mask (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 180.0\n", " lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 90.0\n", "Attributes: (12/46)\n", " Conventions: CF-1.8,ACDD-1.3\n", " title: Analysed foundation sea surface temperature o...\n", " summary: A merged, multi-sensor L4 foundation SST prod...\n", " references: http://www.remss.com/measurements/sea-surface...\n", " institution: REMSS\n", " history: 2021-11-11 18:18:15+0000 created by sst_fusio...\n", " ... ...\n", " project: Group for High Resolution Sea Surface Tempera...\n", " publisher_name: The GHRSST Project Office\n", " publisher_email: ghrsst-po@nceo.ac.uk\n", " publisher_url: http://www.ghrsst.org\n", " processing_level: L4\n", " cdm_data_type: grid
<xarray.Dataset>\n", "Dimensions: (lat: 720, lon: 1440, time: 31, bnds: 2)\n", "Coordinates:\n", " * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88\n", " * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9\n", " * time (time) object 1998-01-01 12:00:00 ... 1998-01-31 12:00:00\n", "Dimensions without coordinates: bnds\n", "Data variables:\n", " analysed_sst (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " analysis_error (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " sea_ice_fraction (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " mask (time, lat, lon) float32 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 180.0\n", " lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 90.0\n", " time_bnds (time, bnds) object 1998-01-01 00:00:00 ... 1998-02-01 ...\n", "Attributes: (12/46)\n", " Conventions: CF-1.8,ACDD-1.3\n", " title: Analysed foundation sea surface temperature o...\n", " summary: A merged, multi-sensor L4 foundation SST prod...\n", " references: http://www.remss.com/measurements/sea-surface...\n", " institution: REMSS\n", " history: 2021-11-11 18:18:15+0000 created by sst_fusio...\n", " ... ...\n", " project: Group for High Resolution Sea Surface Tempera...\n", " publisher_name: The GHRSST Project Office\n", " publisher_email: ghrsst-po@nceo.ac.uk\n", " publisher_url: http://www.ghrsst.org\n", " processing_level: L4\n", " cdm_data_type: grid
<xarray.Dataset>\n", "Dimensions: (lat: 720, lon: 1440, bnds: 2, time: 1)\n", "Coordinates:\n", " * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88\n", " * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9\n", " * time (time) object 1998-01-01 00:00:00\n", "Dimensions without coordinates: bnds\n", "Data variables:\n", " lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 179.8 180.0\n", " lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 89.75 90.0\n", " analysed_sst (time, lat, lon) float64 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", "Attributes: (12/46)\n", " Conventions: CF-1.8,ACDD-1.3\n", " title: Analysed foundation sea surface temperature o...\n", " summary: A merged, multi-sensor L4 foundation SST prod...\n", " references: http://www.remss.com/measurements/sea-surface...\n", " institution: REMSS\n", " history: 2021-11-11 18:18:15+0000 created by sst_fusio...\n", " ... ...\n", " project: Group for High Resolution Sea Surface Tempera...\n", " publisher_name: The GHRSST Project Office\n", " publisher_email: ghrsst-po@nceo.ac.uk\n", " publisher_url: http://www.ghrsst.org\n", " processing_level: L4\n", " cdm_data_type: grid
<xarray.Dataset>\n", "Dimensions: (lat: 720, lon: 1440, bnds: 2, time: 1)\n", "Coordinates:\n", " * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88\n", " * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9\n", " * time (time) object 1998-01-16 12:00:00\n", "Dimensions without coordinates: bnds\n", "Data variables:\n", " lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 179.8 180.0\n", " lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 89.75 90.0\n", " analysed_sst (time, lat, lon) float64 dask.array<chunksize=(1, 720, 1440), meta=np.ndarray>\n", " time_bnds (time, bnds) object 1998-01-01 00:00:00 1998-02-01 00:00:00\n", "Attributes: (12/46)\n", " Conventions: CF-1.8,ACDD-1.3\n", " title: Analysed foundation sea surface temperature o...\n", " summary: A merged, multi-sensor L4 foundation SST prod...\n", " references: http://www.remss.com/measurements/sea-surface...\n", " institution: REMSS\n", " history: 2021-11-11 18:18:15+0000 created by sst_fusio...\n", " ... ...\n", " project: Group for High Resolution Sea Surface Tempera...\n", " publisher_name: The GHRSST Project Office\n", " publisher_email: ghrsst-po@nceo.ac.uk\n", " publisher_url: http://www.ghrsst.org\n", " processing_level: L4\n", " cdm_data_type: grid
<xarray.Dataset>\n", "Dimensions: (lat: 720, lon: 1440, bnds: 2)\n", "Coordinates:\n", " * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88\n", " * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9\n", "Dimensions without coordinates: bnds\n", "Data variables:\n", " lon_bnds (lon, bnds) float32 -180.0 -179.8 -179.8 ... 179.8 179.8 180.0\n", " lat_bnds (lat, bnds) float32 -90.0 -89.75 -89.75 ... 89.75 89.75 90.0\n", " analysed_sst (lat, lon) float64 dask.array<chunksize=(720, 1440), meta=np.ndarray>\n", "Attributes: (12/46)\n", " Conventions: CF-1.8,ACDD-1.3\n", " title: Analysed foundation sea surface temperature o...\n", " summary: A merged, multi-sensor L4 foundation SST prod...\n", " references: http://www.remss.com/measurements/sea-surface...\n", " institution: REMSS\n", " history: 2021-11-11 18:18:15+0000 created by sst_fusio...\n", " ... ...\n", " project: Group for High Resolution Sea Surface Tempera...\n", " publisher_name: The GHRSST Project Office\n", " publisher_email: ghrsst-po@nceo.ac.uk\n", " publisher_url: http://www.ghrsst.org\n", " processing_level: L4\n", " cdm_data_type: grid
<xarray.DataArray 'analysed_sst' (time: 1, lat: 720, lon: 1440)>\n", "dask.array<stack, shape=(1, 720, 1440), dtype=float32, chunksize=(1, 720, 1440), chunktype=numpy.ndarray>\n", "Coordinates:\n", " * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n", " * lon (lon) float32 -179.9 -179.6 -179.4 -179.1 ... 179.4 179.6 179.9\n", " * time (time) object 1998-01-31 00:00:00\n", "Attributes:\n", " units: K\n", " long_name: analysed sea surface temperature\n", " standard_name: sea_surface_foundation_temperature\n", " valid_min: -32767\n", " valid_max: 32767\n", " source: REMSS-L3C-TMI\n", " coverage_content_type: physicalMeasurement
<xarray.DataArray 'analysed_sst' (time: 31, lat: 720, lon: 1440)>\n", "dask.array<concatenate, shape=(31, 720, 1440), dtype=float32, chunksize=(1, 720, 1440), chunktype=numpy.ndarray>\n", "Coordinates:\n", " * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n", " * lon (lon) float32 -179.9 -179.6 -179.4 -179.1 ... 179.4 179.6 179.9\n", " * time (time) object 1998-01-01 12:00:00 ... 1998-01-31 12:00:00\n", "Attributes:\n", " units: K\n", " long_name: analysed sea surface temperature\n", " standard_name: sea_surface_foundation_temperature\n", " valid_min: -32767\n", " valid_max: 32767\n", " source: REMSS-L3C-TMI\n", " coverage_content_type: physicalMeasurement