{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Accessing Met Office COVID-19 response data on Azure\n", "\n", "This dataset contains weather data that has been curated for researchers looking to understand links between COVID-19 and environmental factors.\n", "\n", "This dataset is stored in the East US 2 Azure region, so this notebook will run most efficiently on Azure compute located in the same region. If you are using this data for environmental science applications, consider applying for an [AI for Earth grant](http://aka.ms/ai4egrants) to support your compute requirements.\n", "\n", "This dataset is documented at [ai4edata-metofficecovid19](https://aka.ms/ai4edata-metofficecovid19)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Environment setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt \n", "import datetime\n", "\n", "from azure.storage.blob import BlobClient, ContainerClient\n", "from collections import namedtuple\n", "\n", "plt.rcParams['figure.figsize'] = (20.0, 10.0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set up the blob client with the connection details" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "account_url = 'https://metdatasa.blob.core.windows.net/'\n", "container_name = 'covid19-response'\n", "\n", "# Create the ContainerClient object which will be used to enumerate blobs\n", "container_client = ContainerClient(account_url=account_url,\n", " container_name=container_name,\n", " credential=None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### List the files under metoffice_global_daily/t1o5m_max" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "metoffice_global_daily/t1o5m_max/global_daily_t1o5m_max_20200101.nc\n", "metoffice_global_daily/t1o5m_max/global_daily_t1o5m_max_20200102.nc\n", "metoffice_global_daily/t1o5m_max/global_daily_t1o5m_max_20200103.nc\n", "metoffice_global_daily/t1o5m_max/global_daily_t1o5m_max_20200104.nc\n", "metoffice_global_daily/t1o5m_max/global_daily_t1o5m_max_20200105.nc\n", "metoffice_global_daily/t1o5m_max/global_daily_t1o5m_max_20200106.nc\n", "metoffice_global_daily/t1o5m_max/global_daily_t1o5m_max_20200107.nc\n", "metoffice_global_daily/t1o5m_max/global_daily_t1o5m_max_20200108.nc\n", "metoffice_global_daily/t1o5m_max/global_daily_t1o5m_max_20200109.nc\n", "metoffice_global_daily/t1o5m_max/global_daily_t1o5m_max_20200110.nc\n", "metoffice_global_daily/t1o5m_max/global_daily_t1o5m_max_20200111.nc\n" ] } ], "source": [ "max_blobs = 10\n", "for i_blob,blob in enumerate(container_client.list_blobs(\n", " name_starts_with='metoffice_global_daily/t1o5m_max')):\n", " print(f'{blob.name}')\n", " if i_blob >= max_blobs:\n", " break" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Choose a file based on date and time" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/covid19-response/metoffice_global_daily/precip_mean/global_daily_precip_mean_20200303.nc\n", "/covid19-response/metoffice_ukv_daily/t1o5m_min/ukv_daily_t1o5m_min_20200401.nc\n", "/covid19-response/metoffice_ukv_hourly/snow/ukv_hourly_snow_20200202.nc\n" ] } ], "source": [ "data_end = (datetime.datetime.now() - datetime.timedelta(days=9)).date()\n", "data_start = datetime.date(2020,1,1)\n", "\n", "def url_from_properties(model, param, freq, stat=None, day=None, hour=None):\n", " \n", " assert model in [\"global\",\"ukv\"]\n", " assert param in [\"rain\", \"sh\", \"snow\", \"t1o5m\", \"pmsl\",\"precip\",\"sw\"]\n", " assert freq in [\"daily\",\"hourly\"]\n", " if freq == 'daily':\n", " assert stat in ['max', 'min', 'mean']\n", " else:\n", " assert stat is None \n", " assert data_start <= day <= data_end\n", " \n", " stat = '_'+stat if stat else ''\n", "\n", " filepath = f'metoffice_{model}_{freq}/{param}{stat}/{model}_{freq}_{param}{stat}_{day:%Y%m%d}.nc'\n", " return f\"{account_url}/{container_name}/{filepath}\"\n", "\n", "Properties = namedtuple('Properties',[\"model\",\"param\",\"freq\",\"stat\",\"day\"])\n", "\n", "files = [\n", " Properties(\"global\",\"precip\",\"daily\",\"mean\",datetime.date(2020,3,3)),\n", " Properties(\"ukv\",\"t1o5m\",\"daily\",\"min\",datetime.date(2020,4,1)),\n", " Properties(\"ukv\",\"snow\",\"hourly\",None,datetime.date(2020,2,2)),\n", "]\n", "\n", "for file in files:\n", " path = url_from_properties(*file)\n", " print(path.replace(account_url,''))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Stream blob into memory and load dataset in xarray" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
<xarray.Dataset>\n", "Dimensions: (bnds: 2, latitude: 1920, longitude: 2560)\n", "Coordinates:\n", " * latitude (latitude) float32 -89.95 -89.86 ... 89.95\n", " * longitude (longitude) float32 0.07031 0.2109 ... 359.9\n", " forecast_period timedelta64[ns] 02:00:00\n", " forecast_reference_time datetime64[ns] 2020-01-30T09:00:00\n", " time datetime64[ns] 2020-01-30T11:00:00\n", "Dimensions without coordinates: bnds\n", "Data variables:\n", " precipitation_flux (latitude, longitude) float32 ...\n", " latitude_longitude int32 -2147483647\n", " forecast_period_bnds (bnds) float64 -1.0 5.0\n", " forecast_reference_time_bnds (bnds) datetime64[ns] 2020-01-30 2020-01-30...\n", " time_bnds (bnds) datetime64[ns] 2020-01-29T23:00:00 2...\n", "Attributes:\n", " source: Data from Met Office Unified Model\n", " um_version: 11.2\n", " Conventions: CF-1.5
array([-89.953125, -89.859375, -89.765625, ..., 89.765625, 89.859375,\n", " 89.953125], dtype=float32)
array([7.031250e-02, 2.109375e-01, 3.515625e-01, ..., 3.596484e+02,\n", " 3.597891e+02, 3.599297e+02], dtype=float32)
array(7200000000000, dtype='timedelta64[ns]')
array('2020-01-30T09:00:00.000000000', dtype='datetime64[ns]')
array('2020-01-30T11:00:00.000000000', dtype='datetime64[ns]')
[4915200 values with dtype=float32]
array(-2147483647, dtype=int32)
array([-1., 5.])
array(['2020-01-30T00:00:00.000000000', '2020-01-30T18:00:00.000000000'],\n", " dtype='datetime64[ns]')
array(['2020-01-29T23:00:00.000000000', '2020-01-30T23:00:00.000000000'],\n", " dtype='datetime64[ns]')