{ "cells": [ { "cell_type": "markdown", "id": "ffcaec2c-c120-4117-b964-be49032282c8", "metadata": {}, "source": [ "# TopoPyScale Benchmarking Against Local Observation\n", "S. Filhol" ] }, { "cell_type": "code", "execution_count": 32, "id": "0f5ddb5e-d5fa-41ad-a848-6068d8bc7395", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using matplotlib backend: Qt5Agg\n" ] } ], "source": [ "import requests, os, glob\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "%matplotlib\n", "import numpy as np\n", "import xarray as xr\n", "import seaborn as sns\n", "plt.style.use('style_1')\n", "import rasterio\n", "from scipy import stats" ] }, { "cell_type": "code", "execution_count": 20, "id": "abfe8d77-8dad-4e11-9f15-ca7ab27ddd30", "metadata": {}, "outputs": [], "source": [ "df_stn = pd.read_csv('inputs/dem/station_list.csv')\n", "ds_stn = xr.open_dataset('outputs/stn_downscaled.nc')" ] }, { "cell_type": "code", "execution_count": 21, "id": "54170e7e-35f7-4861-8591-e010e0094609", "metadata": {}, "outputs": [], "source": [ "df_obs = pd.concat(map(pd.read_pickle, glob.glob(os.path.join('', \"inputs/obs/metno*.pckl\"))))" ] }, { "cell_type": "code", "execution_count": 22, "id": "6bcf0e15-08e2-492c-ad36-679c759f9321", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['air_temperature', 'sum(precipitation_amount PT12H)', 'wind_speed',\n", " 'wind_from_direction', 'relative_humidity',\n", " 'air_pressure_at_sea_level'], dtype=object)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_obs.elementId.unique()" ] }, { "cell_type": "code", "execution_count": 38, "id": "cc4f8053-7791-4341-9023-ad26d4461bb7", "metadata": {}, "outputs": [], "source": [ "dem = rasterio.open('inputs/dem/ASTER_Finse_Kris.tif')\n", "plt.figure()\n", "show(dem)\n", "plt.scatter(df_stn.x, df_stn.y, s=100)\n", "for i,row in df_stn.iterrows():\n", " plt.text(row.x,row.y,row.Name)" ] }, { "cell_type": "code", "execution_count": 24, "id": "92503b01-8247-4b95-bc56-74f506041232", "metadata": {}, "outputs": [], "source": [ "# Convert pandas dataframe to xarray dataset\n", "df = pd.pivot_table(df_obs, columns=['elementId'],values=['value'], index=['sourceId', 'referenceTime'])\n", "ds = df.xs('value', axis=1, level=0).to_xarray() \n", "ds['referenceTime']=pd.to_datetime(ds.referenceTime)" ] }, { "cell_type": "code", "execution_count": 61, "id": "04a93023-7708-4194-9c1a-77d76c2b0b0d", "metadata": {}, "outputs": [], "source": [ "stn_name='SN25830:0'\n", "\n", "stn = pd.DataFrame()\n", "stn['time'] = ds.referenceTime\n", "stn['Downscaled'] = (ds_stn.t.sel(point_id=df_stn.loc[df_stn.stn_number==stn_name[:-2]].index)-273.15).copy().drop('point_id').sel(time=slice('2018-01-01', '2021-08-30'))[0].values\n", "stn['Observed'] = ds.air_temperature.sel(sourceId=stn_name).dropna(dim='referenceTime').sel(referenceTime=slice('2018-01-01', '2021-08-30')).copy().values" ] }, { "cell_type": "markdown", "id": "9dbd8f90-32bb-4bcc-b92e-025838a10ae1", "metadata": {}, "source": [ "## Compare Air Temperature" ] }, { "cell_type": "code", "execution_count": 49, "id": "237d9a60-7186-453b-96b3-3b5613c523ee", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_13568/1664442083.py:8: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).\n", " plt.figure()\n" ] } ], "source": [ "# plot temperature for each station\n", "for stn_name in df_stn.stn_number:\n", " stn = ds.air_temperature.sel(sourceId=stn_name+':0').dropna(dim='referenceTime').copy().to_dataframe().resample('1H').mean()\n", " stn['Downscaled'] = (ds_stn.t.sel(point_id=df_stn.loc[df_stn.stn_number==stn_name].index)-273.15).copy().drop('point_id').sel(time=stn.index)[0].values\n", " stn.rename(columns={'air_temperature':'Observed', 'sourceId':'stn_id'}, inplace=True)\n", " #stn = stn.dropna(axis=0)\n", " stn['var_diff'] = stn.Downscaled - stn.Observed\n", "\n", " plt.figure()\n", " ax = sns.histplot(x=stn.Observed, y=stn.Downscaled, cmap='magma')\n", " ax.figure.colorbar(ax.collections[0])\n", " plt.plot([-20,20], [-20,20], c='r', label='1:1 line')\n", " reg = stats.linregress(stn.Observed, stn.Downscaled)\n", " plt.plot([-20,20], np.array([-20,20])*reg.slope + reg.intercept, c='b', label='regression')\n", " plt.ylabel('Downscaled')\n", " plt.xlabel('Observed')\n", " plt.title('Air Temperature [$^{o}C$] at ' + df_stn.Name.loc[df_stn.stn_number==stn_name].values[0])\n", " plt.legend()\n", " \n", " # Plot time series \n", " fig, ax = plt.subplots(2,1, sharex=True, gridspec_kw={'height_ratios': [3, 1]})\n", " # absolute value\n", " stn.Observed.plot(ax=ax[0], label='Obs.')\n", " stn.Downscaled.plot(ax=ax[0], label='TopoPyScale')\n", " ax[0].legend()\n", " ax[0].set_ylabel('Tair [$^{o}C$]')\n", " ax[0].set_title(df_stn.Name.loc[df_stn.stn_number==stn_name].values[0])\n", " # timeseries difference\n", " stn.var_diff.plot(ax=ax[1])\n", " (stn.Observed*0).plot(color='k', linestyle='--')\n", " ax[1].fill_between(stn.index, (stn.Observed*0)+stn.var_diff.quantile(0.05),(stn.Observed*0)+stn.var_diff.quantile(0.95), alpha=.3, color='r', zorder=5, label='90% values')\n", " ((stn.Observed*0)+stn.var_diff.median()).plot(ax=ax[1], label='median', c='r', linestyle='-', zorder=10)\n", " ax[1].legend()\n", " ax[1].set_ylabel('Tair diff [$^{o}C$]')" ] }, { "cell_type": "markdown", "id": "0af9a5fa-992b-4a3a-a2c8-978a5869f08b", "metadata": {}, "source": [ "## Compare Wind Speed" ] }, { "cell_type": "code", "execution_count": 96, "id": "425830be-13b4-49b2-84a3-fe196549bf23", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_13568/2219099889.py:10: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).\n", " plt.figure()\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "list index out of range\n" ] } ], "source": [ "# plot temperature for each station\n", "for stn_name in df_stn.stn_number:\n", " try:\n", " stn = ds.wind_speed.sel(sourceId=stn_name+':0').dropna(dim='referenceTime').copy().to_dataframe().resample('1H').mean()\n", " stn['Downscaled'] = (ds_stn.ws.sel(point_id=df_stn.loc[df_stn.stn_number==stn_name].index)).copy().drop('point_id').sel(time=stn.index)[0].values\n", " stn.rename(columns={'wind_speed':'Observed', 'sourceId':'stn_id'}, inplace=True)\n", " #stn = stn.dropna(axis=0)\n", " stn['var_diff'] = stn.Downscaled - stn.Observed\n", "\n", " plt.figure()\n", " ax = sns.histplot(x=stn.Observed, y=stn.Downscaled, cmap='magma')\n", " ax.figure.colorbar(ax.collections[0])\n", " plt.plot([0,20], [0,20], c='r', label='1:1 line')\n", " reg = stats.linregress(stn.Observed, stn.Downscaled)\n", " plt.plot([0,20], np.array([0,20])*reg.slope + reg.intercept, c='b', label='regression')\n", " plt.ylabel('Downscaled')\n", " plt.xlabel('Observed')\n", " plt.title('Wind Speed [m.s$^{-1}$] at ' + df_stn.Name.loc[df_stn.stn_number==stn_name].values[0])\n", " plt.legend()\n", "\n", " # Plot time series \n", " fig, ax = plt.subplots(2,1, sharex=True, gridspec_kw={'height_ratios': [3, 1]})\n", " # absolute value\n", " stn.Observed.plot(ax=ax[0], label='Obs.')\n", " stn.Downscaled.plot(ax=ax[0], label='TopoPyScale')\n", " ax[0].legend()\n", " ax[0].set_ylabel('Wind Speed [m.s$^{-1}$]')\n", " ax[0].set_title(df_stn.Name.loc[df_stn.stn_number==stn_name].values[0])\n", " # timeseries difference\n", " stn.var_diff.plot(ax=ax[1])\n", " (stn.Observed*0).plot(color='k', linestyle='--')\n", " ax[1].fill_between(stn.index, (stn.Observed*0)+stn.var_diff.quantile(0.05),(stn.Observed*0)+stn.var_diff.quantile(0.95), alpha=.3, color='r', zorder=5, label='90% values')\n", " ((stn.Observed*0)+stn.var_diff.median()).plot(ax=ax[1], label='median', c='r', linestyle='-', zorder=10)\n", " ax[1].legend()\n", " ax[1].set_ylabel('Ws diff [m.s$^{-1}$]')\n", " except Exception as e: \n", " print(e)\n", " " ] }, { "cell_type": "code", "execution_count": 97, "id": "4f6d8a34-ec9f-42d6-963f-031477c3aa86", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['air_pressure_at_sea_level',\n", " 'air_temperature',\n", " 'relative_humidity',\n", " 'sum(precipitation_amount PT12H)',\n", " 'wind_from_direction',\n", " 'wind_speed']" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(ds)" ] }, { "cell_type": "code", "execution_count": 100, "id": "fd55e308-00f9-479e-86f2-ce6931103ac6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
<xarray.DataArray 'sum(precipitation_amount PT12H)' (referenceTime: 2666)>\n",
"array([7.3, 4.8, 2.8, ..., 0. , 0. , 0. ])\n",
"Coordinates:\n",
" sourceId <U9 'SN53480:0'\n",
" * referenceTime (referenceTime) datetime64[ns] 2018-01-01T06:00:00 ... 202...array([7.3, 4.8, 2.8, ..., 0. , 0. , 0. ])
array('SN53480:0', dtype='<U9')array(['2018-01-01T06:00:00.000000000', '2018-01-01T18:00:00.000000000',\n",
" '2018-01-02T06:00:00.000000000', ..., '2021-08-29T18:00:00.000000000',\n",
" '2021-08-30T06:00:00.000000000', '2021-08-30T18:00:00.000000000'],\n",
" dtype='datetime64[ns]')