{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd, numpy as np\n", "from scipy import stats\n", "import random" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "stations=pd.read_csv('data/stations.csv').set_index('ID')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Setup plot params" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from matplotlib.collections import PolyCollection\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import matplotlib as mpl\n", "import matplotlib.font_manager as font_manager\n", "path = 'KulimPark-Regular.ttf'\n", "path2 = 'Symbola.ttf'\n", "prop = font_manager.FontProperties(fname=path)\n", "prop2 = font_manager.FontProperties(fname=path2)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "color_ax='#E7CFBC'\n", "color_bg='#FFF4EC'\n", "color_obs_right0='#F2B880'\n", "color_obs_left0=color_ax\n", "color_pred_right0='#C98686'\n", "color_pred_left0='#966B9D'\n", "color_pred_talalt0='#59c687'\n", "color_pred_nem_talalt0='#c95498'\n", "font_size=12\n", "s=40\n", "obs_talalt_glyph0='★'\n", "obs_nem_talalt_glyph0='☆'\n", "pred_talalt_glyph0='✔️'\n", "pred_nem_talalt_glyph0='✖️'\n", "title_icon_right={'Temp':'☼','Wind':'🌀','Hail':'⭕️','Snow':'☃️','Snow Depth':'⛄️','Rain':'☔️','Visib':'☀️'}\n", "title_icon_left={'Temp':'✨️','Wind':'☘','Hail':'⚪️','Snow':'⚪️','Snow Depth':'⚪️','Rain':'🌂','Visib':'⛈️'}\n", "title_icon={'Temp':'♨️','Rain':'☂️','Hail':'✴️','Snow':'❄️','Snow Depth':'⛷️','Wind':'⛵️','Cloud':'☁️','Visib':'⛅️'}" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def get_data(data,th):\n", " a1=pd.DataFrame(data[data<=th])\n", " a1['g']='left'\n", " a2=pd.DataFrame(data[data>th])\n", " a2['g']='right'\n", " a3=pd.concat([a1,a2])\n", " a3['x']='x'\n", " return a1,a2,a3" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def violin_plot(data,th,ax,color_left,color_right):\n", " a=0.3\n", " a1,a2,a3=get_data(data,th)\n", " a1_augment=True\n", " a2_augment=True\n", " if len(a1)==0:\n", " a1=a3.loc[[a3.index[0]]]\n", " a1['g']='left'\n", " a1[a1.columns[0]]=5\n", " if len(a2)==0:\n", " a2=a3.loc[[a3.index[0]]]\n", " a2['g']='right'\n", " a2[a2.columns[0]]=5\n", " if len(a1)>1: a1_augment=False\n", " if not a1_augment:\n", " if a1.nunique()[a1.columns[0]]==1: \n", " a1_augment=True\n", " if a1_augment:\n", " a11=a1.copy().loc[[a1.index[0]]]\n", " a11[a11.columns[0]]+=random.random()*0.1*th\n", " a11['x']='x'\n", " a12=a1.copy().loc[[a1.index[0]]]\n", " a12[a12.columns[0]]-=random.random()*0.1*th\n", " a12['x']='x'\n", " a3=pd.concat([a3,a11,a12])\n", " if len(a2)>1: a2_augment=False\n", " if not a2_augment:\n", " if a2.nunique()[a2.columns[0]]==1: \n", " a2_augment=True\n", " if a2_augment:\n", " a21=a2.copy().loc[[a2.index[0]]]\n", " a21[a21.columns[0]]+=random.random()*0.1*th\n", " a21['x']='x'\n", " a22=a2.copy().loc[[a2.index[0]]]\n", " a22[a22.columns[0]]-=random.random()*0.1*th\n", " a22['x']='x'\n", " a3=pd.concat([a3,a21,a22])\n", " a3=a3.sort_values('g')\n", " ax.axvline(0,color=color_ax)\n", " if a3.nunique()['g']>1:\n", " sns.violinplot(y=a1.columns[0], x='x',hue='g', data=a3, split=True, ax=ax,\n", " inner=None,linewidth=1, scale=\"count\", saturation=1)\n", "\n", " ax.get_children()[0].set_color(mpl.colors.colorConverter.to_rgba(color_left, alpha=a))\n", " ax.get_children()[0].set_edgecolor(color_left)\n", " ax.get_children()[1].set_color(mpl.colors.colorConverter.to_rgba(color_right, alpha=a))\n", " ax.get_children()[1].set_edgecolor(color_right)\n", " ax.legend().remove()\n", " else:\n", " if len(a1)>0: \n", " w=a1\n", " c=color_left\n", " else: \n", " w=a2\n", " c=color_right\n", " sns.violinplot(y=w.columns[0], data=w, ax=ax, \n", " inner=None,linewidth=1, scale=\"count\", saturation=1)\n", " ax.set_xlim([-1,0])\n", " ax.get_children()[0].set_color(mpl.colors.colorConverter.to_rgba(c, alpha=a))\n", " ax.get_children()[0].set_edgecolor(c)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def setup_axes():\n", " fig,axes=plt.subplots(1,3,figsize=(8,5),gridspec_kw={'width_ratios': [1, 3, 1]})\n", " axi_top= axes[2].inset_axes([0.1, 0.65, 1, 0.3])\n", " axi_top.axis('off')\n", " axi_bottom= axes[2].inset_axes([0.1, 0, 1, 0.5])\n", " axi_bottom.axis('off')\n", " axes[0].axis('off')\n", " axes[1].axis('off')\n", " axes[2].axis('off')\n", " axes[0]=axes[0].inset_axes([0, 0.15, 1, 0.85])\n", " axes[1]=axes[1].inset_axes([0, 0.15, 1, 0.85])\n", " axes[0].axis('off')\n", " axes[1].axis('off')\n", " return fig, axes, axi_top, axi_bottom" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def stem_plot(data,ax,color,s=s):\n", " data=pd.DataFrame(data)\n", " x=data.index\n", " y=data[data.columns[0]].values\n", " for i,e in enumerate(y):\n", " ax.plot([0,e],[x[i],x[i]],color=color)\n", " ax.scatter(y,x,s,color=color,zorder=10)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def stem2_plot(data,th,ax,color_left,color_right,s=s,axv_color=None):\n", " if axv_color==None:axv_color=color_right\n", " a1,a2,a3=get_data(data,th)\n", " stem_plot(a1,ax,color_left,s)\n", " stem_plot(a2,ax,color_right,s)\n", " ax.axvline(0,color=color_ax)\n", " #if th!=0:\n", " if True:\n", " ax.axvline(th,color=axv_color,ls='--',zorder=5)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def icons_plot(axes,kondicio,mennyiseg,observation_th,prediction_th):\n", " ylim=axes[0].get_ylim()\n", " xlim=axes[1].get_xlim()\n", " y_max_coord=ylim[0]+(ylim[1]-ylim[0])*1.05\n", " y_max_coord2=ylim[0]+(ylim[1]-ylim[0])*1.05 #1.04\n", " x_icon_coord_shift=(xlim[1]-xlim[0])*0.1\n", " axes[0].text(observation_th, y_max_coord, title_icon[kondicio], \n", " horizontalalignment='center', color=color_obs_right0, fontproperties=prop2, fontsize=font_size*1.5)\n", " axes[1].text(prediction_th, y_max_coord, title_icon[mennyiseg], \n", " horizontalalignment='center', color=color_ax, fontproperties=prop2, fontsize=font_size*1.5)\n", " axes[1].text(prediction_th+x_icon_coord_shift, y_max_coord2, title_icon_right[mennyiseg], \n", " horizontalalignment='center', color=color_pred_right, fontproperties=prop2, fontsize=font_size*1.5)\n", " axes[1].text(prediction_th-x_icon_coord_shift, y_max_coord2, title_icon_left[mennyiseg], \n", " horizontalalignment='center', color=color_pred_left, fontproperties=prop2, fontsize=font_size*1.5)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def talalat_plot_line(axes,n_prediction_ts_good,n_prediction_ts_bad,\n", " n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt,\n", " observation_th,prediction_th):\n", " ylim=axes[0].get_ylim()\n", " xlim=axes[0].get_xlim()\n", " y_max_coord=ylim[0]+(ylim[1]-ylim[0])*(-0.07)\n", " x_icon_coord_shift=(xlim[1]-xlim[0])*0.1\n", " x_icon_coord_shift2=(xlim[1]-xlim[0])*0.27\n", " axes[0].text(observation_th+x_icon_coord_shift, y_max_coord, obs_talalt_glyph, \n", " horizontalalignment='center', color=color_obs_right, fontproperties=prop2)\n", " axes[0].text(observation_th-x_icon_coord_shift, y_max_coord, obs_nem_talalt_glyph, \n", " horizontalalignment='center', color=color_obs_left, fontproperties=prop2)\n", " axes[0].text(observation_th+x_icon_coord_shift2, y_max_coord, n_prediction_ts_good, \n", " horizontalalignment='center', color=color_obs_right, fontproperties=prop)\n", " axes[0].text(observation_th-x_icon_coord_shift2, y_max_coord, n_prediction_ts_bad, \n", " horizontalalignment='center', color=color_obs_left, fontproperties=prop)\n", " axes[0].text(observation_th, y_max_coord, '|', \n", " horizontalalignment='center', color=color_obs_right0, fontproperties=prop,fontsize=19)\n", "\n", " xlim=axes[1].get_xlim()\n", " x_icon_coord_shift=(xlim[1]-xlim[0])*0.04\n", " x_icon_coord_shift2=(xlim[1]-xlim[0])*0.1\n", " axes[1].text(prediction_th+x_icon_coord_shift, y_max_coord, pred_talalt_glyph, \n", " horizontalalignment='center', color=color_pred_talalt, fontproperties=prop2)\n", " axes[1].text(prediction_th-x_icon_coord_shift, y_max_coord, pred_nem_talalt_glyph, \n", " horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop2)\n", " axes[1].text(prediction_th+x_icon_coord_shift2, y_max_coord, n_prediction_ts_good_talalt, \n", " horizontalalignment='center', color=color_pred_talalt, fontproperties=prop)\n", " axes[1].text(prediction_th-x_icon_coord_shift2, y_max_coord, n_prediction_ts_good_nem_talalt, \n", " horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop)\n", " axes[1].text(prediction_th, y_max_coord, '|', \n", " horizontalalignment='center', color=color_pred_right, fontproperties=prop,fontsize=19)\n", " \n", " y_max_coord=ylim[0]+(ylim[1]-ylim[0])*(-0.14)\n", " axes[0].text(observation_th, y_max_coord, 'feltétel', \n", " horizontalalignment='center', color=color_obs_right0, fontproperties=prop)\n", " axes[1].text(prediction_th, y_max_coord, 'jóslat', \n", " horizontalalignment='center', color=color_pred_right, fontproperties=prop)\n", " y_max_coord=ylim[0]+(ylim[1]-ylim[0])*(-0.13)\n", " x_coord_shift=prediction_th+(prediction_th-xlim[0])*(-0.4)\n", " axes[1].annotate('', xy=(x_coord_shift, y_max_coord),xycoords='data',annotation_clip=False,\n", " xytext=(xlim[0], y_max_coord),arrowprops=dict(arrowstyle= '->',color=color_obs_right0))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "def talalat_plot_violin(axes,n_prediction_ts_good,n_prediction_ts_bad,n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt):\n", " y_icon_obs=0.65\n", " y_icon_pred=0.5\n", " if color_obs_right==color_obs_right0: x=0.72\n", " else: x=0.47\n", " axes[2].text(0.72, y_icon_obs, obs_talalt_glyph, \n", " horizontalalignment='center', color=color_obs_right, fontproperties=prop2)\n", " axes[2].text(0.9, y_icon_obs,n_prediction_ts_good, \n", " horizontalalignment='center', color=color_obs_right, fontproperties=prop)\n", " axes[2].text(0.47, y_icon_obs, obs_nem_talalt_glyph, \n", " horizontalalignment='center', color=color_obs_left, fontproperties=prop2)\n", " axes[2].text(0.29, y_icon_obs, n_prediction_ts_bad, \n", " horizontalalignment='center', color=color_obs_left, fontproperties=prop)\n", " \n", " axes[2].text(0.72, y_icon_pred, pred_talalt_glyph,\n", " horizontalalignment='center', color=color_pred_talalt, fontproperties=prop2)\n", " axes[2].text(0.9, y_icon_pred, n_prediction_ts_good_talalt, \n", " horizontalalignment='center', color=color_pred_talalt, fontproperties=prop)\n", " axes[2].text(0.47, y_icon_pred, pred_nem_talalt_glyph, \n", " horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop2)\n", " axes[2].text(0.29, y_icon_pred, n_prediction_ts_good_nem_talalt,\n", " horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop)\n", " \n", " axes[2].annotate('', xy=(0.59, y_icon_pred*1.04),xycoords='data',\n", " xytext=(x, y_icon_obs*0.98),arrowprops=dict(arrowstyle= '->',color=color_obs_right0))" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "def talalat_plot(axes,ns,observation_th,prediction_th):\n", " n_prediction_ts_good,n_prediction_ts_bad,n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt=ns\n", " talalat_plot_line(axes,n_prediction_ts_good,n_prediction_ts_bad,\n", " n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt,\n", " observation_th,prediction_th)\n", " talalat_plot_violin(axes,n_prediction_ts_good,n_prediction_ts_bad,\n", " n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "def year_plot(data,ax,k):\n", " y=data.values\n", " x=data.index\n", " ex=max(y)-min(y)\n", " text_off=abs(ex*k)\n", " text_align='left'\n", " if y[0]<0:\n", " text_off=-text_off\n", " text_align='right'\n", " ax.text(y[0]+text_off, x[0], str(int(x[0])), \n", " horizontalalignment=text_align, verticalalignment='center', \n", " color=color_ax, fontproperties=prop)\n", " text_off=abs(text_off)\n", " text_align='left'\n", " if y[-1]<0:\n", " text_off=-text_off\n", " text_align='right'\n", " ax.text(y[-1]+text_off, x[-1], str(int(x[-1])), \n", " horizontalalignment=text_align, verticalalignment='center', \n", " color=color_ax, fontproperties=prop)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "def spine_plot(datum,title,mondas,jelentes,kondicio,mennyiseg,\n", " observation_ts,observation_th,prediction_ts,prediction_th,c):\n", " \n", " #data\n", " prediction_ts_good=prediction_ts.loc[observation_ts[observation_ts>observation_th].index]\n", " prediction_ts_bad=prediction_ts.loc[observation_ts[observation_ts<=observation_th].index]\n", " n_prediction_ts_good=len(prediction_ts_good)\n", " n_prediction_ts_bad=len(prediction_ts_bad)\n", " \n", " if color_obs_right0!=color_obs_right:\n", " prediction_ts_good,prediction_ts_bad=prediction_ts_bad,prediction_ts_good\n", " prediction_ts_good_nem_talalt,prediction_ts_good_talalt,\\\n", " prediction_ts_good_joined=get_data(prediction_ts_good,prediction_th)\n", " n_prediction_ts_good_talalt=len(prediction_ts_good_talalt)\n", " n_prediction_ts_good_nem_talalt=len(prediction_ts_good_nem_talalt)\n", " ns=[n_prediction_ts_good,n_prediction_ts_bad,n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt]\n", " \n", " #plots\n", " fig, axes, axi_top, axi_bottom=setup_axes()\n", " \n", " stem2_plot(observation_ts,observation_th,axes[0],color_obs_left,color_obs_right,s/2,color_obs_right0)\n", " stem2_plot(prediction_ts_good,prediction_th,axes[1],color_pred_left,color_pred_right)\n", " stem_plot(prediction_ts_bad,axes[1],color_ax)\n", " \n", " violin_plot(observation_ts,observation_th,axi_top,color_obs_left,color_obs_right)\n", " violin_plot(prediction_ts_good,prediction_th,axi_bottom,color_pred_left,color_pred_right)\n", " \n", " #icons\n", " icons_plot(axes,kondicio,mennyiseg,observation_th,prediction_th)\n", " \n", " #talalat\n", " talalat_plot(axes,ns,observation_th,prediction_th)\n", " \n", " #years\n", " obs_year_index=observation_data[[obs_key,'pyear']].dropna().groupby('pyear').mean().index\n", " pred_year_index=prediction_data[[pred_key,'pyear']].dropna().groupby('pyear').mean().index\n", " pred_year_index_filt=prediction_ts.loc[pred_year_index].dropna().index\n", " obs_year_index_filt=observation_ts.loc[obs_year_index].dropna().index\n", " pred_year_index2=max(min(pred_year_index_filt),min(obs_year_index_filt))\n", " pred_year_index=range(pred_year_index2,max(pred_year_index_filt)+1)\n", " year_plot(observation_ts.loc[obs_year_index].dropna(),axes[0],0.09)\n", " year_plot(prediction_ts.loc[pred_year_index].dropna(),axes[1],0.03)\n", " \n", " #titles\n", " len_ratio=0.15*(-1+(len(jelentes.split(',')[0])/len(jelentes.split(',')[1])))\n", " fig.text(0.5+len_ratio,0.04,jelentes.split(',')[0]+',',color=color_obs_right0,\n", " fontproperties=prop,fontsize=font_size*0.7,horizontalalignment='right')\n", " if color_pred_talalt==color_pred_talalt0: color_pred_side=color_pred_right\n", " else: color_pred_side=color_pred_left\n", " fig.text(0.5+len_ratio,0.04,jelentes.split(',')[1],color=color_pred_side,\n", " fontproperties=prop,fontsize=font_size*0.7,horizontalalignment='left')\n", " if n_prediction_ts_good_nem_talalt>=n_prediction_ts_good_talalt:\n", " color_title=color_pred_nem_talalt\n", " else: \n", " color_title=color_pred_talalt\n", " verdict=int(100*n_prediction_ts_good_talalt/(n_prediction_ts_good_talalt+n_prediction_ts_good_nem_talalt))\n", " if color_pred_talalt!=color_pred_talalt0: verdict=100-verdict\n", " return_verdict=int(verdict)\n", " verdict=str(verdict)+'%'\n", " plt.suptitle(title,y=0.11,color=color_title,fontproperties=prop,fontsize=font_size)\n", " fig.text(0.97,0.04,verdict, fontproperties=prop,\n", " horizontalalignment='right', color=color_title, fontsize=font_size*2, )\n", " fig.text(0.03,0.04, datum, fontproperties=prop,\n", " horizontalalignment='left', color=color_obs_right0, fontsize=font_size*2, )\n", " \n", " plt.savefig(c+'/a1/'+str(mondas)+'.png',dpi=300, facecolor=color_bg)\n", " plt.show()\n", " return return_verdict" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "def filter_data(dz,observation_range,prediction_range):\n", " dgs=[]\n", " dhs=[]\n", " for year in range(int(dz.min()['year']),int(dz.max()['year'])):\n", " k=0\n", " from_date=pd.to_datetime(str(year)+'-'+str(observation_range[k].month)+'-'+str(observation_range[k].day))\n", " from_pred=pd.to_datetime(str(year)+'-'+str(prediction_range[k].month)+'-'+str(prediction_range[k].day))\n", " k=1\n", " to_date=pd.to_datetime(str(year)+'-'+str(observation_range[k].month)+'-'+str(observation_range[k].day))\n", " to_pred=pd.to_datetime(str(year)+'-'+str(prediction_range[k].month)+'-'+str(prediction_range[k].day))\n", " if to_pred' in sign)):\n", " if '<' in sign:\n", " positive=False\n", " elif '>' in sign:\n", " positive=True\n", " return positive" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "universal_normalize=['XTEMP','XVSB','XSPD']\n", "\n", "def get_ts_data(data,key,sign):\n", " ts=data.groupby('year').mean()[key]\n", " if (('-' in sign) or ('+' in sign)):\n", " th=ts.mean()\n", " else:\n", " th=float(sign[1:])\n", " if key in universal_normalize:\n", " th-=ts.mean() \n", " ts-=ts.mean()\n", " return ts,th" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "def get_comp_data(observation_data,obs_key,obs_sign,prediction_data,pred_key,pred_sign):\n", " ertek_sign=True\n", " irany_sign=True\n", " observation_ts=observation_data.groupby('year').mean()[obs_key]\n", " prediction_ts=prediction_data.groupby('year').mean()[pred_key]\n", " prediction_th=observation_ts.mean()\n", " observation_ts-=observation_ts.mean()\n", " observation_th=observation_ts.min()*1.01\n", " prediction_th-=prediction_ts.mean()\n", " prediction_ts-=prediction_ts.mean() \n", " if obs_sign=='A':\n", " if pred_sign=='A':\n", " observation_th=0\n", " prediction_th=0\n", " else:\n", " irany_sign=False\n", " return observation_ts,observation_th,prediction_ts,prediction_th,ertek_sign,irany_sign" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "mennyiseg_key={'Temp':'XTEMP','Snow Depth':'XSD','Wind':'XSPD','Rain':'YPCP','Visib':'XVSB',\n", " 'Snow':'YSNW','Hail':'YHAL'}" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "stations_to_include={'ro':[150040,151700,151450,152600,152470,150800,152300,150100,151200,152000],\n", " 'hu':[128820,128120,127720,128600,128390,128920,128430,128250,128220,128050,\n", " 129150,129420,129600,129700,129820,129920,129350,129100]}\n", "stations_to_include['huro']=stations_to_include['hu']+stations_to_include['ro']" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "def get_country(c,h='ds',plot=False):\n", " if c=='huro':\n", " hu=pd.read_csv('data/'+'hu'+'_'+h+'.csv') #daily data\n", " ro=pd.read_csv('data/'+'ro'+'_'+h+'.csv') #daily data\n", " df=pd.concat([hu,ro])\n", " else:\n", " df=pd.read_csv('data/'+c+'_'+h+'.csv') #daily data\n", " # df=pd.read_csv('data/'+c+'_hs.csv') #high_res data\n", " df=df[df['ID'].isin(stations_to_include[c])]\n", " df['time']=pd.to_datetime(df['time'])\n", " df['year']=df['time'].dt.year\n", " df['month']=df['time'].dt.month\n", " df['day']=df['time'].dt.day\n", " df['hour']=df['time'].dt.hour\n", " df=df.set_index('time')\n", " df=df.sort_index()\n", " if plot: df.groupby('year').nunique()['ID'].plot()\n", " nepi=pd.read_excel(c+'/idojaras_'+c+'.xlsx')\n", " return df,nepi" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "c='huro'\n", "df,nepi=get_country(c)\n", "dz=df.groupby(['time']).mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "color_pred_left=color_pred_left0\n", "color_pred_right=color_pred_right0\n", "mondasok=nepi['ID'].values\n", "# mondasok=[1,6]\n", "shares=[]\n", "for mondas in mondasok:\n", " nep=nepi.loc[mondas]\n", " if str(nep['Mennyiség'])!='nan':\n", " obs_key=mennyiseg_key[nep['Kondíció']]\n", " pred_key=mennyiseg_key[nep['Mennyiség']]\n", " observation_range=[nep['Dátum:mettől']+pd.to_timedelta('-1D'),nep['Dátum:meddig']+pd.to_timedelta('+2D')]\n", " prediction_range=[nep['Periódus:mettől'],nep['Periódus:meddig']+pd.to_timedelta('+1D')]\n", " observation_data,prediction_data=filter_data(dz,observation_range,prediction_range)\n", "\n", " #comparison\n", " if str(nep['Érték']) in ['A','B']:\n", " print('comp',mondas)\n", " observation_ts,observation_th,prediction_ts,prediction_th,ertek_sign,irany_sign=\\\n", " get_comp_data(observation_data,obs_key,nep['Érték'],\\\n", " prediction_data,pred_key,nep['Irány'])\n", " #time series\n", " else:\n", " print('ts',mondas)\n", " ertek_sign=get_sign(nep['Érték'],obs_key)\n", " irany_sign=get_sign(nep['Irány'],pred_key)\n", " observation_ts,observation_th=get_ts_data(observation_data,obs_key,nep['Érték'])\n", " prediction_ts,prediction_th=get_ts_data(prediction_data,pred_key,nep['Irány'])\n", "\n", " color_obs_right,color_obs_left,obs_talalt_glyph,obs_nem_talalt_glyph,\\\n", " color_pred_talalt,color_pred_nem_talalt,pred_talalt_glyph,pred_nem_talalt_glyph=\\\n", " set_direction(ertek_sign, irany_sign)\n", "\n", " #datum=str(nep['Dátums'])[:3]+'. '+str(nep['Dátum:mettől'].day)\n", " datum=nep['DS']\n", " shares.append(spine_plot(datum,nep['Mondás'].strip(),mondas,nep['Jelentés'].strip(),nep['Kondíció'],nep['Mennyiség'],\n", " observation_ts,observation_th,prediction_ts,prediction_th,c))\n", "print(np.mean(shares))" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "df.set_index('ID').loc[stations_to_include['huro']].groupby('ID').nunique()[['year']].join(stations.loc[stations_to_include['huro']]).to_csv('huro/huro_stations.csv')" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "df.set_index('ID').loc[stations_to_include['huro']].groupby('ID').min()[['year']].to_csv('huro/huro_stations_years.csv')" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(stations_to_include['ro'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "White" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "def setup_axes2():\n", " fig,axes=plt.subplots(1,3,figsize=(8,5),gridspec_kw={'width_ratios': [1, 3, 1]})\n", " axi_top= axes[2].inset_axes([0.1, 0.65, 1, 0.3])\n", " axi_top.axis('off')\n", " axi_bottom= axes[2].inset_axes([0.1, 0, 1, 0.5])\n", " axi_bottom.axis('off')\n", " axes[0].axis('off')\n", " axes[1].axis('off')\n", " axes[2].axis('off')\n", " axes[0]=axes[0].inset_axes([0, 0.15, 1, 0.85])\n", " axes[1]=axes[1].inset_axes([0, 0.15, 1, 0.85])\n", " axes[0].axis('off')\n", " axes[1].axis('off')\n", " return fig, axes, axi_top, axi_bottom" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "def spine_plot2(datum,title,mondas,jelentes,kondicio,mennyiseg,\n", " observation_ts,observation_th,prediction_ts,prediction_th,c):\n", " \n", " #data\n", " prediction_ts_good=prediction_ts.loc[observation_ts[observation_ts>observation_th].index]\n", " prediction_ts_bad=prediction_ts.loc[observation_ts[observation_ts<=observation_th].index]\n", " n_prediction_ts_good=len(prediction_ts_good)\n", " n_prediction_ts_bad=len(prediction_ts_bad)\n", " \n", " if color_obs_right0!=color_obs_right:\n", " prediction_ts_good,prediction_ts_bad=prediction_ts_bad,prediction_ts_good\n", " prediction_ts_good_nem_talalt,prediction_ts_good_talalt,\\\n", " prediction_ts_good_joined=get_data(prediction_ts_good,prediction_th)\n", " n_prediction_ts_good_talalt=len(prediction_ts_good_talalt)\n", " n_prediction_ts_good_nem_talalt=len(prediction_ts_good_nem_talalt)\n", " ns=[n_prediction_ts_good,n_prediction_ts_bad,n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt]\n", " \n", " #plots\n", " fig, axes, axi_top, axi_bottom=setup_axes2()\n", " \n", " stem2_plot(observation_ts,observation_th,axes[0],color_obs_left,color_obs_right,s/2,color_obs_right0)\n", " stem2_plot(prediction_ts_good,prediction_th,axes[1],color_pred_left,color_pred_right)\n", " stem_plot(prediction_ts_bad,axes[1],color_ax)\n", " \n", " violin_plot(observation_ts,observation_th,axi_top,color_obs_left,color_obs_right)\n", " violin_plot(prediction_ts_good,prediction_th,axi_bottom,color_pred_left,color_pred_right)\n", " \n", " #icons\n", " icons_plot(axes,kondicio,mennyiseg,observation_th,prediction_th)\n", " \n", " #talalat\n", " talalat_plot(axes,ns,observation_th,prediction_th)\n", " \n", " #years\n", " obs_year_index=observation_data[[obs_key,'pyear']].dropna().groupby('pyear').mean().index\n", " pred_year_index=prediction_data[[pred_key,'pyear']].dropna().groupby('pyear').mean().index\n", " pred_year_index_filt=prediction_ts.loc[pred_year_index].dropna().index\n", " obs_year_index_filt=observation_ts.loc[obs_year_index].dropna().index\n", " pred_year_index2=max(min(pred_year_index_filt),min(obs_year_index_filt))\n", " pred_year_index=range(pred_year_index2,max(pred_year_index_filt)+1)\n", " year_plot(observation_ts.loc[obs_year_index].dropna(),axes[0],0.09)\n", " year_plot(prediction_ts.loc[pred_year_index].dropna(),axes[1],0.03)\n", " \n", " #titles\n", " len_ratio=0.15*(-1+(len(jelentes.split(',')[0])/len(jelentes.split(',')[1])))\n", " fig.text(0.5+len_ratio,0.04,jelentes.split(',')[0]+',',color=color_obs_right0,\n", " fontproperties=prop,fontsize=font_size*0.7,horizontalalignment='right')\n", " if color_pred_talalt==color_pred_talalt0: color_pred_side=color_pred_right\n", " else: color_pred_side=color_pred_left\n", " fig.text(0.5+len_ratio,0.04,jelentes.split(',')[1],color=color_pred_side,\n", " fontproperties=prop,fontsize=font_size*0.7,horizontalalignment='left')\n", " if n_prediction_ts_good_nem_talalt>=n_prediction_ts_good_talalt:\n", " color_title=color_pred_nem_talalt\n", " else: \n", " color_title=color_pred_talalt\n", " verdict=int(100*n_prediction_ts_good_talalt/(n_prediction_ts_good_talalt+n_prediction_ts_good_nem_talalt))\n", " if color_pred_talalt!=color_pred_talalt0: verdict=100-verdict\n", " return_verdict=int(verdict)\n", " verdict=str(verdict)+'%'\n", " plt.suptitle(title,y=0.11,color=color_title,fontproperties=prop,fontsize=font_size)\n", " fig.text(0.97,0.04,verdict, fontproperties=prop,\n", " horizontalalignment='right', color=color_title, fontsize=font_size*2, )\n", " fig.text(0.03,0.04, datum, fontproperties=prop,\n", " horizontalalignment='left', color=color_obs_right0, fontsize=font_size*2, )\n", " \n", " plt.savefig(c+'/a1/'+str(mondas)+'.png',dpi=300)#, facecolor=color_bg)\n", " plt.show()\n", " return return_verdict" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ts 1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:42: FutureWarning: \n", "Passing list-likes to .loc or [] with any missing label will raise\n", "KeyError in the future, you can use .reindex() as an alternative.\n", "\n", "See the documentation here:\n", "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "ts 6\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "79.0\n" ] } ], "source": [ "color_pred_left=color_pred_left0\n", "color_pred_right=color_pred_right0\n", "mondasok=nepi['ID'].values\n", "mondasok=[1,6]\n", "shares=[]\n", "for mondas in mondasok:\n", " nep=nepi.loc[mondas]\n", " if str(nep['Mennyiség'])!='nan':\n", " obs_key=mennyiseg_key[nep['Kondíció']]\n", " pred_key=mennyiseg_key[nep['Mennyiség']]\n", " observation_range=[nep['Dátum:mettől']+pd.to_timedelta('-1D'),nep['Dátum:meddig']+pd.to_timedelta('+2D')]\n", " prediction_range=[nep['Periódus:mettől'],nep['Periódus:meddig']+pd.to_timedelta('+1D')]\n", " observation_data,prediction_data=filter_data(dz,observation_range,prediction_range)\n", "\n", " #comparison\n", " if str(nep['Érték']) in ['A','B']:\n", " print('comp',mondas)\n", " observation_ts,observation_th,prediction_ts,prediction_th,ertek_sign,irany_sign=\\\n", " get_comp_data(observation_data,obs_key,nep['Érték'],\\\n", " prediction_data,pred_key,nep['Irány'])\n", " #time series\n", " else:\n", " print('ts',mondas)\n", " ertek_sign=get_sign(nep['Érték'],obs_key)\n", " irany_sign=get_sign(nep['Irány'],pred_key)\n", " observation_ts,observation_th=get_ts_data(observation_data,obs_key,nep['Érték'])\n", " prediction_ts,prediction_th=get_ts_data(prediction_data,pred_key,nep['Irány'])\n", "\n", " color_obs_right,color_obs_left,obs_talalt_glyph,obs_nem_talalt_glyph,\\\n", " color_pred_talalt,color_pred_nem_talalt,pred_talalt_glyph,pred_nem_talalt_glyph=\\\n", " set_direction(ertek_sign, irany_sign)\n", "\n", " #datum=str(nep['Dátums'])[:3]+'. '+str(nep['Dátum:mettől'].day)\n", " datum=nep['DS']\n", " shares.append(spine_plot2(datum,nep['Mondás'].strip(),mondas,nep['Jelentés'].strip(),nep['Kondíció'],nep['Mennyiség'],\n", " observation_ts,observation_th,prediction_ts,prediction_th,c))\n", "print(np.mean(shares))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }