{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import pandas as pd, numpy as np\n", "from scipy import stats\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "stations=pd.read_csv('data/stations.csv').set_index('ID')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Setup plot params" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "def get_country(c,h='hs',plot=False):\n", " if c=='huro':\n", " hu=pd.read_csv('data/'+'hu'+'_'+h+'.csv') #daily data\n", " ro=pd.read_csv('data/'+'ro'+'_'+h+'.csv') #daily data\n", " df=pd.concat([hu,ro])\n", " else:\n", " df=pd.read_csv('data/'+c+'_'+h+'.csv') #daily data\n", " # df=pd.read_csv('data/'+c+'_hs.csv') #high_res data\n", " df['time']=pd.to_datetime(df['time'])\n", " df['year']=df['time'].dt.year\n", " df['month']=df['time'].dt.month\n", " df['day']=df['time'].dt.day\n", " df['hour']=df['time'].dt.hour\n", " df=df.set_index('time')\n", " df=df.sort_index()\n", " if plot: df.groupby('year').nunique()['ID'].plot()\n", " return df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Re-run this section for `ro`, `hu` and `huro`" ] }, { "cell_type": "code", "execution_count": 380, "metadata": {}, "outputs": [], "source": [ "c='huro'\n", "if c!='huro':\n", " df=get_country(c,plot=True)\n", " df_ro=df.copy()\n", " # df_hu=df.copy()\n", "else:\n", " df=pd.concat([df_ro,df_hu])" ] }, { "cell_type": "code", "execution_count": 382, "metadata": {}, "outputs": [], "source": [ "import math\n", "\n", "def haversine(coord1, coord2):\n", " R = 6372800 # Earth radius in meters\n", " lat1, lon1 = coord1\n", " lat2, lon2 = coord2\n", " \n", " phi1, phi2 = math.radians(lat1), math.radians(lat2) \n", " dphi = math.radians(lat2 - lat1)\n", " dlambda = math.radians(lon2 - lon1)\n", " \n", " a = math.sin(dphi/2)**2 + \\\n", " math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2)**2\n", " \n", " return 2*R*math.atan2(math.sqrt(a), math.sqrt(1 - a))/1000" ] }, { "cell_type": "code", "execution_count": 383, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "119000 151630 BAISOARA BAISOARA\n", "152000 152005 ARAD ARAD\n", "154200 154210 AUREL VLAICU HENRI COANDA\n", "154200 154220 AUREL VLAICU BUCURESTI FILARET\n", "150230 150235 STEFAN CEL MARE SUCEAVA/SALCEA\n", "150320 150330 RODNEI MOUNTAIN RNG IEZER\n", "150400 150520 CIMPULUNG MOLDOVENE RARAU\n", "150890 151080 CEAHLAU CEAHLAU TOACA\n", "151200 151205 CLUJ NAPOCA CLUJ-NAPOCA/SOMESEN\n", "151400 151600 STEIU STEI\n", "151620 151840 CAMPENI ROSIA MONTANA\n", "152590 152820 POSTAVARU POIANA BRASOV\n", "152590 153020 POSTAVARU PREDEAL\n", "152960 153200 PETROSANI PARANG\n", "152990 153010 RUCAR FUNDATA\n", "153160 153170 CUNTU TARCU\n", "153200 153210 PARANG PARING (AUT)\n", "153350 153355 CATALOI TULCEA/CATALOI\n", "154230 154430 DILGA DOR MARUNT\n", "154500 154510 CRAIOVA CRAIOVA\n", "154930 154931 TUZLA TUZLA\n", "128300 697204 VESZPREM/SZENTKIRALYSZABADJA VARPALOTA TRAINING AREA / EXER\n", "128305 128440 TOKOL TOKOL\n", "128380 128400 BUDAORS BUDAPEST MET CENTER\n", "128390 128430 FERIHEGY BUDAPEST/PESTSZENTLORINC\n", "128601 128603 AZENTKILYSZABADJA SZENTKIRALYSZABADJA\n", "128601 697204 AZENTKILYSZABADJA VARPALOTA TRAINING AREA / EXER\n", "128603 697204 SZENTKIRALYSZABADJA VARPALOTA TRAINING AREA / EXER\n", "128605 129200 BALATON KESZTHELY\n", "128605 129220 BALATON SARMELLEK\n", "129200 129220 KESZTHELY SARMELLEK\n", "129255 129300 TASZAR KAPOSVAR\n", "129255 129320 TASZAR TASZAR\n", "129300 129320 KAPOSVAR TASZAR\n", "129400 129410 PECS PECS/ARPADTETO\n", "129400 129420 PECS PECS SOUTH\n" ] } ], "source": [ "coerce={}\n", "for i1 in range(len(stations.index)):\n", " station1=stations.index[i1]\n", " for i2 in range(i1,len(stations.index)):\n", " station2=stations.index[i2]\n", " if station1!=station2:\n", " if haversine(stations.loc[station1][['LAT','LON']].values,\n", " stations.loc[station2][['LAT','LON']].values)<10:\n", " print(station1,station2,stations.loc[station1]['LOC'],stations.loc[station2]['LOC'])\n", " if station2 not in coerce:\n", " if station1 in coerce:\n", " coerce[station2]=coerce[station1]\n", " else:\n", " coerce[station2]=station1" ] }, { "cell_type": "code", "execution_count": 384, "metadata": {}, "outputs": [], "source": [ "df['ID']=df['ID'].replace(coerce)" ] }, { "cell_type": "code", "execution_count": 385, "metadata": {}, "outputs": [], "source": [ "station_list=df['ID'].unique()" ] }, { "cell_type": "code", "execution_count": 386, "metadata": {}, "outputs": [], "source": [ "coerce={}\n", "for i1 in range(len(station_list)):\n", " station1=station_list[i1]\n", " for i2 in range(i1,len(station_list)):\n", " station2=station_list[i2]\n", " if station1!=station2:\n", " if haversine(stations.loc[station1][['LAT','LON']].values,\n", " stations.loc[station2][['LAT','LON']].values)<10:\n", " print(station1,station2,stations.loc[station1]['LOC'],stations.loc[station2]['LOC'])\n", " if station2 not in coerce:\n", " if station1 in coerce:\n", " coerce[station2]=coerce[station1]\n", " else:\n", " coerce[station2]=station1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If nothing found continue, otherwise loop." ] }, { "cell_type": "code", "execution_count": 398, "metadata": {}, "outputs": [], "source": [ "keys=['XTEMP','XSPD','XVSB']\n", "dz=df.groupby(['ID','month','hour','year']).median()[keys]\n", "#universal mean\n", "dzm=dz.groupby(['ID','month','hour']).median()[keys]\n", "#or last 10 mean\n", "dzm10=df[df['year'].isin(range(2009,2020))].groupby(['ID','month','hour']).median()[keys]\n", "dzn=dz-dzm\n", "dzn10=dz-dzm10\n", "dzp=dzn.groupby(['ID','year']).median()\n", "dzp10=dzn10.groupby(['ID','year']).median()\n", "#get outliers\n", "dws=[]\n", "for station in dzp.index.get_level_values(0).unique():\n", " dw=dzp.loc[station][['XTEMP']]\n", " dw['z']=np.abs(stats.zscore(dw).flatten())\n", " dw=dw[dw['z']<3]\n", " dw['ID']=station\n", " dws.append(dw)\n", "dws=pd.concat(dws)\n", "#slice under new index\n", "dzp=dzp.loc[dws.reset_index().set_index(['ID','year']).index]\n", "dzp10=dzp10.loc[dws.reset_index().set_index(['ID','year']).index]\n", "#export\n", "dzr=(((dzp-dzp.groupby('ID').min())/(dzp.groupby('ID').max()-dzp.groupby('ID').min()))*2)-1\n", "dzr.columns=[i.replace('X','N') for i in dzr.columns]\n", "dzp10.columns=[i+'10' for i in dzp10.columns]\n", "dzq=dzp.join(dzp10).join(dzr).join(stations[['LOC','LAT','LON','ELEVATION']])" ] }, { "cell_type": "code", "execution_count": 399, "metadata": {}, "outputs": [], "source": [ "for i in [10,20,30,40,50,60]:\n", " dk=dzq.reset_index().set_index('ID').loc[dzp.groupby(['ID']).count()['XTEMP']>i]\n", " dk.to_csv('stripes/'+c+'_'+str(i)+'.csv')\n", " if i==40:\n", " dk.to_csv('stripes/'+c+'.csv')\n", " translated_stations=dk.index.unique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Stations - only for `huro`" ] }, { "cell_type": "code", "execution_count": 370, "metadata": {}, "outputs": [], "source": [ "import json" ] }, { "cell_type": "code", "execution_count": 371, "metadata": {}, "outputs": [], "source": [ "namer=json.loads(open('data/namer.json','r').read())" ] }, { "cell_type": "code", "execution_count": 393, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\"BALATON\":\"Balaton\",\n", "\"PECS\":\"Pecs\",\n", "\"SIGHETUL MARMATIEI\":\"Sighetul marmatiei\",\n", "\"CEAHLAU\":\"Ceahlau\",\n", "\"ARAD\":\"Arad\",\n", "\"POSTAVARU\":\"Postavaru\",\n", "\"CRAIOVA\":\"Craiova\",\n", "\"TURNU-MAGURELE\":\"Turnu-magurele\",\n" ] } ], "source": [ "for i in stations.loc[translated_stations]['LOC'].values:\n", " if i not in namer: \n", " print('\"'+i+'\":\"'+i.capitalize()+'\",')" ] }, { "cell_type": "code", "execution_count": 394, "metadata": {}, "outputs": [], "source": [ "namer2={\n", "\"BAISOARA\":\"Băișoara\",\n", "\"AVRAMENI\":\"Avrameni\",\n", "\"BAIA MARE/MAGHERUSI\":\"Nagybánya - Miszmogyorós\",\n", "\"BISTRITA\":\"Beszterce\",\n", "\"BARNOVA\":\"Iași / Bârnova\",\n", "\"BATOS\":\"Bátos\",\n", "\"BAISOARA\":\"Băișoara\",\n", "\"BARLAD\":\"Bârlad\",\n", "\"ARAD\":\"Arad\",\n", "\"ALBA IULIA\":\"Gyulafehérvár\",\n", "\"BLAJ\":\"Balázsfalva\",\n", "\"BARAOLT\":\"Barót\",\n", "\"ADJUD\":\"Adjud\",\n", "\"BALINTESTI\":\"Bălintețti\",\n", "\"BALEA LAC\":\"Bâlea-tó\",\n", "\"BISOCA\":\"Bisoca\",\n", "\"BANLOC\":\"Bánlak\",\n", "\"APA NEAGRA\":\"Apa Neagră\",\n", "\"BAILE HERCULANE\":\"Herkulesfürdő\",\n", "\"BERZASCA\":\"Berzasca\",\n", "\"BACLES\":\"Bâcleș\",\n", "\"BAILESTI\":\"Băilești\",\n", "\"ADAMCLISI\":\"Adamclisi\",\n", "\"ALEXANDRIA\":\"Alexandria\",\n", "\"BECHET\":\"Bechet\",\n", "\"DARABANI\":\"Darabani\",\n", "\"BORCEA FETESTI AIR BASE\":\"Fetești légibázis\",\n", "\"SIGHETUL MARMATIEI\":\"Máramarossziget\",\n", "\"RADAUTI\":\"Rădăuți\",\n", "\"DOROHOI\":\"Dorohoi\",\n", "\"SUCEAVA/SALCEA\":\"Suceava\",\n", "\"BALATON\":\"Balaton\",\n", "\"PECS\":\"Pécs\",\n", "\"CEAHLAU\":\"Csalhó\",\n", "\"ARAD\":\"Arad\",\n", "\"POSTAVARU\":\"Postăvaru-csúcs\",\n", "\"CRAIOVA\":\"Craiova\",\n", "\"TURNU-MAGURELE\":\"Turnu Măgurele\"}" ] }, { "cell_type": "code", "execution_count": 395, "metadata": {}, "outputs": [], "source": [ "namer.update(namer2)" ] }, { "cell_type": "code", "execution_count": 397, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3256" ] }, "execution_count": 397, "metadata": {}, "output_type": "execute_result" } ], "source": [ "open('data/namer.json','w').write(json.dumps(namer))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Carpet plots" ] }, { "cell_type": "code", "execution_count": 404, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | \n", " | XTEMP | \n", "
---|---|---|
year | \n", "month | \n", "\n", " |
1964 | \n", "1 | \n", "-4.444444 | \n", "
2 | \n", "-2.916667 | \n", "|
3 | \n", "-4.166667 | \n", "|
4 | \n", "-2.777778 | \n", "|
5 | \n", "-3.333333 | \n", "|
... | \n", "... | \n", "... | \n", "
2019 | \n", "6 | \n", "2.361111 | \n", "
7 | \n", "0.000000 | \n", "|
8 | \n", "-0.416667 | \n", "|
9 | \n", "0.000000 | \n", "|
10 | \n", "7.222222 | \n", "
457 rows × 1 columns
\n", "