{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd, numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "ro=['CDO7252998062998','CDO5064618063001','CDO3042698063020','CDO7893378063026','CDO4604228063028','CDO7821968063031',\n", " 'CDO5072238063046','CDO4981038063054','CDO4725178063056','CDO5209078063060','CDO699718063062','CDO4894288063064',\n", " 'CDO1632508063066','CDO8765068063068','CDO9993348063070']\n", "hu=['CDO5941998062972','CDO5285728062974','CDO3021588062978','CDO9675788062981']" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "p='C:/Users/csala/Onedrive - Lancaster University/Datarepo/szekelydata/klima/'" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "stations=pd.read_csv(p+'stations.csv')" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CDO5941998062972\n", "CDO5285728062974\n", "CDO3021588062978\n", "CDO9675788062981\n" ] } ], "source": [ "dfs=[]\n", "for i in hu:\n", " df=pd.read_csv(p+'daily/raw/hu/'+i+'.txt',dtype={' FRSHTT':str,' YEARMODA':str})\n", " dfs.append(df)\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CDO7252998062998\n", "CDO5064618063001\n", "CDO3042698063020\n", "CDO7893378063026\n", "CDO4604228063028\n", "CDO7821968063031\n", "CDO5072238063046\n", "CDO4981038063054\n", "CDO4725178063056\n", "CDO5209078063060\n", "CDO699718063062\n", "CDO4894288063064\n", "CDO1632508063066\n", "CDO8765068063068\n", "CDO9993348063070\n" ] } ], "source": [ "for i in ro:\n", " df=pd.read_csv(p+'daily/raw/ro/'+i+'.txt',dtype={' FRSHTT':str,' YEARMODA':str})\n", " dfs.append(df)\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [], "source": [ "dfs=pd.concat(dfs)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "year_fixer={'199710':'19971001'}" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "dfs['time']=pd.to_datetime(dfs[' YEARMODA'].str.strip().replace(year_fixer),format='%Y%m%d')" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
STN---WBANYEARMODATEMPDEWP.1SLP.2STP....5MXSPDGUSTMAXMINPRCPSNDPFRSHTTUnnamed: 22time
0127560999992015010124.26.016.96.01036.16.01016.1...5.07.8999.927.320.8*0.00I999.9000000NaN2015-01-01
1127560999992015010225.68.023.98.01030.78.01010.9...6.07.8999.931.8*21.20.02E999.9000000NaN2015-01-02
2127560999992015010334.48.030.08.01023.98.01004.6...8.015.5999.940.528.00.00I999.9000000NaN2015-01-03
3127560999992015010433.67.030.47.01016.67.0997.3...7.013.6999.940.828.4*0.04A999.9000000NaN2015-01-04
4127560999992015010527.212.024.012.01022.612.01003.0...10.011.7999.937.418.3*0.00G999.9000000NaN2015-01-05
\n", "

5 rows × 24 columns

\n", "
" ], "text/plain": [ " STN--- WBAN YEARMODA TEMP DEWP .1 SLP .2 \\\n", "0 127560 99999 20150101 24.2 6.0 16.9 6.0 1036.1 6.0 \n", "1 127560 99999 20150102 25.6 8.0 23.9 8.0 1030.7 8.0 \n", "2 127560 99999 20150103 34.4 8.0 30.0 8.0 1023.9 8.0 \n", "3 127560 99999 20150104 33.6 7.0 30.4 7.0 1016.6 7.0 \n", "4 127560 99999 20150105 27.2 12.0 24.0 12.0 1022.6 12.0 \n", "\n", " STP ... .5 MXSPD GUST MAX MIN PRCP SNDP \\\n", "0 1016.1 ... 5.0 7.8 999.9 27.3 20.8* 0.00I 999.9 \n", "1 1010.9 ... 6.0 7.8 999.9 31.8* 21.2 0.02E 999.9 \n", "2 1004.6 ... 8.0 15.5 999.9 40.5 28.0 0.00I 999.9 \n", "3 997.3 ... 7.0 13.6 999.9 40.8 28.4* 0.04A 999.9 \n", "4 1003.0 ... 10.0 11.7 999.9 37.4 18.3* 0.00G 999.9 \n", "\n", " FRSHTT Unnamed: 22 time \n", "0 000000 NaN 2015-01-01 \n", "1 000000 NaN 2015-01-02 \n", "2 000000 NaN 2015-01-03 \n", "3 000000 NaN 2015-01-04 \n", "4 000000 NaN 2015-01-05 \n", "\n", "[5 rows x 24 columns]" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfs.head()" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stn=151700\n", "d=dfs[dfs['STN---']==stn]\n", "d.set_index('time')[' TEMP'].plot()" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "127560\n", "127660\n", "127720\n", "127860\n", "128050\n", "128120\n", "128150\n", "128220\n", "128250\n", "128300\n", "128360\n", "128390\n", "128430\n", "128460\n", "128470\n", "128510\n", "128600\n", "128605\n", "128660\n", "128700\n", "128820\n", "128920\n", "129100\n", "129150\n", "129200\n", "129220\n", "129250\n", "129300\n", "129350\n", "129420\n", "129500\n", "129600\n", "129700\n", "129820\n", "129920\n", "128603\n", "129320\n", "697204\n", "128305\n", "128380\n", "128400\n", "128601\n", "129400\n", "119000\n", "150001\n", "150010\n", "150850\n", "150940\n", "151240\n", "151500\n", "151630\n", "151970\n", "152000\n", "152005\n", "152080\n", "152090\n", "152150\n", "152190\n", "152210\n", "152790\n", "152850\n", "152890\n", "153410\n", "153660\n", "153890\n", "154120\n", "154200\n", "154650\n", "154790\n", "154890\n", "154940\n", "150105\n", "150000\n", "150002\n", "150040\n", "150070\n", "150100\n", "150140\n", "150150\n", "150200\n", "150230\n", "150250\n", "150330\n", "150420\n", "150440\n", "150470\n", "150560\n", "150630\n", "150690\n", "150730\n", "150800\n", "150830\n", "150880\n", "150900\n", "150950\n", "150990\n", "151070\n", "151080\n", "151090\n", "151110\n", "151130\n", "151170\n", "151180\n", "151190\n", "151200\n", "151230\n", "151270\n", "151360\n", "151380\n", "151430\n", "151450\n", "151480\n", "151540\n", "151600\n", "151620\n", "151650\n", "151680\n", "151700\n", "151790\n", "151820\n", "151840\n", "151890\n", "151940\n", "151990\n", "152040\n", "152060\n", "152170\n", "152300\n", "152310\n", "152350\n", "152380\n", "152450\n", "152470\n", "152540\n", "152600\n", "152610\n", "152620\n", "152640\n", "152650\n", "152700\n", "152770\n", "152800\n", "152840\n", "152920\n", "152960\n", "152970\n", "153000\n", "153010\n", "153020\n", "153070\n", "153100\n", "153140\n", "153150\n", "153160\n", "153170\n", "153190\n", "153200\n", "153240\n", "153250\n", "153280\n", "153330\n", "153350\n", "153360\n", "153370\n", "153380\n", "153400\n", "153440\n", "153450\n", "153460\n", "153470\n", "153490\n", "153500\n", "153600\n", "153640\n", "153690\n", "153730\n", "153750\n", "153770\n", "153870\n", "153880\n", "153950\n", "154020\n", "154050\n", "154060\n", "154080\n", "154090\n", "154100\n", "154160\n", "154190\n", "154210\n", "154215\n", "154220\n", "154240\n", "154250\n", "154280\n", "154340\n", "154440\n", "154450\n", "154500\n", "154550\n", "154600\n", "154620\n", "154690\n", "154700\n", "154750\n", "154770\n", "154800\n", "154810\n", "154820\n", "154900\n", "154910\n", "154931\n", "154980\n", "154990\n", "150090\n", "150400\n", "150410\n", "150520\n", "151400\n", "152120\n", "152410\n", "152730\n", "152820\n", "152980\n", "153550\n", "154230\n", "154290\n", "150550\n", "150890\n", "151320\n", "151340\n", "151580\n", "151740\n", "150750\n", "152590\n", "152870\n", "152990\n", "153210\n", "153560\n", "154430\n", "150320\n", "151590\n", "152670\n", "153630\n", "154760\n", "150235\n", "151205\n", "151455\n", "153355\n", "154930\n", "154470\n", "154580\n", "154510\n" ] } ], "source": [ "for stn in dfs['STN---'].unique():\n", " d=dfs[dfs['STN---']==stn]\n", " d.to_csv(p+'daily/export/'+str(stn)+'.csv')\n", " print(stn)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Determine most frequent" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "'127720',\n", "'128050',\n", "'128120',\n", "'128220',\n", "'128250',\n", "'128300',\n", "'128390',\n", "'128430',\n", "'128510',\n", "'128600',\n", "'128820',\n", "'128920',\n", "'129100',\n", "'129150',\n", "'150040',\n", "'150100',\n", "'150140',\n", "'150150',\n", "'150200',\n", "'150230',\n", "'150800',\n", "'150850',\n", "'150900',\n", "'151080',\n", "'151200',\n", "'151450',\n", "'151500',\n", "'151700',\n", "'151970',\n", "'152000',\n", "'152300',\n", "'152350',\n", "'152470',\n", "'152600',\n", "'152800',\n", "'152920',\n", "'153100',\n", "'153350',\n", "'153460',\n", "'153500',\n", "'153600',\n", "'154100',\n", "'154200',\n", "'154210',\n", "'154500',\n", "'154600',\n", "'154700',\n", "'154800',\n", "'154810',\n", "'154990',\n" ] } ], "source": [ "for i in np.sort(dfs.groupby('STN---').count()['time'].sort_values(ascending=False).head(50).index):\n", " print(\"'\"+str(i)+\"',\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }