{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd, numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "ro=['5765358043206','9812808043220','9576658043223','1699958043227','7225068043228','265208043229']\n", "hu=['8073718043234','2087988043232','6247548043235']" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "p='C:/Users/csala/Onedrive - Lancaster University/Datarepo/szekelydata/klima/'" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5765358043206\n", "9812808043220\n", "9576658043223\n", "1699958043227\n", "7225068043228\n", "265208043229\n" ] } ], "source": [ "stations=[]\n", "for i in ro:\n", " stations.append(pd.read_csv(p+'high_res/raw/ro/'+i+'stn+.txt',delimiter= '+',skiprows=2,header=None))\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8073718043234\n", "2087988043232\n", "6247548043235\n" ] } ], "source": [ "for i in hu:\n", " stations.append(pd.read_csv(p+'high_res/raw/hu/'+i+'stn+.txt',delimiter= '+',skiprows=2,header=None))\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "station=pd.concat(stations)\n", "station=station.drop_duplicates()\n", "station[2]=station[2].str.strip()\n", "station[3]=station[3].str.strip()\n", "station=station[[0,2,3,4,5,6]]\n", "station.columns=['ID','LOC','COUNTRY','LAT','LON','ELEVATION']\n", "station.to_csv(p+'stations.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**!!! 16G memory required at least, 64G recommended**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "RO" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (4,21,22,23,25) have mixed types. Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "5765358043206\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "9812808043220\n", "9576658043223\n", "1699958043227\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (4,6,11,12,21,22,23,24,25) have mixed types. Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "7225068043228\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3058: DtypeWarning: Columns (4,21,22,23,24,25) have mixed types. Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "265208043229\n" ] } ], "source": [ "dfs=[]\n", "for i in ro:\n", " df=pd.read_csv(p+'high_res/raw/ro/'+i+'dat.txt',delimiter= '\\s+')\n", " dfs.append(df)\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "dfz=pd.concat(dfs)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "dfs=None #free memory\n", "df=None #free memory" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | USAF | \n", "WBAN | \n", "YR--MODAHRMN | \n", "DIR | \n", "SPD | \n", "GUS | \n", "CLG | \n", "SKC | \n", "L | \n", "M | \n", "... | \n", "SLP | \n", "ALT | \n", "STP | \n", "MAX | \n", "MIN | \n", "PCP01 | \n", "PCP06 | \n", "PCP24 | \n", "PCPXX | \n", "SD | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "119000 | \n", "99999 | \n", "201205010000 | \n", "040 | \n", "2 | \n", "*** | \n", "*** | \n", "*** | \n", "* | \n", "* | \n", "... | \n", "1017.6 | \n", "***** | \n", "984.8 | \n", "*** | \n", "*** | \n", "***** | \n", "0.00 | \n", "***** | \n", "***** | \n", "** | \n", "
1 | \n", "119000 | \n", "99999 | \n", "201205010100 | \n", "040 | \n", "2 | \n", "*** | \n", "*** | \n", "*** | \n", "* | \n", "* | \n", "... | \n", "1017.8 | \n", "***** | \n", "984.9 | \n", "*** | \n", "*** | \n", "***** | \n", "***** | \n", "***** | \n", "***** | \n", "** | \n", "
2 | \n", "119000 | \n", "99999 | \n", "201205010200 | \n", "*** | \n", "0 | \n", "*** | \n", "*** | \n", "*** | \n", "* | \n", "* | \n", "... | \n", "1018.1 | \n", "***** | \n", "985 | \n", "*** | \n", "*** | \n", "***** | \n", "***** | \n", "***** | \n", "***** | \n", "** | \n", "
3 | \n", "119000 | \n", "99999 | \n", "201205010300 | \n", "030 | \n", "2 | \n", "*** | \n", "*** | \n", "*** | \n", "* | \n", "* | \n", "... | \n", "1018.3 | \n", "***** | \n", "985.2 | \n", "*** | \n", "*** | \n", "***** | \n", "***** | \n", "***** | \n", "***** | \n", "** | \n", "
4 | \n", "119000 | \n", "99999 | \n", "201205010400 | \n", "*** | \n", "0 | \n", "*** | \n", "*** | \n", "*** | \n", "* | \n", "* | \n", "... | \n", "1018.8 | \n", "***** | \n", "985.6 | \n", "*** | \n", "*** | \n", "***** | \n", "***** | \n", "***** | \n", "***** | \n", "** | \n", "
5 rows × 33 columns
\n", "\n", " | \n", " | month | \n", "
---|---|---|
USAF | \n", "year | \n", "\n", " |
119000 | \n", "2012 | \n", "8.0 | \n", "
2013 | \n", "12.0 | \n", "|
2014 | \n", "12.0 | \n", "|
2015 | \n", "12.0 | \n", "|
2016 | \n", "4.0 | \n", "