{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#FULL DATA MINE AND CLEAN UP\n", "\n", "#def getWeather():\n", "import requests # library for html requests\n", "from datetime import date # library for python date formats? *check*\n", "\n", "#get weather\n", "#set up daily link\n", "weather_fcast = \"http://www.metoffice.gov.uk/public/weather/forecast/gcpvj0v07\"\n", "#send url requests to data links\n", "weather_fcast_rqst = requests.get(weather_fcast)\n", "#convert web data to text strings\n", "weather_fcast_text = weather_fcast_rqst.text\n", "#for testing \n", "print(weather_fcast_text)\n", "\n", "#def cleanWeather():\n", "#clean weather\n", "import numpy as np\n", "\n", "weather_short = []\n", "\n", "weather_date = weather_fcast_text.index('data-tab-id') #finds 1st date tabs within text string (should be today)\n", "weather_fcast_chopped = weather_fcast_text #intialise weather dataframe\n", "\n", "#counter for remaining days in week\n", "days = 1\n", "while days <= 7:\n", "\n", " weather_date_short=weather_fcast_chopped[weather_date+13:weather_date+23]\n", "\n", " #chop off the head of text string, find the max \n", " weather_fcast_chopped = weather_fcast_chopped[weather_date:len(weather_fcast_chopped)]\n", " weather_max = weather_fcast_chopped.index('Maximum daytime temperature: ') #finds temp max within text string\n", "\n", " #find number of decimal places for max temp\n", " chr_adder=29\n", " while weather_fcast_chopped[weather_max+chr_adder] != '&':\n", " if chr_adder == 33:\n", " break\n", " chr_adder += 1\n", "\n", " weather_max_short=weather_fcast_chopped[weather_max+29:weather_max+chr_adder]\n", "\n", " #chop off the head of text string, find the min \n", " weather_fcast_chopped = weather_fcast_chopped[weather_max+1:len(weather_fcast_chopped)]\n", " weather_min = weather_fcast_chopped.index('Minimum nighttime temperature: ') #finds temp min within text string\n", "\n", " #find number of decimal places for max temp\n", " chr_adder=31\n", " while weather_fcast_chopped[weather_min+chr_adder] != '&':\n", " if chr_adder == 34:\n", " break\n", " chr_adder += 1\n", "\n", " weather_min_short=weather_fcast_chopped[weather_min+31:weather_min+chr_adder]\n", "\n", " #gather list data\n", " weather_short.append(days)\n", " weather_short[days-1]=[weather_date_short,weather_max_short,weather_min_short]\n", "\n", " #search the next occurence of the forecast date\n", " weather_fcast_chopped = weather_fcast_chopped[weather_min:len(weather_fcast_chopped)]\n", " if days < 7:\n", " weather_date = weather_fcast_chopped.index('data-tab-id') #finds next date tabs within text string\n", "\n", " days += 1\n", "\n", "#convert weather_short from list of lists to numpy array\n", "weather_short = np.array(weather_short)\n", "\n", "#get demand\n", "import pandas as pd\n", "data = pd.read_csv('https://api.bmreports.com/BMRS/FORDAYDEM/v1?APIKey=lg3cgfi7sanoqzm&ServiceType=csv', skiprows=[0])\n", "print(data)\n", "\n", "#clean demand\n", "\n", "import datetime\n", "\n", "data.columns = ['Demand Fcast Type', 'Date','Settlement Period','N Flag','Time of Fcast?', 'Demand MW']\n", "load_fcast_text=data.filter(items=['Date','Settlement Period','Demand MW','Demand Fcast Type'])\n", "load_fcast_text=load_fcast_text.loc[load_fcast_text['Demand Fcast Type'] == \"DATF\"]\n", "load_fcast_text.set_index('Date')\n", "print(load_fcast_text.dtypes)\n", "load_fcast_text['Date'] = pd.to_datetime(load_fcast_text['Date'],format='%Y%m%d')\n", "\n", "#convert load_fcast_text from list of lists to numpy array\n", "load_fcast_text = np.array(load_fcast_text)\n", "#print(load_fcast_text)\n", "\n", "load_fcast_text_D=load_fcast_text[load_fcast_text[:,0] < datetime.datetime.now()]\n", "load_fcast_text_D1=load_fcast_text[load_fcast_text[:,0] > datetime.datetime.now()]\n", "\n", "print(load_fcast_text_D1)\n", "#load_fcast_text_D+1\n", "\n", "#print(load_fcast_text[:,2])\n", "np.mean(load_fcast_text_D1[:,2])\n", "\n", "#get RES\n", "#NB skiprows below will skip number of rows specified and infer number of columns of data from the next row\n", "#NB use str(date.today() + datetime.timedelta(1)) for tomorrow \n", "RES_data = pd.read_csv('https://api.bmreports.com/BMRS/B1440/v1?ServiceType=csv&Period=*&APIKey=lg3cgfi7sanoqzm&SettlementDate=' + str(date.today()), skiprows=4)\n", "print(RES_data)\n", "#USE THIS TO JUST PRINT ONE ROW: print(data.loc[1]) NB INDEXING STARTS FROM ROW ZERO WHICH IS FIRST ROW OF DATA (HEADER DOESNT COUNT)\n", "#USE THIS TO JUST PRINT ONE VALUE WITHIN A ROW: print(data.iloc[1][5])\n", "\n", "#clean RES\n", "RES_fcast_text = RES_data[0:-1] #remove last row NaN\n", "#no need to get column names, as these were imported with csv\n", "RES_fcast_text=RES_fcast_text.filter(items=['Settlement Date','Settlement Period','Quantity','Business Type','Power System Resource Type'])\n", "RES_fcast_text.set_index('Settlement Date') #may have to combine this later with settlement period\n", "RES_fcast_text=RES_fcast_text.sort_values(by='Settlement Period')\n", "RES_fcast_text=RES_fcast_text.reset_index(drop=True)\n", "print(RES_fcast_text.dtypes)\n", "#RES_fcast_text['Settlement Date'] = pd.to_datetime(RES_fcast_text['Settlement Date'],format='%Y%m%d')\n", "\n", "#convert RES_fcast_text from list of lists to numpy array\n", "RES_fcast_text = np.array(RES_fcast_text)\n", "\n", "#print(RES_fcast_text)\n", "print(RES_fcast_text[RES_fcast_text[:,0] == str(date.today())])\n", "\n", "#get FX\n", "\n", "import requests\n", "FX = \"https://www.poundsterlinglive.com/data/currencies/gbp-pairs/GBPEUR-exchange-rate\"\n", "FX_fcast_rqst = requests.get(FX)\n", "print(FX_fcast_rqst) # should say 200 means connection is good\n", "FX_fcast_text = FX_fcast_rqst.text\n", "#print(FX_fcast_text)\n", "\n", "#truncate data pulled back, from today's price\n", "#first find \"Today’s Price\":\n", "FX_GBPEUR = FX_fcast_text.index('Latest GBP/EUR') #find \"Today’s Price\" within data\n", "#print(FX_EURGBP)\n", "FX_GBPEUR_chopped=FX_fcast_text[FX_GBPEUR:len(FX_fcast_text)]\n", "\n", "print(FX_GBPEUR_chopped)\n", "\n", "#repeat for GBPUSD\n", "FX = \"https://www.poundsterlinglive.com/data/currencies/gbp-pairs/GBPUSD-exchange-rate\"\n", "FX_fcast_rqst = requests.get(FX)\n", "print(FX_fcast_rqst) # should say 200 means connection is good\n", "FX_fcast_text = FX_fcast_rqst.text\n", "#print(FX_fcast_text)\n", "\n", "#truncate data pulled back, from today's price\n", "#first find \"Today’s Price\":\n", "FX_GBPUSD = FX_fcast_text.index('Latest GBP/USD') #find \"Today’s Price\" within data\n", "\n", "#print(FX_GBPUSD)\n", "FX_GBPUSD_chopped=FX_fcast_text[FX_GBPUSD:len(FX_fcast_text)]\n", "\n", "print(FX_GBPUSD_chopped)\n", "\n", "#clean FX\n", "\n", "FX_short = {}\n", "\n", "FX_short[0,0]=\"GBPEUR\"\n", "FX_short[0,1]=FX_GBPEUR_chopped[45:51]\n", "#NB can also store last 5, 10, 15, 20,50,100 days averages and % changes - later\n", "\n", "FX_short[1,0]=\"GBPUSD\"\n", "FX_short[1,1]=FX_GBPUSD_chopped[45:51]\n", "\n", "print(FX_short)\n", "print(FX_short[0,1])\n", "print(FX_short[1,1])\n", "\n", "#get elec and gas prices\n", "import pandas as pd\n", "Marex_data = pd.read_csv('http://www.marexspectron.com/intelligence/indices/historical-index')\n", "print(Marex_data)\n", "\n", "#clean elec prices\n", "\n", "#no need to get column names, as these were imported with csv\n", "Marex_fcast_text=Marex_data.filter(items=['DealDate','Average','High','Low','IndexName','InstrumentName','SequenceItemName','MarketName'])\n", "Marex_fcast_text.set_index('DealDate') \n", "#filter by elec baseload then by D-1\n", "Marex_fcast_text=Marex_fcast_text.loc[(Marex_fcast_text['IndexName'] == \"U.K Power BSLD All-Day D.A\") | (Marex_fcast_text['IndexName'] == \"U.K Power BSLD All-Day M.A\") | (Marex_fcast_text['IndexName'] == \"NBP All Day D.A\") | (Marex_fcast_text['IndexName'] == \"NBP All Day M.A\") ]\n", "\n", "Marex_fcast_text['DealDate'] = pd.to_datetime(Marex_fcast_text['DealDate'],format='%d/%m/%Y')\n", "\n", "if date.today()==datetime.date(2018, 8, 28): #bank holiday exceptions - make more robust at a later stage\n", " Marex_fcast_text=Marex_fcast_text.loc[Marex_fcast_text['DealDate'] == date.today()- datetime.timedelta(4)]\n", "elif date.today().weekday() == 0: #Monday\n", " Marex_fcast_text=Marex_fcast_text.loc[Marex_fcast_text['DealDate'] == date.today()- datetime.timedelta(3)] # NEED TO USE date.today()- timedelta(1))\n", "else: #Tue, Wed, Thu or Fri. BASICALLY DONT RUN WEEKEND\n", " Marex_fcast_text=Marex_fcast_text.loc[Marex_fcast_text['DealDate'] == date.today()- datetime.timedelta(1)] # NEED TO USE date.today()- timedelta(1))\n", "\n", "Marex_fcast_text=Marex_fcast_text.reset_index(drop=True) #reset index to 0\n", "\n", "print(Marex_fcast_text.dtypes)\n", "#RES_fcast_text['Settlement Date'] = pd.to_datetime(RES_fcast_text['Settlement Date'],format='%Y%m%d')\n", "\n", "print(Marex_fcast_text)\n", "\n", "Marex_fcast_text = np.array(Marex_fcast_text)\n", "\n", "print(Marex_fcast_text)\n", "\n", "#some error handling in case Marex prices not in datafeed (this has happened before on 23 Apr 2019)\n", "import sys\n", "\n", "if Marex_fcast_text[:1,1]=='.':\n", " print(\"No DA NBP price in Marex Spectron datafeed\")\n", " sys.exit(0)\n", "if Marex_fcast_text[1:2,1]=='.':\n", " print(\"No MA NBP price in Marex Spectron datafeed\")\n", " sys.exit(0)\n", "if Marex_fcast_text[2:3,1]=='.':\n", " print(\"No DA elec price in Marex Spectron datafeed\")\n", " sys.exit(0)\n", "if Marex_fcast_text[3:4,1]=='.':\n", " print(\"No MA elec price in Marex Spectron datafeed\")\n", " sys.exit(0)\n", "\n", "#create neural network (live) input (i.e. test) and output (for valiation) matrices \n", "\n", "import numpy as np\n", "import pandas as pd\n", "import datetime\n", "\n", "nn_input=0\n", "\n", "#nn_input = np.array() #initialise\n", "\n", "#nn=weather_short[0:2,1] #add weather highs\n", "# print(weather_short[0:2,1])\n", "# print(weather_short[0:2,2])\n", "nn_input=np.concatenate((weather_short[:1,1], weather_short[:1,2])) #add high, weather low D NEEDS TWO BRACKETS (( ))\n", "nn_input=np.concatenate((nn_input, weather_short[1:2,1])) #add high D+1\n", "nn_input=np.concatenate((nn_input, weather_short[1:2,2])) #add low D+1\n", "#nn_input stands for neural network and is input matrix, basically values for:\n", "\n", "#WeatherHigh_D\n", "#WeatherLow_D\n", "#WeatherHigh_D+1\n", "#WeatherLow_D+1\n", "#Av Demand D\n", "#Peak Demand D\n", "#Av Demand D+1\n", "#Peak Demand D+1\n", "#Av Offshore Wind D\n", "#Peak Offshore Wind D\n", "#Av Onshore Wind D\n", "#Peak Onshore Wind D\n", "#Av Solar D\n", "#Peak Solar D\n", "#GBPEUR rate\n", "#GBBUSD rate\n", "#DA NBP \n", "#MA NBP\n", "\n", "#add demand facast\n", "nn_input=np.hstack((nn_input,round(np.mean(load_fcast_text_D[:,2]),2)))\n", "nn_input=np.hstack((nn_input,round(np.max(load_fcast_text_D[:,2]),2)))\n", "nn_input=np.hstack((nn_input,round(np.mean(load_fcast_text_D1[:,2]),2)))\n", "nn_input=np.hstack((nn_input,round(np.max(load_fcast_text_D1[:,2]),2)))\n", "\n", "#add RES fcast\n", "#RES_fcast_D=RES_fcast_text[RES_fcast_text[:,0] == str(date.today())])\n", "\n", "RES_fcast_D= RES_fcast_text[RES_fcast_text[:,0] == str(date.today())]\n", "OFFwind_fcast_D=RES_fcast_D[RES_fcast_D[:,4] == 'Wind Offshore']\n", "ONwind_fcast_D=RES_fcast_D[RES_fcast_D[:,4] == 'Wind Onshore']\n", "SolarPV_fcast_D=RES_fcast_D[RES_fcast_D[:,4] == 'Solar']\n", "nn_input=np.hstack((nn_input,round(np.mean(OFFwind_fcast_D[:,2]),2)))\n", "nn_input=np.hstack((nn_input,round(np.max(OFFwind_fcast_D[:,2]),2)))\n", "nn_input=np.hstack((nn_input,round(np.mean(ONwind_fcast_D[:,2]),2)))\n", "nn_input=np.hstack((nn_input,round(np.max(ONwind_fcast_D[:,2]),2)))\n", "nn_input=np.hstack((nn_input,round(np.mean(SolarPV_fcast_D[:,2]),2)))\n", "nn_input=np.hstack((nn_input,round(np.max(SolarPV_fcast_D[:,2]),2)))\n", "\n", "#print(RES_fcast_D)\n", "#load_fcast_text[load_fcast_text[:,0] < datetime.datetime.now()]\n", "\n", "#add FX fcast\n", "nn_input=np.hstack((nn_input,FX_short[0,1])) #GBPEUR\n", "nn_input=np.hstack((nn_input,FX_short[1,1])) #GBPUSD\n", "\n", "#add NBP prices\n", "nn_input=np.hstack((nn_input,Marex_fcast_text[:1,1])) #DA NBP\n", "nn_input=np.hstack((nn_input,Marex_fcast_text[1:2,1])) #MA NBP\n", "\n", "#add Marex elec prices\n", "nn_input=np.hstack((nn_input,Marex_fcast_text[2:3,1])) #DA elec\n", "nn_input=np.hstack((nn_input,Marex_fcast_text[3:4,1])) #MA elec\n", "\n", "#create nn_output (predictor is Marex elec price)\n", "#NB need to update later - this is the target, so the forecast is not \"known\" until D+a\n", "#nn_output=np.concatenate((Marex_fcast_text[2:3,1], Marex_fcast_text[3:4,1]))\n", "#nn_output stands for neural network and is output (target) matrix, basically values for:\n", "\n", "#DA elec\n", "#MA elec\n", "\n", "#nn_input=np.asarray(np.mean(load_fcast_text_D[:,2]))\n", "print(nn_input)\n", "#print(nn_output)\n", "\n", "#np.savetxt('C:\\ChangingEnergy\\Beta_testing\\ML testing\\RollingTrainingData.csv', nn_input, fmt='%s', delimiter=',', header=\"WeatherHigh_D,WeatherLow_D,WeatherHigh_D+1,WeatherLow_D+1,Av Demand D,Peak Demand D,Av Demand D+1,Peak Demand D+1,Av Offshore Wind D,Peak Offshore Wind D,Av Onshore Wind D,Peak Onshore Wind D,Av Solar D,Peak Solar D,GBPEUR rate,GBBUSD rate,DA NBP,MA NBP\")\n", "\n", "#convert neural network input to pandas data frame\n", "nn_input=pd.DataFrame(nn_input,columns = [str(date.today())])\n", "print(str(date.today()))\n", "nn_input=nn_input.T #transpose array for csv export\n", "\n", "#USE TWO LINES BELOW ONCE THIS IS UP AND RUNNING TO APPEND DATA\n", "with open(\"RollingTrainingData.csv\", 'a') as rolling_training_data:\n", " nn_input.to_csv(rolling_training_data,header=False, index=True)\n", "print(rolling_training_data)\n", "\n", "training_data=np.array(rolling_training_data) #turn training_data pandas dataframe into numpy array for NN algo\n", "print(training_data)\n", "\n", "\n", "#plot training data as time series plots\n", "#see for help: https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/\n", "\n", "from pandas import read_csv\n", "from matplotlib import pyplot\n", "import matplotlib.pyplot as plt\n", "\n", "#input('Starting python')\n", "\n", "# load dataset\n", "dataset = read_csv('RollingTrainingData.csv', header=0, index_col=0)\n", "values = dataset.values\n", "\n", "#BELOW PLOTS MOVED TO END OF SCRIPT\n", "# specify columns to plot\n", "# groups = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]\n", "# i = 1\n", "# # plot each column\n", "# pyplot.figure(0)\n", "# # pyplot.figure(figsize=(8,20)) # have to comment out for running python shell as will cause an error trying to fit plot to window\n", "# for group in groups: \n", "# \tpyplot.subplot(len(groups), 1, i) #subplot(nrows, ncols, index, **kwargs)\n", "# \tpyplot.plot(values[:, group])\n", "# \tpyplot.title(dataset.columns[group], y=-0.05, loc='right')\n", "# \ti += 1\n", "# pyplot.subplots_adjust(left=0.125, bottom=0.1, right=0.9, top=1, wspace=0.2, hspace=0.1)\n", "# pyplot.show()\n", "\n", "\n", "# data.columns=names #rename columns with short name to make scatter plot easier to read\n", "\n", "#taken from https://stackoverflow.com/questions/29432629/correlation-matrix-using-pandas\n", "#pyplot.figure(1)\n", "#pyplot.matshow(dataset.corr())\n", "#pyplot.show() #required to show plot when running python shell\n", "\n", "#input('End of code')\n", "\n", "\n", "#Multivariate Time Series Forecasting with LSTMs in Keras\n", "\n", "import numpy as np\n", "import math\n", "\n", "from math import sqrt\n", "from fractions import gcd #greatest common divisor to work out optimal number of batches given sizes of training and test sets\n", "from numpy import concatenate\n", "from matplotlib import pyplot\n", "from pandas import read_csv\n", "from pandas import DataFrame\n", "from pandas import concat\n", "from sklearn.preprocessing import MinMaxScaler\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.metrics import mean_squared_error\n", "from keras.models import Sequential\n", "from keras.layers import Dense\n", "from keras.layers import LSTM\n", " \n", "# convert series to supervised learning\n", "def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):\n", "\tn_vars = 1 if type(data) is list else data.shape[1]\n", "\tdf = DataFrame(data)\n", "\tcols, names = list(), list()\n", "\t# input sequence (t-n, ... t-1)\n", "\tfor i in range(n_in, 0, -1):\n", "\t\tcols.append(df.shift(i))\n", "\t\tnames += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]\n", "\t# forecast sequence (t, t+1, ... t+n)\n", "\tfor i in range(0, n_out):\n", "\t\tcols.append(df.shift(-i))\n", "\t\tif i == 0:\n", "\t\t\tnames += [('var%d(t)' % (j+1)) for j in range(n_vars)]\n", "\t\telse:\n", "\t\t\tnames += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]\n", "\t# put it all together\n", "\tagg = concat(cols, axis=1)\n", "\tagg.columns = names\n", "\t# drop rows with NaN values\n", "\tif dropnan:\n", "\t\tagg.dropna(inplace=True)\n", "\treturn agg\n", "\n", "\n", "#FIRST RUN PERSISTENCE MODEL\n", "\n", "# load dataset\n", "series = read_csv('RollingTrainingData.csv', header=0, index_col=0)\n", "\n", "# split data into train and test\n", "X = series.values\n", "n_train_days = math.floor(len(X)*0.8) #use 80/20 rule train on 80% of data, test on 20%\n", "train = X[:n_train_days, -1] \n", "test = X[n_train_days:, -1] \n", "\n", "# walk-forward validation\n", "history = [x for x in train]\n", "predictions = list()\n", "for i in range(len(test)):\n", "\t# make prediction\n", "\tpredictions.append(history[-1])\n", "\t# observation\n", "\thistory.append(test[i])\n", "# report performance\n", "rmse_persistence = sqrt(mean_squared_error(test, predictions))\n", "print('RMSE: %.3f' % rmse_persistence)\n", "# line plot of observed vs predicted\n", "pyplot.plot(test)\n", "pyplot.plot(predictions)\n", "pyplot.show()\n", "\n", "#NOW RUN LSTM Model\n", "\n", "# load dataset\n", "dataset = read_csv('RollingTrainingData.csv', header=0, index_col=0)\n", "values = dataset.values\n", "# integer encode direction\n", "#encoder = LabelEncoder()\n", "#values[:,4] = encoder.fit_transform(values[:,4]) #assigns ordinal levels to categorical data - this is for label data dont need in our case\n", "\n", "# ensure all data is float\n", "values = values.astype('float32')\n", "#print(values)\n", "#print(\"BREAK\")\n", "# normalize features\n", "scaler = MinMaxScaler(feature_range=(0, 1))\n", "scaled = scaler.fit_transform(values)\n", "print(scaled)\n", "\n", "# frame as supervised learning\n", "reframed = series_to_supervised(scaled, 1, 1)\n", "# drop columns we don't want to predict - this is basically all columns for predictor variables at t+1 (next time step)\n", "reframed.drop(reframed.columns[[20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38]], axis=1, inplace=True)\n", "\n", "# split into train and test sets\n", "values = reframed.values\n", "n_train_days = math.floor(len(values)*0.8) #use 80/20 rule train on 80% of data, test on 20%\n", "np.random.shuffle(values) #shuffling data to remove trend (will randomly assign test data and training data rather than take \n", "#e.g. first 80 days to train on and last 20 days to test on - if its a rising market this is likely to underfit)\n", "train = values[:n_train_days, :] \n", "test = values[n_train_days:, :] \n", "# split into input and outputs\n", "train_X, train_y = train[:, :-1], train[:, -1] #stick with MA elec for now, later expand to DA and MA if code scalable\n", "test_X, test_y = test[:, :-1], test[:, -1] #stick with MA elec for now, later expand to DA and MA if code scalable\n", "\n", "# reshape input to be 3D [samples, timesteps, features]\n", "print(train_X)\n", "train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))\n", "test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))\n", "print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)\n", " \n", "# design network\n", "model = Sequential()\n", "model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))\n", "model.add(Dense(1))\n", "model.compile(loss='mae', optimizer='adam')\n", "# fit network #check batch size should be big really (assuming big data sourced). \n", "#In our case its still a small growing dataset, example used batch size 72 and epochs 50\n", "history = model.fit(train_X, train_y, epochs=50, batch_size=gcd(len(train_X),len(test_X)), validation_data=(test_X, test_y), verbose=2, shuffle=False)\n", "# plot history\n", "pyplot.plot(history.history['loss'], label='train')\n", "pyplot.plot(history.history['val_loss'], label='test')\n", "pyplot.legend()\n", "pyplot.savefig('ML_Demo_TrainTestPlot.png')\n", "pyplot.show()\n", "\n", "# make a prediction\n", "yhat = model.predict(test_X)\n", "test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))\n", "# invert scaling for forecast\n", "inv_yhat = concatenate((test_X[:, :-1],yhat), axis=1)\n", "inv_yhat = scaler.inverse_transform(inv_yhat)\n", "inv_yhat = inv_yhat[:,-1]\n", "\n", "# invert scaling for actual\n", "test_y = test_y.reshape((len(test_y), 1))\n", "inv_y = concatenate((test_X[:, :-1],test_y), axis=1)\n", "inv_y = scaler.inverse_transform(inv_y)\n", "inv_y = inv_y[:,-1]\n", "# calculate RMSE\n", "rmse = sqrt(mean_squared_error(inv_y, inv_yhat))\n", "print(inv_y,inv_yhat)\n", "print('Test RMSE: %.3f' % rmse)\n", "\n", "#write comparison of RMSE for LSTM model and Persistence Model to csv\n", "\n", "RMSE_comparison=concatenate(([rmse_persistence],[rmse]), axis=0)\n", "\n", "#convert forecast to pandas data frame\n", "latest_rmse=pd.DataFrame(RMSE_comparison,columns = [str(date.today())])\n", "print(str(date.today()))\n", "print(latest_rmse)\n", "latest_rmse=latest_rmse.T #transpose array for csv export\n", "\n", "with open('ModelComparison.csv', 'a') as model_comparison:\n", " latest_rmse.to_csv(model_comparison,header=False, index=True)\n", "\n", "\n", "#latest predictor dataset only\n", "import datetime\n", "import numpy as np\n", "import pandas as pd\n", "\n", "#first read existing forecast v. actual file to get deltas\n", "forecast_comparison = pd.read_csv('Elec_MA_fcast.csv',index_col=[0])\n", "#forecast_comparison.set_index(\"Date\", drop = False)\n", "\n", "forecast_comparison=np.array(forecast_comparison) #turn training_data pandas dataframe into numpy array for NN algo\n", "\n", "#get latest deltas\n", "latest_actual_delta= [float(Marex_fcast_text[3:4,1]) - forecast_comparison[len(forecast_comparison)-1,0]] #latest actual delta\n", "latest_fcast_delta=[forecast_comparison[len(forecast_comparison)-1,1] - forecast_comparison[len(forecast_comparison)-2,1]] #latest forecast delta\n", "\n", "latest_data= series_to_supervised(scaled, 0, 1) #no time increment for last row of predictor variables as forecast is \"unknown\"\n", "values=latest_data.values\n", "latest_X=values[len(values)-1:, :]\n", "print(latest_X.shape)\n", "print(latest_X[:,-1].shape)\n", "\n", "#reshape input to be 3D [samples, timesteps, features]\n", "latest_X = latest_X.reshape((latest_X.shape[0], 1, latest_X.shape[1]))\n", "print(latest_X.shape)\n", "# latest_X\n", "# latest_X = latest_X.reshape((latest_X.shape[0], latest_X.shape[2]))\n", "# latest_X=scaler.inverse_transform(latest_X)\n", "# latest_X\n", "\n", "\n", "# make a prediction\n", "latest_hat = model.predict(latest_X)\n", "\n", "#get ready to transform back\n", "latest_X = latest_X.reshape((latest_X.shape[0], latest_X.shape[2]))\n", "# invert scaling for forecast\n", "inv_latest_hat = concatenate((latest_X[:, :-1],latest_hat), axis=1)\n", "inv_latest_hat = scaler.inverse_transform(inv_latest_hat)\n", "print(inv_latest_hat) #check this should be the latest predictor varables (20) minus last elec MA price plus a single forecasted MA price at end of array\n", "inv_latest_hat = inv_latest_hat[:,-1]\n", "\n", "#export latest forecast to csv\n", "\n", "latest_forecast=concatenate((Marex_fcast_text[3:4,1],inv_latest_hat,latest_actual_delta,latest_fcast_delta), axis=0)\n", "\n", "print(Marex_fcast_text[3:4,1])\n", "print(latest_forecast)\n", "#convert forecast to pandas data frame\n", "latest_forecast=pd.DataFrame(latest_forecast,columns = [str(date.today())])\n", "print(str(date.today()))\n", "print(latest_forecast)\n", "latest_forecast=latest_forecast.T #transpose array for csv export\n", "\n", "with open('Elec_MA_fcast.csv', 'a') as rolling_forecast:\n", " latest_forecast.to_csv(rolling_forecast,header=False, index=True)\n", "\n", " \n", " \n", "#additional plots\n", "\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as ticker\n", "import pandas\n", "import numpy\n", "from pandas.plotting import scatter_matrix\n", "\n", "\n", "# Training data subplots\n", "\n", "dataset = read_csv('RollingTrainingData.csv')\n", "\n", "dataset = dataset.rename(columns={'Unnamed: 0': 'Date'}) #rename date column\n", "# forecast_comparison.index = forecast_comparison[\"Date\"] #set date column as index - needed to show date as x axis in charts below\n", "# forecast_comparison=forecast_comparison.drop(['Date'], axis=1)\n", "\n", "dataset['Date'] = pd.to_datetime(dataset['Date'])\n", "dataset.set_index(['Date'],inplace=True)\n", "\n", "dataset.plot(kind='line', subplots=True, layout=(5,4),sharex=True,figsize=(13,13))\n", "\n", "plt.savefig('ML_Demo_TrainingData.png')\n", "plt.show()\n", "\n", "# Correlation Matrix Plot \n", "\n", "#source https://machinelearningmastery.com/visualize-machine-learning-data-python-pandas/\n", "\n", "url = \"RollingTrainingData.csv\"\n", "names = [\"THighD\",\"TLowD\",\"THighD+1\",\"TLowD+1\",\"AvDD\",\"PkDD\",\"AvDD+1\",\"PkDD+1\",\"AvOffD\",\"PkOffD\",\"AvOnD\",\"PkOnD\",\"AvPVD\",\"PkPVD\",\"GBPEUR\",\"GBBUSD\",\"NBPDA\",\"NBPMA\",\"ElecDA\",\"ElecMA\"]\n", "\n", "data = pandas.read_csv(url)\n", "data = data.drop(data.columns[0], axis=1)\n", "data.columns=names #rename columns with short name to make plots easier to read\n", "\n", "#names = [\"WeatherHigh_D\",\"WeatherLow_D\",\"WeatherHigh_D+1\",\"WeatherLow_D+1\",\"Av Demand D\",\"Peak Demand D\",\"Av Demand D+1\",\"Peak Demand D+1\",\"Av Offshore Wind D\",\"Peak Offshore Wind D\",\"Av Onshore Wind D\",\"Peak Onshore Wind D\",\"Av Solar D\",\"Peak Solar D\",\"GBPEUR rate\",\"GBBUSD rate\",\"DA NBP\",\"MA NBP\",\"DA Elec\",\"MA Elec\"]\n", "#data = pandas.read_csv(url, names=names)\n", "correlations = data.corr()\n", "# plot correlation matrix\n", "fig = plt.figure(figsize=(10,10))\n", "ax = fig.add_subplot(111)\n", "cax = ax.matshow(correlations, vmin=-1, vmax=1)\n", "fig.colorbar(cax)\n", "ticks = numpy.arange(0,20,1)\n", "ax.set_xticks(ticks)\n", "ax.set_yticks(ticks)\n", "ax.set_xticklabels(names)\n", "plt.xticks(rotation=90)\n", "ax.set_yticklabels(names)\n", "plt.savefig('ML_Demo_CorrelationMatrix.png')\n", "plt.show()\n", "\n", "# Scatterplot Matrix\n", "\n", "#some of below taken from https://stackoverflow.com/questions/32560932/how-to-customize-a-scatter-matrix-to-see-all-titles\n", "\n", "from pandas.tools.plotting import scatter_matrix\n", "\n", "sm=scatter_matrix(data,figsize=(13,13))\n", "\n", "#Change label rotation\n", "[s.xaxis.label.set_rotation(45) for s in sm.reshape(-1)]\n", "[s.yaxis.label.set_rotation(0) for s in sm.reshape(-1)]\n", "\n", "#May need to offset label when rotating to prevent overlap of figure\n", "[s.get_yaxis().set_label_coords(-0.8,0.5) for s in sm.reshape(-1)]\n", "\n", "#Hide all ticks\n", "[s.set_xticks(()) for s in sm.reshape(-1)]\n", "[s.set_yticks(()) for s in sm.reshape(-1)]\n", "\n", "plt.savefig('ML_Demo_ScatterPlots.png')\n", "plt.show()\n", "\n", "# Univariate Density Plots\n", "\n", "data.plot(kind='density', subplots=True, layout=(5,4),sharex=False,figsize=(13,13))\n", "\n", "#*CHECK* for some reason below 4 lines do not save the image, so have replaced with four lines below this\n", "# fig = plt.figure(figsize=(10,10))\n", "# plt.tight_layout()\n", "# plt.savefig('ML_Demo_DensityPlots.png')\n", "# plt.show()\n", "fig1 = plt.gcf() #get current figure\n", "plt.show()\n", "plt.draw()\n", "fig1.savefig('ML_Demo_DensityPlots.png', dpi=100)\n", "\n", "\n", "#plot actual, forecast comparison\n", "\n", "from datetime import datetime\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as ticker\n", "\n", "forecast_comparison = pd.read_csv('Elec_MA_fcast.csv')\n", "forecast_comparison = forecast_comparison.rename(columns={'Unnamed: 0': 'Date'}) #rename date column\n", "# forecast_comparison.index = forecast_comparison[\"Date\"] #set date column as index - needed to show date as x axis in charts below\n", "# forecast_comparison=forecast_comparison.drop(['Date'], axis=1)\n", "\n", "forecast_comparison['Date'] = pd.to_datetime(forecast_comparison['Date'])\n", "forecast_comparison.set_index(['Date'],inplace=True)\n", " \n", "print(forecast_comparison)\n", "\n", "\n", "#print(forecast_comparison['Unnamed: 0']) #this is the date column\n", "forecast_comparison['Actual MA Elec']=forecast_comparison['Actual MA Elec'].shift(-1) #shifts actual price back 1 day to align actual price with forecast\n", "forecast_comparison['Delta actual']=forecast_comparison['Delta actual'].shift(-1) #shift actual delta differences\n", "forecast_comparison['Delta fcast']=forecast_comparison['Delta fcast'].shift(-1) #shift fcast delta differences\n", "\n", "print(forecast_comparison)\n", "\n", "#have replaced below with Plotly interactive version\n", "# plt.figure(figsize=(12,5))\n", "# plt.ylabel('£/MWh')\n", "\n", "# delta=abs(forecast_comparison['Forecast MA Elec']-forecast_comparison['Actual MA Elec'])\n", "\n", "# ax1 = forecast_comparison['Actual MA Elec'].plot(color='orange', grid=True, label='Actual')\n", "# ax2 = forecast_comparison['Forecast MA Elec'].plot(color='blue', grid=True, label='Forecast')\n", "# ax3 = delta.plot(color='red', grid=True, secondary_y=True, label='Delta')\n", "# ax3.tick_params('y', colors='r')\n", "# ax3.set_ylabel('delta £/MWh',color='r')\n", "\n", "\n", "# ax1.legend(loc=1)\n", "# ax2.legend(loc=2)\n", "# ax3.legend(loc=3)\n", "\n", "# plt.savefig('ML_Demo_Fcast&Actual.png')\n", "# plt.show()\n", "\n", "import plotly\n", "import plotly.plotly as py\n", "import plotly.graph_objs as go\n", "\n", "plotly.tools.set_credentials_file(username='CE_Guest', api_key='MnTrIqXOWDmI1626tntn')\n", "\n", "actual = {\"x\": forecast_comparison['Actual MA Elec'].index, \n", " \"y\": forecast_comparison['Actual MA Elec'], \n", " \"marker\": {\"color\": \"blue\", \"size\": 12}, \n", " \"mode\": \"markers\", \n", " \"name\": \"Actual\", \n", " \"type\": \"scatter\"\n", "}\n", "\n", "forecast = {\"x\": forecast_comparison['Forecast MA Elec'].index, \n", " \"y\": forecast_comparison['Forecast MA Elec'], \n", " \"marker\": {\"color\": \"green\", \"size\": 12}, \n", " \"mode\": \"markers\", \n", " \"name\": \"Forecast\", \n", " \"type\": \"scatter\", \n", "}\n", "\n", "\n", "delta = {\"x\": forecast_comparison['Forecast MA Elec'].index, \n", " \"y\": forecast_comparison['Forecast MA Elec']-forecast_comparison['Actual MA Elec'], \n", " \"marker\": {\"color\": \"pink\", \"size\": 12}, \n", " \"mode\": \"markers\", \n", " \"name\": \"Delta\", \n", " \"type\": \"scatter\",\n", " \"yaxis\": \"y2\"\n", "}\n", "\n", "data = [actual, forecast, delta]\n", "# layout = {\"title\": \"UK Month Ahead Baseload Elec Price: Forecast v. Actual\", \n", "# \"xaxis\": {\"title\": \"Date\", }, \n", "# \"yaxis\": {\"title\": \"£/MWh\"},\n", "# \"yaxis2\": {\"title\": \"delta £/MWh\",dict(side:'right')}}\n", "\n", "\n", "layout = go.Layout(\n", " title='UK Month Ahead Baseload Elec Price: Forecast v. Actual',\n", " titlefont=dict(\n", " size=14,\n", " color='#7f7f7f'\n", " ),\n", " yaxis=dict(\n", " title='£/MWh'\n", " ),\n", " yaxis2=dict(\n", " title='delta £/MWh',\n", " titlefont=dict(\n", " color='rgb(148, 103, 189)'\n", " ),\n", " tickfont=dict(\n", " color='rgb(148, 103, 189)'\n", " ),\n", " overlaying='y',\n", " side='right',\n", " zeroline=False\n", " )\n", ")\n", "\n", "fig = go.Figure(data=data, layout=layout)\n", "py.iplot(fig, filename='CE_UK_Elec_MA')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }