{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#FULL DATA MINE AND CLEAN UP\n",
    "\n",
    "#def getWeather():\n",
    "import requests # library for html requests\n",
    "from datetime import date # library for python date formats? *check*\n",
    "\n",
    "#get weather\n",
    "#set up daily link\n",
    "weather_fcast = \"http://www.metoffice.gov.uk/public/weather/forecast/gcpvj0v07\"\n",
    "#send url requests to data links\n",
    "weather_fcast_rqst = requests.get(weather_fcast)\n",
    "#convert web data to text strings\n",
    "weather_fcast_text = weather_fcast_rqst.text\n",
    "#for testing \n",
    "print(weather_fcast_text)\n",
    "\n",
    "#def cleanWeather():\n",
    "#clean weather\n",
    "import numpy as np\n",
    "\n",
    "weather_short = []\n",
    "\n",
    "weather_date = weather_fcast_text.index('data-tab-id') #finds 1st date tabs within text string (should be today)\n",
    "weather_fcast_chopped = weather_fcast_text #intialise weather dataframe\n",
    "\n",
    "#counter for remaining days in week\n",
    "days = 1\n",
    "while days <= 7:\n",
    "\n",
    "    weather_date_short=weather_fcast_chopped[weather_date+13:weather_date+23]\n",
    "\n",
    "    #chop off the head of text string, find the max \n",
    "    weather_fcast_chopped = weather_fcast_chopped[weather_date:len(weather_fcast_chopped)]\n",
    "    weather_max = weather_fcast_chopped.index('Maximum daytime temperature: ') #finds temp max within text string\n",
    "\n",
    "    #find number of decimal places for max temp\n",
    "    chr_adder=29\n",
    "    while weather_fcast_chopped[weather_max+chr_adder] != '&':\n",
    "        if chr_adder == 33:\n",
    "            break\n",
    "        chr_adder += 1\n",
    "\n",
    "    weather_max_short=weather_fcast_chopped[weather_max+29:weather_max+chr_adder]\n",
    "\n",
    "    #chop off the head of text string, find the min \n",
    "    weather_fcast_chopped = weather_fcast_chopped[weather_max+1:len(weather_fcast_chopped)]\n",
    "    weather_min = weather_fcast_chopped.index('Minimum nighttime temperature: ') #finds temp min within text string\n",
    "\n",
    "    #find number of decimal places for max temp\n",
    "    chr_adder=31\n",
    "    while weather_fcast_chopped[weather_min+chr_adder] != '&':\n",
    "        if chr_adder == 34:\n",
    "            break\n",
    "        chr_adder += 1\n",
    "\n",
    "    weather_min_short=weather_fcast_chopped[weather_min+31:weather_min+chr_adder]\n",
    "\n",
    "    #gather list data\n",
    "    weather_short.append(days)\n",
    "    weather_short[days-1]=[weather_date_short,weather_max_short,weather_min_short]\n",
    "\n",
    "    #search the next occurence of the forecast date\n",
    "    weather_fcast_chopped = weather_fcast_chopped[weather_min:len(weather_fcast_chopped)]\n",
    "    if days < 7:\n",
    "        weather_date = weather_fcast_chopped.index('data-tab-id') #finds next date tabs within text string\n",
    "\n",
    "    days += 1\n",
    "\n",
    "#convert weather_short from list of lists to numpy array\n",
    "weather_short = np.array(weather_short)\n",
    "\n",
    "#get demand\n",
    "import pandas as pd\n",
    "data = pd.read_csv('https://api.bmreports.com/BMRS/FORDAYDEM/v1?APIKey=lg3cgfi7sanoqzm&ServiceType=csv', skiprows=[0])\n",
    "print(data)\n",
    "\n",
    "#clean demand\n",
    "\n",
    "import datetime\n",
    "\n",
    "data.columns = ['Demand Fcast Type', 'Date','Settlement Period','N Flag','Time of Fcast?', 'Demand MW']\n",
    "load_fcast_text=data.filter(items=['Date','Settlement Period','Demand MW','Demand Fcast Type'])\n",
    "load_fcast_text=load_fcast_text.loc[load_fcast_text['Demand Fcast Type'] == \"DATF\"]\n",
    "load_fcast_text.set_index('Date')\n",
    "print(load_fcast_text.dtypes)\n",
    "load_fcast_text['Date'] = pd.to_datetime(load_fcast_text['Date'],format='%Y%m%d')\n",
    "\n",
    "#convert load_fcast_text from list of lists to numpy array\n",
    "load_fcast_text = np.array(load_fcast_text)\n",
    "#print(load_fcast_text)\n",
    "\n",
    "load_fcast_text_D=load_fcast_text[load_fcast_text[:,0] < datetime.datetime.now()]\n",
    "load_fcast_text_D1=load_fcast_text[load_fcast_text[:,0] > datetime.datetime.now()]\n",
    "\n",
    "print(load_fcast_text_D1)\n",
    "#load_fcast_text_D+1\n",
    "\n",
    "#print(load_fcast_text[:,2])\n",
    "np.mean(load_fcast_text_D1[:,2])\n",
    "\n",
    "#get RES\n",
    "#NB skiprows below will skip number of rows specified and infer number of columns of data from the next row\n",
    "#NB use str(date.today() + datetime.timedelta(1)) for tomorrow \n",
    "RES_data = pd.read_csv('https://api.bmreports.com/BMRS/B1440/v1?ServiceType=csv&Period=*&APIKey=lg3cgfi7sanoqzm&SettlementDate=' + str(date.today()), skiprows=4)\n",
    "print(RES_data)\n",
    "#USE THIS TO JUST PRINT ONE ROW: print(data.loc[1]) NB INDEXING STARTS FROM ROW ZERO WHICH IS FIRST ROW OF DATA (HEADER DOESNT COUNT)\n",
    "#USE THIS TO JUST PRINT ONE VALUE WITHIN A ROW: print(data.iloc[1][5])\n",
    "\n",
    "#clean RES\n",
    "RES_fcast_text = RES_data[0:-1] #remove last row NaN\n",
    "#no need to get column names, as these were imported with csv\n",
    "RES_fcast_text=RES_fcast_text.filter(items=['Settlement Date','Settlement Period','Quantity','Business Type','Power System Resource  Type'])\n",
    "RES_fcast_text.set_index('Settlement Date') #may have to combine this later with settlement period\n",
    "RES_fcast_text=RES_fcast_text.sort_values(by='Settlement Period')\n",
    "RES_fcast_text=RES_fcast_text.reset_index(drop=True)\n",
    "print(RES_fcast_text.dtypes)\n",
    "#RES_fcast_text['Settlement Date'] = pd.to_datetime(RES_fcast_text['Settlement Date'],format='%Y%m%d')\n",
    "\n",
    "#convert RES_fcast_text from list of lists to numpy array\n",
    "RES_fcast_text = np.array(RES_fcast_text)\n",
    "\n",
    "#print(RES_fcast_text)\n",
    "print(RES_fcast_text[RES_fcast_text[:,0] == str(date.today())])\n",
    "\n",
    "#get FX\n",
    "\n",
    "import requests\n",
    "FX = \"https://www.poundsterlinglive.com/data/currencies/gbp-pairs/GBPEUR-exchange-rate\"\n",
    "FX_fcast_rqst = requests.get(FX)\n",
    "print(FX_fcast_rqst) # should say 200 means connection is good\n",
    "FX_fcast_text = FX_fcast_rqst.text\n",
    "#print(FX_fcast_text)\n",
    "\n",
    "#truncate data pulled back, from today's price\n",
    "#first find \"Today’s Price\":\n",
    "FX_GBPEUR = FX_fcast_text.index('Latest GBP/EUR') #find \"Today’s Price\" within data\n",
    "#print(FX_EURGBP)\n",
    "FX_GBPEUR_chopped=FX_fcast_text[FX_GBPEUR:len(FX_fcast_text)]\n",
    "\n",
    "print(FX_GBPEUR_chopped)\n",
    "\n",
    "#repeat for GBPUSD\n",
    "FX = \"https://www.poundsterlinglive.com/data/currencies/gbp-pairs/GBPUSD-exchange-rate\"\n",
    "FX_fcast_rqst = requests.get(FX)\n",
    "print(FX_fcast_rqst) # should say 200 means connection is good\n",
    "FX_fcast_text = FX_fcast_rqst.text\n",
    "#print(FX_fcast_text)\n",
    "\n",
    "#truncate data pulled back, from today's price\n",
    "#first find \"Today’s Price\":\n",
    "FX_GBPUSD = FX_fcast_text.index('Latest GBP/USD') #find \"Today’s Price\" within data\n",
    "\n",
    "#print(FX_GBPUSD)\n",
    "FX_GBPUSD_chopped=FX_fcast_text[FX_GBPUSD:len(FX_fcast_text)]\n",
    "\n",
    "print(FX_GBPUSD_chopped)\n",
    "\n",
    "#clean FX\n",
    "\n",
    "FX_short = {}\n",
    "\n",
    "FX_short[0,0]=\"GBPEUR\"\n",
    "FX_short[0,1]=FX_GBPEUR_chopped[45:51]\n",
    "#NB can also store last 5, 10, 15, 20,50,100 days averages and % changes - later\n",
    "\n",
    "FX_short[1,0]=\"GBPUSD\"\n",
    "FX_short[1,1]=FX_GBPUSD_chopped[45:51]\n",
    "\n",
    "print(FX_short)\n",
    "print(FX_short[0,1])\n",
    "print(FX_short[1,1])\n",
    "\n",
    "#get elec and gas prices\n",
    "import pandas as pd\n",
    "Marex_data = pd.read_csv('http://www.marexspectron.com/intelligence/indices/historical-index')\n",
    "print(Marex_data)\n",
    "\n",
    "#clean elec prices\n",
    "\n",
    "#no need to get column names, as these were imported with csv\n",
    "Marex_fcast_text=Marex_data.filter(items=['DealDate','Average','High','Low','IndexName','InstrumentName','SequenceItemName','MarketName'])\n",
    "Marex_fcast_text.set_index('DealDate') \n",
    "#filter by elec baseload then by D-1\n",
    "Marex_fcast_text=Marex_fcast_text.loc[(Marex_fcast_text['IndexName'] == \"U.K Power BSLD All-Day D.A\") | (Marex_fcast_text['IndexName'] == \"U.K Power BSLD All-Day M.A\") | (Marex_fcast_text['IndexName'] == \"NBP All Day D.A\") | (Marex_fcast_text['IndexName'] == \"NBP All Day M.A\") ]\n",
    "\n",
    "Marex_fcast_text['DealDate'] = pd.to_datetime(Marex_fcast_text['DealDate'],format='%d/%m/%Y')\n",
    "\n",
    "if date.today()==datetime.date(2018, 8, 28): #bank holiday exceptions - make more robust at a later stage\n",
    "  Marex_fcast_text=Marex_fcast_text.loc[Marex_fcast_text['DealDate'] == date.today()- datetime.timedelta(4)]\n",
    "elif date.today().weekday() == 0: #Monday\n",
    "  Marex_fcast_text=Marex_fcast_text.loc[Marex_fcast_text['DealDate'] == date.today()- datetime.timedelta(3)] # NEED TO USE date.today()- timedelta(1))\n",
    "else: #Tue, Wed, Thu or Fri. BASICALLY DONT RUN WEEKEND\n",
    "  Marex_fcast_text=Marex_fcast_text.loc[Marex_fcast_text['DealDate'] == date.today()- datetime.timedelta(1)] # NEED TO USE date.today()- timedelta(1))\n",
    "\n",
    "Marex_fcast_text=Marex_fcast_text.reset_index(drop=True) #reset index to 0\n",
    "\n",
    "print(Marex_fcast_text.dtypes)\n",
    "#RES_fcast_text['Settlement Date'] = pd.to_datetime(RES_fcast_text['Settlement Date'],format='%Y%m%d')\n",
    "\n",
    "print(Marex_fcast_text)\n",
    "\n",
    "Marex_fcast_text = np.array(Marex_fcast_text)\n",
    "\n",
    "print(Marex_fcast_text)\n",
    "\n",
    "#some error handling in case Marex prices not in datafeed (this has happened before on 23 Apr 2019)\n",
    "import sys\n",
    "\n",
    "if Marex_fcast_text[:1,1]=='.':\n",
    "    print(\"No DA NBP price in Marex Spectron datafeed\")\n",
    "    sys.exit(0)\n",
    "if Marex_fcast_text[1:2,1]=='.':\n",
    "    print(\"No MA NBP price in Marex Spectron datafeed\")\n",
    "    sys.exit(0)\n",
    "if Marex_fcast_text[2:3,1]=='.':\n",
    "    print(\"No DA elec price in Marex Spectron datafeed\")\n",
    "    sys.exit(0)\n",
    "if Marex_fcast_text[3:4,1]=='.':\n",
    "    print(\"No MA elec price in Marex Spectron datafeed\")\n",
    "    sys.exit(0)\n",
    "\n",
    "#create neural network (live) input (i.e. test) and output (for valiation) matrices \n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import datetime\n",
    "\n",
    "nn_input=0\n",
    "\n",
    "#nn_input = np.array() #initialise\n",
    "\n",
    "#nn=weather_short[0:2,1] #add weather highs\n",
    "# print(weather_short[0:2,1])\n",
    "# print(weather_short[0:2,2])\n",
    "nn_input=np.concatenate((weather_short[:1,1], weather_short[:1,2])) #add high, weather low D NEEDS TWO BRACKETS (( ))\n",
    "nn_input=np.concatenate((nn_input, weather_short[1:2,1])) #add high D+1\n",
    "nn_input=np.concatenate((nn_input, weather_short[1:2,2])) #add low D+1\n",
    "#nn_input stands for neural network and is input matrix, basically values for:\n",
    "\n",
    "#WeatherHigh_D\n",
    "#WeatherLow_D\n",
    "#WeatherHigh_D+1\n",
    "#WeatherLow_D+1\n",
    "#Av Demand D\n",
    "#Peak Demand D\n",
    "#Av Demand D+1\n",
    "#Peak Demand D+1\n",
    "#Av Offshore Wind D\n",
    "#Peak Offshore Wind D\n",
    "#Av Onshore Wind D\n",
    "#Peak Onshore Wind D\n",
    "#Av Solar D\n",
    "#Peak Solar D\n",
    "#GBPEUR rate\n",
    "#GBBUSD rate\n",
    "#DA NBP \n",
    "#MA NBP\n",
    "\n",
    "#add demand facast\n",
    "nn_input=np.hstack((nn_input,round(np.mean(load_fcast_text_D[:,2]),2)))\n",
    "nn_input=np.hstack((nn_input,round(np.max(load_fcast_text_D[:,2]),2)))\n",
    "nn_input=np.hstack((nn_input,round(np.mean(load_fcast_text_D1[:,2]),2)))\n",
    "nn_input=np.hstack((nn_input,round(np.max(load_fcast_text_D1[:,2]),2)))\n",
    "\n",
    "#add RES fcast\n",
    "#RES_fcast_D=RES_fcast_text[RES_fcast_text[:,0] == str(date.today())])\n",
    "\n",
    "RES_fcast_D= RES_fcast_text[RES_fcast_text[:,0] == str(date.today())]\n",
    "OFFwind_fcast_D=RES_fcast_D[RES_fcast_D[:,4] == 'Wind Offshore']\n",
    "ONwind_fcast_D=RES_fcast_D[RES_fcast_D[:,4] == 'Wind Onshore']\n",
    "SolarPV_fcast_D=RES_fcast_D[RES_fcast_D[:,4] == 'Solar']\n",
    "nn_input=np.hstack((nn_input,round(np.mean(OFFwind_fcast_D[:,2]),2)))\n",
    "nn_input=np.hstack((nn_input,round(np.max(OFFwind_fcast_D[:,2]),2)))\n",
    "nn_input=np.hstack((nn_input,round(np.mean(ONwind_fcast_D[:,2]),2)))\n",
    "nn_input=np.hstack((nn_input,round(np.max(ONwind_fcast_D[:,2]),2)))\n",
    "nn_input=np.hstack((nn_input,round(np.mean(SolarPV_fcast_D[:,2]),2)))\n",
    "nn_input=np.hstack((nn_input,round(np.max(SolarPV_fcast_D[:,2]),2)))\n",
    "\n",
    "#print(RES_fcast_D)\n",
    "#load_fcast_text[load_fcast_text[:,0] < datetime.datetime.now()]\n",
    "\n",
    "#add FX fcast\n",
    "nn_input=np.hstack((nn_input,FX_short[0,1])) #GBPEUR\n",
    "nn_input=np.hstack((nn_input,FX_short[1,1])) #GBPUSD\n",
    "\n",
    "#add NBP prices\n",
    "nn_input=np.hstack((nn_input,Marex_fcast_text[:1,1])) #DA NBP\n",
    "nn_input=np.hstack((nn_input,Marex_fcast_text[1:2,1])) #MA NBP\n",
    "\n",
    "#add Marex elec prices\n",
    "nn_input=np.hstack((nn_input,Marex_fcast_text[2:3,1])) #DA elec\n",
    "nn_input=np.hstack((nn_input,Marex_fcast_text[3:4,1])) #MA elec\n",
    "\n",
    "#create nn_output (predictor is Marex elec price)\n",
    "#NB need to update later - this is the target, so the forecast is not \"known\" until D+a\n",
    "#nn_output=np.concatenate((Marex_fcast_text[2:3,1], Marex_fcast_text[3:4,1]))\n",
    "#nn_output stands for neural network and is output (target) matrix, basically values for:\n",
    "\n",
    "#DA elec\n",
    "#MA elec\n",
    "\n",
    "#nn_input=np.asarray(np.mean(load_fcast_text_D[:,2]))\n",
    "print(nn_input)\n",
    "#print(nn_output)\n",
    "\n",
    "#np.savetxt('C:\\ChangingEnergy\\Beta_testing\\ML testing\\RollingTrainingData.csv', nn_input, fmt='%s', delimiter=',', header=\"WeatherHigh_D,WeatherLow_D,WeatherHigh_D+1,WeatherLow_D+1,Av Demand D,Peak Demand D,Av Demand D+1,Peak Demand D+1,Av Offshore Wind D,Peak Offshore Wind D,Av Onshore Wind D,Peak Onshore Wind D,Av Solar D,Peak Solar D,GBPEUR rate,GBBUSD rate,DA NBP,MA NBP\")\n",
    "\n",
    "#convert neural network input to pandas data frame\n",
    "nn_input=pd.DataFrame(nn_input,columns = [str(date.today())])\n",
    "print(str(date.today()))\n",
    "nn_input=nn_input.T #transpose array for csv export\n",
    "\n",
    "#USE TWO LINES BELOW ONCE THIS IS UP AND RUNNING TO APPEND DATA\n",
    "with open(\"RollingTrainingData.csv\", 'a') as rolling_training_data:\n",
    "    nn_input.to_csv(rolling_training_data,header=False, index=True)\n",
    "print(rolling_training_data)\n",
    "\n",
    "training_data=np.array(rolling_training_data) #turn training_data pandas dataframe into numpy array for NN algo\n",
    "print(training_data)\n",
    "\n",
    "\n",
    "#plot training data as time series plots\n",
    "#see for help: https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/\n",
    "\n",
    "from pandas import read_csv\n",
    "from matplotlib import pyplot\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "#input('Starting python')\n",
    "\n",
    "# load dataset\n",
    "dataset = read_csv('RollingTrainingData.csv', header=0, index_col=0)\n",
    "values = dataset.values\n",
    "\n",
    "#BELOW PLOTS MOVED TO END OF SCRIPT\n",
    "# specify columns to plot\n",
    "# groups = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]\n",
    "# i = 1\n",
    "# # plot each column\n",
    "# pyplot.figure(0)\n",
    "# # pyplot.figure(figsize=(8,20)) # have to comment out for running python shell as will cause an error trying to fit plot to window\n",
    "# for group in groups:  \n",
    "# \tpyplot.subplot(len(groups), 1, i) #subplot(nrows, ncols, index, **kwargs)\n",
    "# \tpyplot.plot(values[:, group])\n",
    "# \tpyplot.title(dataset.columns[group], y=-0.05, loc='right')\n",
    "# \ti += 1\n",
    "# pyplot.subplots_adjust(left=0.125, bottom=0.1, right=0.9, top=1, wspace=0.2, hspace=0.1)\n",
    "# pyplot.show()\n",
    "\n",
    "\n",
    "# data.columns=names #rename columns with short name to make scatter plot easier to read\n",
    "\n",
    "#taken from https://stackoverflow.com/questions/29432629/correlation-matrix-using-pandas\n",
    "#pyplot.figure(1)\n",
    "#pyplot.matshow(dataset.corr())\n",
    "#pyplot.show() #required to show plot when running python shell\n",
    "\n",
    "#input('End of code')\n",
    "\n",
    "\n",
    "#Multivariate Time Series Forecasting with LSTMs in Keras\n",
    "\n",
    "import numpy as np\n",
    "import math\n",
    "\n",
    "from math import sqrt\n",
    "from fractions import gcd #greatest common divisor to work out optimal number of batches given sizes of training and test sets\n",
    "from numpy import concatenate\n",
    "from matplotlib import pyplot\n",
    "from pandas import read_csv\n",
    "from pandas import DataFrame\n",
    "from pandas import concat\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense\n",
    "from keras.layers import LSTM\n",
    " \n",
    "# convert series to supervised learning\n",
    "def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):\n",
    "\tn_vars = 1 if type(data) is list else data.shape[1]\n",
    "\tdf = DataFrame(data)\n",
    "\tcols, names = list(), list()\n",
    "\t# input sequence (t-n, ... t-1)\n",
    "\tfor i in range(n_in, 0, -1):\n",
    "\t\tcols.append(df.shift(i))\n",
    "\t\tnames += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]\n",
    "\t# forecast sequence (t, t+1, ... t+n)\n",
    "\tfor i in range(0, n_out):\n",
    "\t\tcols.append(df.shift(-i))\n",
    "\t\tif i == 0:\n",
    "\t\t\tnames += [('var%d(t)' % (j+1)) for j in range(n_vars)]\n",
    "\t\telse:\n",
    "\t\t\tnames += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]\n",
    "\t# put it all together\n",
    "\tagg = concat(cols, axis=1)\n",
    "\tagg.columns = names\n",
    "\t# drop rows with NaN values\n",
    "\tif dropnan:\n",
    "\t\tagg.dropna(inplace=True)\n",
    "\treturn agg\n",
    "\n",
    "\n",
    "#FIRST RUN PERSISTENCE MODEL\n",
    "\n",
    "# load dataset\n",
    "series = read_csv('RollingTrainingData.csv', header=0, index_col=0)\n",
    "\n",
    "# split data into train and test\n",
    "X = series.values\n",
    "n_train_days = math.floor(len(X)*0.8) #use 80/20 rule train on 80% of data, test on 20%\n",
    "train = X[:n_train_days, -1] \n",
    "test = X[n_train_days:, -1] \n",
    "\n",
    "# walk-forward validation\n",
    "history = [x for x in train]\n",
    "predictions = list()\n",
    "for i in range(len(test)):\n",
    "\t# make prediction\n",
    "\tpredictions.append(history[-1])\n",
    "\t# observation\n",
    "\thistory.append(test[i])\n",
    "# report performance\n",
    "rmse_persistence = sqrt(mean_squared_error(test, predictions))\n",
    "print('RMSE: %.3f' % rmse_persistence)\n",
    "# line plot of observed vs predicted\n",
    "pyplot.plot(test)\n",
    "pyplot.plot(predictions)\n",
    "pyplot.show()\n",
    "\n",
    "#NOW RUN LSTM Model\n",
    "\n",
    "# load dataset\n",
    "dataset = read_csv('RollingTrainingData.csv', header=0, index_col=0)\n",
    "values = dataset.values\n",
    "# integer encode direction\n",
    "#encoder = LabelEncoder()\n",
    "#values[:,4] = encoder.fit_transform(values[:,4]) #assigns ordinal levels to categorical data - this is for label data dont need in our case\n",
    "\n",
    "# ensure all data is float\n",
    "values = values.astype('float32')\n",
    "#print(values)\n",
    "#print(\"BREAK\")\n",
    "# normalize features\n",
    "scaler = MinMaxScaler(feature_range=(0, 1))\n",
    "scaled = scaler.fit_transform(values)\n",
    "print(scaled)\n",
    "\n",
    "# frame as supervised learning\n",
    "reframed = series_to_supervised(scaled, 1, 1)\n",
    "# drop columns we don't want to predict - this is basically all columns for predictor variables at t+1 (next time step)\n",
    "reframed.drop(reframed.columns[[20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38]], axis=1, inplace=True)\n",
    "\n",
    "# split into train and test sets\n",
    "values = reframed.values\n",
    "n_train_days = math.floor(len(values)*0.8) #use 80/20 rule train on 80% of data, test on 20%\n",
    "np.random.shuffle(values) #shuffling data to remove trend (will randomly assign test data and training data rather than take \n",
    "#e.g. first 80 days to train on and last 20 days to test on - if its a rising market this is likely to underfit)\n",
    "train = values[:n_train_days, :] \n",
    "test = values[n_train_days:, :] \n",
    "# split into input and outputs\n",
    "train_X, train_y = train[:, :-1], train[:, -1] #stick with MA elec for now, later expand to DA and MA if code scalable\n",
    "test_X, test_y = test[:, :-1], test[:, -1] #stick with MA elec for now, later expand to DA and MA if code scalable\n",
    "\n",
    "# reshape input to be 3D [samples, timesteps, features]\n",
    "print(train_X)\n",
    "train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))\n",
    "test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))\n",
    "print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)\n",
    " \n",
    "# design network\n",
    "model = Sequential()\n",
    "model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))\n",
    "model.add(Dense(1))\n",
    "model.compile(loss='mae', optimizer='adam')\n",
    "# fit network #check batch size should be big really (assuming big data sourced). \n",
    "#In our case its still a small growing dataset, example used batch size 72 and epochs 50\n",
    "history = model.fit(train_X, train_y, epochs=50, batch_size=gcd(len(train_X),len(test_X)), validation_data=(test_X, test_y), verbose=2, shuffle=False)\n",
    "# plot history\n",
    "pyplot.plot(history.history['loss'], label='train')\n",
    "pyplot.plot(history.history['val_loss'], label='test')\n",
    "pyplot.legend()\n",
    "pyplot.savefig('ML_Demo_TrainTestPlot.png')\n",
    "pyplot.show()\n",
    "\n",
    "# make a prediction\n",
    "yhat = model.predict(test_X)\n",
    "test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))\n",
    "# invert scaling for forecast\n",
    "inv_yhat = concatenate((test_X[:, :-1],yhat), axis=1)\n",
    "inv_yhat = scaler.inverse_transform(inv_yhat)\n",
    "inv_yhat = inv_yhat[:,-1]\n",
    "\n",
    "# invert scaling for actual\n",
    "test_y = test_y.reshape((len(test_y), 1))\n",
    "inv_y = concatenate((test_X[:, :-1],test_y), axis=1)\n",
    "inv_y = scaler.inverse_transform(inv_y)\n",
    "inv_y = inv_y[:,-1]\n",
    "# calculate RMSE\n",
    "rmse = sqrt(mean_squared_error(inv_y, inv_yhat))\n",
    "print(inv_y,inv_yhat)\n",
    "print('Test RMSE: %.3f' % rmse)\n",
    "\n",
    "#write comparison of RMSE for LSTM model and Persistence Model to csv\n",
    "\n",
    "RMSE_comparison=concatenate(([rmse_persistence],[rmse]), axis=0)\n",
    "\n",
    "#convert forecast to pandas data frame\n",
    "latest_rmse=pd.DataFrame(RMSE_comparison,columns = [str(date.today())])\n",
    "print(str(date.today()))\n",
    "print(latest_rmse)\n",
    "latest_rmse=latest_rmse.T #transpose array for csv export\n",
    "\n",
    "with open('ModelComparison.csv', 'a') as model_comparison:\n",
    "    latest_rmse.to_csv(model_comparison,header=False, index=True)\n",
    "\n",
    "\n",
    "#latest predictor dataset only\n",
    "import datetime\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "#first read existing forecast v. actual file to get deltas\n",
    "forecast_comparison = pd.read_csv('Elec_MA_fcast.csv',index_col=[0])\n",
    "#forecast_comparison.set_index(\"Date\", drop = False)\n",
    "\n",
    "forecast_comparison=np.array(forecast_comparison) #turn training_data pandas dataframe into numpy array for NN algo\n",
    "\n",
    "#get latest deltas\n",
    "latest_actual_delta= [float(Marex_fcast_text[3:4,1]) - forecast_comparison[len(forecast_comparison)-1,0]] #latest actual delta\n",
    "latest_fcast_delta=[forecast_comparison[len(forecast_comparison)-1,1] - forecast_comparison[len(forecast_comparison)-2,1]] #latest forecast delta\n",
    "\n",
    "latest_data= series_to_supervised(scaled, 0, 1) #no time increment for last row of predictor variables as forecast is \"unknown\"\n",
    "values=latest_data.values\n",
    "latest_X=values[len(values)-1:, :]\n",
    "print(latest_X.shape)\n",
    "print(latest_X[:,-1].shape)\n",
    "\n",
    "#reshape input to be 3D [samples, timesteps, features]\n",
    "latest_X = latest_X.reshape((latest_X.shape[0], 1, latest_X.shape[1]))\n",
    "print(latest_X.shape)\n",
    "# latest_X\n",
    "# latest_X = latest_X.reshape((latest_X.shape[0], latest_X.shape[2]))\n",
    "# latest_X=scaler.inverse_transform(latest_X)\n",
    "# latest_X\n",
    "\n",
    "\n",
    "# make a prediction\n",
    "latest_hat = model.predict(latest_X)\n",
    "\n",
    "#get ready to transform back\n",
    "latest_X = latest_X.reshape((latest_X.shape[0], latest_X.shape[2]))\n",
    "# invert scaling for forecast\n",
    "inv_latest_hat = concatenate((latest_X[:, :-1],latest_hat), axis=1)\n",
    "inv_latest_hat = scaler.inverse_transform(inv_latest_hat)\n",
    "print(inv_latest_hat) #check this should be the latest predictor varables (20) minus last elec MA price plus a single forecasted MA price at end of array\n",
    "inv_latest_hat = inv_latest_hat[:,-1]\n",
    "\n",
    "#export latest forecast to csv\n",
    "\n",
    "latest_forecast=concatenate((Marex_fcast_text[3:4,1],inv_latest_hat,latest_actual_delta,latest_fcast_delta), axis=0)\n",
    "\n",
    "print(Marex_fcast_text[3:4,1])\n",
    "print(latest_forecast)\n",
    "#convert forecast to pandas data frame\n",
    "latest_forecast=pd.DataFrame(latest_forecast,columns = [str(date.today())])\n",
    "print(str(date.today()))\n",
    "print(latest_forecast)\n",
    "latest_forecast=latest_forecast.T #transpose array for csv export\n",
    "\n",
    "with open('Elec_MA_fcast.csv', 'a') as rolling_forecast:\n",
    "    latest_forecast.to_csv(rolling_forecast,header=False, index=True)\n",
    "\n",
    "    \n",
    "    \n",
    "#additional plots\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.ticker as ticker\n",
    "import pandas\n",
    "import numpy\n",
    "from pandas.plotting import scatter_matrix\n",
    "\n",
    "\n",
    "# Training data subplots\n",
    "\n",
    "dataset = read_csv('RollingTrainingData.csv')\n",
    "\n",
    "dataset = dataset.rename(columns={'Unnamed: 0': 'Date'}) #rename date column\n",
    "# forecast_comparison.index = forecast_comparison[\"Date\"] #set date column as index - needed to show date as x axis in charts below\n",
    "# forecast_comparison=forecast_comparison.drop(['Date'], axis=1)\n",
    "\n",
    "dataset['Date'] = pd.to_datetime(dataset['Date'])\n",
    "dataset.set_index(['Date'],inplace=True)\n",
    "\n",
    "dataset.plot(kind='line', subplots=True, layout=(5,4),sharex=True,figsize=(13,13))\n",
    "\n",
    "plt.savefig('ML_Demo_TrainingData.png')\n",
    "plt.show()\n",
    "\n",
    "# Correlation Matrix Plot \n",
    "\n",
    "#source https://machinelearningmastery.com/visualize-machine-learning-data-python-pandas/\n",
    "\n",
    "url = \"RollingTrainingData.csv\"\n",
    "names = [\"THighD\",\"TLowD\",\"THighD+1\",\"TLowD+1\",\"AvDD\",\"PkDD\",\"AvDD+1\",\"PkDD+1\",\"AvOffD\",\"PkOffD\",\"AvOnD\",\"PkOnD\",\"AvPVD\",\"PkPVD\",\"GBPEUR\",\"GBBUSD\",\"NBPDA\",\"NBPMA\",\"ElecDA\",\"ElecMA\"]\n",
    "\n",
    "data = pandas.read_csv(url)\n",
    "data = data.drop(data.columns[0], axis=1)\n",
    "data.columns=names #rename columns with short name to make plots easier to read\n",
    "\n",
    "#names = [\"WeatherHigh_D\",\"WeatherLow_D\",\"WeatherHigh_D+1\",\"WeatherLow_D+1\",\"Av Demand D\",\"Peak Demand D\",\"Av Demand D+1\",\"Peak Demand D+1\",\"Av Offshore Wind D\",\"Peak Offshore Wind D\",\"Av Onshore Wind D\",\"Peak Onshore Wind D\",\"Av Solar D\",\"Peak Solar D\",\"GBPEUR rate\",\"GBBUSD rate\",\"DA NBP\",\"MA NBP\",\"DA Elec\",\"MA Elec\"]\n",
    "#data = pandas.read_csv(url, names=names)\n",
    "correlations = data.corr()\n",
    "# plot correlation matrix\n",
    "fig = plt.figure(figsize=(10,10))\n",
    "ax = fig.add_subplot(111)\n",
    "cax = ax.matshow(correlations, vmin=-1, vmax=1)\n",
    "fig.colorbar(cax)\n",
    "ticks = numpy.arange(0,20,1)\n",
    "ax.set_xticks(ticks)\n",
    "ax.set_yticks(ticks)\n",
    "ax.set_xticklabels(names)\n",
    "plt.xticks(rotation=90)\n",
    "ax.set_yticklabels(names)\n",
    "plt.savefig('ML_Demo_CorrelationMatrix.png')\n",
    "plt.show()\n",
    "\n",
    "# Scatterplot Matrix\n",
    "\n",
    "#some of below taken from https://stackoverflow.com/questions/32560932/how-to-customize-a-scatter-matrix-to-see-all-titles\n",
    "\n",
    "from pandas.tools.plotting import scatter_matrix\n",
    "\n",
    "sm=scatter_matrix(data,figsize=(13,13))\n",
    "\n",
    "#Change label rotation\n",
    "[s.xaxis.label.set_rotation(45) for s in sm.reshape(-1)]\n",
    "[s.yaxis.label.set_rotation(0) for s in sm.reshape(-1)]\n",
    "\n",
    "#May need to offset label when rotating to prevent overlap of figure\n",
    "[s.get_yaxis().set_label_coords(-0.8,0.5) for s in sm.reshape(-1)]\n",
    "\n",
    "#Hide all ticks\n",
    "[s.set_xticks(()) for s in sm.reshape(-1)]\n",
    "[s.set_yticks(()) for s in sm.reshape(-1)]\n",
    "\n",
    "plt.savefig('ML_Demo_ScatterPlots.png')\n",
    "plt.show()\n",
    "\n",
    "# Univariate Density Plots\n",
    "\n",
    "data.plot(kind='density', subplots=True, layout=(5,4),sharex=False,figsize=(13,13))\n",
    "\n",
    "#*CHECK* for some reason below 4 lines do not save the image, so have replaced with four lines below this\n",
    "# fig = plt.figure(figsize=(10,10))\n",
    "# plt.tight_layout()\n",
    "# plt.savefig('ML_Demo_DensityPlots.png')\n",
    "# plt.show()\n",
    "fig1 = plt.gcf() #get current figure\n",
    "plt.show()\n",
    "plt.draw()\n",
    "fig1.savefig('ML_Demo_DensityPlots.png', dpi=100)\n",
    "\n",
    "\n",
    "#plot actual, forecast comparison\n",
    "\n",
    "from datetime import datetime\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.ticker as ticker\n",
    "\n",
    "forecast_comparison = pd.read_csv('Elec_MA_fcast.csv')\n",
    "forecast_comparison = forecast_comparison.rename(columns={'Unnamed: 0': 'Date'}) #rename date column\n",
    "# forecast_comparison.index = forecast_comparison[\"Date\"] #set date column as index - needed to show date as x axis in charts below\n",
    "# forecast_comparison=forecast_comparison.drop(['Date'], axis=1)\n",
    "\n",
    "forecast_comparison['Date'] = pd.to_datetime(forecast_comparison['Date'])\n",
    "forecast_comparison.set_index(['Date'],inplace=True)\n",
    "                        \n",
    "print(forecast_comparison)\n",
    "\n",
    "\n",
    "#print(forecast_comparison['Unnamed: 0']) #this is the date column\n",
    "forecast_comparison['Actual MA Elec']=forecast_comparison['Actual MA Elec'].shift(-1) #shifts actual price back 1 day to align actual price with forecast\n",
    "forecast_comparison['Delta actual']=forecast_comparison['Delta actual'].shift(-1) #shift actual delta differences\n",
    "forecast_comparison['Delta fcast']=forecast_comparison['Delta fcast'].shift(-1) #shift fcast delta differences\n",
    "\n",
    "print(forecast_comparison)\n",
    "\n",
    "#have replaced below with Plotly interactive version\n",
    "# plt.figure(figsize=(12,5))\n",
    "# plt.ylabel('£/MWh')\n",
    "\n",
    "# delta=abs(forecast_comparison['Forecast MA Elec']-forecast_comparison['Actual MA Elec'])\n",
    "\n",
    "# ax1 = forecast_comparison['Actual MA Elec'].plot(color='orange', grid=True, label='Actual')\n",
    "# ax2 = forecast_comparison['Forecast MA Elec'].plot(color='blue', grid=True, label='Forecast')\n",
    "# ax3 = delta.plot(color='red', grid=True, secondary_y=True, label='Delta')\n",
    "# ax3.tick_params('y', colors='r')\n",
    "# ax3.set_ylabel('delta £/MWh',color='r')\n",
    "\n",
    "\n",
    "# ax1.legend(loc=1)\n",
    "# ax2.legend(loc=2)\n",
    "# ax3.legend(loc=3)\n",
    "\n",
    "# plt.savefig('ML_Demo_Fcast&Actual.png')\n",
    "# plt.show()\n",
    "\n",
    "import plotly\n",
    "import plotly.plotly as py\n",
    "import plotly.graph_objs as go\n",
    "\n",
    "plotly.tools.set_credentials_file(username='CE_Guest', api_key='MnTrIqXOWDmI1626tntn')\n",
    "\n",
    "actual = {\"x\": forecast_comparison['Actual MA Elec'].index, \n",
    "          \"y\": forecast_comparison['Actual MA Elec'], \n",
    "          \"marker\": {\"color\": \"blue\", \"size\": 12}, \n",
    "          \"mode\": \"markers\", \n",
    "          \"name\": \"Actual\", \n",
    "          \"type\": \"scatter\"\n",
    "}\n",
    "\n",
    "forecast = {\"x\": forecast_comparison['Forecast MA Elec'].index, \n",
    "          \"y\": forecast_comparison['Forecast MA Elec'], \n",
    "          \"marker\": {\"color\": \"green\", \"size\": 12}, \n",
    "          \"mode\": \"markers\", \n",
    "          \"name\": \"Forecast\", \n",
    "          \"type\": \"scatter\", \n",
    "}\n",
    "\n",
    "\n",
    "delta = {\"x\": forecast_comparison['Forecast MA Elec'].index, \n",
    "          \"y\": forecast_comparison['Forecast MA Elec']-forecast_comparison['Actual MA Elec'], \n",
    "          \"marker\": {\"color\": \"pink\", \"size\": 12}, \n",
    "          \"mode\": \"markers\", \n",
    "          \"name\": \"Delta\", \n",
    "          \"type\": \"scatter\",\n",
    "          \"yaxis\": \"y2\"\n",
    "}\n",
    "\n",
    "data = [actual, forecast, delta]\n",
    "# layout = {\"title\": \"UK Month Ahead Baseload Elec Price: Forecast v. Actual\", \n",
    "#           \"xaxis\": {\"title\": \"Date\", }, \n",
    "#           \"yaxis\": {\"title\": \"£/MWh\"},\n",
    "#           \"yaxis2\": {\"title\": \"delta £/MWh\",dict(side:'right')}}\n",
    "\n",
    "\n",
    "layout = go.Layout(\n",
    "    title='<b>UK Month Ahead Baseload Elec Price: Forecast v. Actual</b>',\n",
    "    titlefont=dict(\n",
    "        size=14,\n",
    "        color='#7f7f7f'\n",
    "    ),\n",
    "    yaxis=dict(\n",
    "        title='£/MWh'\n",
    "    ),\n",
    "    yaxis2=dict(\n",
    "        title='delta £/MWh',\n",
    "        titlefont=dict(\n",
    "            color='rgb(148, 103, 189)'\n",
    "        ),\n",
    "        tickfont=dict(\n",
    "            color='rgb(148, 103, 189)'\n",
    "        ),\n",
    "        overlaying='y',\n",
    "        side='right',\n",
    "        zeroline=False\n",
    "    )\n",
    ")\n",
    "\n",
    "fig = go.Figure(data=data, layout=layout)\n",
    "py.iplot(fig, filename='CE_UK_Elec_MA')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}