{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%HTML\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import gdax\n", "import pandas as pd\n", "from time import sleep\n", "import plotly.plotly as py\n", "from plotly.offline import init_notebook_mode, plot, iplot\n", "import plotly.graph_objs as go\n", "from datetime import datetime\n", "init_notebook_mode(connected=False)\n", "import cufflinks\n", "from keras.layers.core import Dense, Activation, Dropout, Flatten\n", "from keras.layers.recurrent import LSTM\n", "from keras.models import Sequential\n", "import time\n", "from keras_tqdm import TQDMNotebookCallback\n", "import tqdm\n", "\n", "from sklearn.preprocessing import MinMaxScaler\n", "from sklearn.model_selection import TimeSeriesSplit\n", "import numpy as np\n", "\n", "def get_loads(symbol, start=None, end=None, granularity=86400):\n", " \"\"\" This was fiddly so I'm not live-coding this one\"\"\"\n", " public_client = gdax.PublicClient()\n", " if end is None:\n", " end = pd.to_datetime('now')\n", " if start is None:\n", " start = end-pd.Timedelta(seconds=granularity)\n", " \n", " while True:\n", " response = public_client.get_product_historic_rates(\n", " product_id=symbol,\n", " granularity=granularity,\n", " start=start.isoformat(),\n", " end=end.isoformat()\n", " )\n", " \n", " if not response:\n", " raise StopIteration()\n", " if not isinstance(response,list):\n", " raise ValueError(response)\n", " \n", " for r in response:\n", " yield r\n", " sleep(3)\n", " end = pd.to_datetime(r[0], unit='s')\n", " start = end-pd.Timedelta(seconds=granularity*len(response))\n", " print(f\"{start}-{end}\")\n", " \n", "def ts_df_to_supervised(df, lag=1):\n", " columns = [df.shift(i) for i in range(1, lag+1)]\n", " columns.append(df)\n", " _df = pd.concat(columns, axis=1)\n", " _df.fillna(0, inplace=True)\n", " return _df\n", "\n", "def difference(X, lag=1):\n", " return X.diff(lag).dropna()\n", "\n", "def inverse_difference(history, yhat, interval=1):\n", " return yhat + history[-interval]\n", "\n", "def twodim_to_threedim(X):\n", " return X.values.reshape(X.shape[0],1,X.shape[1])\n", "\n", "def fit_lstm(train, batch_size, nb_epoch, neurons):\n", " X, y = train[:, 0:-1], train[:, -1]\n", " X = X.reshape(X.shape[0], 1, X.shape[1])\n", " model = Sequential()\n", " model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))\n", " model.add(Dense(1))\n", " model.compile(loss='mean_squared_error', optimizer='adam')\n", " for i in tqdm.tqdm_notebook(range(nb_epoch)):\n", " model.fit(X, y, epochs=1, batch_size=batch_size, \n", " verbose=0, shuffle=False, \n", " #callbacks=[TQDMNotebookCallback(leave_inner=False,leave_outer=False)]\n", " )\n", " model.reset_states()\n", " return model\n", " \n", "def forecast(model, batch_size, row):\n", " X = row[0:-1]\n", " X = X.reshape(1, 1, len(X))\n", " yhat = model.predict(X, batch_size=batch_size)\n", " return yhat[0,0]\n", "\n", "def scale(train, test):\n", " # fit scaler to both sets\n", " scaler = MinMaxScaler(feature_range=(-1, 1))\n", " scaler = scaler.fit(train.values)\n", " # transform train\n", " train = train.values.reshape(train.shape[0], train.shape[1])\n", " train_scaled = scaler.transform(train)\n", " # transform test\n", " test = test.values.reshape(test.shape[0], test.shape[1])\n", " test_scaled = scaler.transform(test)\n", " return scaler, train_scaled, test_scaled\n", "\n", "df = pd.DataFrame(\n", " get_loads('BTC-USD'),\n", " columns=['time','low','high','open','close','volume']\n", ")\n", "df['time'] = pd.to_datetime(df['time'], unit='s')\n", "\n", "tscv = TimeSeriesSplit(max_train_size = df.shape[0]//2, n_splits=5)\n", "tt_sets = []\n", "for train_index, test_index in tscv.split(df.close.values):\n", " tt_sets.append((train_index,test_index))\n", " print(f\"TRAIN:, {min(train_index)} ++ {len(train_index)}, TEST: {min(test_index)} ++ {len(test_index)}\")\n", "\n", "\n", "raw_X = df.set_index('time')['close'].sort_index() # Sort Closes\n", "dX = difference(raw_X) # Stationarise\n", "sup_dX = ts_df_to_supervised(dX) # Supervise\n", "sup_dX_train = sup_dX.iloc[tt_sets[0][0]] # Training Set\n", "sup_dX_test = sup_dX.iloc[tt_sets[0][1]] # Test Set" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:bash]", "language": "python", "name": "conda-env-bash-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }