{ "cells": [ { "metadata": { "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "trusted": true, "collapsed": true }, "cell_type": "code", "source": "# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load in \n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\n\nimport os\n#print(os.listdir(\"../input/Data/Stocks\"))\n\n# Any results you write to the current directory are saved as output.", "execution_count": 1, "outputs": [] }, { "metadata": { "trusted": true, "_uuid": "f8325e01c1bdd8621b4c3674ad04daaceb9c64e8", "collapsed": true }, "cell_type": "code", "source": "DATA_PATH = '../input/Data/Stocks'", "execution_count": 2, "outputs": [] }, { "metadata": { "trusted": true, "_uuid": "de8db9021cb0526b3a1b069e34db4ba40dd19e20", "collapsed": true }, "cell_type": "code", "source": "class TradeEnv(): \n def reset(self):\n self.data = self.gen_universe()\n self.pos = 0\n self.game_length = self.data.shape[0]\n self.returns = []\n \n # return first state\n return self.data[0,:-1,:]\n \n def step(self,allocation):\n ret = np.sum(allocation * self.data[self.pos,-1,:])\n self.returns.append(ret)\n mean = 0\n std = 1\n if len(self.returns) >= 20:\n mean = np.mean(self.returns[-20:])\n std = np.std(self.returns[-20:]) + 0.0001\n sharpe = mean / std\n \n if (self.pos +1) >= self.game_length:\n return None, sharpe, True, {} \n else:\n self.pos +=1\n return self.data[self.pos,:-1,:], sharpe, False, {}\n \n def gen_universe(self):\n stocks = os.listdir(DATA_PATH)\n stocks = np.random.permutation(stocks)\n frames = []\n idx = 0\n while len(frames) < 100:\n try:\n stock = stocks[idx]\n frame = pd.read_csv(os.path.join(DATA_PATH,stock),index_col='Date')\n frame = frame.loc['2005-01-01':].Close\n frames.append(frame)\n except: # catch *all* exceptions\n e = sys.exc_info()[0]\n idx += 1\n\n df = pd.concat(frames,axis=1,ignore_index=False)\n df = df.pct_change()\n df = df.fillna(0)\n batch = df.values\n episodes = []\n for i in range(batch.shape[0] - 101):\n eps = batch[i:i+101]\n episodes.append(eps)\n data = np.stack(episodes)\n assert len(data.shape) == 3\n assert data.shape[-1] == 100\n return data", "execution_count": 9, "outputs": [] }, { "metadata": { "trusted": true, "collapsed": true, "_uuid": "d91c72d82ff718ce383e90d559914dd6ebd970c7" }, "cell_type": "code", "source": "class RandomTrader():\n def get_action(self):\n action = np.random.rand(100) * 2 - 1\n action = action * (np.abs(action) / np.sum(np.abs(action)))\n return action", "execution_count": 10, "outputs": [] }, { "metadata": { "trusted": true, "_uuid": "ad47a700332256944cb25d5c8d61e644d539f1a5", "collapsed": true }, "cell_type": "code", "source": "import sys\n#import gym\nimport numpy as np\nfrom scipy.stats import norm\nfrom keras.layers import Dense, Input, Lambda, LSTM\nfrom keras.models import Model\nfrom keras.optimizers import Adam\nfrom keras import backend as K\nfrom collections import deque\nimport random\n\nEPISODES = 3000\n\n\n# A2C(Advantage Actor-Critic) agent for the Cartpole\nclass A2CAgent:\n def __init__(self, state_size, state_seq_length, action_size):\n # if you want to see Cartpole learning, then change to True\n self.render = False\n self.state_size = state_size\n self.state_seq_length = state_seq_length\n self.action_size = action_size\n self.value_size = 1\n \n self.exp_replay = deque(maxlen=2000)\n\n # get gym environment name\n # these are hyper parameters for the A3C\n self.actor_lr = 0.0001\n self.critic_lr = 0.001\n self.discount_factor = .9\n\n # create model for actor and critic network\n self.actor, self.critic = self.build_model()\n\n # method for training actor and critic network\n #self.optimizer = [self.actor_optimizer(), self.critic_optimizer()]\n \n self.optimize_actor = self.actor_optimizer() #5\n self.optimize_critic = self.critic_optimizer() \n\n\n def build_model(self):\n state = Input(batch_shape=(None, self.state_seq_length, self.state_size))\n \n x = LSTM(120,return_sequences=True)(state)\n x = LSTM(100)(x)\n \n actor_input = Dense(100, activation='relu', kernel_initializer='he_uniform')(x)\n # actor_hidden = Dense(self.hidden2, activation='relu')(actor_input)\n mu = Dense(self.action_size, activation='tanh', kernel_initializer='he_uniform')(actor_input)\n sigma_0 = Dense(self.action_size, activation='softplus', kernel_initializer='he_uniform')(actor_input)\n sigma = Lambda(lambda x: x + 0.0001)(sigma_0)\n\n critic_input = Dense(30, activation='relu', kernel_initializer='he_uniform')(x)\n # value_hidden = Dense(self.hidden2, activation='relu')(critic_input)\n state_value = Dense(1, activation='linear', kernel_initializer='he_uniform')(critic_input)\n\n actor = Model(inputs=state, outputs=(mu, sigma))\n critic = Model(inputs=state, outputs=state_value)\n\n actor._make_predict_function()\n critic._make_predict_function()\n\n actor.summary()\n critic.summary()\n\n return actor, critic\n\n def actor_optimizer(self):\n action = K.placeholder(shape=(None, 1))\n advantages = K.placeholder(shape=(None, 1))\n\n # mu = K.placeholder(shape=(None, self.action_size))\n # sigma_sq = K.placeholder(shape=(None, self.action_size))\n\n mu, sigma_sq = self.actor.output\n\n pdf = 1. / K.sqrt(2. * np.pi * sigma_sq) * K.exp(-K.square(action - mu) / (2. * sigma_sq))\n log_pdf = K.log(pdf + K.epsilon())\n entropy = K.sum(0.5 * (K.log(2. * np.pi * sigma_sq) + 1.))\n\n exp_v = log_pdf * advantages\n\n exp_v = K.sum(exp_v + 0.01 * entropy)\n actor_loss = -exp_v\n\n optimizer = Adam(lr=self.actor_lr)\n updates = optimizer.get_updates(self.actor.trainable_weights, [], actor_loss)\n\n train = K.function([self.actor.input, action, advantages], [], updates=updates)\n return train\n\n # make loss function for Value approximation\n def critic_optimizer(self):\n discounted_reward = K.placeholder(shape=(None, 1))\n\n value = self.critic.output\n\n loss = K.mean(K.square(discounted_reward - value))\n\n optimizer = Adam(lr=self.critic_lr)\n updates = optimizer.get_updates(self.critic.trainable_weights, [], loss)\n train = K.function([self.critic.input, discounted_reward], [], updates=updates)\n return train\n\n # using the output of policy network, pick action stochastically\n def get_action(self, state):\n mu, sigma_sq = self.actor.predict(np.reshape(state, [1, self.state_seq_length,self.state_size]))\n # sigma_sq = np.log(np.exp(sigma_sq + 1))\n epsilon = np.random.randn(self.action_size)\n # action = norm.rvs(loc=mu, scale=sigma_sq,size=1)\n action = mu + np.sqrt(sigma_sq) * epsilon\n action = np.clip(action, -2, 2)\n return action\n\n # update policy network every episode\n def train_model(self, state, action, reward, next_state, done):\n self.exp_replay.append((state, action, reward, next_state, done))\n \n (state, action, reward, next_state, done) = random.sample(self.exp_replay,1)[0]\n \n target = np.zeros((1, self.value_size))\n advantages = np.zeros((1, self.action_size))\n\n value = self.critic.predict(state)[0]\n next_value = self.critic.predict(next_state)[0]\n\n if done:\n advantages[0] = reward - value\n target[0][0] = reward\n else:\n advantages[0] = reward + self.discount_factor * (next_value) - value\n target[0][0] = reward + self.discount_factor * next_value\n\n self.optimize_actor([state, action, advantages])\n self.optimize_critic([state, target])", "execution_count": 11, "outputs": [] }, { "metadata": { "trusted": true, "_uuid": "64e1f7c58461094e08bdebaab5cc4056c20cbf92", "collapsed": true }, "cell_type": "code", "source": "state_size = 100\nstate_seq_length = 100\naction_size = 100", "execution_count": 12, "outputs": [] }, { "metadata": { "trusted": true, "collapsed": true, "_uuid": "dd9f5a8d682bf91161b7b8967c885219bc0b98a4" }, "cell_type": "code", "source": "import time", "execution_count": 13, "outputs": [] }, { "metadata": { "trusted": true, "_uuid": "2544132fb716e5567889e80bfbd34be798e89224", "scrolled": true, "collapsed": true }, "cell_type": "code", "source": "def run_experiment():\n start = time.time()\n env = TradeEnv()\n agent = A2CAgent(state_size, state_seq_length, action_size)\n epochs = 10\n reward_hist = []\n\n print('Setup: {:.4f}'.format(time.time() - start))\n\n for e in range(epochs):\n\n start = time.time()\n state = env.reset()\n state = np.reshape(state, [1,state_seq_length, state_size])\n done = False\n total_reward = 0\n print('Game Start: {:.4f}'.format(time.time() - start))\n\n while not done:\n\n start = time.time()\n action = agent.get_action(state)\n print('Get Action: {:.4f}'.format(time.time() - start))\n\n start = time.time()\n next_state, reward, done, info = env.step(action)\n print('Step: {:.4f}'.format(time.time() - start))\n\n start = time.time()\n next_state = np.reshape(next_state, [1,state_seq_length, state_size])\n agent.train_model(state, action, reward, next_state, done)\n print('Train: {:.4f}'.format(time.time() - start))\n\n total_reward += reward\n state = next_state\n\n print(total_reward)\n reward_hist.append(total_reward)\n return reward_hist", "execution_count": 1, "outputs": [] }, { "metadata": { "trusted": true, "collapsed": true, "_uuid": "d41408802fcb5b48eae55409638e1f1bbdd0b19e" }, "cell_type": "code", "source": "# Running training takes very long\n\n#import matplotlib.pyplot as plt\n#reward_hist = run_experiment()\n#plt.plot(reward_hist)", "execution_count": null, "outputs": [] }, { "metadata": { "trusted": true, "_uuid": "1b803c08f8165217248959fff7b4bea51adb524f", "collapsed": true }, "cell_type": "code", "source": "", "execution_count": null, "outputs": [] }, { "metadata": { "trusted": true, "collapsed": true, "_uuid": "8bc07bf631b369dc9539e58530c4a04aecb24ecd" }, "cell_type": "code", "source": "", "execution_count": null, "outputs": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.6.5", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" } }, "nbformat": 4, "nbformat_minor": 1 }