{
  "cells": [
    {
      "metadata": {
        "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
        "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load in \n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\n\nimport os\n#print(os.listdir(\"../input/Data/Stocks\"))\n\n# Any results you write to the current directory are saved as output.",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "_uuid": "f8325e01c1bdd8621b4c3674ad04daaceb9c64e8",
        "collapsed": true
      },
      "cell_type": "code",
      "source": "DATA_PATH = '../input/Data/Stocks'",
      "execution_count": 2,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "_uuid": "de8db9021cb0526b3a1b069e34db4ba40dd19e20",
        "collapsed": true
      },
      "cell_type": "code",
      "source": "class TradeEnv():    \n    def reset(self):\n        self.data = self.gen_universe()\n        self.pos = 0\n        self.game_length = self.data.shape[0]\n        self.returns = []\n        \n        # return first state\n        return self.data[0,:-1,:]\n    \n    def step(self,allocation):\n        ret = np.sum(allocation * self.data[self.pos,-1,:])\n        self.returns.append(ret)\n        mean = 0\n        std = 1\n        if len(self.returns) >= 20:\n            mean = np.mean(self.returns[-20:])\n            std = np.std(self.returns[-20:]) + 0.0001\n        sharpe = mean / std\n        \n        if (self.pos +1) >= self.game_length:\n            return None, sharpe, True, {}  \n        else:\n            self.pos +=1\n            return self.data[self.pos,:-1,:], sharpe, False, {}\n        \n    def gen_universe(self):\n        stocks = os.listdir(DATA_PATH)\n        stocks = np.random.permutation(stocks)\n        frames = []\n        idx = 0\n        while len(frames) < 100:\n            try:\n                stock = stocks[idx]\n                frame = pd.read_csv(os.path.join(DATA_PATH,stock),index_col='Date')\n                frame = frame.loc['2005-01-01':].Close\n                frames.append(frame)\n            except: # catch *all* exceptions\n                e = sys.exc_info()[0]\n            idx += 1\n\n        df = pd.concat(frames,axis=1,ignore_index=False)\n        df = df.pct_change()\n        df = df.fillna(0)\n        batch = df.values\n        episodes = []\n        for i in range(batch.shape[0] - 101):\n            eps = batch[i:i+101]\n            episodes.append(eps)\n        data = np.stack(episodes)\n        assert len(data.shape) == 3\n        assert data.shape[-1] == 100\n        return data",
      "execution_count": 9,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": true,
        "_uuid": "d91c72d82ff718ce383e90d559914dd6ebd970c7"
      },
      "cell_type": "code",
      "source": "class RandomTrader():\n    def get_action(self):\n        action = np.random.rand(100) * 2 - 1\n        action = action * (np.abs(action) / np.sum(np.abs(action)))\n        return action",
      "execution_count": 10,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "_uuid": "ad47a700332256944cb25d5c8d61e644d539f1a5",
        "collapsed": true
      },
      "cell_type": "code",
      "source": "import sys\n#import gym\nimport numpy as np\nfrom scipy.stats import norm\nfrom keras.layers import Dense, Input, Lambda, LSTM\nfrom keras.models import Model\nfrom keras.optimizers import Adam\nfrom keras import backend as K\nfrom collections import deque\nimport random\n\nEPISODES = 3000\n\n\n# A2C(Advantage Actor-Critic) agent for the Cartpole\nclass A2CAgent:\n    def __init__(self, state_size, state_seq_length, action_size):\n        # if you want to see Cartpole learning, then change to True\n        self.render = False\n        self.state_size = state_size\n        self.state_seq_length = state_seq_length\n        self.action_size = action_size\n        self.value_size = 1\n        \n        self.exp_replay = deque(maxlen=2000)\n\n        # get gym environment name\n        # these are hyper parameters for the A3C\n        self.actor_lr = 0.0001\n        self.critic_lr = 0.001\n        self.discount_factor = .9\n\n        # create model for actor and critic network\n        self.actor, self.critic = self.build_model()\n\n        # method for training actor and critic network\n        #self.optimizer = [self.actor_optimizer(), self.critic_optimizer()]\n        \n        self.optimize_actor = self.actor_optimizer() #5\n        self.optimize_critic = self.critic_optimizer() \n\n\n    def build_model(self):\n        state = Input(batch_shape=(None, self.state_seq_length, self.state_size))\n        \n        x = LSTM(120,return_sequences=True)(state)\n        x = LSTM(100)(x)\n        \n        actor_input = Dense(100, activation='relu', kernel_initializer='he_uniform')(x)\n        # actor_hidden = Dense(self.hidden2, activation='relu')(actor_input)\n        mu = Dense(self.action_size, activation='tanh', kernel_initializer='he_uniform')(actor_input)\n        sigma_0 = Dense(self.action_size, activation='softplus', kernel_initializer='he_uniform')(actor_input)\n        sigma = Lambda(lambda x: x + 0.0001)(sigma_0)\n\n        critic_input = Dense(30, activation='relu', kernel_initializer='he_uniform')(x)\n        # value_hidden = Dense(self.hidden2, activation='relu')(critic_input)\n        state_value = Dense(1, activation='linear', kernel_initializer='he_uniform')(critic_input)\n\n        actor = Model(inputs=state, outputs=(mu, sigma))\n        critic = Model(inputs=state, outputs=state_value)\n\n        actor._make_predict_function()\n        critic._make_predict_function()\n\n        actor.summary()\n        critic.summary()\n\n        return actor, critic\n\n    def actor_optimizer(self):\n        action = K.placeholder(shape=(None, 1))\n        advantages = K.placeholder(shape=(None, 1))\n\n        # mu = K.placeholder(shape=(None, self.action_size))\n        # sigma_sq = K.placeholder(shape=(None, self.action_size))\n\n        mu, sigma_sq = self.actor.output\n\n        pdf = 1. / K.sqrt(2. * np.pi * sigma_sq) * K.exp(-K.square(action - mu) / (2. * sigma_sq))\n        log_pdf = K.log(pdf + K.epsilon())\n        entropy = K.sum(0.5 * (K.log(2. * np.pi * sigma_sq) + 1.))\n\n        exp_v = log_pdf * advantages\n\n        exp_v = K.sum(exp_v + 0.01 * entropy)\n        actor_loss = -exp_v\n\n        optimizer = Adam(lr=self.actor_lr)\n        updates = optimizer.get_updates(self.actor.trainable_weights, [], actor_loss)\n\n        train = K.function([self.actor.input, action, advantages], [], updates=updates)\n        return train\n\n    # make loss function for Value approximation\n    def critic_optimizer(self):\n        discounted_reward = K.placeholder(shape=(None, 1))\n\n        value = self.critic.output\n\n        loss = K.mean(K.square(discounted_reward - value))\n\n        optimizer = Adam(lr=self.critic_lr)\n        updates = optimizer.get_updates(self.critic.trainable_weights, [], loss)\n        train = K.function([self.critic.input, discounted_reward], [], updates=updates)\n        return train\n\n    # using the output of policy network, pick action stochastically\n    def get_action(self, state):\n        mu, sigma_sq = self.actor.predict(np.reshape(state, [1, self.state_seq_length,self.state_size]))\n        # sigma_sq = np.log(np.exp(sigma_sq + 1))\n        epsilon = np.random.randn(self.action_size)\n        # action = norm.rvs(loc=mu, scale=sigma_sq,size=1)\n        action = mu + np.sqrt(sigma_sq) * epsilon\n        action = np.clip(action, -2, 2)\n        return action\n\n    # update policy network every episode\n    def train_model(self, state, action, reward, next_state, done):\n        self.exp_replay.append((state, action, reward, next_state, done))\n        \n        (state, action, reward, next_state, done) = random.sample(self.exp_replay,1)[0]\n      \n        target = np.zeros((1, self.value_size))\n        advantages = np.zeros((1, self.action_size))\n\n        value = self.critic.predict(state)[0]\n        next_value = self.critic.predict(next_state)[0]\n\n        if done:\n            advantages[0] = reward - value\n            target[0][0] = reward\n        else:\n            advantages[0] = reward + self.discount_factor * (next_value) - value\n            target[0][0] = reward + self.discount_factor * next_value\n\n        self.optimize_actor([state, action, advantages])\n        self.optimize_critic([state, target])",
      "execution_count": 11,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "_uuid": "64e1f7c58461094e08bdebaab5cc4056c20cbf92",
        "collapsed": true
      },
      "cell_type": "code",
      "source": "state_size = 100\nstate_seq_length = 100\naction_size = 100",
      "execution_count": 12,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": true,
        "_uuid": "dd9f5a8d682bf91161b7b8967c885219bc0b98a4"
      },
      "cell_type": "code",
      "source": "import time",
      "execution_count": 13,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "_uuid": "2544132fb716e5567889e80bfbd34be798e89224",
        "scrolled": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "def run_experiment():\n    start = time.time()\n    env = TradeEnv()\n    agent = A2CAgent(state_size, state_seq_length, action_size)\n    epochs = 10\n    reward_hist = []\n\n    print('Setup: {:.4f}'.format(time.time() - start))\n\n    for e in range(epochs):\n\n        start = time.time()\n        state = env.reset()\n        state = np.reshape(state, [1,state_seq_length, state_size])\n        done = False\n        total_reward = 0\n        print('Game Start: {:.4f}'.format(time.time() - start))\n\n        while not done:\n\n            start = time.time()\n            action = agent.get_action(state)\n            print('Get Action: {:.4f}'.format(time.time() - start))\n\n            start = time.time()\n            next_state, reward, done, info = env.step(action)\n            print('Step: {:.4f}'.format(time.time() - start))\n\n            start = time.time()\n            next_state = np.reshape(next_state, [1,state_seq_length, state_size])\n            agent.train_model(state, action, reward, next_state, done)\n            print('Train: {:.4f}'.format(time.time() - start))\n\n            total_reward += reward\n            state = next_state\n\n        print(total_reward)\n        reward_hist.append(total_reward)\n    return reward_hist",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": true,
        "_uuid": "d41408802fcb5b48eae55409638e1f1bbdd0b19e"
      },
      "cell_type": "code",
      "source": "# Running training takes very long\n\n#import matplotlib.pyplot as plt\n#reward_hist = run_experiment()\n#plt.plot(reward_hist)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "_uuid": "1b803c08f8165217248959fff7b4bea51adb524f",
        "collapsed": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": true,
        "_uuid": "8bc07bf631b369dc9539e58530c4a04aecb24ecd"
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.6.5",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 1
}