{
  "cells": [
    {
      "metadata": {
        "_uuid": "eb62cdc7d86d0a84821bf36ea7673effd6e9f77d"
      },
      "cell_type": "markdown",
      "source": "# Neural nets for time series\nNote that all networks only train one epoch to save compute time. Increase the number of epochs for more accurate results."
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
        "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a",
        "trusted": true
      },
      "cell_type": "code",
      "source": "import pandas as pd\nimport numpy as np\n%matplotlib inline\n",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "_cell_guid": "9192a75f-766e-4b82-b978-8710f91a0876",
        "_uuid": "8f4fe85fe679d66bafc72b34ad57321a49b61587",
        "trusted": true
      },
      "cell_type": "code",
      "source": "train = pd.read_csv('../input/train_1.csv').fillna(0)\ntrain.head()",
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "                                                Page  2015-07-01  2015-07-02  \\\n0            2NE1_zh.wikipedia.org_all-access_spider        18.0        11.0   \n1             2PM_zh.wikipedia.org_all-access_spider        11.0        14.0   \n2              3C_zh.wikipedia.org_all-access_spider         1.0         0.0   \n3         4minute_zh.wikipedia.org_all-access_spider        35.0        13.0   \n4  52_Hz_I_Love_You_zh.wikipedia.org_all-access_s...         0.0         0.0   \n\n   2015-07-03  2015-07-04  2015-07-05  2015-07-06  2015-07-07  2015-07-08  \\\n0         5.0        13.0        14.0         9.0         9.0        22.0   \n1        15.0        18.0        11.0        13.0        22.0        11.0   \n2         1.0         1.0         0.0         4.0         0.0         3.0   \n3        10.0        94.0         4.0        26.0        14.0         9.0   \n4         0.0         0.0         0.0         0.0         0.0         0.0   \n\n   2015-07-09     ...      2016-12-22  2016-12-23  2016-12-24  2016-12-25  \\\n0        26.0     ...            32.0        63.0        15.0        26.0   \n1        10.0     ...            17.0        42.0        28.0        15.0   \n2         4.0     ...             3.0         1.0         1.0         7.0   \n3        11.0     ...            32.0        10.0        26.0        27.0   \n4         0.0     ...            48.0         9.0        25.0        13.0   \n\n   2016-12-26  2016-12-27  2016-12-28  2016-12-29  2016-12-30  2016-12-31  \n0        14.0        20.0        22.0        19.0        18.0        20.0  \n1         9.0        30.0        52.0        45.0        26.0        20.0  \n2         4.0         4.0         6.0         3.0         4.0        17.0  \n3        16.0        11.0        17.0        19.0        10.0        11.0  \n4         3.0        11.0        27.0        13.0        36.0        10.0  \n\n[5 rows x 551 columns]",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Page</th>\n      <th>2015-07-01</th>\n      <th>2015-07-02</th>\n      <th>2015-07-03</th>\n      <th>2015-07-04</th>\n      <th>2015-07-05</th>\n      <th>2015-07-06</th>\n      <th>2015-07-07</th>\n      <th>2015-07-08</th>\n      <th>2015-07-09</th>\n      <th>...</th>\n      <th>2016-12-22</th>\n      <th>2016-12-23</th>\n      <th>2016-12-24</th>\n      <th>2016-12-25</th>\n      <th>2016-12-26</th>\n      <th>2016-12-27</th>\n      <th>2016-12-28</th>\n      <th>2016-12-29</th>\n      <th>2016-12-30</th>\n      <th>2016-12-31</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2NE1_zh.wikipedia.org_all-access_spider</td>\n      <td>18.0</td>\n      <td>11.0</td>\n      <td>5.0</td>\n      <td>13.0</td>\n      <td>14.0</td>\n      <td>9.0</td>\n      <td>9.0</td>\n      <td>22.0</td>\n      <td>26.0</td>\n      <td>...</td>\n      <td>32.0</td>\n      <td>63.0</td>\n      <td>15.0</td>\n      <td>26.0</td>\n      <td>14.0</td>\n      <td>20.0</td>\n      <td>22.0</td>\n      <td>19.0</td>\n      <td>18.0</td>\n      <td>20.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2PM_zh.wikipedia.org_all-access_spider</td>\n      <td>11.0</td>\n      <td>14.0</td>\n      <td>15.0</td>\n      <td>18.0</td>\n      <td>11.0</td>\n      <td>13.0</td>\n      <td>22.0</td>\n      <td>11.0</td>\n      <td>10.0</td>\n      <td>...</td>\n      <td>17.0</td>\n      <td>42.0</td>\n      <td>28.0</td>\n      <td>15.0</td>\n      <td>9.0</td>\n      <td>30.0</td>\n      <td>52.0</td>\n      <td>45.0</td>\n      <td>26.0</td>\n      <td>20.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3C_zh.wikipedia.org_all-access_spider</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>1.0</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>4.0</td>\n      <td>0.0</td>\n      <td>3.0</td>\n      <td>4.0</td>\n      <td>...</td>\n      <td>3.0</td>\n      <td>1.0</td>\n      <td>1.0</td>\n      <td>7.0</td>\n      <td>4.0</td>\n      <td>4.0</td>\n      <td>6.0</td>\n      <td>3.0</td>\n      <td>4.0</td>\n      <td>17.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4minute_zh.wikipedia.org_all-access_spider</td>\n      <td>35.0</td>\n      <td>13.0</td>\n      <td>10.0</td>\n      <td>94.0</td>\n      <td>4.0</td>\n      <td>26.0</td>\n      <td>14.0</td>\n      <td>9.0</td>\n      <td>11.0</td>\n      <td>...</td>\n      <td>32.0</td>\n      <td>10.0</td>\n      <td>26.0</td>\n      <td>27.0</td>\n      <td>16.0</td>\n      <td>11.0</td>\n      <td>17.0</td>\n      <td>19.0</td>\n      <td>10.0</td>\n      <td>11.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>52_Hz_I_Love_You_zh.wikipedia.org_all-access_s...</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>...</td>\n      <td>48.0</td>\n      <td>9.0</td>\n      <td>25.0</td>\n      <td>13.0</td>\n      <td>3.0</td>\n      <td>11.0</td>\n      <td>27.0</td>\n      <td>13.0</td>\n      <td>36.0</td>\n      <td>10.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 551 columns</p>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "97715368-4aa6-4f37-abec-cdf44f6787af",
        "_uuid": "fd103a1544c1acc2c4ae0df826d732191aa67701",
        "trusted": true
      },
      "cell_type": "code",
      "source": "def parse_page(page):\n    x = page.split('_')\n    return ' '.join(x[:-3]), x[-3], x[-2], x[-1]",
      "execution_count": 3,
      "outputs": []
    },
    {
      "metadata": {
        "_cell_guid": "3ee5cb06-92a8-485c-bf51-1ea9539aac93",
        "_uuid": "6205fd2096816414d337f6b3333c80c7a3651ea6",
        "trusted": true
      },
      "cell_type": "code",
      "source": "l = list(train.Page.apply(parse_page))\ndf = pd.DataFrame(l)\ndel l\ndf.columns = ['Subject','Sub_Page','Access','Agent']\ndf.head()",
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/plain": "            Subject          Sub_Page      Access   Agent\n0              2NE1  zh.wikipedia.org  all-access  spider\n1               2PM  zh.wikipedia.org  all-access  spider\n2                3C  zh.wikipedia.org  all-access  spider\n3           4minute  zh.wikipedia.org  all-access  spider\n4  52 Hz I Love You  zh.wikipedia.org  all-access  spider",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Subject</th>\n      <th>Sub_Page</th>\n      <th>Access</th>\n      <th>Agent</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2NE1</td>\n      <td>zh.wikipedia.org</td>\n      <td>all-access</td>\n      <td>spider</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2PM</td>\n      <td>zh.wikipedia.org</td>\n      <td>all-access</td>\n      <td>spider</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3C</td>\n      <td>zh.wikipedia.org</td>\n      <td>all-access</td>\n      <td>spider</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4minute</td>\n      <td>zh.wikipedia.org</td>\n      <td>all-access</td>\n      <td>spider</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>52 Hz I Love You</td>\n      <td>zh.wikipedia.org</td>\n      <td>all-access</td>\n      <td>spider</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "ad310f5c-2373-4863-84fc-8c48fa67e1ac",
        "_uuid": "10688f9591b5552e972daf3fd687282c231486ec",
        "trusted": true
      },
      "cell_type": "code",
      "source": "train = pd.concat([train,df],axis=1)\ndel train['Page']\ndel df",
      "execution_count": 5,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "c05b8623-55ef-4922-b708-45299c852e6e",
        "_uuid": "e9281dafe71f600d1618da4eae97bdc9855a6c9d",
        "trusted": true
      },
      "cell_type": "code",
      "source": "import datetime\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.preprocessing import OneHotEncoder",
      "execution_count": 6,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "7b2c24c5-4639-49a9-9bf6-1be4bc7f68a2",
        "_uuid": "5611c14837704989e1b6d0cdabc3223c5eb1222d",
        "trusted": true
      },
      "cell_type": "code",
      "source": "def lag_arr(arr, lag,fill):\n    filler = np.full((arr.shape[0],lag,1),-1)\n    comb = np.concatenate((filler,arr),axis=1)\n    result = comb[:,:arr.shape[1]]\n    return result",
      "execution_count": 7,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "4cc60126-73cb-4eca-b5a8-7ba2cf4e8d2e",
        "_uuid": "b9fc5e09157453104fe9aa2bc0ed329b6346b85f",
        "trusted": true
      },
      "cell_type": "code",
      "source": "def single_autocorr(series, lag):\n    \"\"\"\n    Autocorrelation for single data series\n    :param series: traffic series\n    :param lag: lag, days\n    :return:\n    \"\"\"\n    s1 = series[lag:]\n    s2 = series[:-lag]\n    ms1 = np.mean(s1)\n    ms2 = np.mean(s2)\n    ds1 = s1 - ms1\n    ds2 = s2 - ms2\n    divider = np.sqrt(np.sum(ds1 * ds1)) * np.sqrt(np.sum(ds2 * ds2))\n    return np.sum(ds1 * ds2) / divider if divider != 0 else 0",
      "execution_count": 8,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "8ebf6580-c475-47e1-ac21-0bdfcbc04cee",
        "_uuid": "0ecfe613c7d95d0795f63bb5696bb98a857e2fab",
        "trusted": true
      },
      "cell_type": "code",
      "source": "def batc_autocorr(data,lag,series_length):\n    corrs = []\n    for i in range(data.shape[0]):\n        c = single_autocorr(data, lag)\n        corrs.append(c)\n    corr = np.array(corrs)\n    corr = corr.reshape(-1,1)\n    corr = np.expand_dims(corr,-1)\n    corr = np.repeat(corr,series_length,axis=1)\n    return corr",
      "execution_count": 9,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "97cd32b5-565b-4265-9a84-b14e4b962c7c",
        "_uuid": "721542f10a9ee51b4c0f800bced787763460fe53",
        "trusted": true
      },
      "cell_type": "code",
      "source": "datetime.datetime.strptime(train.columns.values[0], '%Y-%m-%d').strftime('%a')\nweekdays = [datetime.datetime.strptime(date, '%Y-%m-%d').strftime('%a') \n            for date in train.columns.values[:-4]]\n\nday_one_hot = LabelEncoder().fit_transform(weekdays)\nday_one_hot = day_one_hot.reshape(-1, 1)\nday_one_hot = OneHotEncoder(sparse=False).fit_transform(day_one_hot)\nday_one_hot = np.expand_dims(day_one_hot,0)\n\n",
      "execution_count": 10,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "f3fe8c58-5643-4147-b165-b71d4863c883",
        "_uuid": "ee8bb28ed57d8858881b56dea64eacab80c362c1",
        "trusted": true
      },
      "cell_type": "code",
      "source": "agent_int = LabelEncoder().fit(train['Agent'])\nagent_enc = agent_int.transform(train['Agent'])\nagent_enc = agent_enc.reshape(-1, 1)\nagent_one_hot = OneHotEncoder(sparse=False).fit(agent_enc)\n\ndel agent_enc",
      "execution_count": 11,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "cac63457-7bdc-4602-bd77-04faa36684e3",
        "_uuid": "4e8ba967ad100e242901e5097ed884d7a2796309",
        "trusted": true
      },
      "cell_type": "code",
      "source": "page_int = LabelEncoder().fit(train['Sub_Page'])\npage_enc = page_int.transform(train['Sub_Page'])\npage_enc = page_enc.reshape(-1, 1)\npage_one_hot = OneHotEncoder(sparse=False).fit(page_enc)\n\ndel page_enc",
      "execution_count": 12,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "8d612753-2b4b-45f7-bfac-c30becd5e378",
        "_uuid": "9e31bba31ad67686dbfcc633475fa1e049e0bf5f",
        "trusted": true
      },
      "cell_type": "code",
      "source": "acc_int = LabelEncoder().fit(train['Access'])\nacc_enc = acc_int.transform(train['Access'])\nacc_enc = acc_enc.reshape(-1, 1)\nacc_one_hot = OneHotEncoder(sparse=False).fit(acc_enc)\n\ndel acc_enc",
      "execution_count": 13,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "0e123aec-24fb-4688-b0e8-aaf8d106c458",
        "_uuid": "c4ba6c494beac030d7eac3dadcdb85d7f784b39a",
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "f2b98941-4744-49ad-be61-e171a8e6f09b",
        "_uuid": "c8e7a9580983f56953ee5aa4d79dbe4c44e93597",
        "trusted": true
      },
      "cell_type": "code",
      "source": "def get_batch(train,start=0,lookback = 100):\n    assert((start + lookback) <= (train.shape[1] - 5)) , 'End of lookback would be out of bounds'\n    \n    data = train.iloc[:,start:start + lookback].values\n    target = train.iloc[:,start + lookback].values\n    target = np.log1p(target)\n    \n    log_view = np.log1p(data)\n    log_view = np.expand_dims(log_view,axis=-1)\n    \n    days = day_one_hot[:,start:start + lookback]\n    days = np.repeat(days,repeats=train.shape[0],axis=0)\n    \n    year_lag = lag_arr(log_view,365,-1)\n    halfyear_lag = lag_arr(log_view,182,-1)\n    quarter_lag = lag_arr(log_view,91,-1)\n    \n    agent_enc = agent_int.transform(train['Agent'])\n    agent_enc = agent_enc.reshape(-1, 1)\n    agent_enc = agent_one_hot.transform(agent_enc)\n    agent_enc = np.expand_dims(agent_enc,1)\n    agent_enc = np.repeat(agent_enc,lookback,axis=1)\n    \n    page_enc = page_int.transform(train['Sub_Page'])\n    page_enc = page_enc.reshape(-1, 1)\n    page_enc = page_one_hot.transform(page_enc)\n    page_enc = np.expand_dims(page_enc, 1)\n    page_enc = np.repeat(page_enc,lookback,axis=1)\n    \n    acc_enc = acc_int.transform(train['Access'])\n    acc_enc = acc_enc.reshape(-1, 1)\n    acc_enc = acc_one_hot.transform(acc_enc)\n    acc_enc = np.expand_dims(acc_enc,1)\n    acc_enc = np.repeat(acc_enc,lookback,axis=1)\n    \n    year_autocorr = batc_autocorr(data,lag=365,series_length=lookback)\n    halfyr_autocorr = batc_autocorr(data,lag=182,series_length=lookback)\n    quarter_autocorr = batc_autocorr(data,lag=91,series_length=lookback)\n    \n    medians = np.median(data,axis=1)\n    medians = np.expand_dims(medians,-1)\n    medians = np.expand_dims(medians,-1)\n    medians = np.repeat(medians,lookback,axis=1)\n    \n    \n    '''\n    print(log_view.shape)\n    print(days.shape)\n    print(year_lag.shape)\n    print(halfyear_lag.shape)\n    print(page_enc.shape)\n    print(agent_enc.shape)\n    print(acc_enc.shape)'''\n    \n    batch = np.concatenate((log_view,\n                            days, \n                            year_lag, \n                            halfyear_lag, \n                            quarter_lag,\n                            page_enc,\n                            agent_enc,\n                            acc_enc, \n                            year_autocorr, \n                            halfyr_autocorr,\n                            quarter_autocorr, \n                            medians),axis=2)\n    \n    return batch, target",
      "execution_count": 14,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "903d1b39-4e52-4701-bdfa-3de83eb7fa2c",
        "_uuid": "07a50ee3178189fe8013a4c6e15473f05b42e994",
        "trusted": true
      },
      "cell_type": "code",
      "source": "def generate_batches(train,batch_size = 32, lookback = 100):\n    num_samples = train.shape[0]\n    num_steps = train.shape[1] - 5\n    while True:\n        for i in range(num_samples // batch_size):\n            batch_start = i * batch_size\n            batch_end = batch_start + batch_size\n\n            seq_start = np.random.randint(num_steps - lookback)\n            X,y = get_batch(train.iloc[batch_start:batch_end],start=seq_start)\n            yield X,y",
      "execution_count": 15,
      "outputs": []
    },
    {
      "metadata": {
        "_cell_guid": "6aec1d3d-4a7c-420c-8bf7-2bf61ab35d50",
        "_uuid": "36bdaf7ed73ce30ca2552b516620a4af7a3564eb",
        "trusted": true
      },
      "cell_type": "code",
      "source": "from keras.models import Sequential\nfrom keras.layers import Conv1D, MaxPool1D, Dense, Activation, GlobalMaxPool1D, Flatten",
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "text": "/opt/conda/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n  from ._conv import register_converters as _register_converters\nUsing TensorFlow backend.\n",
          "name": "stderr"
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "62a3d56a-3cf8-4765-89a1-6925d6e40be1",
        "_uuid": "92324440e7b3a39440d3a65376282293093ad97d",
        "trusted": true
      },
      "cell_type": "code",
      "source": "max_len = 100\nn_features = 29",
      "execution_count": 17,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "defb6cfa-af9d-4526-a581-2c4bc162c6bd",
        "_uuid": "1c385ea6686a6350a963ac965c120e2cab0fcf7c",
        "trusted": true
      },
      "cell_type": "code",
      "source": "model = Sequential()\n\nmodel.add(Conv1D(16,5, input_shape=(max_len,n_features)))\nmodel.add(Activation('relu'))\nmodel.add(MaxPool1D(5))\n\nmodel.add(Conv1D(16,5))\nmodel.add(Activation('relu'))\nmodel.add(MaxPool1D(5))\n\nmodel.add(Flatten())\nmodel.add(Dense(1))",
      "execution_count": 18,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "3475b817-b7f2-4819-aa69-57ad23d6e644",
        "_uuid": "278c549f576b648175cde75a3765bfdbe974418f",
        "trusted": true
      },
      "cell_type": "code",
      "source": "model.compile(optimizer='adam',loss='mean_absolute_percentage_error')",
      "execution_count": 19,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "12c9ab21-6ff3-4337-bacf-47bcb578803b",
        "_uuid": "07f79203f4e964b5ff2bf8eecc13d13abbc71882",
        "trusted": true
      },
      "cell_type": "code",
      "source": "from sklearn.model_selection import train_test_split",
      "execution_count": 20,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "5d83c087-d814-4693-86f2-6ea870b9300e",
        "_uuid": "c4bb2b93638f5c05d9721d9015be840b33631a74",
        "trusted": true
      },
      "cell_type": "code",
      "source": "batch_size = 128\ntrain_df, val_df = train_test_split(train, test_size=0.1)\ntrain_gen = generate_batches(train_df,batch_size=batch_size)\nval_gen = generate_batches(val_df, batch_size=batch_size)\n\nn_train_samples = train_df.shape[0]\nn_val_samples = val_df.shape[0]",
      "execution_count": 21,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "2e8bbd33-45ed-4cc3-9723-17cb603b759f",
        "_uuid": "a8569107915a6055e58a2921470886e813f1f51f",
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "_cell_guid": "546f48b2-6c81-4af7-a6a7-2d4773e8d635",
        "_uuid": "b89eb7f40af698b1f2e5d3b990780b8d4e3d20ea",
        "trusted": true
      },
      "cell_type": "code",
      "source": "a,b = next(train_gen)",
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "stream",
          "text": "/opt/conda/lib/python3.6/site-packages/numpy/core/fromnumeric.py:2957: RuntimeWarning: Mean of empty slice.\n  out=out, **kwargs)\n/opt/conda/lib/python3.6/site-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n  ret = ret.dtype.type(ret / rcount)\n",
          "name": "stderr"
        }
      ]
    },
    {
      "metadata": {
        "_cell_guid": "c82a220a-054b-41df-91dc-c149b41ddbf1",
        "_uuid": "ff828efc68dfa21cc072daf3bacd25ddf9af85cc",
        "trusted": true
      },
      "cell_type": "code",
      "source": "model.fit_generator(train_gen, \n                    epochs=1,\n                    steps_per_epoch=n_train_samples // batch_size, \n                    validation_data= val_gen, \n                    validation_steps=n_val_samples // batch_size)",
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Epoch 1/1\n",
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": "/opt/conda/lib/python3.6/site-packages/numpy/core/fromnumeric.py:2957: RuntimeWarning: Mean of empty slice.\n  out=out, **kwargs)\n/opt/conda/lib/python3.6/site-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n  ret = ret.dtype.type(ret / rcount)\n",
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": "1019/1019 [==============================] - 52s 51ms/step - loss: 431502.3788 - val_loss: 107665.1747\n",
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "execution_count": 23,
          "data": {
            "text/plain": "<keras.callbacks.History at 0x7f0803fe7630>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "db4c2ad8-5e07-4e91-9936-aee2063ac80e",
        "_uuid": "07fdb0422f4e8c8b1938aa7d189dde2941031511",
        "trusted": true
      },
      "cell_type": "code",
      "source": "from keras.layers import SimpleRNN\n\nmodel = Sequential()\nmodel.add(SimpleRNN(16,input_shape=(max_len,n_features)))\nmodel.add(Dense(1))\n\nmodel.compile(optimizer='adam',loss='mean_absolute_percentage_error')",
      "execution_count": 24,
      "outputs": []
    },
    {
      "metadata": {
        "_cell_guid": "7e82ee22-90f9-4e41-8e01-8dc9d52c7d37",
        "_uuid": "2f63f5a0bb2485156abd7db20d8a9bc1ea7b8f9e",
        "trusted": true
      },
      "cell_type": "code",
      "source": "model.fit_generator(train_gen, \n                    epochs=1,\n                    steps_per_epoch=n_train_samples // batch_size, \n                    validation_data= val_gen, \n                    validation_steps=n_val_samples // batch_size)",
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Epoch 1/1\n",
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": "/opt/conda/lib/python3.6/site-packages/numpy/core/fromnumeric.py:2957: RuntimeWarning: Mean of empty slice.\n  out=out, **kwargs)\n/opt/conda/lib/python3.6/site-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n  ret = ret.dtype.type(ret / rcount)\n",
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": "1019/1019 [==============================] - 82s 81ms/step - loss: 4612880.6155 - val_loss: 973000.9942\n",
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "execution_count": 25,
          "data": {
            "text/plain": "<keras.callbacks.History at 0x7f0802940278>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "_cell_guid": "121e0926-88a2-4b39-a84d-77dabadd37da",
        "_uuid": "069032a724720dc86e59bc27946ec85b37dbcb58",
        "trusted": true
      },
      "cell_type": "code",
      "source": "from keras.layers import SimpleRNN\n\nmodel = Sequential()\nmodel.add(SimpleRNN(32,return_sequences=True,input_shape=(max_len,n_features)))\nmodel.add(SimpleRNN(16, return_sequences = True))\nmodel.add(SimpleRNN(16))\nmodel.add(Dense(1))\n\nmodel.compile(optimizer='adam',loss='mean_absolute_percentage_error')",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "_cell_guid": "adb3e3e1-377b-4158-bff5-78e618e5e569",
        "_uuid": "79fc0f4e25d3e2c422587979668267feedf56307",
        "trusted": true
      },
      "cell_type": "code",
      "source": "model.fit_generator(train_gen, \n                    epochs=1,\n                    steps_per_epoch=n_train_samples // batch_size, \n                    validation_data= val_gen, \n                    validation_steps=n_val_samples // batch_size)",
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Epoch 1/1\n",
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": "/opt/conda/lib/python3.6/site-packages/numpy/core/fromnumeric.py:2957: RuntimeWarning: Mean of empty slice.\n  out=out, **kwargs)\n/opt/conda/lib/python3.6/site-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n  ret = ret.dtype.type(ret / rcount)\n",
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": "1019/1019 [==============================] - 171s 168ms/step - loss: 3398789.7302 - val_loss: 1034468.5364\n",
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "execution_count": 27,
          "data": {
            "text/plain": "<keras.callbacks.History at 0x7f0802940198>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "_cell_guid": "5f44c453-5f9c-4e13-ba2c-8ab36a268c62",
        "_uuid": "c48edcfe20c92443f4e6c64ed42a8846bb3485c8",
        "trusted": true
      },
      "cell_type": "code",
      "source": "from keras.layers import CuDNNLSTM\n\nmodel = Sequential()\nmodel.add(CuDNNLSTM(16,input_shape=(max_len,n_features)))\nmodel.add(Dense(1))",
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": "WARNING:tensorflow:From /opt/conda/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:198: retry (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\nInstructions for updating:\nUse the retry module or similar alternatives.\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "_cell_guid": "0ac66d54-86b6-4cec-9a26-36f31ac98f76",
        "collapsed": true,
        "_uuid": "3a3bd4ceadd1fb3d7a1972ab146c93c902123fe3",
        "trusted": true
      },
      "cell_type": "code",
      "source": "model.compile(optimizer='adam',loss='mean_absolute_percentage_error')",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "_cell_guid": "8eee4ae3-19f7-4e96-9e88-7d66852fe637",
        "_uuid": "f8b6859f15cbec0acb6a6c71d0695c304a666530",
        "trusted": true
      },
      "cell_type": "code",
      "source": "model.fit_generator(train_gen, \n                    epochs=1,\n                    steps_per_epoch=n_train_samples // batch_size, \n                    validation_data= val_gen, \n                    validation_steps=n_val_samples // batch_size)",
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Epoch 1/1\n",
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": "/opt/conda/lib/python3.6/site-packages/numpy/core/fromnumeric.py:2957: RuntimeWarning: Mean of empty slice.\n  out=out, **kwargs)\n/opt/conda/lib/python3.6/site-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n  ret = ret.dtype.type(ret / rcount)\n",
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": "1019/1019 [==============================] - 52s 51ms/step - loss: 918992.5830 - val_loss: 283646.8184\n",
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "execution_count": 30,
          "data": {
            "text/plain": "<keras.callbacks.History at 0x7f0728f436d8>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "_cell_guid": "7a45c4be-888d-4951-b5df-5af5cb4adb20",
        "collapsed": true,
        "_uuid": "a6227c476c7cc9a07149e4d6586ac44d9eaf306b",
        "trusted": true
      },
      "cell_type": "code",
      "source": "from keras.layers import LSTM\n\nmodel = Sequential()\nmodel.add(LSTM(16, \n               recurrent_dropout=0.1,\n               return_sequences=True,\n               input_shape=(max_len,n_features)))\n\nmodel.add(LSTM(16,recurrent_dropout=0.1))\n\nmodel.add(Dense(1))",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "_cell_guid": "3f84368b-3ca2-47c6-ba67-a8f5e876ccf5",
        "collapsed": true,
        "_uuid": "651ac976647a9bd5da60a286201a6b9cf1acfc42",
        "trusted": true
      },
      "cell_type": "code",
      "source": "model.compile(optimizer='adam',loss='mean_absolute_percentage_error')",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "_cell_guid": "2a4f1cf0-8041-436a-9047-4bca3c000199",
        "_uuid": "dcbd1f0eb743031e1ebc8cb0d0796d197b82965e",
        "trusted": true
      },
      "cell_type": "code",
      "source": "model.fit_generator(train_gen, \n                    epochs=1,\n                    steps_per_epoch=n_train_samples // batch_size, \n                    validation_data= val_gen, \n                    validation_steps=n_val_samples // batch_size)",
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Epoch 1/1\n",
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": "/opt/conda/lib/python3.6/site-packages/numpy/core/fromnumeric.py:2957: RuntimeWarning: Mean of empty slice.\n  out=out, **kwargs)\n/opt/conda/lib/python3.6/site-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n  ret = ret.dtype.type(ret / rcount)\n",
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": "1019/1019 [==============================] - 443s 435ms/step - loss: 516063.6124 - val_loss: 87626.7314\n",
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "execution_count": 33,
          "data": {
            "text/plain": "<keras.callbacks.History at 0x7f0726750898>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "_cell_guid": "4932a2bf-54fc-4498-9a46-2240ba360788",
        "collapsed": true,
        "_uuid": "1fb64d33b126aee1827f404c474899e455db5401",
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.6.4",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 1
}