{
 "metadata": {
  "name": "",
  "signature": "sha256:a127b5e035d0b64fb162511b97690025c0f8e4b4b53f5597b8d5fe17d1f91dfe"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from __future__ import print_function\n",
      "import numpy as np\n",
      "import googleprediction"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Google's APL library is setup to work well with command line applications. Mimic some of that behavior here."
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Initialize Model\n",
      "=============="
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "model = googleprediction.GooglePredictor(\n",
      "    \"myproject\",\n",
      "    \"mybucket/X_train_spectra_ave_goog.csv\",\n",
      "    \"tswift_fft_ave\",\n",
      "    \"client_secrets.json\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "List Available Models\n",
      "============"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "model.list()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Fit Model to Training Data in the Storage Bucket\n",
      "============="
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "model.fit('CLASSIFICATION')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "List Model Features\n",
      "======"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "model.get_params()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Load Data Files for Testing\n",
      "==========="
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "with np.load(\"data_files.npz\") as data:\n",
      "    X_train = data['X_train']\n",
      "    Y_train = data['Y_train']\n",
      "    X_test = data['X_test']\n",
      "    Y_test = data['Y_test']\n",
      "    X_comp = data['X_comp']\n",
      "del data"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "X_train = np.float64(X_train)\n",
      "X_test = np.float64(X_test)\n",
      "X_comp = np.float64(X_comp)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 9
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Compute Frequency Spectra for Input Features\n",
      "============="
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def convert_to_spectra(X):\n",
      "    out = []\n",
      "    for row in X:\n",
      "        xfft = np.fft.fft(row)\n",
      "        n = len(xfft)\n",
      "        half_n = np.ceil(n/2.0)\n",
      "        xfft = (2.0 / n) * xfft[1:half_n]\n",
      "        out.append(np.abs(xfft))\n",
      "    out = np.array(out)\n",
      "    return out"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "X_train_spectra = convert_to_spectra(X_train)\n",
      "X_test_spectra = convert_to_spectra(X_test)\n",
      "X_comp_spectra = convert_to_spectra(X_comp)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 11
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Smooth the Spectra\n",
      "========"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def moving_average(X, n=3):\n",
      "    ret = []\n",
      "    for row in X:\n",
      "        row = np.cumsum(row)\n",
      "        row[n:] = row[n:] - row[:-n]\n",
      "        row = row[n - 1:] / n\n",
      "        ret.append(row)\n",
      "    ret = np.array(ret)\n",
      "    return ret"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "X_train_spectra = moving_average(X_train_spectra, n=5)\n",
      "X_test_spectra = moving_average(X_test_spectra, n=5)\n",
      "X_comp_spectra = moving_average(X_comp_spectra, n=5)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 13
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "X_train_spectra = np.int16(X_train_spectra)\n",
      "X_test_spectra = np.int16(X_test_spectra)\n",
      "X_comp_spectra = np.int16(X_comp_spectra)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 14
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Y_test.shape"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 15,
       "text": [
        "(6720,)"
       ]
      }
     ],
     "prompt_number": 15
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Predict on the Test Set\n",
      "================"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "out = model.predict(X_test_spectra)\n",
      "print(out)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "=======================\n",
        "Making some predictions\n",
        "=======================\n",
        "[u'1' u'0' u'1' ..., u'1' u'1' u'1']"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      }
     ],
     "prompt_number": 16
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "out = np.int16(out)\n",
      "out.shape"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 17,
       "text": [
        "(6720,)"
       ]
      }
     ],
     "prompt_number": 17
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Compute Performance on the Test Set\n",
      "==============="
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from sklearn.metrics import classification_report, accuracy_score, confusion_matrix"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 18
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print(classification_report(np.int16(Y_test), out))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "             precision    recall  f1-score   support\n",
        "\n",
        "          0       0.96      0.93      0.94      3381\n",
        "          1       0.93      0.96      0.94      3339\n",
        "\n",
        "avg / total       0.94      0.94      0.94      6720\n",
        "\n"
       ]
      }
     ],
     "prompt_number": 19
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "confusion_matrix(Y_test, out, labels=[0, 1])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 20,
       "text": [
        "array([[3141,  240],\n",
        "       [ 145, 3194]])"
       ]
      }
     ],
     "prompt_number": 20
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print(accuracy_score(Y_test, out))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "0.942708333333\n"
       ]
      }
     ],
     "prompt_number": 21
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Save Predicitons\n",
      "=========="
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "np.savetxt(\"gpapi_test_pred_fft.csv\", np.array(out,dtype=int), delimiter=',', fmt='%i')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 22
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}