{
 "metadata": {
  "name": "",
  "signature": "sha256:72f14f716341fe8617ac394b427ca7959f6e4a62f0e386665d146c552d2c7ccd"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import os, sys, itertools, pickle\n",
      "\n",
      "import joblib\n",
      "\n",
      "import sklearn.cross_validation\n",
      "import sklearn.metrics"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from simplednn.nets import *"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def oprint(s):\n",
      "    os.system('echo \"%s\"'%s)\n",
      "\n",
      "def train_net(label, w, n):\n",
      "    \"\"\"\n",
      "        Given a sample label and filter set label w, train a neural network and make predictions. \n",
      "        Output is saved to a pickle indexed by parameter n.\n",
      "    \"\"\"\n",
      "    \n",
      "    d = joblib.load('./data/cov_opt_%s_%s.pickle'%(label,w))\n",
      "\n",
      "    X = d['covs'].astype('float32')\n",
      "    y = d['y'].astype('int32')\n",
      "   \n",
      "    N_CHANNELS = 12\n",
      "\n",
      "    # Select N_CHANNELS at random from the full covariance matrices\n",
      "    channelidx = range(X.shape[2])\n",
      "    numpy.random.shuffle(channelidx)\n",
      "    channelidx = channelidx[:N_CHANNELS]\n",
      "    X = X[:,:,channelidx,:][:,:,:,channelidx]\n",
      "    X = X.reshape((X.shape[0],-1))\n",
      "    \n",
      "    # Partition the labeled data and split into train/test\n",
      "    X_traintest = X[y!=-1]\n",
      "    y_traintest = y[y!=-1]\n",
      "\n",
      "    test_size = 0.1\n",
      "    rseed = randint(2**32)\n",
      "    tri, tei = next(iter(sklearn.cross_validation.ShuffleSplit(\n",
      "                         X_traintest.shape[0], n_iter=1, test_size=test_size, random_state=rseed)))\n",
      "    X_train = X_traintest[tri]\n",
      "    X_test = X_traintest[tei]\n",
      "    y_train = y_traintest[tri]\n",
      "    y_test = y_traintest[tei]\n",
      "    \n",
      "    oprint(\"%s, %s\"%(label, w))\n",
      "    oprint(\"Selecting EEG channels: %s\"%channelidx)\n",
      "    oprint(\"Total dataset size:\")\n",
      "    oprint(\"n labels: %d\" % y_traintest.shape[0])\n",
      "    oprint(\"n samples: %d\" % X_traintest.shape[0])\n",
      "    oprint(\"n features: %d\" % X_traintest.shape[1])\n",
      "    oprint(\"n classes: %d\" % len(set(y_traintest)))\n",
      "    \n",
      "    # Initialize and train network\n",
      "    dnn1 = DropoutNet(numpy_rng=numpy.random.RandomState(rseed), n_ins=X_traintest.shape[1],\n",
      "                    layers_types=[ReLU, ReLU, LogisticRegression],\n",
      "                    layers_sizes=[200, 100],\n",
      "                    trainables=[1, 1, 1, 1, 1, 1],\n",
      "                    dropout_rates=[0., 0.5, 0.5],\n",
      "                    # TODO if you have a big enough GPU, use these:\n",
      "                    #layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],\n",
      "                    #layers_sizes=[2000, 2000, 2000, 2000],\n",
      "                    #dropout_rates=[0., 0.5, 0.5, 0.5, 0.5],\n",
      "                    n_outs=2,\n",
      "                    max_norm=4.,\n",
      "                    fast_drop=True,\n",
      "                    debugprint=0)\n",
      "\n",
      "    dnn1.fit(X_train, y_train, max_epochs=100, method='adadelta', verbose=False, test=(X_test is not None), plot=False, save=None)\n",
      "    \n",
      "    # Test on CV data. Fails if no seizure clips in split.\n",
      "    try:\n",
      "        cv = sklearn.metrics.roc_auc_score(y_test, dnn1.predict_proba(X_test)[:,1])\n",
      "        os.system('echo \"%s\"'%('%s %s CV AUC: %f'%(label, w, cv)))\n",
      "    except:\n",
      "        pass\n",
      "    \n",
      "    # Make predictions on full dataset and save to pickle\n",
      "    pred = dnn1.predict_proba(X)[:,1]\n",
      "    d = {'fns':d['fns'], 'y':d['y'], 'pred':pred, 'rseed':rseed}\n",
      "    pickle.dump(d, open('./output/pred_%d_%s_%s'%(n, label, w),'w'))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "labels = ['dog_0', 'dog_1', 'dog_2', 'dog_3', 'patient_0', 'patient_1', 'patient_2', 'patient_3', 'patient_4',\n",
      "          'patient_5', 'patient_6', 'patient_7']\n",
      "\n",
      "ws = 'ABC'\n",
      "\n",
      "# Train multiple networks on each processed subject dataset (50 x 12 x 3)\n",
      "# This will take several hours\n",
      "r = joblib.Parallel(n_jobs=-1)(joblib.delayed(train_net)(label, w, n) for n, label, w in \n",
      "                              itertools.product(range(50),array(labels),ws))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}