{ "metadata": { "name": "", "signature": "sha256:72f14f716341fe8617ac394b427ca7959f6e4a62f0e386665d146c552d2c7ccd" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import os, sys, itertools, pickle\n", "\n", "import joblib\n", "\n", "import sklearn.cross_validation\n", "import sklearn.metrics" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "from simplednn.nets import *" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "def oprint(s):\n", " os.system('echo \"%s\"'%s)\n", "\n", "def train_net(label, w, n):\n", " \"\"\"\n", " Given a sample label and filter set label w, train a neural network and make predictions. \n", " Output is saved to a pickle indexed by parameter n.\n", " \"\"\"\n", " \n", " d = joblib.load('./data/cov_opt_%s_%s.pickle'%(label,w))\n", "\n", " X = d['covs'].astype('float32')\n", " y = d['y'].astype('int32')\n", " \n", " N_CHANNELS = 12\n", "\n", " # Select N_CHANNELS at random from the full covariance matrices\n", " channelidx = range(X.shape[2])\n", " numpy.random.shuffle(channelidx)\n", " channelidx = channelidx[:N_CHANNELS]\n", " X = X[:,:,channelidx,:][:,:,:,channelidx]\n", " X = X.reshape((X.shape[0],-1))\n", " \n", " # Partition the labeled data and split into train/test\n", " X_traintest = X[y!=-1]\n", " y_traintest = y[y!=-1]\n", "\n", " test_size = 0.1\n", " rseed = randint(2**32)\n", " tri, tei = next(iter(sklearn.cross_validation.ShuffleSplit(\n", " X_traintest.shape[0], n_iter=1, test_size=test_size, random_state=rseed)))\n", " X_train = X_traintest[tri]\n", " X_test = X_traintest[tei]\n", " y_train = y_traintest[tri]\n", " y_test = y_traintest[tei]\n", " \n", " oprint(\"%s, %s\"%(label, w))\n", " oprint(\"Selecting EEG channels: %s\"%channelidx)\n", " oprint(\"Total dataset size:\")\n", " oprint(\"n labels: %d\" % y_traintest.shape[0])\n", " oprint(\"n samples: %d\" % X_traintest.shape[0])\n", " oprint(\"n features: %d\" % X_traintest.shape[1])\n", " oprint(\"n classes: %d\" % len(set(y_traintest)))\n", " \n", " # Initialize and train network\n", " dnn1 = DropoutNet(numpy_rng=numpy.random.RandomState(rseed), n_ins=X_traintest.shape[1],\n", " layers_types=[ReLU, ReLU, LogisticRegression],\n", " layers_sizes=[200, 100],\n", " trainables=[1, 1, 1, 1, 1, 1],\n", " dropout_rates=[0., 0.5, 0.5],\n", " # TODO if you have a big enough GPU, use these:\n", " #layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression],\n", " #layers_sizes=[2000, 2000, 2000, 2000],\n", " #dropout_rates=[0., 0.5, 0.5, 0.5, 0.5],\n", " n_outs=2,\n", " max_norm=4.,\n", " fast_drop=True,\n", " debugprint=0)\n", "\n", " dnn1.fit(X_train, y_train, max_epochs=100, method='adadelta', verbose=False, test=(X_test is not None), plot=False, save=None)\n", " \n", " # Test on CV data. Fails if no seizure clips in split.\n", " try:\n", " cv = sklearn.metrics.roc_auc_score(y_test, dnn1.predict_proba(X_test)[:,1])\n", " os.system('echo \"%s\"'%('%s %s CV AUC: %f'%(label, w, cv)))\n", " except:\n", " pass\n", " \n", " # Make predictions on full dataset and save to pickle\n", " pred = dnn1.predict_proba(X)[:,1]\n", " d = {'fns':d['fns'], 'y':d['y'], 'pred':pred, 'rseed':rseed}\n", " pickle.dump(d, open('./output/pred_%d_%s_%s'%(n, label, w),'w'))" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "labels = ['dog_0', 'dog_1', 'dog_2', 'dog_3', 'patient_0', 'patient_1', 'patient_2', 'patient_3', 'patient_4',\n", " 'patient_5', 'patient_6', 'patient_7']\n", "\n", "ws = 'ABC'\n", "\n", "# Train multiple networks on each processed subject dataset (50 x 12 x 3)\n", "# This will take several hours\n", "r = joblib.Parallel(n_jobs=-1)(joblib.delayed(train_net)(label, w, n) for n, label, w in \n", " itertools.product(range(50),array(labels),ws))" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }