{ "metadata": { "name": "TEST_theanoml_autoencoder" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import theanoml\n", "reload(theanoml)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 1, "text": [ "" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "import cPickle\n", "import numpy as np\n", "X, y = cPickle.load(open('data/blackbox.pkl', 'rb'))\n", "y = y - 1\n", "classes = np.unique(y)\n", "print X.shape, y.shape, classes" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(1000, 1875) (1000,) [0 1 2 3 4 5 6 7 8]\n" ] } ], "prompt_number": 14 }, { "cell_type": "markdown", "metadata": {}, "source": [ "***TEST Contractive AutoEncoder***" ] }, { "cell_type": "code", "collapsed": false, "input": [ "reload(theanoml.autoencoder)\n", "ac = theanoml.autoencoder.ContractiveAutoEncoder(n_epochs=5)\n", "ac.fit(X)\n", "print ac.transform(X).shape" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "training epoch 0, recall cost -849.060751, jacobian norm 13.992968 \n", "training epoch 1, recall cost -2802.754321, jacobian norm 8.055183 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 2, recall cost -4695.691037, jacobian norm 7.194211 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 3, recall cost -6594.667194, jacobian norm 7.036029 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 4, recall cost -8497.628880, jacobian norm 7.056526 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "(1000, 500)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "## TEST the usage of auto encoder for classification\n", "from sklearn.cross_validation import train_test_split\n", "from sklearn.linear_model import SGDClassifier\n", "train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.2)\n", "ac = theanoml.autoencoder.ContractiveAutoEncoder(n_hidden=30, n_epochs=20)\n", "train_feats = ac.fit_transform(train_X)\n", "validation_feats = ac.transform(validation_X)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "training epoch 0, recall cost 453.759859, jacobian norm 4.880746 \n", "training epoch 1, recall cost 17.325294, jacobian norm 3.223397 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 2, recall cost -67.488043, jacobian norm 2.688757 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 3, recall cost -125.443729, jacobian norm 2.494923 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 4, recall cost -176.106676, jacobian norm 2.418065 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 5, recall cost -223.721742, jacobian norm 2.400184 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 6, recall cost -269.771063, jacobian norm 2.415523 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 7, recall cost -314.896781, jacobian norm 2.443972 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 8, recall cost -359.440429, jacobian norm 2.482995 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 9, recall cost -403.649780, jacobian norm 2.558611 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 10, recall cost -448.440946, jacobian norm 2.665677 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 11, recall cost -496.511160, jacobian norm 2.693180 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 12, recall cost -548.328873, jacobian norm 2.634594 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 13, recall cost -603.231024, jacobian norm 2.638994 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 14, recall cost -661.238554, jacobian norm 2.588580 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 15, recall cost -721.428754, jacobian norm 2.518980 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 16, recall cost -783.781057, jacobian norm 2.535105 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 17, recall cost -847.916318, jacobian norm 2.404655 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 18, recall cost -913.003163, jacobian norm 2.371111 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 19, recall cost -978.504146, jacobian norm 2.369204 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "sgd = SGDClassifier()\n", "sgd.fit(train_feats, train_y)\n", "print sgd.score(train_feats, train_y)\n", "print sgd.score(validation_feats, validation_y)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "0.11875\n", "0.09\n" ] } ], "prompt_number": 17 }, { "cell_type": "markdown", "metadata": {}, "source": [ "***Test Denoising AutoEncoder***" ] }, { "cell_type": "code", "collapsed": false, "input": [ "reload(theanoml.autoencoder)\n", "da = theanoml.autoencoder.DenoisingAutoEncoder()\n", "da.fit(X)\n", "print da.transform(X).shape" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "training epoch 0, recall cost -799.749661 \n", "training epoch 1, recall cost -2749.752624 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 2, recall cost -4658.635129 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 3, recall cost -6579.628540 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 4, recall cost -8506.663734 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 5, recall cost -10438.382075 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 6, recall cost -12365.957260 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 7, recall cost -14292.152982 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 8, recall cost -16226.490023 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 9, recall cost -18152.938796 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 10, recall cost -20078.178948 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 11, recall cost -22015.874000 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 12, recall cost -23934.141165 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 13, recall cost -25865.363523 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 14, recall cost -27787.282287 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 15, recall cost -29725.064009 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 16, recall cost -31659.581391 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 17, recall cost -33561.392486 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 18, recall cost -35497.663851 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 19, recall cost -37422.808957 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "(1000, 500)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "## TEST the usage of denoising auto encoder for classification\n", "reload(theanoml.autoencoder)\n", "from sklearn.cross_validation import train_test_split\n", "from sklearn.linear_model import SGDClassifier\n", "train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.2)\n", "da = theanoml.autoencoder.DenoisingAutoEncoder(corruption_level=0.1)\n", "train_feats = da.fit_transform(train_X)\n", "validation_feats = da.transform(validation_X)\n", "sgd = SGDClassifier()\n", "sgd.fit(train_feats, train_y)\n", "print sgd.score(train_feats, train_y)\n", "print sgd.score(validation_feats, validation_y)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "training epoch 0, recall cost -609.777560 \n", "training epoch 1, recall cost -2195.633734 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 2, recall cost -3720.939261 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 3, recall cost -5256.150622 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 4, recall cost -6800.977186 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 5, recall cost -8352.417585 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 6, recall cost -9909.995321 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 7, recall cost -11465.122129 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 8, recall cost -13014.628828 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 9, recall cost -14579.630394 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 10, recall cost -16126.183992 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 11, recall cost -17668.635624 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 12, recall cost -19238.311686 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 13, recall cost -20767.353782 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 14, recall cost -22341.993979 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 15, recall cost -23895.572684 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 16, recall cost -25444.877052 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 17, recall cost -27005.653110 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 18, recall cost -28509.056846 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 19, recall cost -30077.161623 " ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "0.08625" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "0.085\n" ] } ], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }