{
 "metadata": {
  "name": "TEST_theanoml_autoencoder"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import theanoml\n",
      "reload(theanoml)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "pyout",
       "prompt_number": 1,
       "text": [
        "<module 'theanoml' from 'theanoml/__init__.pyc'>"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import cPickle\n",
      "import numpy as np\n",
      "X, y = cPickle.load(open('data/blackbox.pkl', 'rb'))\n",
      "y = y - 1\n",
      "classes = np.unique(y)\n",
      "print X.shape, y.shape, classes"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "(1000, 1875) (1000,) [0 1 2 3 4 5 6 7 8]\n"
       ]
      }
     ],
     "prompt_number": 14
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "***TEST Contractive AutoEncoder***"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "reload(theanoml.autoencoder)\n",
      "ac = theanoml.autoencoder.ContractiveAutoEncoder(n_epochs=5)\n",
      "ac.fit(X)\n",
      "print ac.transform(X).shape"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "training epoch 0, recall cost -849.060751, jacobian norm 13.992968 \n",
        "training epoch 1, recall cost -2802.754321, jacobian norm 8.055183 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 2, recall cost -4695.691037, jacobian norm 7.194211 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 3, recall cost -6594.667194, jacobian norm 7.036029 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 4, recall cost -8497.628880, jacobian norm 7.056526 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "(1000, 500)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      }
     ],
     "prompt_number": 15
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "## TEST the usage of auto encoder for classification\n",
      "from sklearn.cross_validation import train_test_split\n",
      "from sklearn.linear_model import SGDClassifier\n",
      "train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.2)\n",
      "ac = theanoml.autoencoder.ContractiveAutoEncoder(n_hidden=30, n_epochs=20)\n",
      "train_feats = ac.fit_transform(train_X)\n",
      "validation_feats = ac.transform(validation_X)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "training epoch 0, recall cost 453.759859, jacobian norm 4.880746 \n",
        "training epoch 1, recall cost 17.325294, jacobian norm 3.223397 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 2, recall cost -67.488043, jacobian norm 2.688757 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 3, recall cost -125.443729, jacobian norm 2.494923 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 4, recall cost -176.106676, jacobian norm 2.418065 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 5, recall cost -223.721742, jacobian norm 2.400184 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 6, recall cost -269.771063, jacobian norm 2.415523 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 7, recall cost -314.896781, jacobian norm 2.443972 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 8, recall cost -359.440429, jacobian norm 2.482995 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 9, recall cost -403.649780, jacobian norm 2.558611 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 10, recall cost -448.440946, jacobian norm 2.665677 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 11, recall cost -496.511160, jacobian norm 2.693180 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 12, recall cost -548.328873, jacobian norm 2.634594 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 13, recall cost -603.231024, jacobian norm 2.638994 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 14, recall cost -661.238554, jacobian norm 2.588580 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 15, recall cost -721.428754, jacobian norm 2.518980 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 16, recall cost -783.781057, jacobian norm 2.535105 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 17, recall cost -847.916318, jacobian norm 2.404655 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 18, recall cost -913.003163, jacobian norm 2.371111 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 19, recall cost -978.504146, jacobian norm 2.369204 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      }
     ],
     "prompt_number": 16
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sgd = SGDClassifier()\n",
      "sgd.fit(train_feats, train_y)\n",
      "print sgd.score(train_feats, train_y)\n",
      "print sgd.score(validation_feats, validation_y)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "0.11875\n",
        "0.09\n"
       ]
      }
     ],
     "prompt_number": 17
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "***Test Denoising AutoEncoder***"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "reload(theanoml.autoencoder)\n",
      "da = theanoml.autoencoder.DenoisingAutoEncoder()\n",
      "da.fit(X)\n",
      "print da.transform(X).shape"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "training epoch 0, recall cost -799.749661 \n",
        "training epoch 1, recall cost -2749.752624 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 2, recall cost -4658.635129 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 3, recall cost -6579.628540 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 4, recall cost -8506.663734 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 5, recall cost -10438.382075 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 6, recall cost -12365.957260 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 7, recall cost -14292.152982 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 8, recall cost -16226.490023 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 9, recall cost -18152.938796 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 10, recall cost -20078.178948 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 11, recall cost -22015.874000 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 12, recall cost -23934.141165 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 13, recall cost -25865.363523 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 14, recall cost -27787.282287 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 15, recall cost -29725.064009 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 16, recall cost -31659.581391 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 17, recall cost -33561.392486 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 18, recall cost -35497.663851 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 19, recall cost -37422.808957 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "(1000, 500)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      }
     ],
     "prompt_number": 18
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "## TEST the usage of denoising auto encoder for classification\n",
      "reload(theanoml.autoencoder)\n",
      "from sklearn.cross_validation import train_test_split\n",
      "from sklearn.linear_model import SGDClassifier\n",
      "train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.2)\n",
      "da = theanoml.autoencoder.DenoisingAutoEncoder(corruption_level=0.1)\n",
      "train_feats = da.fit_transform(train_X)\n",
      "validation_feats = da.transform(validation_X)\n",
      "sgd = SGDClassifier()\n",
      "sgd.fit(train_feats, train_y)\n",
      "print sgd.score(train_feats, train_y)\n",
      "print sgd.score(validation_feats, validation_y)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "training epoch 0, recall cost -609.777560 \n",
        "training epoch 1, recall cost -2195.633734 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 2, recall cost -3720.939261 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 3, recall cost -5256.150622 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 4, recall cost -6800.977186 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 5, recall cost -8352.417585 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 6, recall cost -9909.995321 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 7, recall cost -11465.122129 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 8, recall cost -13014.628828 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 9, recall cost -14579.630394 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 10, recall cost -16126.183992 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 11, recall cost -17668.635624 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 12, recall cost -19238.311686 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 13, recall cost -20767.353782 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 14, recall cost -22341.993979 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 15, recall cost -23895.572684 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 16, recall cost -25444.877052 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 17, recall cost -27005.653110 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 18, recall cost -28509.056846 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "training epoch 19, recall cost -30077.161623 "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "0.08625"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "0.085\n"
       ]
      }
     ],
     "prompt_number": 23
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}