{ "metadata": { "name": "BASIC_theano_II" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "***Test Input and Output Binding in Theano***" ] }, { "cell_type": "code", "collapsed": false, "input": [ "## Patterns for writing theano formulas \n", "import theano\n", "import theano.tensor as T\n", "import numpy as np\n", "\n", "class Computation(object):\n", " def __init__(self, x = None, y = None):\n", " self.x = x or T.matrix('x')\n", " self.y = y or T.matrix('y')\n", " ## for partial_fit mode\n", " def bind_input(self, x):\n", " self.x = x\n", " def bind_output(self, y):\n", " self.y = y\n", " def error(self):\n", " return T.mean((self.y - self.x)**2)\n", " \n", " \n", "data_x = np.random.random((1000, 10))\n", "data_y = data_x + np.random.random((1000, 10)) * 0.01\n", "model = Computation()\n", "error1 = theano.function(inputs = [],\n", " outputs = model.error(),\n", " givens = {\n", " model.x: data_x[:10, :10],\n", " model.y: data_y[:10, :10]\n", " })\n", "error2 = theano.function(inputs = [],\n", " outputs = model.error(),\n", " givens = {\n", " model.x: data_x[:15, :10],\n", " model.y: data_y[:15, :10]\n", " })\n", "print error1(), error2()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "3.43315307605e-05 3.45271127376e-05\n" ] } ], "prompt_number": 11 }, { "cell_type": "markdown", "metadata": {}, "source": [ "***Contractive Auto Encoder***" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import theano \n", "import theano.tensor as T\n", "import numpy as np\n", "import time" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "class ContractiveAutoEncoder(object):\n", " \"\"\"\n", " Contractive Auto Encoder.\n", " References :\n", " - S. Rifai, P. Vincent, X. Muller, X. Glorot, Y. Bengio: Contractive\n", " Auto-Encoders: Explicit Invariance During Feature Extraction, ICML-11\n", "\n", " - S. Rifai, X. Muller, X. Glorot, G. Mesnil, Y. Bengio, and Pascal\n", " Vincent. Learning invariant features through local space\n", " contraction. Technical Report 1360, Universite de Montreal\n", "\n", " - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise\n", " Training of Deep Networks, Advances in Neural Information Processing\n", " Systems 19, 2007\n", " \"\"\"\n", " def __init__(self, rng, n_visible, n_hidden, batch_size = 1, \n", " X = None, W = None, bhid = None, bvis = None):\n", " \"\"\"\n", " Initialize the ConctractiveAutoEncoder class by specifying\n", " the number of visible units, the number of hidden units, \n", " and the contraction level. The constructor also receives\n", " symbolic variables for the input, weights and bias.\n", " rng = np.random.RandomState\n", " input = symbolic description of input or None for standalone encoder\n", " W, bhid, bvis theano variable pointing to a set of variables that should\n", " be shared among dA and another architecture; if dA is standalone, \n", " set these to None\n", " \"\"\"\n", " self.n_visible = n_visible\n", " self.n_hidden = n_hidden\n", " self.batch_size = batch_size\n", " if not W:\n", " W_bound = 4 * np.sqrt(6. / (n_hidden + n_visible))\n", " W = theano.shared(value = np.asarray(rng.uniform(\n", " low = -W_bound,\n", " high = W_bound,\n", " size = (n_visible, n_hidden)),\n", " dtype = theano.config.floatX),\n", " name = 'W',\n", " borrow = True)\n", " if not bvis:\n", " bvis = theano.shared(value = np.zeros(n_visible, \n", " dtype = theano.config.floatX),\n", " borrow = True)\n", " if not bhid:\n", " bhid = theano.shared(value = np.zeros(n_hidden,\n", " dtype = theano.config.floatX), \n", " borrow = True)\n", " self.W = W\n", " self.b = bhid\n", " self.b_prime = bvis\n", " self.W_prime = self.W.T\n", " self.X = X or T.matrix(name = 'X')\n", " self.params = [self.W, self.b, self.b_prime]\n", " def get_hidden_values(self, X):\n", " return T.nnet.sigmoid(T.dot(X, self.W) + self.b)\n", " def get_jacobian(self, hidden, W):\n", " \"\"\"\n", " Compute the jacobian of the hidden layer wrt\n", " the input, reshapes are necessary for broadcasting the \n", " element-wise product of the right axis\n", " \"\"\"\n", " reshaped_hidden = T.reshape(hidden * (1-hidden), (self.batch_size, 1, self.n_hidden))\n", " reshaped_W = T.reshape(W, (1, self.n_visible, self.n_hidden))\n", " return reshaped_hidden * reshaped_W\n", " def get_reconstructed_input(self, hidden):\n", " return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)\n", " def get_cost_updates(self, contraction_level, learning_rate):\n", " y = self.get_hidden_values(self.X)\n", " z = self.get_reconstructed_input(y)\n", " J = self.get_jacobian(y, self.W)\n", " ## Sum over the size of a datapoint, if minibatches are used\n", " ## L will be a vector, with one entry per example in minibatch\n", " self.L_rec = - T.sum(self.X*T.log(z) + (1-self.X)*T.log(1-z), axis = 1)\n", " ## Compute the jacobian and average over the number of samples/minibatch\n", " self.L_jacob = T.sum(J ** 2) / self.batch_size\n", " ## L is now a vector, where each element is the cross-entropy\n", " ## cost of the reconstruction of the corresponding example of the\n", " ## minibatch. We need to compute the average of all these to get \n", " ## the cost of the minibatch\n", " cost = T.mean(self.L_rec) + contraction_level * T.mean(self.L_jacob)\n", " gparams = T.grad(cost, self.params)\n", " updates = [(param, param-learning_rate*gparam) \n", " for (param, gparam) in zip(self.params, gparams)]\n", " return (cost, updates)\n", " \n", " \n", "def share_data(data, dtype = theano.config.floatX):\n", " shared_data = theano.shared(np.asarray(data, dtype = theano.config.floatX),\n", " borrow = True)\n", " return T.cast(shared_data, dtype = dtype)\n", "\n", "def run_cA(v_train_X, \n", " learning_rate = 0.01, n_epochs = 20, batch_size = 10, contraction_level = 0.1):\n", " n_train_batches = v_train_X.get_value(borrow = True).shape[0] / batch_size\n", " n_feats = v_train_X.get_value(borrow = True).shape[1]\n", " print 'building model'\n", " index = T.lscalar()\n", " x = T.matrix('x')\n", " rng = np.random.RandomState(0)\n", " ca = ContractiveAutoEncoder(rng = rng, X = x, n_visible = n_feats,\n", " n_hidden = 500, batch_size = batch_size)\n", " cost, updates = ca.get_cost_updates(contraction_level, learning_rate)\n", " train_ca = theano.function(inputs = [index], \n", " outputs = [T.mean(ca.L_rec), ca.L_jacob], \n", " updates = updates,\n", " givens = {\n", " x: v_train_X[index*batch_size:(index+1)*batch_size],\n", " })\n", " start_time = time.clock()\n", " print 'training model'\n", " for epoch in xrange(n_epochs):\n", " c = [train_ca(i) for i in xrange(n_train_batches)]\n", " c_array = np.vstack(c)\n", " print c_array.shape\n", " print 'training epoch %d, reconstruction cost %f, jacobian norm %f' % (\n", " epoch, np.mean(c_array[:, 0]), np.mean(np.sqrt(c_array[:, 1])))\n", " end_time = time.clock()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "import cPickle\n", "from sklearn.cross_validation import train_test_split\n", "#X, y = cPickle.load(open('data/digits.pkl', 'rb'))\n", "X, y = cPickle.load(open('data/blackbox.pkl', 'rb'))\n", "y = y - 1\n", "print X.shape, y.shape\n", "train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.9)\n", "print train_X.shape, train_y.shape\n", "v_train_X, v_validation_X = share_data(train_X), share_data(validation_X)\n", "v_train_y = share_data(train_y, dtype = 'int32')\n", "v_validation_y = share_data(validation_y, dtype='int32')\n", "run_cA(v_train_X)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(1000, 1875) (1000,)\n", "(100, 1875)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " (100,)\n", "building model\n", "training model" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 0, reconstruction cost 424.612998, jacobian norm 23.137957\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 1, reconstruction cost -172.349470, jacobian norm 19.565774\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 2, reconstruction cost -416.215884, jacobian norm 16.351329\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 3, reconstruction cost -635.762077, jacobian norm 14.059150\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 4, reconstruction cost -846.087529, jacobian norm 12.458559\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 5, reconstruction cost -1052.016090, jacobian norm 11.318715\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 6, reconstruction cost -1256.081126, jacobian norm 10.447896\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 7, reconstruction cost -1459.182969, jacobian norm 9.759262\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 8, reconstruction cost -1661.702354, jacobian norm 9.216226\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 9, reconstruction cost -1863.872129, jacobian norm 8.774608\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 10, reconstruction cost -2065.774331, jacobian norm 8.418967\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 11, reconstruction cost -2267.598844, jacobian norm 8.129463\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 12, reconstruction cost -2469.441701, jacobian norm 7.887607\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 13, reconstruction cost -2671.284029, jacobian norm 7.679948\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 14, reconstruction cost -2873.096477, jacobian norm 7.500753\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 15, reconstruction cost -3074.903276, jacobian norm 7.346815\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 16, reconstruction cost -3276.734967, jacobian norm 7.213882\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 17, reconstruction cost -3478.606484, jacobian norm 7.100257\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 18, reconstruction cost -3680.525140, jacobian norm 7.005262\n", "(10, 2)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "training epoch 19, reconstruction cost -3882.509590, jacobian norm 6.924966\n" ] } ], "prompt_number": 9 }, { "cell_type": "markdown", "metadata": {}, "source": [ "***Stacked Denoising Auto Encoder***" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import time\n", "import numpy as np\n", "import theano\n", "import theano.tensor as T\n", "from theano.tensor.shared_randomstreams import RandomStreams\n", "from theanoml.formula import share_data, LogisticRegressionFormula, HiddenLayerFormula\n", "from theanoml.autoencoder import DenoisingAutoEncoderFormula" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "class SdA(object):\n", " \"\"\"\n", " Stacked denoising auto-encoder class \n", " A stacked denoising autoencoder model is obtained by stacking several\n", " dAs. The hidden layer of the dA at layer i becomes the input of the dA at\n", " layer i+1. The first layer dA gets as input the input of the SdA, and the\n", " hidden layer of the last dA represents the output. Note that after pretraining,\n", " the SdA is dealt with as a normal MLP, the dAs are only used to initalize\n", " the weights.\n", " \"\"\"\n", " def __init__(self, n_in, n_hiddens, n_out, corruption_levels = [0.1, 0.1]):\n", " self.sigmoid_layers = []\n", " self.dA_layers = []\n", " self.params = []\n", " self.n_layers = len(n_hiddens)\n", " theano_rng = RandomStreams(np.random.randint(2 ** 30))\n", " self.x = T.matrix('x')\n", " self.y = T.ivector('y')\n", " ## TODO\n", " ??\n", " def pretraining_functions(self, X, batch_size):\n", " pass\n", " def build_finetune_functions(self, train_X, train_y, \n", " validation_X, validation_y, batch_size, \n", " learning_rate):\n", " pass\n", "\n", "def run_sda(v_train_X, v_train_y, v_validation_X, v_validation_y, \n", " finetune_lr = 0.1, pretraining_epochs = 15,\n", " pretrain_lr = 0.001, training_epochs = 1000, \n", " batch_size = 5):\n", " n_samples, n_feats = v_train_X.get_value(borrow=True).shape \n", " n_train_batches = n_samples / batch_size\n", " print 'building the model '\n", " sda = SdA(n_in = n_feats, n_hiddens = [1000, 1000, 1000], n_out = 10)\n", " print 'getting the pretraining functions '\n", " pretraining_fns = sda.pretraining_functions(X = v_train_X, batch_size = batch_size)\n", " print 'pretraining the model'\n", " corruption_levels = [.1, .2, .3]\n", " for i in xrange(sda.n_layers):\n", " for epoch in xrange(pretraining_epochs):\n", " cost = np.mean([pretraining_fns[i](\n", " index = bindex, \n", " corruption = corruption_levels[i], \n", " lr = pretrain_lr) \n", " for bindex in xrange(n_train_batches)])\n", " print 'pre-training layer %i, epoch %d, cost %f' % (i, epoch, cost)\n", " print 'getting the finetuning functions '\n", " train_fn, validate_model = sda.build_finetune_functions(\n", " v_train_X, v_train_y, v_validation_X, v_validation_y, \n", " batch_size = batch_size, learning_rate = finetune_lr\n", " )\n", " print 'finetunning the model '\n", " patience = 10 * n_train_batches\n", " patience_increase = 2\n", " improvement_threshold = 0.995\n", " validation_frequency = min(n_train_batches, patience / 2)\n", " best_params = None\n", " best_validation_loss = np.inf\n", " done_looping = False\n", " epoch = 0\n", " while (epoch < training_epochs) and (not done_looping):\n", " epoch = epoch + 1\n", " for minibatch_index in xrange(n_train_batches):\n", " train_fn(minibatch_index)\n", " iter = (epoch - 1) * n_train_batches + minibatch_index\n", " if (iter+1) % validation_frequency == 0:\n", " this_validation_loss = np.mean(validate_model())\n", " print 'epoch %i, minibatch %i/%i, validation error %f %%' % (\n", " epoch, minibatch_index+1, n_train_batches, this_validation_loss * 100.\n", " )\n", " if this_validation_loss < best_validation_loss:\n", " if this_validation_loss < best_validation_loss * improvement_threshold:\n", " patience = max(patience, iter * patience_increase)\n", " best_validation_loss = this_validation_loss\n", " best_params = sda.params\n", " if patience <= iter:\n", " done_looping = True\n", " break" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "import cPickle\n", "from sklearn.cross_validation import train_test_split\n", "#X, y = cPickle.load(open('data/digits.pkl', 'rb'))\n", "X, y = cPickle.load(open('data/blackbox.pkl', 'rb'))\n", "y = y - 1\n", "print X.shape, y.shape\n", "train_X, validation_X, train_y, validation_y = train_test_split(X, y, test_size = 0.9)\n", "print train_X.shape, train_y.shape\n", "v_train_X, v_validation_X = share_data(train_X), share_data(validation_X)\n", "v_train_y = share_data(train_y, dtype = 'int32')\n", "v_validation_y = share_data(validation_y, dtype='int32')\n", "run_sda(v_train_X, v_train_y, v_validation_X, v_validation_y)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(1000, 1875) (1000,)\n", "(100, 1875)" ] }, { "ename": "AttributeError", "evalue": "'SdA' object has no attribute 'n_layers'", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mv_train_y\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mshare_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_y\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'int32'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mv_validation_y\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mshare_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalidation_y\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'int32'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0mrun_sda\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv_train_X\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv_train_y\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv_validation_X\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv_validation_y\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m\u001b[0m in \u001b[0;36mrun_sda\u001b[0;34m(v_train_X, v_train_y, v_validation_X, v_validation_y, finetune_lr, pretraining_epochs, pretrain_lr, training_epochs, batch_size)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'pretraining the model'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0mcorruption_levels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m.1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m.2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m.3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_layers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpretraining_epochs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m cost = np.mean([pretraining_fns[i](\n", "\u001b[0;31mAttributeError\u001b[0m: 'SdA' object has no attribute 'n_layers'" ] }, { "output_type": "stream", "stream": "stdout", "text": [ " (100,)\n", "building the model \n", "getting the pretraining functions \n", "pretraining the model\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }