{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import gzip\n", "import pickle\n", "import os\n", "\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.datasets import fetch_mldata\n", "from sklearn.preprocessing import MinMaxScaler\n", "\n", "from vanilla_neural_nets.neural_network.training_batch_generator import MiniBatchGenerator\n", "from vanilla_neural_nets.neural_network.optimization_algorithm import GradientDescent\n", "from vanilla_neural_nets.neural_network.activation_function import SigmoidActivationFunction\n", "from vanilla_neural_nets.neural_network.loss_function import MeanSquaredError, BinaryCrossEntropyLoss\n", "from vanilla_neural_nets.neural_network.parameter_initialization import GaussianBiasInitializer, GaussianWeightInitializer\n", "from vanilla_neural_nets.neural_network.network import VanillaNeuralNetwork\n", "from vanilla_neural_nets.neural_network.data_object import HoldoutData" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Load MNIST data" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "mnist = fetch_mldata('MNIST original', data_home='.')\n", "mnist.target = np.array(pd.get_dummies(mnist.target))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Separate and scale train and holdout data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "holdout_set_mask = np.array([i % 7 == 0 for i in range(len(mnist.data))])\n", "np.random.shuffle(holdout_set_mask)\n", "\n", "X = mnist.data[~holdout_set_mask].astype(float)\n", "y = mnist.target[~holdout_set_mask].astype(float)\n", "X_holdout = mnist.data[holdout_set_mask].astype(float)\n", "y_holdout = mnist.target[holdout_set_mask].astype(float)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X = MinMaxScaler().fit_transform(X)\n", "X_holdout = MinMaxScaler().fit_transform(X_holdout)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Initialize hyper-parameters" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "HIDDEN_LAYER_SIZE = 50\n", "LEARNING_RATE = 3.\n", "N_EPOCHS = 10\n", "TRAINING_BATCH_SIZE = 10\n", "RANDOM_STATE = 123" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "GAUSSIAN_INITIALIZATER_STANDARD_DEVIATION = 1.\n", "\n", "GAUSSIAN_WEIGHT_INITIALIZER = GaussianWeightInitializer(\n", " standard_deviation=GAUSSIAN_INITIALIZATER_STANDARD_DEVIATION,\n", " random_state=RANDOM_STATE\n", ")\n", "GAUSSIAN_BIAS_INITIALIZER = GaussianBiasInitializer(\n", " standard_deviation=GAUSSIAN_INITIALIZATER_STANDARD_DEVIATION,\n", " random_state=RANDOM_STATE\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "LAYER_SIZES = [X.shape[1], HIDDEN_LAYER_SIZE, HIDDEN_LAYER_SIZE, y.shape[1]]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Train network" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### with mean squared error" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "vanilla_neural_net = VanillaNeuralNetwork(\n", " layer_sizes=LAYER_SIZES,\n", " training_batch_generator_class=MiniBatchGenerator,\n", " loss_function_class=MeanSquaredError,\n", " activation_function_class=SigmoidActivationFunction,\n", " optimization_algorithm_class=GradientDescent,\n", " learning_rate=LEARNING_RATE,\n", " n_epochs=N_EPOCHS,\n", " training_batch_size=TRAINING_BATCH_SIZE,\n", " random_state=RANDOM_STATE,\n", " weight_initializer=GAUSSIAN_WEIGHT_INITIALIZER,\n", " bias_initializer=GAUSSIAN_BIAS_INITIALIZER,\n", " holdout_data=HoldoutData(X=X_holdout, y=y_holdout)\n", ")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch: 0 | Accuracy: 0.9132\n", "Epoch: 1 | Accuracy: 0.9247\n", "Epoch: 2 | Accuracy: 0.9315\n", "Epoch: 3 | Accuracy: 0.9438\n", "Epoch: 4 | Accuracy: 0.9463\n", "Epoch: 5 | Accuracy: 0.9454\n", "Epoch: 6 | Accuracy: 0.9494\n", "Epoch: 7 | Accuracy: 0.9514\n", "Epoch: 8 | Accuracy: 0.9527\n", "Epoch: 9 | Accuracy: 0.9553\n" ] } ], "source": [ "vanilla_neural_net.fit(X, y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### with cross entropy loss" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "LEARNING_RATE = .5" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "vanilla_neural_net = VanillaNeuralNetwork(\n", " layer_sizes=LAYER_SIZES,\n", " training_batch_generator_class=MiniBatchGenerator,\n", " loss_function_class=BinaryCrossEntropyLoss,\n", " activation_function_class=SigmoidActivationFunction,\n", " optimization_algorithm_class=GradientDescent,\n", " learning_rate=LEARNING_RATE,\n", " n_epochs=N_EPOCHS,\n", " training_batch_size=TRAINING_BATCH_SIZE,\n", " random_state=RANDOM_STATE,\n", " weight_initializer=GAUSSIAN_WEIGHT_INITIALIZER,\n", " bias_initializer=GAUSSIAN_BIAS_INITIALIZER,\n", " holdout_data=HoldoutData(X=X_holdout, y=y_holdout)\n", ")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch: 0 | Accuracy: 0.9222\n", "Epoch: 1 | Accuracy: 0.9317\n", "Epoch: 2 | Accuracy: 0.9355\n", "Epoch: 3 | Accuracy: 0.9484\n", "Epoch: 4 | Accuracy: 0.9493\n", "Epoch: 5 | Accuracy: 0.9548\n", "Epoch: 6 | Accuracy: 0.9551\n", "Epoch: 7 | Accuracy: 0.9575\n", "Epoch: 8 | Accuracy: 0.9579\n", "Epoch: 9 | Accuracy: 0.9591\n" ] } ], "source": [ "vanilla_neural_net.fit(X, y)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }