import numpy as np import unittest import os import sys import h5py import random sys.path.append('../../../CORE') from fTheanoNNclassCORE import OptionsStore, TheanoNNclass, NNsupport, FunctionModel, LayerNN from fGraphBuilderCORE import Graph #Sampling minibatch from whole data set def getBatch(d, n, i): idx = random.sample(i, n) idx = np.sort(idx) #Remove labels and read data res = d[idx, 1:] #Normalise output from 0..255 to 0..1 return res.T / 255.0 #We use HDF because of its speed and convenience #Set data's file names and path srcFolder = './Data/src/' hdf_type = '.hdf5' train_set = 'mnist_train' test_set = 'mnist_test' #Read train data f_train = h5py.File(srcFolder + train_set + hdf_type, 'r+') DATA = f_train['/hdfDataSet'] #Read CV data f_test = h5py.File(srcFolder + test_set + hdf_type, 'r+') DATA_CV = f_test['/hdfDataSet'] #Print out shapes of loaded data print 'Data shape:', DATA.shape, '\n', 'CV shape:', DATA_CV.shape #Extract some useful data dataSize = DATA.shape[0] cvSize = DATA_CV.shape[0] validDataIndexes = xrange(0, dataSize) # As we have all data we need for Auto Encoder (AE), # let's create an appropriate NN # Set few additional options numberOfFeatures = 196 batchSize = 200 inputSize = DATA.shape[1] - 1 # Subtract label iterations = 10000 checkCvEvery = 500 #Common options for whole NN options = OptionsStore(learnStep=0.005, rmsProp=0.9, mmsmin=1e-20, minibatch_size=batchSize, CV_size=cvSize) #First layer L1 = LayerNN(size_in=inputSize, size_out=numberOfFeatures, sparsity=0.1, beta=3, weightDecay=3e-3, activation=FunctionModel.Sigmoid) #Second layer L2 = LayerNN(size_in=numberOfFeatures, size_out=inputSize, weightDecay=3e-3, activation=FunctionModel.Sigmoid) #Compile all together AE = TheanoNNclass(options, (L1, L2)) #Compile train and predict functions AE.trainCompile() AE.predictCompile() #Normalise CV data from 0..255 to 0..1 X_CV = DATA_CV[:, 1:].T / 255.0 #Empty list to collect CV errors CV_error = [] #Let's iterate! for i in xrange(iterations): #Get miniBatch of defined size from whole DATA X = getBatch(DATA, batchSize, validDataIndexes) #Train on given data/labels AE.trainCalc(X, X, iteration=1, debug=True, errorCollect=True) #Check CV error every *checkCvEvery* cycles if i % checkCvEvery == 0: #Caclculate CV error give CV data/labels CV_error.append(NNsupport.crossV(X_CV, X_CV, AE)) #Print current CV error print 'CV error: ', CV_error[-1] #Draw how error and accuracy evolves vs iterations Graph.Builder(name='AE_error.png', error=AE.errorArray, cv=CV_error, legend_on=True) #Visualise hidden layers weights AE.weightsVisualizer(folder='.', size=(28, 28))