In [14]:
import mxnet as mx
from mxnet import gluon, autograd, ndarray
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [102]:
from sklearn.datasets import load_boston
data = load_boston()
df = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

### Normalize data for the NN

In [103]:
df_norm = (df - df.mean()) / (df.max() - df.min())

In [104]:
df_norm.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.040322,0.066364,-0.323562,-0.06917,-0.034352,0.055636,-0.034757,0.026822,-0.371713,-0.214193,-0.335695,0.101432,-0.211729
1,-0.040086,-0.113636,-0.149075,-0.06917,-0.176327,0.026129,0.106335,0.106581,-0.328235,-0.317246,-0.069738,0.101432,-0.096939
2,-0.040086,-0.113636,-0.149075,-0.06917,-0.176327,0.172517,-0.076981,0.106581,-0.328235,-0.317246,-0.069738,0.091169,-0.237943
3,-0.040029,-0.113636,-0.328328,-0.06917,-0.198961,0.136686,-0.234551,0.206163,-0.284757,-0.355414,0.026007,0.095708,-0.268021
4,-0.039617,-0.113636,-0.328328,-0.06917,-0.198961,0.165236,-0.148042,0.206163,-0.284757,-0.355414,0.026007,0.101432,-0.202071


### Split the data

In [106]:
X_train, X_test, y_train, y_test = train_test_split(df_norm, y,
                                                    test_size=0.2, random_state=1111)

### Gluon Model

In [132]:
BATCH_SIZE = 32
LEARNING_R = 0.01
EPOCHS = 300

### Prepare data 

In [133]:
train_dataset = mx.gluon.data.ArrayDataset(X_train.as_matrix(),y_train)
test_dataset = mx.gluon.data.ArrayDataset(X_test.as_matrix(),y_test)


In [134]:
train_data = mx.gluon.data.DataLoader(train_dataset,
                                      batch_size=BATCH_SIZE, shuffle=True)

test_data = mx.gluon.data.DataLoader(test_dataset,
                                     batch_size=BATCH_SIZE, shuffle=False)

### Initialize the model

In [135]:
net = gluon.nn.Sequential()

# Define the model architecture
with net.name_scope(): 
    net.add(gluon.nn.Dense(16, activation="relu") ) 
    net.add(gluon.nn.BatchNorm())    
    net.add(gluon.nn.Dense(8, activation="relu") ) 
    net.add(gluon.nn.BatchNorm())    
    net.add(gluon.nn.Dense(1))

# Intitalize parametes of the model
net.collect_params().initialize(mx.init.Uniform())

# Add L2 loss function
l2loss = gluon.loss.L2Loss()

trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': LEARNING_R})


In [136]:
for e in range(EPOCHS):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(mx.cpu()).astype('float32')
        label = label.as_in_context(mx.cpu()).astype('float32')
        with autograd.record(): # Start recording the derivatives
            output = net(data) # the forward iteration
            loss = l2loss(output, label)
            loss.backward()
        trainer.step(data.shape[0])
        # Provide stats on the improvement of the model over each epoch
        curr_loss = ndarray.mean(loss).asscalar()
    if e % 20 == 0:
        print("Epoch {}. Current Loss: {}.".format(e, curr_loss))
#     if curr_loss < 2.0:
#             break

Epoch 0. Current Loss: 245.78768920898438.
Epoch 20. Current Loss: 7.980735778808594.
Epoch 40. Current Loss: 4.9582600593566895.
Epoch 60. Current Loss: 6.980565547943115.
Epoch 80. Current Loss: 0.960381031036377.
Epoch 100. Current Loss: 7.814993381500244.
Epoch 120. Current Loss: 9.498841285705566.
Epoch 140. Current Loss: 17.053531646728516.
Epoch 160. Current Loss: 5.0172576904296875.
Epoch 180. Current Loss: 7.0766472816467285.
Epoch 200. Current Loss: 2.061584949493408.
Epoch 220. Current Loss: 4.6772260665893555.
Epoch 240. Current Loss: 4.503413200378418.
Epoch 260. Current Loss: 6.620635986328125.
Epoch 280. Current Loss: 3.283313751220703.


### Predict

In [137]:
y_pred = np.array([])
for data,label in test_data:
        data = data.as_in_context(mx.cpu()).astype('float32')
        label = label.as_in_context(mx.cpu()).astype('float32')
        output = net(data)
        y_pred = np.append(y_pred, output.asnumpy())

In [138]:
mean_squared_error(y_test, y_pred)

5.9256329511598826