{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.cross_validation import train_test_split\n",
"from sklearn.ensemble import GradientBoostingRegressor\n",
"from statsmodels.tools.eval_measures import rmse\n",
"import matplotlib.pylab as plt"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 325
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Make pylab inline and set the theme to 'ggplot'\n",
"plt.style.use('ggplot')\n",
"%pylab inline"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"WARNING: pylab import has clobbered these variables: ['plt', 'mod']\n",
"`%pylab --no-import-all` prevents importing * from pylab and numpy\n"
]
}
],
"prompt_number": 326
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Read Boston Housing Data\n",
"data = pd.read_csv('Datasets/Housing.csv')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 327
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Create a data frame with all the independent features\n",
"data_indep = data.drop('medv', axis = 1)\n",
"\n",
"# Create a target vector(vector of dependent variable, i.e. 'medv')\n",
"data_dep = data['medv']\n",
"\n",
"# Split data into training and test sets\n",
"train_X, test_X, train_y, test_y = train_test_split(data_indep, data_dep,\n",
" test_size = 0.20,\n",
" random_state = 42)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 328
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Regression without any Outliers:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Now let's fit a GradientBoostingRegressor with a L1(Least Absolute Deviation) loss function\n",
"# Set a random seed so that we can reproduce the results\n",
"np.random.seed(32767)\n",
"\n",
"# A GradientBoostingRegressor with L1(Least Absolute Deviation) as the loss function\n",
"mod = GradientBoostingRegressor(loss='lad')\n",
"\n",
"fit = mod.fit(train_X, train_y)\n",
"predict = fit.predict(test_X)\n",
"\n",
"# Root Mean Squared Error\n",
"print \"RMSE -> %f\" % rmse(predict, test_y)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"MSE -> 3.440147\n"
]
}
],
"prompt_number": 329
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Suppress printing numpy array in scientific notation\n",
"np.set_printoptions(suppress=True)\n",
"\n",
"error = predict - test_y\n",
"\n",
"# Print squared errors in all test samples \n",
"np.around(error ** 2, decimals = 2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 330,
"text": [
"array([ 0.03, 0.09, 13.16, 0.08, 0. , 1.49, 0.52,\n",
" 0.31, 2.2 , 11.76, 0.39, 0. , 0.71, 0.01,\n",
" 6.73, 62.99, 2.71, 0.08, 13.18, 3.96, 2.2 ,\n",
" 13.17, 0.18, 0.63, 3.91, 4.46, 2.2 , 1.9 ,\n",
" 2.26, 6.1 , 7.01, 0.24, 111.68, 0.44, 31.79,\n",
" 4.04, 0.06, 0.01, 11.08, 0.06, 6.94, 5.05,\n",
" 16.06, 12.71, 0.04, 0. , 4.9 , 0.21, 10.28,\n",
" 10.61, 5.29, 0.04, 1.43, 6.06, 3.39, 0.11,\n",
" 7.18, 20.04, 1.45, 0.09, 4.82, 2.39, 2.19,\n",
" 0.05, 0.81, 0.63, 4.83, 4.14, 4. , 0.75,\n",
" 0.91, 0.65, 0.42, 1.17, 0.63, 12.26, 0.41,\n",
" 3.02, 0.89, 51.75, 0.16, 18.5 , 0.36, 1.43,\n",
" 3.31, 8.56, 1.62, 2.8 , 0.73, 1.59, 0.75,\n",
" 2.85, 0.55, 4.29, 48.63, 3.88, 476.98, 59.54,\n",
" 12.18, 22.68, 2.86, 0.45])"
]
}
],
"prompt_number": 330
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# A GradientBoostingRegressor with L2(Least Squares) as the loss function\n",
"mod = GradientBoostingRegressor(loss='ls')\n",
"\n",
"fit = mod.fit(train_X, train_y)\n",
"predict = fit.predict(test_X)\n",
"\n",
"# Root Mean Squared Error\n",
"print \"RMSE -> %f\" % rmse(predict, test_y)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"MSE -> 2.542019\n"
]
}
],
"prompt_number": 331
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"error = predict - test_y\n",
"\n",
"# Print squared errors in all test samples \n",
"np.around(error ** 2, decimals = 2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 332,
"text": [
"array([ 0.02, 1.51, 17.29, 1.49, 2.5 , 5.38, 0.12, 0.03,\n",
" 1.03, 18. , 2.44, 1.11, 5.32, 0.31, 1.68, 16.66,\n",
" 1.54, 1.86, 24.29, 3.47, 1. , 14.71, 0.03, 2.27,\n",
" 0.61, 2.82, 3.62, 2.67, 5.87, 9.84, 11.36, 0.11,\n",
" 39.7 , 2.7 , 21.12, 6.05, 2.57, 0.09, 13.48, 0.56,\n",
" 1.96, 4.68, 32.54, 11.9 , 0. , 0.24, 6.82, 0. ,\n",
" 3.35, 15.8 , 1.78, 0.07, 2.35, 2.28, 18.14, 0.04,\n",
" 3.54, 14.06, 3.48, 0.17, 0.57, 0.92, 0.7 , 1.01,\n",
" 0.33, 9.48, 1.98, 1.3 , 7.62, 7.32, 1.62, 1.61,\n",
" 0.06, 0.58, 6.42, 1.81, 0.03, 8.83, 0.81, 34.72,\n",
" 0.51, 15.03, 0.94, 0.73, 0.89, 4.12, 0.92, 1.91,\n",
" 0.39, 0.2 , 0.7 , 0.16, 0.1 , 3.2 , 25.71, 12.62,\n",
" 84.07, 30.38, 5.81, 7.08, 3.31, 2.21])"
]
}
],
"prompt_number": 332
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As apparent from RMSE errors of L1 and L2 loss functions, Least Squares(L2) outperform L1, when there are no outliers in the data."
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Regression with Outliers:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Some statistics about the Housing Data\n",
"data.describe()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" crim | \n",
" zn | \n",
" indus | \n",
" chas | \n",
" nox | \n",
" rm | \n",
" age | \n",
" dis | \n",
" rad | \n",
" tax | \n",
" ptratio | \n",
" black | \n",
" lstat | \n",
" medv | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
" 506.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 3.613524 | \n",
" 11.363636 | \n",
" 11.136779 | \n",
" 0.069170 | \n",
" 0.554695 | \n",
" 6.284634 | \n",
" 68.574901 | \n",
" 3.795043 | \n",
" 9.549407 | \n",
" 408.237154 | \n",
" 18.455534 | \n",
" 356.674032 | \n",
" 12.653063 | \n",
" 22.532806 | \n",
"
\n",
" \n",
" std | \n",
" 8.601545 | \n",
" 23.322453 | \n",
" 6.860353 | \n",
" 0.253994 | \n",
" 0.115878 | \n",
" 0.702617 | \n",
" 28.148861 | \n",
" 2.105710 | \n",
" 8.707259 | \n",
" 168.537116 | \n",
" 2.164946 | \n",
" 91.294864 | \n",
" 7.141062 | \n",
" 9.197104 | \n",
"
\n",
" \n",
" min | \n",
" 0.006320 | \n",
" 0.000000 | \n",
" 0.460000 | \n",
" 0.000000 | \n",
" 0.385000 | \n",
" 3.561000 | \n",
" 2.900000 | \n",
" 1.129600 | \n",
" 1.000000 | \n",
" 187.000000 | \n",
" 12.600000 | \n",
" 0.320000 | \n",
" 1.730000 | \n",
" 5.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.082045 | \n",
" 0.000000 | \n",
" 5.190000 | \n",
" 0.000000 | \n",
" 0.449000 | \n",
" 5.885500 | \n",
" 45.025000 | \n",
" 2.100175 | \n",
" 4.000000 | \n",
" 279.000000 | \n",
" 17.400000 | \n",
" 375.377500 | \n",
" 6.950000 | \n",
" 17.025000 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.256510 | \n",
" 0.000000 | \n",
" 9.690000 | \n",
" 0.000000 | \n",
" 0.538000 | \n",
" 6.208500 | \n",
" 77.500000 | \n",
" 3.207450 | \n",
" 5.000000 | \n",
" 330.000000 | \n",
" 19.050000 | \n",
" 391.440000 | \n",
" 11.360000 | \n",
" 21.200000 | \n",
"
\n",
" \n",
" 75% | \n",
" 3.677082 | \n",
" 12.500000 | \n",
" 18.100000 | \n",
" 0.000000 | \n",
" 0.624000 | \n",
" 6.623500 | \n",
" 94.075000 | \n",
" 5.188425 | \n",
" 24.000000 | \n",
" 666.000000 | \n",
" 20.200000 | \n",
" 396.225000 | \n",
" 16.955000 | \n",
" 25.000000 | \n",
"
\n",
" \n",
" max | \n",
" 88.976200 | \n",
" 100.000000 | \n",
" 27.740000 | \n",
" 1.000000 | \n",
" 0.871000 | \n",
" 8.780000 | \n",
" 100.000000 | \n",
" 12.126500 | \n",
" 24.000000 | \n",
" 711.000000 | \n",
" 22.000000 | \n",
" 396.900000 | \n",
" 37.970000 | \n",
" 50.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 333,
"text": [
" crim zn indus chas nox rm \\\n",
"count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 \n",
"mean 3.613524 11.363636 11.136779 0.069170 0.554695 6.284634 \n",
"std 8.601545 23.322453 6.860353 0.253994 0.115878 0.702617 \n",
"min 0.006320 0.000000 0.460000 0.000000 0.385000 3.561000 \n",
"25% 0.082045 0.000000 5.190000 0.000000 0.449000 5.885500 \n",
"50% 0.256510 0.000000 9.690000 0.000000 0.538000 6.208500 \n",
"75% 3.677082 12.500000 18.100000 0.000000 0.624000 6.623500 \n",
"max 88.976200 100.000000 27.740000 1.000000 0.871000 8.780000 \n",
"\n",
" age dis rad tax ptratio black \\\n",
"count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 \n",
"mean 68.574901 3.795043 9.549407 408.237154 18.455534 356.674032 \n",
"std 28.148861 2.105710 8.707259 168.537116 2.164946 91.294864 \n",
"min 2.900000 1.129600 1.000000 187.000000 12.600000 0.320000 \n",
"25% 45.025000 2.100175 4.000000 279.000000 17.400000 375.377500 \n",
"50% 77.500000 3.207450 5.000000 330.000000 19.050000 391.440000 \n",
"75% 94.075000 5.188425 24.000000 666.000000 20.200000 396.225000 \n",
"max 100.000000 12.126500 24.000000 711.000000 22.000000 396.900000 \n",
"\n",
" lstat medv \n",
"count 506.000000 506.000000 \n",
"mean 12.653063 22.532806 \n",
"std 7.141062 9.197104 \n",
"min 1.730000 5.000000 \n",
"25% 6.950000 17.025000 \n",
"50% 11.360000 21.200000 \n",
"75% 16.955000 25.000000 \n",
"max 37.970000 50.000000 "
]
}
],
"prompt_number": 333
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"After looking at the minimum and maximum values of 'medv' column, we can see that the range of values in 'medv' is [5, 50].
\n",
"Let's add a few Outliers in this Dataset, so that we can see some significant differences with L1 and L2 loss functions."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Get upper and lower bounds[min, max] of all the features\n",
"stats = data.describe()\n",
"extremes = stats.loc[['min', 'max'],:].drop('medv', axis = 1)\n",
"extremes"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" crim | \n",
" zn | \n",
" indus | \n",
" chas | \n",
" nox | \n",
" rm | \n",
" age | \n",
" dis | \n",
" rad | \n",
" tax | \n",
" ptratio | \n",
" black | \n",
" lstat | \n",
"
\n",
" \n",
" \n",
" \n",
" min | \n",
" 0.00632 | \n",
" 0 | \n",
" 0.46 | \n",
" 0 | \n",
" 0.385 | \n",
" 3.561 | \n",
" 2.9 | \n",
" 1.1296 | \n",
" 1 | \n",
" 187 | \n",
" 12.6 | \n",
" 0.32 | \n",
" 1.73 | \n",
"
\n",
" \n",
" max | \n",
" 88.97620 | \n",
" 100 | \n",
" 27.74 | \n",
" 1 | \n",
" 0.871 | \n",
" 8.780 | \n",
" 100.0 | \n",
" 12.1265 | \n",
" 24 | \n",
" 711 | \n",
" 22.0 | \n",
" 396.90 | \n",
" 37.97 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 334,
"text": [
" crim zn indus chas nox rm age dis rad tax \\\n",
"min 0.00632 0 0.46 0 0.385 3.561 2.9 1.1296 1 187 \n",
"max 88.97620 100 27.74 1 0.871 8.780 100.0 12.1265 24 711 \n",
"\n",
" ptratio black lstat \n",
"min 12.6 0.32 1.73 \n",
"max 22.0 396.90 37.97 "
]
}
],
"prompt_number": 334
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now, we are going to generate 5 random samples, such that their values lies in the [min, max] range of respective features."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Set a random seed\n",
"np.random.seed(1234)\n",
"\n",
"# Create 5 random values \n",
"rands = np.random.rand(5, 1)\n",
"rands"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 335,
"text": [
"array([[ 0.19151945],\n",
" [ 0.62210877],\n",
" [ 0.43772774],\n",
" [ 0.78535858],\n",
" [ 0.77997581]])"
]
}
],
"prompt_number": 335
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Get the 'min' and 'max' rows as numpy array\n",
"min_array = np.array(extremes.loc[['min'], :])\n",
"max_array = np.array(extremes.loc[['max'], :])\n",
"\n",
"# Find the difference(range) of 'max' and 'min'\n",
"range = max_array - min_array\n",
"range"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 336,
"text": [
"array([[ 88.96988, 100. , 27.28 , 1. , 0.486 ,\n",
" 5.219 , 97.1 , 10.9969 , 23. , 524. ,\n",
" 9.4 , 396.58 , 36.24 ]])"
]
}
],
"prompt_number": 336
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Generate 5 samples with 'rands' value\n",
"outliers_X = (rands * range) + min_array\n",
"outliers_X"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 337,
"text": [
"array([[ 17.04578252, 19.15194504, 5.68465061, 0.19151945,\n",
" 0.47807845, 4.56054001, 21.49653863, 3.23572024,\n",
" 5.40494736, 287.356192 , 14.40028283, 76.27278363,\n",
" 8.67066488],\n",
" [ 55.35526271, 62.2108771 , 17.43112727, 0.62210877,\n",
" 0.68734486, 6.80778568, 63.30676167, 7.97086794,\n",
" 15.30850173, 512.98499602, 18.44782245, 247.03589642,\n",
" 24.27522186],\n",
" [ 38.95090441, 43.7727739 , 12.40121272, 0.43772774,\n",
" 0.59773568, 5.84550107, 45.40336346, 5.94324817,\n",
" 11.067738 , 416.36933524, 16.71464075, 173.91406674,\n",
" 17.59325326],\n",
" [ 69.87957895, 78.53585837, 21.88458216, 0.78535858,\n",
" 0.76668427, 7.65978645, 79.15831848, 9.76610981,\n",
" 19.06324743, 598.52789787, 19.98237069, 311.77750713,\n",
" 30.19139507],\n",
" [ 69.40067405, 77.99758081, 21.73774005, 0.77997581,\n",
" 0.76406824, 7.63169374, 78.63565097, 9.70691596,\n",
" 18.93944359, 595.70732345, 19.9317726 , 309.64280598,\n",
" 29.99632329]])"
]
}
],
"prompt_number": 337
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# We will also create some hard coded outliers for 'medv', i.e. our target\n",
"medv_outliers = np.array([0, 0, 600, 700, 600])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 338
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Let's have a look at the data types of all the columns\n",
"# so that we can create our new Dataset compatible with the original one\n",
"data_indep.dtypes"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 339,
"text": [
"crim float64\n",
"zn float64\n",
"indus float64\n",
"chas int64\n",
"nox float64\n",
"rm float64\n",
"age float64\n",
"dis float64\n",
"rad int64\n",
"tax int64\n",
"ptratio float64\n",
"black float64\n",
"lstat float64\n",
"dtype: object"
]
}
],
"prompt_number": 339
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Change the type of 'chas', 'rad' and 'tax' to rounded of Integers\n",
"outliers_X[:, [3, 8, 9]] = np.int64(np.round(outliers_X[:, [3, 8, 9]]))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 340
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Finally concatenate our existing 'train_X' and 'train_y' with these outliers\n",
"train_X = np.append(train_X, outliers_X, axis = 0)\n",
"train_y = np.append(train_y, medv_outliers, axis = 0)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 341
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Plot a histogram of 'medv' in train_y\n",
"fig = plt.figure(figsize=(13,7))\n",
"plt.hist(train_y, bins=50, range = (-10, 800))\n",
"fig.suptitle('medv Count', fontsize = 20)\n",
"plt.xlabel('medv', fontsize = 16)\n",
"plt.ylabel('count', fontsize = 16)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 342,
"text": [
""
]
},
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAxoAAAHjCAYAAACkQmmqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucVfV97//3nhkRgWG4KIKgBYUcdTwYI3hDES8nbdRE\nYy0ejYmiPWm8VlJrjNUEQ6MmVtQajT3HqInNSSWnB8zFxpxWvFRSBYOxQYwSJUoNIsNwMyIOs35/\n5MfUyQxmhr24jD6fjwePx+y19tr7uz8zPJgXe6+9K0VRFAEAAChRzfZeAAAA8N4jNAAAgNIJDQAA\noHRCAwAAKJ3QAAAASic0AACA0gkNgPepkSNHZtSoUdt7GQC8RwkNgPexSqWy1e/jueeey8UXX5wD\nDjggDQ0N2XnnnTN8+PCcdNJJueuuu7Jhw4atvoZq3XPPPampqck3v/nN7b0UgB6jbnsvAID3ri99\n6Uu55pprUhRFjjjiiBx//PGpr6/PsmXL8uijj+ZP//RP8/Wvfz3z5s3b3kvtkm0RZgDvFUIDgK3i\n2muvzbRp07LXXnvlu9/9bsaPH9/hOg8++GBuuOGG7bC6LVMUxfZeAkCP4aVTAFvJkiVLUlNTkylT\npuSXv/xlTjvttAwePDj9+/fPhz/84fz85z9Pkrz++uv50z/90wwbNiy77LJLxo8fn4cffrjT22xp\nacntt9+eww47LP3790/fvn3zoQ99KLfddttmfwn+2te+lsbGxuyyyy4ZMWJELr744qxevbrD9a6/\n/vrU1NTkb//2bzu9nVdffTV1dXWdBkNnj33atGnp1atXHnjggc0e84d/+Id54IEHOmyfOXNmJk6c\nmIaGhvTp0ydjx47N9ddf3+nLrGpqanLMMcd0evvnnHNOampq8vLLL7db26bvy5IlS/Lf//t/z667\n7to2+x/+8IftbmPSpEk599xzkyRTpkxJTU1N25933i4A7XlGA2ArW7JkSQ477LDsv//+Offcc/PS\nSy9l1qxZmTRpUv71X/81J5xwQgYOHJgzzjgjTU1N+Yd/+Id85CMfyfPPP58999yz7XbefvvtfPSj\nH82Pf/zj7LvvvjnrrLPSu3fvPPTQQ7n44ovzxBNP5Fvf+la7+/7zP//z3Hrrrdljjz3yZ3/2Z6mr\nq8v999+fJ554Im+//XZ23nnntut+6lOfylVXXZVvfetbueSSSzo8jr//+79Pa2trpkyZ8nsf8913\n352WlpacccYZ2X///d/1ur169Wp3+corr8z111+f3XbbLWeddVb69euXBx54IFdeeWUefPDB/PjH\nP85OO+3U7ph3e0nT5vb96le/yqGHHpp99tknZ599dpqamnLffffl5JNPzj//8z9n0qRJSX4bFwMH\nDsz999+fU045JR/84AfbbqOhoeFdHxvA+1oBwFbx0ksvFZVKpahUKsW1117bbt/06dOLSqVSNDQ0\nFOeff367fffee29RqVSKqVOnttv+xS9+sahUKsUll1xStLa2tm3fuHFjcd555xWVSqW4//7727Y/\n/vjjRaVSKcaMGVM0Nze3bV+/fn1x+OGHF5VKpRg1alS7+/jDP/zDolKpFD//+c87PJ7999+/6N27\nd7Fy5crf+9iPPfbYolKpFN/4xjd+73Xfae7cuUWlUin+4A/+oHjttdfatre0tBQf/ehHO51lpVIp\njjnmmE5v7+yzzy4qlUrxq1/9qm3bO78vX/rSl9pd/8EHHywqlUpxwgkntNt+9913F5VKpfjmN7/Z\nrccD8H7mpVMAW9moUaNyxRVXtNt29tlnJ0k2btzY4RyFM888M3V1dfnZz37Wtq21tTW33nprhg0b\nlptuuqnd/9LX1NTkb/7mb1KpVPLtb3+7bfvdd9+dJPmrv/qrDBgwoG37zjvvnOuuu67TtW5a1+++\nu9L8+fOzaNGinHjiiRk4cODvfcy//vWvkyQjRoz4vdd9p7vuuitJctVVV2XIkCFt22tra3PjjTem\npqYmd955Z7duc3NGjhyZq666qt22D3/4w9lzzz17zMnpADsyL50C2Mo++MEPdnj5zrBhw5IkH/jA\nB9K3b992+2pqajJkyJAsXbq0bdvzzz+f5ubmjBkzJl/60pc6vZ/evXtn0aJFbZd/+tOfplKp5Oij\nj+5w3QkTJqSmpuP/NX384x9PQ0NDvv3tb7eds5H8Z3icc845XXjEW27Tmo899tgO+8aMGZPhw4dn\nyZIlWbt2berr66u6r86+L0my55575oknnqjqtgEQGgBbXWev46+rq9vsvk3733777bbLTU1NSZIX\nXnhhs6FRqVTyxhtvtF3edML37rvv3unt77rrrh229+7dO5MnT87/+l//Kz/+8Y/zR3/0R9mwYUO+\n853vZMiQIfnIRz6yuYfZzrBhw/Lcc8+1i6Wu2LTmTSHW2e0uXbo0q1atqjo03vkszzvV1dWltbW1\nqtsGwLtOAfQIm4Lk1FNPTWtra6d/Nm7cmF/+8pcdjlm2bFmH22tpacmKFSs6va/fffnUD3/4w6xc\nuTJnnnlmamtru7Teo446KknyL//yL118hO3XvOmlV79r0/bfDbSWlpZOr79q1apu3T8A5REaAD3A\nfvvtlwEDBuQnP/nJZn+p/l0HH3xwiqLII4880mHfv/7rv272f+2POOKIjBkzJt/73veyZs2atuDY\nFCBdMWXKlOy00075x3/8x3Yv5+rMO9+y9kMf+lCKouj07X0XL16cpUuXZtSoUenfv3/b9oEDB+aV\nV17pcP2NGzfm6aefLuVD9jYF1saNG6u+LYD3C6EB0APU1tbm4osvzq9//etccsklWb9+fYfr/PrX\nv273S/2m8ym+/OUvp7m5uW37+vXr8/nPf/5d7+/ss8/Om2++mdtvvz0PPPBADjzwwBx44IFdXu8f\n/MEfZNq0admwYUNOPPHEPPXUU51e75/+6Z/yR3/0R22XN31exV//9V+3e8Zl48aNueyyy1IURc47\n77x2t3HooYfmV7/6Vf7f//t/7bb/9V//dWmfczF48OAkv31LXAC6xjkaAD3E1VdfnZ/97Ge54447\n8v3vfz/HHHNMhg8fnuXLl+eFF17I3Llzc+2112a//fZL8ttnJi6++OLceuutOeCAA/LHf/zH2Wmn\nnXL//fdn8ODBGTZs2GY/5O+Tn/xkvvCFL+SLX/xiWlpauvVsxiaf//zn09LSkmuuuSbjx4/PEUcc\nkYMPPjj9+vXLa6+9lkcffTSLFy9u92F+hx9+eC6//PJ89atfzQEHHJDTTjstffr0yT/90z9l4cKF\nOeqoo/KXf/mX7e7nsssuy4MPPpiTTz45p59+egYOHJi5c+dmyZIlmTRp0mY//PDd/O5cjjjiiPTp\n0yc333xzmpqa2s57ueSSS9o9uwLAO2zXN9cFeA/b9HkNU6ZM6XT/u33+w8iRIzt8xsUm9957b3Hc\ncccVgwYNKnr16lWMGDGiOOqoo4rrrruuWLp0aYfrf+1rXyv222+/Yueddy6GDx9eXHTRRcXq1avf\n9T6KoiiOP/74olKpFL169SqWL1/ehUfcuUWLFhUXX3xxccABBxT9+/cvevXqVeyxxx7FCSecUNx1\n113Fhg0bOhzzD//wD8WRRx5Z1NfXF7179y4OOOCA4tprry3eeuutTu/je9/7XjFu3Liid+/exa67\n7lqcccYZxcsvv1ycc845RU1NTaefo7G578ukSZOKmpqaDtt/9KMfFYcffnjRr1+/olKpdLhdANqr\nFMVm/jsLAABgCzlHAwAAKJ3QAAAASic0AACA0gkNAACgdEIDAAAondAAAABKJzQAAIDSCQ0AAKB0\nQgMAACid0AAAAEonNAAAgNIJDQAAoHRCAwAAKJ3QAAAASic0AACA0gkNAACgdEIDAAAondAAAABK\nJzQAAIDSCQ0AAKB02zQ0VqxYkWuuuSaf/exn8xd/8Rd54IEHkiQzZ87MZz7zmVx++eW5/PLLs2DB\ngrZjZs2alUsuuSSXXnppfvazn3XpfhYuXLhV1v9+YX7VM8PqmWF1zK96Zlgd86ueGVbPDKtT7fy2\naWjU1dXl7LPPzowZM/LlL385Dz74YJYuXZpKpZKTTjopX/3qV/PVr341Bx10UJJk6dKlmTt3bmbM\nmJErr7wyd955Z1pbW3/v/fihqo75Vc8Mq2eG1TG/6plhdcyvemZYPTOsTo8KjQEDBmTkyJFJkt69\ne2f48OFZuXJlkqQoig7XnzdvXiZMmJC6uroMGTIkQ4cOzeLFi7flkgEAgC2w3c7RWL58eZYsWZIP\nfOADSZIf/ehH+cu//Mt8/etfzxtvvJEkaW5uzuDBg9uOGTx4cFuYAAAAO65K0dlTCVvZ+vXrM23a\ntJx66qk55JBDsnr16vTv3z9Jct9996W5uTnnn39+7rrrrowZMyZHHXVUkuSOO+7IQQcdlEMPPbTd\n7S1cuLDdUzuTJ0/edg8GAADeo2bOnNn2dWNjYxobG7t8bN3WWNC7aWlpyY033pijjjoqhxxySJKk\noaGhbf+xxx6br3zlK0mSQYMGpampqW1fU1NTBg0a1OE2O3vQr7766tZY/vtCfX191q5du72X0aOZ\nYfXMsDrmVz0zrI75Vc8Mq2eG1dljjz2q+g/8bfrSqaIocscdd2T48OE58cQT27Y3Nze3ff3kk09m\nr732SpKMGzcujz/+eFpaWrJ8+fIsW7Yso0eP3pZLBgAAtsA2fUbjF7/4RR577LHstddeufzyy5Mk\nZ5xxRh5//PEsWbIklUolu+22Wz796U8nSUaMGJHDDz88U6dOTW1tbc4777xUKpVtuWQAAGALbJdz\nNLYFL53acp5mrJ4ZVs8Mq2N+1TPD6phf9cywemZYnT322KOq430yOAAAUDqhAQAAlE5oAAAApRMa\nAABA6YQGAABQOqEBAACUTmgAAAClExoAAEDphAYAAFA6oQEAAJROaAAAAKUTGgAAQOmEBgAAUDqh\nAQAAlE5oAAAApRMaAABA6YQGAABQOqEBAACUTmgAAAClExoAAEDphAYAAFA6oQEAAJROaAAAAKUT\nGgAAQOmEBgAAUDqhAQAAlE5oAAAApRMaAABA6YQGAABQOqEBAACUTmgAAAClq9veC6BnqFu9MsWK\n17p1TGXX3dPSMGgrrQgAgB2Z0KBLihWvZcP1n+vWMb2u+EoiNAAA3pe8dAoAACid0AAAAEonNAAA\ngNIJDQAAoHRCAwAAKJ3QAAAASic0AACA0gkNAACgdEIDAAAondAAAABKJzQAAIDSCQ0AAKB0QgMA\nACid0AAAAEonNAAAgNIJDQAAoHRCAwAAKJ3QAAAASic0AACA0gkNAACgdEIDAAAondAAAABKJzQA\nAIDSCQ0AAKB0QgMAACid0AAAAEonNAAAgNIJDQAAoHRCAwAAKJ3QAAAASic0AACA0gkNAACgdEID\nAAAondAAAABKJzQAAIDSCQ0AAKB0ddvyzlasWJHbbrstq1evTqVSyXHHHZcTTjgh69aty0033ZQV\nK1Zkt912y9SpU9O3b98kyaxZszJnzpzU1NRkypQpOfDAA7flkgEAgC2wTUOjrq4uZ599dkaOHJn1\n69fnc5/7XMaOHZuHH344Y8eOzcknn5zZs2dn9uzZ+cQnPpGlS5dm7ty5mTFjRlauXJnp06fnlltu\nSU2NJ2IAAGBHtk1/Yx8wYEBGjhyZJOndu3eGDx+elStXZv78+Tn66KOTJJMmTcq8efOSJPPmzcuE\nCRNSV1eXIUOGZOjQoVm8ePG2XDIAALAFtttTA8uXL8+SJUsyZsyYrF69OgMGDEiSNDQ0ZPXq1UmS\n5ubmDB48uO2YwYMHZ+XKldtlvQAAQNdt05dObbJ+/frceOONOeecc7LLLru021epVN712M72L1y4\nMAsXLmy7PHny5NTX15ez2PehXr16dZjfW7Xd/1Gpra1Ln/fp96GzGdI9Zlgd86ueGVbH/KpnhtUz\nw+rNnDmz7evGxsY0NjZ2+dhtHhotLS258cYbM3HixBxyyCFJfvssxqpVqzJgwIA0NzenoaEhSTJo\n0KA0NTW1HdvU1JRBgwZ1uM3OHvTatWu34qN4b6uvr+8wv9qNLd2+nY0bW96334fOZkj3mGF1zK96\nZlgd86ueGVbPDKtTX1+fyZMnb/Hx2/SlU0VR5I477sjw4cNz4okntm0fN25cHn744STJI488kvHj\nx7dtf/zxx9PS0pLly5dn2bJlGT169LZcMgAAsAW26TMav/jFL/LYY49lr732yuWXX54kOfPMM3PK\nKafkpptuypw5c9re3jZJRowYkcMPPzxTp05NbW1tzjvvvN/70ioAAGD726ahse++++a+++7rdN/V\nV1/d6fZTTz01p5566tZcFgAAUDIfSAEAAJROaAAAAKUTGgAAQOmEBgAAUDqhAQAAlE5oAAAApRMa\nAABA6YQGAABQOqEBAACUTmgAAAClExoAAEDphAYAAFA6oQEAAJROaAAAAKUTGgAAQOmEBgAAUDqh\nAQAAlE5oAAAApRMaAABA6YQGAABQOqEBAACUTmgAAAClExoAAEDphAYAAFA6oQEAAJROaAAAAKUT\nGgAAQOmEBgAAUDqhAQAAlE5oAAAApRMaAABA6YQGAABQOqEBAACUTmgAAAClExoAAEDphAYAAFA6\noQEAAJROaAAAAKUTGgAAQOmEBgAAUDqhAQAAlE5oAAAApRMaAABA6YQGAABQOqEBAACUTmgAAACl\nExoAAEDphAYAAFA6oQEAAJROaAAAAKUTGgAAQOmEBgAAUDqhAQAAlE5oAAAApRMaAABA6YQGAABQ\nOqEBAACUTmgAAAClExoAAEDphAYAAFA6oQEAAJROaAAAAKUTGgAAQOmEBgAAUDqhAQAAlE5oAAAA\npRMaAABA6YQGAABQOqEBAACUrm5b3+Htt9+eBQsWpH///rnxxhuTJDNnzsxDDz2U/v37J0nOOOOM\nHHTQQUmSWbNmZc6cOampqcmUKVNy4IEHbuslAwAA3bTNQ+OYY47JRz7ykXzta19r21apVHLSSSfl\npJNOanfdpUuXZu7cuZkxY0ZWrlyZ6dOn55ZbbklNjSdiAABgR7bNf2Pfb7/90rdv3w7bi6LosG3e\nvHmZMGFC6urqMmTIkAwdOjSLFy/eFssEAACqsM2f0dicH/3oR3n00Uez995751Of+lT69u2b5ubm\njBkzpu06gwcPzsqVK7fjKgEAgK7YIULjwx/+cE477bQkyX333ZdvfetbOf/88zu9bqVS6bBt4cKF\nWbhwYdvlyZMnp76+fuss9n2gV69eHeb3Vm33f1Rqa+vS5336fehshnSPGVbH/KpnhtUxv+qZYfXM\nsHozZ85s+7qxsTGNjY1dPnaHCI2Ghoa2r4899th85StfSZIMGjQoTU1NbfuampoyaNCgDsd39qDX\nrl27lVb73ldfX99hfrUbW7p9Oxs3trxvvw+dzZDuMcPqmF/1zLA65lc9M6yeGVanvr4+kydP3uLj\nd4izqpubm9u+fvLJJ7PXXnslScaNG5fHH388LS0tWb58eZYtW5bRo0dvr2UCAABdtM2f0bj55puz\naNGirFmzJueff37+5E/+JM8++2yWLFmSSqWS3XbbLZ/+9KeTJCNGjMjhhx+eqVOnpra2Nuedd16n\nL50CAAB2LNs8NC699NIO24499tjNXv/UU0/NqaeeujWXBAAAlGyHOEeD96aaXjsnv1zU7eMqu+6e\nloaO5+IAANBzCA22mmJ1czbcck23j+t1xVcSoQEA0KPtECeDAwAA7y1CAwAAKJ3QAAAASic0AACA\n0gkNAACgdEIDAAAondAAAABKJzQAAIDSCQ0AAKB0QgMAACid0AAAAEonNAAAgNJ1KTQuuuiiLFmy\npNN9L7/8ci666KIy1wQAAPRwXQqN119/PS0tLZ3u27BhQ15//fVSFwUAAPRsVb906sUXX0yfPn3K\nWAsAAPAeUbe5HT/4wQ/ywx/+sO3yV77yldTVtb/6hg0bsm7duhxxxBFbb4UAAECPs9nQGDJkSA44\n4IAkyaOPPpp99tkn9fX17a6z0047ZcSIETnuuOO27ioBAIAeZbOhccghh+SQQw5pu3zaaadl9913\n3yaLAgAAerbNhsY7XXjhhVt7HQAAwHtIl0IjSZYtW5af/OQnaWpqyoYNGzrsv+CCC0pdGAAA0HN1\nKTSefPLJ3HTTTSmKIg0NDR1OCgcAAHinLhXDfffdl8bGxlxyySXp37//1l4TAADQw3XpczSWL1+e\nk046SWQAAABd0qXQ2GOPPbJu3bqtvRYAAOA9okuh8YlPfCKzZs3KsmXLtvZ6AACA94AunaPxf/7P\n/8m6devy2c9+NsOGDUu/fv3a9hVFkUqlkmuuuWarLRIAAOhZuhQaNTU12WOPPVIURYd9lUollUql\n9IUBAAA9V5dCY9q0aVt5GQAAwHtJl87RAAAA6I4uPaPx7LPP/t7r7L///lUvBgAAeG/oUmh05UTv\n++67r+rFAAAA7w1dCo0vfOELHbatXbs2P/3pT7No0aJMmTKl9IUBAAA9V5dCo7GxsdPthx12WO65\n55789Kc/zYc+9KFSFwYAAPRcVZ8M/qEPfShz584tYy0AAMB7RNWh8eqrr/ocDQAAoJ0uvXTq4Ycf\n7hATLS0tefnll/PQQw/l0EMP3SqLAwAAeqYuhcbXv/71zg+uq8sRRxzhZHAAAKCdLoXGrbfe2mFb\nr1690tDQ4GVTAABAB10KjSFDhmztdQAAAO8hXQqNJCmKIk899VQWLVqUdevWpV+/fmlsbPS2tgAA\nQAddCo0333wz119/fZ577rnU1NSkvr4+a9euzQ9+8IPsu++++fznP5/evXtv7bUCAAA9RJdC4zvf\n+U5eeumlXHTRRTniiCNSW1ubjRs3Zu7cubnzzjvzv//3/8655567tdcKAAD0EF36HI0nnngip59+\neo466qjU1tYmSWpra3PUUUfl9NNPzxNPPLFVFwkAAPQsXQqNtWvXZs899+x03/Dhw7N27dpSFwUA\nAPRsXQqN3XbbLfPnz+9034IFC7wrFQAA0E6XztH4b//tv+Xee+/N+vXrM3HixAwYMCCrVq3K448/\nnoceeiif+tSntvY6AQCAHqRLoXHiiSdmzZo1+cEPfpBHHnnkPw+uq8spp5ySE088castEAAA6Hm6\nFBqVSiVnnnlmPvaxj+X5559v+xyND3zgA+nXr9/WXiMAANDDdCk0Zs+enZUrV+bcc8/t8AF9d911\nV3bdddd87GMf2yoLBAAAep4unQz+8MMPZ6+99up038iRIzNnzpxSFwUAAPRsXQqNFStWZNiwYZ3u\nGzJkSF5//fVSFwUAAPRsXQqNnXfeOU1NTZ3uW7lyZXbaaadSFwUAAPRsXQqNfffdN9///vezYcOG\ndts3bNiQH/zgB9l33323yuIAAICeqUsng//Jn/xJrrrqqlx66aU58sgjM3jw4DQ1NeWxxx7LunXr\ncsEFF2ztdQIAAD1Il0Jj5MiRmTZtWu69995873vfS1EUqVQq2XfffXPZZZdl5MiRW3mZAABAT9Kl\n0EiS0aNH55prrslbb72VN954I3379s3OO++8NdcGAAD0UF0OjU123nlngQEAALyrLp0MDgAA0B1C\nAwAAKJ3QAAAASic0AACA0gkNAACgdEIDAAAondAAAABKJzQAAIDSCQ0AAKB0QgMAAChd3ba+w9tv\nvz0LFixI//79c+ONNyZJ1q1bl5tuuikrVqzIbrvtlqlTp6Zv375JklmzZmXOnDmpqanJlClTcuCB\nB27rJQMAAN20zZ/ROOaYY3LllVe22zZ79uyMHTs2t9xySw444IDMnj07SbJ06dLMnTs3M2bMyJVX\nXpk777wzra2t23rJAABAN23z0Nhvv/3anq3YZP78+Tn66KOTJJMmTcq8efOSJPPmzcuECRNSV1eX\nIUOGZOjQoVm8ePG2XjIAANBNO8Q5GqtXr86AAQOSJA0NDVm9enWSpLm5OYMHD2673uDBg7Ny5crt\nskYAAKDrdojQeKdKpVLVfgAAYPvb5ieDd6ahoSGrVq3KgAED0tzcnIaGhiTJoEGD0tTU1Ha9pqam\nDBo0qMPxCxcuzMKFC9suT548OfX19Vt/4e9RvXr16jC/t2q7/6OypVFYW1uXPj38+9fZDOkeM6yO\n+VXPDKtjftUzw+qZYfVmzpzZ9nVjY2MaGxu7fOwOERrjxo3Lww8/nFNOOSWPPPJIxo8f37b9lltu\nyUknnZSVK1dm2bJlGT16dIfjO3vQa9eu3SZrfy+qr6/vML/ajS3dvp2iKLbo/jdubOnx37/OZkj3\nmGF1zK96Zlgd86ueGVbPDKtTX1+fyZMnb/Hx2zw0br755ixatChr1qzJ+eefn8mTJ+eUU07JTTfd\nlDlz5rS9vW2SjBgxIocffnimTp2a2tranHfeeV46BQAAPcA2D41LL7200+1XX311p9tPPfXUnHrq\nqVtzSQAAQMl2uJPBAQCAnk9oAAAApRMaAABA6YQGAABQOqEBAACUTmgAAAClExoAAEDphAYAAFA6\noQEAAJROaAAAAKUTGgAAQOmEBgAAUDqhAQAAlE5oAAAApRMaAABA6YQGAABQOqEBAACUTmgAAACl\nExoAAEDphAYAAFA6oQEAAJROaAAAAKUTGgAAQOmEBgAAUDqhAQAAlE5oAAAApRMaAABA6YQGAABQ\nOqEBAACUTmgAAAClExoAAEDphAYAAFA6oQEAAJROaAAAAKUTGgAAQOmEBgAAUDqhAQAAlE5oAAAA\npRMaAABA6YQGAABQOqEBAACUTmgAAAClExoAAEDphAYAAFA6oQEAAJROaAAAAKUTGgAAQOmEBgAA\nUDqhAQAAlE5oAAAApRMaAABA6YQGAABQOqEBAACUTmgAAAClExoAAEDphAYAAFA6oQEAAJROaAAA\nAKUTGgAAQOmEBgAAUDqhAQAAlE5oAAAApRMaAABA6YQGAABQOqEBAACUTmgAAAClExoAAEDphAYA\nAFA6oQEAAJSubnsv4J0uvPDC7LLLLqmpqUltbW2uu+66rFu3LjfddFNWrFiR3XbbLVOnTk3fvn23\n91IBAIAhh9IsAAAR70lEQVR3sUOFRpJMmzYt/fr1a7s8e/bsjB07NieffHJmz56d2bNn5xOf+MR2\nXCEAAPD77HAvnSqKot3l+fPn5+ijj06STJo0KfPmzdseywIAALphh3pGo1KpZPr06ampqcnxxx+f\n448/PqtXr86AAQOSJA0NDVm9evV2XiUAAPD77FChMX369AwcODBr1qzJ9OnTM3z48Hb7K5XKdloZ\nAADQHTtUaAwcODBJ0r9//xxyyCFZvHhxGhoasmrVqgwYMCDNzc1paGjocNzChQuzcOHCtsuTJ09O\nfX39Nlv3e02vXr06zO+t2u7/qGxpGNbW1qVPD//+dTZDuscMq2N+1TPD6phf9cywemZYvZkzZ7Z9\n3djYmMbGxi4fu8OExltvvZXW1tbssssuWb9+fZ555pmcdtppGTduXB5++OGccsopeeSRRzJ+/PgO\nx3b2oNeuXbutlv6eU19f32F+tRtbun07v3u+TVdt3NjS479/nc2Q7jHD6phf9cywOuZXPTOsnhlW\np76+PpMnT97i43eY0Fi9enVuuOGGJElra2uOPPLIHHjggdlnn31y0003Zc6cOW1vbwsAAOzYdpjQ\nGDJkSFtovFO/fv1y9dVXb4cVAQAAW2qHe3tbAACg5xMaAABA6YQGAABQOqEBAACUTmgAAAClExoA\nAEDphAYAAFA6oQEAAJROaAAAAKUTGgAAQOmEBgAAUDqhAQAAlE5oAAAApRMaAABA6YQGAABQOqEB\nAACUTmgAAAClExoAAEDphAYAAFA6oQEAAJROaAAAAKUTGgAAQOmEBgAAUDqhAQAAlE5oAAAApRMa\nAABA6YQGAABQOqEBAACUTmgAAAClExoAAEDp6rb3AuB31fTaOfnlom4dU9l197Q0DNpKKwIAoLuE\nBjucYnVzNtxyTbeO6XXFVxKhAQCww/DSKQAAoHRCAwAAKJ3QAAAASic0AACA0gkNAACgdEIDAAAo\nndAAAABKJzQAAIDSCQ0AAKB0QgMAACid0AAAAEonNAAAgNIJDQAAoHRCAwAAKJ3QAAAASic0AACA\n0gkNAACgdEIDAAAondAAAABKJzQAAIDSCQ0AAKB0QgMAACid0AAAAEonNAAAgNIJDQAAoHRCAwAA\nKJ3QAAAASic0AACA0gkNAACgdEIDAAAondAAAABKJzQAAIDSCQ0AAKB0QgMAACid0AAAAEpXt70X\nwLZXt6opxermze7fUFeX2paWdtsqG1s2c20AAOhIaLwPFa+8mA1/O32z+zd0sm3nqddsvQUBAPCe\n0yNC4+mnn84999yT1tbWHHvssTnllFO295IAAIB3scOfo9Ha2ppvfOMbufLKKzNjxow8/vjjWbp0\n6fZeFgAA8C52+Gc0Fi9enKFDh2bIkCFJkgkTJmT+/PkZMWLEdl4ZAMCWqVu9MsWK1971Om/V1qX2\nd86RrOy6e1oaBm3NpUFpdvjQWLlyZQYPHtx2edCgQVm8ePF2XBEAQHWKFa9lw/Wf6/Zxva74SvI+\nD42uRNomm2JNoG0fO3xoUL6aYXtmpzP/bPP7K5W0FkX7jbW1W3lVAAC/35ZEmkDbPipF8bu/Ue5Y\nnn/++Xz3u9/NX/3VXyVJZs2alUql0u6E8IULF2bhwoVtlydPnrzN1wkAAO81M2fObPu6sbExjY2N\nXT52hz8ZfJ999smyZcuyfPnytLS0ZO7cuRk3bly76zQ2Nmby5Mltf945ELrP/KpnhtUzw+qYX/XM\nsDrmVz0zrJ4ZVmfmzJntfsfuTmQkPeClU7W1tTn33HPz5S9/ue3tbZ0IDgAAO7YdPjSS5KCDDspB\nBx20vZcBAAB0Ue20adOmbe9FbA2b3g6XLWN+1TPD6plhdcyvemZYHfOrnhlWzwyrU838dviTwQEA\ngJ5nhz8ZHAAA6HmEBgAAULoecTL45vzkJz/Jd7/73fzHf/xHrrvuuuy9995t+2bNmpU5c+akpqYm\nU6ZMyYEHHpgkefHFF3Pbbbfl7bffzkEHHZQpU6Zsr+XvkJ5++uncc889be/w9c7PK+G3br/99ixY\nsCD9+/fPjTfemCRZt25dbrrppqxYsSK77bZbpk6dmr59+ybZ/M/i+9mKFSty2223ZfXq1alUKjnu\nuONywgknmGMXbdiwIdOmTcvbb7+dlpaWjB8/Pmeeeab5bYHW1tZcccUVGTRoUK644goz7KYLL7ww\nu+yyS2pqalJbW5vrrrvODLvhjTfeyB133JGlS5cmSS644IIMGzbM/Lro1Vdfzc0339x2+bXXXsvp\np5+eiRMnmmEXzZo1K4899lgqlUr22muvXHDBBXnrrbfKm1/Rgy1durT4j//4j2LatGnFL3/5y7bt\nr7zySnHZZZcVb7/9dvHaa68VF110UdHa2loURVFcccUVxQsvvFAURVFce+21xYIFC7bL2ndEGzdu\nLC666KLitddeK95+++3isssuK1555ZXtvawdzrPPPlu8+OKLxWc/+9m2bffee28xe/bsoiiKYtas\nWcXf//3fF0XR+c/ixo0bt8u6dyTNzc3FSy+9VBRFUbz55pvFJZdcUrzyyivm2A3r168viqIoWlpa\niiuvvLJYtGiR+W2B73//+8Utt9xSXH/99UVR+LvcXRdccEGxdu3adtvMsOtuvfXW4l/+5V+Kovjt\n3+U33njD/LbQxo0bi//xP/5H8frrr5thF7322mvFhRdeWGzYsKEoiqKYMWNGMWfOnFLn16NfOjV8\n+PDsscceHbbPmzcvEyZMSF1dXYYMGZKhQ4fmhRdeSHNzc9avX5/Ro0cnSSZOnJgnn3xyWy97h7V4\n8eIMHTo0Q4YMSV1dXSZMmJD58+dv72XtcPbbb7+2st9k/vz5Ofroo5MkkyZNyrx585J0/rO4ePHi\nbb7mHc2AAQMycuTIJEnv3r0zfPjwrFy50hy7Yeedd06StLS0pLW1NX379jW/bmpqasqCBQty7LHH\npvj/3xfFDLuv+J33lDHDrvnNb36T5557Lscee2yS335uWJ8+fcxvC/37v/97hg4dml133dUMu6hP\nnz6pra3NW2+9lY0bN+att97KoEGDSp1fj37p1OY0NzdnzJgxbZcHDx6clStXpq6uLoMGDWrbPmjQ\noKxcuXJ7LHGHtHLlygwePLjt8qBBg97XfwG7Y/Xq1RkwYECSpKGhIatXr06y+Z9F/tPy5cuzZMmS\njBkzxhy7obW1NZ/73Ofy2muv5cMf/nD23HNP8+umb37zmznrrLPy5ptvtm0zw+6pVCqZPn16ampq\ncvzxx+f44483wy5avnx5+vfvn9tvvz2/+tWvMmrUqJxzzjnmt4Uef/zxTJgwIYm/x13Vr1+/fPSj\nH80FF1yQXr165cADD8zYsWNLnd8OHxrTp0/PqlWrOmw/44wzMm7cuO2wInh3lUqlqv3vJ+vXr8+N\nN96Yc845J7vssku7feb47mpqanLDDTfkN7/5Tb785S/n5z//ebv95vfunnrqqfTv3z+jRo3KwoUL\nO72OGf5+06dPz8CBA7NmzZpMnz49w4cPb7ffDDdv48aNeemll3Luuedm9OjRueeeezJ79ux21zG/\nrmlpaclTTz2Vs846q8M+M9y8ZcuW5Yc//GFuu+229OnTJzNmzMijjz7a7jrVzm+HD42rr76628cM\nGjQoTU1NbZebmpoyePDgDs9gNDU1tXuG4/2us7mZT9c0NDRk1apVGTBgQJqbm9PQ0JDETN9NS0tL\nbrzxxkycODGHHHJIEnPcEn369MlBBx2UF1980fy64Re/+EWeeuqpLFiwIG+//XbefPPN3HrrrWbY\nTQMHDkyS9O/fP4ccckgWL15shl206feSTS/nPuywwzJr1qwMGDDA/LppwYIF2XvvvdO/f/8k/i3p\nqhdffDH/5b/8l9TX1ydJDj300Dz//POl/gz26HM0NmfcuHF5/PHH09LSkuXLl2fZsmUZPXp0BgwY\nkF122SUvvPBCiqLIY4891vYLDsk+++yTZcuWZfny5WlpacncuXM9a9RF48aNy8MPP5wkeeSRRzJ+\n/Pi27Z39LL7fFUWRO+64I8OHD8+JJ57Ytt0cu2bNmjV54403kvz2Haj+/d//PaNGjTK/bjjzzDPz\n9a9/PbfddlsuvfTSNDY25uKLLzbDbnjrrbfaXna2fv36PPPMM9lrr73MsIsGDBiQXXfdNa+++mqS\n5Jlnnsmee+6Zgw8+2Py66Z0vm0r8W9JVe+yxR1544YVs2LAhRVHkmWeeyYgRI0r9GezRnwz+5JNP\n5u67786aNWvSp0+fjBo1KldeeWWS5P/+3/+bOXPmpLa2Nuecc04++MEPJvnPt7fdsGFDDjrooJx7\n7rnb8yHscBYsWNDu7W0//vGPb+8l7XBuvvnmLFq0KGvWrMmAAQMyefLkjB8/frNvBbe5n8X3s+ee\ney5f/OIXs9dee7U97XrmmWdm9OjR5tgFL7/8cm677ba0tramKIpMnDgxH/vYx971bUXNb/OeffbZ\nfP/738/nPvc5M+yG5cuX54Ybbkjy23OGjjzyyHz84x83w25YsmRJ/u7v/i4tLS3Zfffdc8EFF6S1\ntdX8umH9+vW58MIL87Wvfa3tJbh+Brvu/vvvzyOPPJJKpZJRo0blM5/5TNavX1/a/Hp0aAAAADum\n9+RLpwAAgO1LaAAAAKUTGgAAQOmEBgAAUDqhAQAAlE5oAAAApRMaAOyQFi5cmNNPPz3PPvvs9l4K\nAFtAaAAAAKUTGgAAQOnqtvcCANixzZw5M//4j/+YGTNm5K677srzzz+f/v3757TTTssxxxyTOXPm\nZPbs2Wlubs4+++yTz3zmM9l9993bjv/nf/7nPPjgg3n11VfTu3fvjB8/PmeddVb69evXdp01a9bk\n7rvvzk9/+tPU1NTk4IMPziGHHNJuHXfeeWeeeOKJ/N3f/V1qav7z/8nefvvtfPrTn87RRx+dc845\nZ6vPA4Cu8YwGAF0yY8aMjB8/PpdffnlGjRqVO+64I9/61rfy0EMP5ZOf/GQuuOCCvPrqq/nbv/3b\ntmO+/e1v5xvf+EbGjh2bz33uc/nkJz+Zp59+Otddd11aW1vbrvc3f/M3WbBgQc4888xMnTo1tbW1\nufvuu9vd/9FHH501a9bkZz/7WbvtTz31VH7zm9/k6KOP3roDAKBbPKMBQJecfPLJmThxYpJk7733\nzvz58/PII4/ktttuS+/evZMkzc3Nueeee7JixYq0trbme9/7XiZPnpw//uM/brudYcOG5Qtf+EKe\neuqpjB8/Ps8880x+8Ytf5M///M9zxBFHJEnGjh2b6667LitXrmw7bsyYMRk6dGgeffTRHHTQQW3b\nH3300YwYMSKjRo3aFmMAoIs8owFAl7zzl/u+ffumoaEhY8aMaYuMJNljjz2SJE1NTXnmmWeSJEce\neWQ2btzY9mf06NHp3bt3Fi1alCR5/vnnU1NTk8MOO6zd/W2KjneaOHFi5s+fn/Xr1ydJ1q5dm6ef\nfjpHHXVUuQ8WgKp5RgOALunbt2+7y3V1de3Os9i0LUk2bNiQNWvWJEkuueSSTm9v3bp1SX77LEjf\nvn3bnXeRJA0NDR2OOeqoozJz5sz827/9WyZNmpS5c+dm48aNbc+0ALDjEBoAlK5SqaS+vj5JctVV\nV3WIlCRt+wcOHJg33ngjra2t7WJj1apVHY4ZMmRI9t133zz22GOZNGlSHnvssRxwwAEZNGjQVnok\nAGwpL50CYKsYO3ZsKpVKXn/99ey9994d/uy2225Jkg984ANpbW3Nv/3bv7U7fu7cuZ3e7sSJE7Nw\n4cIsXLgwL7zwgpdNAeygPKMBwFax++675+STT85dd92VV199Nfvvv3922mmntvM3jjvuuDQ2Nmbs\n2LHZd9998z//5//MmjVrMnTo0MydOzevvPJKp7d7+OGH56677sqtt96aXr16dTi3A4Adg9AA4F1V\nKpUtPvaMM87I8OHD8+CDD+bBBx9MpVLJ4MGD81//63/NsGHD2q73F3/xF7n77rvzne98JzU1NRk3\nblzOO++83HDDDR1us0+fPjn44IPzxBNPZMKECe1ORgdgx1EpiqLY3osAAADeW5yjAQAAlE5oAAAA\npRMaAABA6YQGAABQOqEBAACUTmgAAAClExoAAEDphAYAAFA6oQEAAJTu/wOvjuOyg29YWQAAAABJ\nRU5ErkJggg==\n",
"text": [
""
]
}
],
"prompt_number": 342
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can see there are some clear outliers at 600, 700 and even one or two 'medv' values are 0.
\n",
"Since, our outliers are in place now, we will once again fit the GradientBoostingRegressor with L1 and L2 Loss function to see the contrast in their performances with outliers."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# So let's fit a GradientBoostingRegressor with a L1(Least Absolute Deviation) loss function\n",
"np.random.seed(9876)\n",
"\n",
"# A GradientBoostingRegressor with L1(Least Absolute Deviation) as the loss function\n",
"mod = GradientBoostingRegressor(loss='lad')\n",
"\n",
"fit = mod.fit(train_X, train_y)\n",
"predict = fit.predict(test_X)\n",
"\n",
"# Root Mean Squared Error\n",
"print \"RMSE -> %f\" % rmse(predict, test_y)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"MSE -> 7.055568\n"
]
}
],
"prompt_number": 343
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"error = predict - test_y\n",
"\n",
"# Print squared errors in all test samples \n",
"np.around(error ** 2, decimals = 2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 344,
"text": [
"array([ 0.04, 0.69, 17.13, 0.01, 0.62, 0.01,\n",
" 1.88, 0.17, 0.54, 16.12, 2.4 , 0.23,\n",
" 0.04, 0. , 1.62, 83.27, 1.58, 0.23,\n",
" 17.81, 1.3 , 2.77, 12.79, 0.07, 0.45,\n",
" 1.59, 7.27, 1.3 , 2.45, 5.99, 7.88,\n",
" 9.02, 0.49, 3396.2 , 0.01, 28.32, 12.75,\n",
" 0.45, 0. , 15.67, 0. , 4.2 , 3.72,\n",
" 30.43, 5.71, 0.01, 0.18, 7.47, 0.09,\n",
" 1.28, 12.06, 2.49, 0.13, 6.22, 2.76,\n",
" 2.29, 0.02, 4.91, 16.74, 1.94, 0.27,\n",
" 0.25, 363.92, 2.42, 0.63, 0.58, 4.43,\n",
" 4.28, 3.15, 1.17, 0.97, 0.73, 1.43,\n",
" 0.53, 0.49, 0.81, 16.5 , 0.07, 14.49,\n",
" 1.84, 38.94, 0.17, 16.95, 0.45, 1.42,\n",
" 1.97, 6.45, 1.08, 3.42, 0.36, 0.46,\n",
" 1.1 , 3.07, 0.12, 2.18, 46.74, 1.48,\n",
" 652.32, 66.97, 9.32, 46.2 , 3.05, 0.66])"
]
}
],
"prompt_number": 344
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# A GradientBoostingRegressor with L2(Least Squares) as the loss function\n",
"mod = GradientBoostingRegressor(loss='ls')\n",
"\n",
"fit = mod.fit(train_X, train_y)\n",
"predict = fit.predict(test_X)\n",
"\n",
"# Root Mean Squared Error\n",
"print \"RMSE -> %f\" % rmse(predict, test_y)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"MSE -> 9.806251\n"
]
}
],
"prompt_number": 345
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"error = predict - test_y\n",
"\n",
"# Print squared errors in all test samples \n",
"np.around(error ** 2, decimals = 2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 346,
"text": [
"array([ 0. , 6.18, 11. , 0.36, 0.45, 1.62,\n",
" 0.28, 0.02, 0.59, 22.96, 2.24, 0.06,\n",
" 0.98, 0.02, 0.64, 6.52, 1.72, 0.04,\n",
" 7.4 , 3.76, 0.76, 15.28, 0.38, 1.95,\n",
" 3.7 , 9. , 4.28, 6.69, 5.83, 10.83,\n",
" 17.79, 0.57, 121.92, 2.3 , 15.77, 4.94,\n",
" 0.02, 0.6 , 13.59, 0.03, 11.32, 1.51,\n",
" 16.45, 6.85, 0.07, 0.21, 6.32, 0.12,\n",
" 3.41, 12.18, 7.96, 2.12, 2.95, 9.69,\n",
" 17.15, 0.4 , 2.43, 4.46, 1.58, 1.59,\n",
" 2.45, 0.66, 2.56, 3.33, 0.43, 13.59,\n",
" 6.76, 1.15, 2.87, 3.3 , 4.7 , 0.01,\n",
" 1.3 , 0.02, 0.86, 8597.78, 0.07, 1.7 ,\n",
" 0. , 33.7 , 0.71, 10.75, 0.03, 0.63,\n",
" 1.55, 13.66, 0.89, 2.61, 0.27, 0.05,\n",
" 1.12, 0.08, 0. , 3.76, 63.48, 10.78,\n",
" 475.58, 111.95, 12.11, 5.93, 1.53, 1.99])"
]
}
],
"prompt_number": 346
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"With outliers in the dataset, a L2(Loss function) tries to adjust the coefficients features according to these outliers on the expense of other features, since the squared-error is going to be huge for these outliers(for error > 1). On the other hand L1(Least absolute deviation) is quite robust to outliers.
\n",
"As a result, L2 loss function may result in huge deviations in some of the samples which results in reduced accuracy."
]
}
],
"metadata": {}
}
]
}