{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.utils import shuffle\n", "from sklearn.base import BaseEstimator, TransformerMixin\n", "from sklearn.cross_validation import train_test_split\n", "from sklearn.preprocessing import StandardScaler, normalize\n", "from sklearn.linear_model import SGDClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.grid_search import GridSearchCV\n", "from sklearn.ensemble import ExtraTreesClassifier\n", "from sklearn.ensemble import GradientBoostingClassifier\n", "from sklearn.decomposition import PCA\n", "\n", "\n", "from autodiff import optimize\n", "from scipy import stats\n", "import pylab as pl\n", "\n", "%pylab inline --no-import-all" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "def soft_absolute(u):\n", " epsilon = 1e-8\n", " return np.sqrt(u*u + epsilon)\n", "\n", "def logistic(u):\n", " return 1. / (1. + np.exp(-u))\n", "\n", "class SparseFilter(BaseEstimator, TransformerMixin):\n", " def __init__(self, n_features = 200, n_iterations = 300, activate=soft_absolute):\n", " self.epsilon = 1e-8\n", " self.n_features = n_features\n", " self.n_iterations = n_iterations\n", " self.activate = activate\n", " def fit(self, X, y = None):\n", " n_samples, n_dim = X.shape\n", " W = np.random.randn(n_dim, self.n_features)\n", " b = np.random.randn(self.n_features)\n", " obj_fn = self.get_objective_fn(X)\n", " self.W_, self.b_ = optimize.fmin_l_bfgs_b(obj_fn, (W, b), \n", " iprint = 1, \n", " maxfun = self.n_iterations)\n", " return self\n", " def get_objective_fn(self, X):\n", " def _objective_fn(W, b):\n", " Y = self.activate(np.dot(X, W) + b)\n", " Y = Y / np.sqrt(np.sum(Y*Y, axis = 0) + self.epsilon)\n", " Y = Y / np.sqrt(np.sum(Y*Y, axis = 1)[:, np.newaxis] + self.epsilon)\n", " return np.sum(Y)\n", " return _objective_fn\n", " def transform(self, X):\n", " Y = self.activate(np.dot(X, self.W_) + self.b_)\n", " Y = Y / np.sqrt(np.sum(Y*Y, axis=0) + self.epsilon)\n", " Y = Y / np.sqrt(np.sum(Y*Y, axis=1)[:, np.newaxis] + self.epsilon)\n", " return Y" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## load data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "\n", "train_data = pd.read_csv('data/train.csv', header=None)\n", "train_labels = pd.read_csv('data/trainLabels.csv', header=None)\n", "test_data = pd.read_csv('data/test.csv', header=None)\n", "\n", "train_X = np.asarray(train_data)\n", "train_y = np.asarray(train_labels).ravel()\n", "test_X = np.asarray(test_data)\n", "\n", "## shuffle train data\n", "train_X, train_y = shuffle(train_X, train_y)\n", "\n", "print train_X.shape, test_X.shape, train_y.shape" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(1000, 40) (9000, 40) (1000,)\n" ] } ], "prompt_number": 19 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## unsupervised feature engineering" ] }, { "cell_type": "code", "collapsed": false, "input": [ "## train a sparse filter on both train and test data\n", "sf = SparseFilter(n_features=50, n_iterations=1000)\n", "sf.fit(np.r_[train_X, test_X])\n", "train_sf_X = sf.transform(train_X)\n", "test_sf_X = sf.transform(test_X)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 44 }, { "cell_type": "code", "collapsed": false, "input": [ "## train a pca on both train and test data\n", "## but before that, see how many components are suitable\n", "pca = PCA()\n", "pca.fit(np.r_[train_X, test_X])\n", "pd.DataFrame(pca.explained_variance_ratio_).plot(kind = 'bar')\n", "n_components = np.where(np.cumsum(pca.explained_variance_ratio_) >= 0.85)[0][0]\n", "print n_components\n", "\n", "pca = PCA(n_components=n_components)\n", "pca.fit(np.r_[train_X, test_X])\n", "train_pca_X = pca.transform(train_X)\n", "test_pca_X = pca.transform(test_X)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "15\n" ] }, { "metadata": {}, "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAEACAYAAACnJV25AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9UVHX6B/A3BrtW/BAtzRhyVJCfCqwIVvqNkybpMUpt\nC1NLQyOLLc/RUttt1dqTUtnmj9PKtqUnNbLMxHZ11kxuWilkarRhpq0E4s8UQ9FExuf7BzLLDHBn\nmBnwc+H9Ouee5s7nc+/zzGemZ4aHy+gjIgIiImozOlztBIiIyLtY2ImI2hgWdiKiNoaFnYiojWFh\nJyJqY1jYiYjaGKeF3WKxIDIyEuHh4cjOzm4wnpeXh7i4OCQkJKB///7YunWry8cSEZH3+ehdx261\nWhEREYEtW7YgJCQEAwYMQG5uLqKiomxzqqqqcP311wMAvv32W4waNQoHDx506VgiIvI+3U/shYWF\nCAsLg9lshp+fH9LT05GXl2c3p66oA8C5c+dwww03uHwsERF5n6/eYHl5OUJDQ237JpMJBQUFDeat\nX78es2fPxtGjR7F58+ZmHevj4+N28kRE7VlTDRfdT+yuFt377rsP+/btw8cff4wJEyY0GUwvufrb\nnDlzGtzX3DmejjOGWjFUyIExGEOlHPToFvaQkBCUlZXZ9svKymAymZqcP3jwYNTU1OD06dMwmUzN\nOra+kpISj+d4Os4YasVQIQfGYAxVc3CkW9gTExNx4MABlJSUoLq6GmvWrEFaWprdnB9//NH27rF7\n924AQJcuXVw6loiIWoA4sXHjRunTp4/07t1bXnrpJRERWbZsmSxbtkxERLKzsyUmJkbi4+Nl0KBB\nUlhYqHuso8ZSyM/Pd5aW0zmejjOGWjFUyIExGEOlHPTKt+7ljq3Bx8fHab+IiIjs6dVOJf/yVNM0\nj+d4Os4YasVQIQfGMF6MoKAg+Pj4GHrr3LmzS2tRn+7ljkRERlZZWWn4joA7l4SzFUNEbVZbqC9N\nPQbDtWKIiMh9ShZ2o/TvGKP1YqiQA2MYL0Zb0dzHqWRhJyIi97HHTkRtVluoL+yxExE5ERjYuUUv\nTwwM7OxSHqdPn8aoUaPg7+8Ps9mM3Nxcrz1GJQu7Ufp3jNF6MVTIgTGMF6MxZ89WAJAW22rP79yT\nTz6Jjh074sSJE1i9ejWmTp2K4uJirzxOJQs7EVFbVlVVhXXr1uHFF1/Eddddh9tvvx333nsvVq5c\n6ZXzs8dORG1WY/Wl9g9+WrLmOK9pe/bswaBBg1BVVWW777XXXoOmadiwYYP92dhjJyJS37lz5xAY\nGGh3X0BAAM6ePeuV8ytZ2I3Sv2OM1ouhQg6MYbwYqvL390dlZaXdfb/88gsCAgIanc8eOxGR4vr0\n6YOamhocPHjQdt8333yD2NhYr5yfPXYiarNU7bEDwNixY+Hj44N//OMf2L17N0aOHIkdO3YgKirK\n/mzssRMR6QsICAbg02Jb7fmde+ONN3DhwgV07doV48ePx7JlyxoUdXcpWdiN0r9jjNaLoUIOjGG8\nGI2prDzt9B+P9mSrrDztUh7BwcH46KOPcO7cOZSUlCA9Pd1rj1PJwk5ERO5jj52I2qy2UF/YYyci\nIjULu1H6d4zRejFUyIExjBcjMDDwqv+bpZ5uwcHB7LETEdXJy8vT/UVnfn6+01+GOpvT0uOnT7v2\ny9j62GMnIjIgQ/TY9b4j2dXvNyYiIoUKu/13JOfD2fcbq9C/Y4zWi6FCDozBGKrm4EiZwk5ERN6h\nTI9d//sb2IcnIqrPED12IiLyDqeF3WKxIDIyEuHh4cjOzm4wvnr1asTFxaFfv364/fbbUVRUZBsz\nm83o168fEhISkJSU1Iy0NOczFOhrMUbrxVAhB8ZgDFVzcOSrN2i1WpGVlYUtW7YgJCQEAwYMQFpa\nmt03kPXq1Qvbtm1DUFAQLBYLHnvsMezcuRNA7Y8Kmqahc2de1UJE1Fp0e+w7duzAvHnzYLFYAAAL\nFiwAAMyaNavR+RUVFejbty8OHz4MAOjZsyd27dqFLl26NJ0Ae+xERM2m12PX/cReXl6O0NBQ277J\nZEJBQUGT89966y2MGDHCLvDQoUNxzTXXIDMzE1OmTGn0uIkTJ165NRdAJwDxAFKu3KfZza37kSQl\nJYX73Oc+99vNvqZpWLFiBYDaNrcu0bF27VqZPHmybX/lypWSlZXV6NytW7dKVFSUnD592nbfkSNH\nRETkxIkTEhcXJ9u2bWtwXF0KAASQK1t+vdv/m1Nffn6+XuoejzOGWjFUyIExGEOlHPTKt+4vT0NC\nQlBWVmbbLysrg8lkajCvqKgIU6ZMwYYNGxAc/L9/PaR79+4AgBtvvBGjRo1CYWGh/rsMERF5TLfH\nXlNTg4iICHz66ae4+eabkZSUhNzcXLtfnpaWluLOO+/EqlWrMHDgQNv958+fh9VqRUBAAKqqqjBs\n2DDMmTMHw4YNs0+APXYiomZzu8fu6+uLpUuXIjU1FVarFRkZGYiKikJOTg4AIDMzEy+88AIqKiow\ndepUAICfnx8KCwtx7NgxjB49GkDtG8S4ceMaFHUiImoBTps7LQzssTOGQXJgDMZQKQe98s2/PCUi\namP4XTFERAbE74ohImpHFC3smvMZCnw/A2O0XgwVcmAMxlA1B0eKFnYiInIXe+xERAbEHjsRUTui\naGHXnM9QoK/FGK0XQ4UcGIMxVM3BkaKFnYiI3MUeOxGRAbHHTkTUjiha2DXnMxToazFG68VQIQfG\nYAxVc3CkaGEnIiJ3scdORGRA7LETEbUjihZ2zfkMBfpajNF6MVTIgTEYQ9UcHCla2ImIyF3ssRMR\nGRB77ERE7YiihV1zPkOBvhZjtF4MFXJgDMZQNQdHihZ2IiJyF3vsREQGxB47EVE7omhh15zPUKCv\nxRitF0OFHBiDMVTNwZGihZ2IiNzFHjsRkQGxx05E1I4oWtg15zMU6GsxRuvFUCEHxmAMVXNwpGhh\nJyIidzntsVssFkybNg1WqxWTJ0/GzJkz7cZXr16Nl19+GSKCgIAA/O1vf0O/fv1cOhZoXo89MLAz\nzp6taDAaEBCMysrTrj1iIqI2QK/HDtFRU1MjvXv3lkOHDkl1dbXExcVJcXGx3Zwvv/xSzpw5IyIi\nmzZtkuTkZJePvfKmYvsvIE1szuboPgwiojZHr+7ptmIKCwsRFhYGs9kMPz8/pKenIy8vz27Orbfe\niqCgIABAcnIyDh8+7PKxTdM8nmOE3hpjGCsHxmAMVXNw5Ks3WF5ejtDQUNu+yWRCQUFBk/Pfeust\njBgxotnHTpw48cqtuQA6XbmdcuW/msNs/X3HBajbT0lJadZ4a+3v3bu3Rcc1TcPevXtbdLy+qzXO\n58v18fr4fFz958PV50vTNKxYsQIAYDaboUe3x/7hhx/CYrHgzTffBACsWrUKBQUFWLJkSYO5+fn5\nePLJJ/HFF18gODjY5WOb02Nveg6vcyei9kWvx677iT0kJARlZWW2/bKyMphMpgbzioqKMGXKFFgs\nFgQHBzfrWCIi8i7dHntiYiIOHDiAkpISVFdXY82aNUhLS7ObU1paitGjR2PVqlUICwtr1rFN0zye\n4/gjY3PHvXEOxmhbOTAGY6iagyPdT+y+vr5YunQpUlNTYbVakZGRgaioKOTk5AAAMjMz8cILL6Ci\nogJTp04FAPj5+aGwsLDJY4mIqGUZ6rti2GMnIqrF74ohImpHFC3smsdzjNBbYwxj5cAYjKFqDo4U\nLexEROQu9tiJiAyIPXYionZE0cKueTzHCL01xjBWDozBGKrm4EjRwk5ERO5ij52IyIDYYyciakcU\nLeyax3OM0FtjDGPlwBiMoWoOjhQt7ERE5C722ImIDIg9diKidkTRwq55PMcIvTXGMFYOjMEYqubg\nSNHCTkRE7mKPnYjIgNhjJyJqRxQt7JrHc4zQW2MMY+XAGIyhag6OFC3sRETkLvbYiYgMiD12IqJ2\nRNHCrnk8xwi9NcYwVg6MwRiq5uBI0cJORETuYo+diMiA2GMnImpHFC3smsdzjNBbYwxj5cAYjKFq\nDo4ULexEROQu9tiJiAzIox67xWJBZGQkwsPDkZ2d3WD8+++/x6233oqOHTti4cKFdmNmsxn9+vVD\nQkICkpKS3EyfiIiaQ7ewW61WZGVlwWKxoLi4GLm5udi3b5/dnC5dumDJkiWYMWNGg+N9fHygaRr2\n7NmDwsLCZqSleTzHCL01xjBWDozBGKrm4Ei3sBcWFiIsLAxmsxl+fn5IT09HXl6e3Zwbb7wRiYmJ\n8PPza/QcbJEQEbUuX73B8vJyhIaG2vZNJhMKCgpcPrmPjw+GDh2Ka665BpmZmZgyZUqj8yZOnHjl\n1lwAnQDE1xvVHGbX7ac0Ol73zpaSkuLxfkpKSouO18+5pcYd3+lbalyFfT5faj1ffD68+3xpmoYV\nK1YAqG1z69H95emHH34Ii8WCN998EwCwatUqFBQUYMmSJQ3mzps3D/7+/pg+fbrtvqNHj6J79+44\nefIk7rrrLixZsgSDBw+2T4C/PCUiaja3f3kaEhKCsrIy235ZWRlMJpPLgbt37w6gtl0zatSoZvTZ\nNY/nOL4TNnfcG+dgjLaVA2Mwhqo5ONIt7ImJiThw4ABKSkpQXV2NNWvWIC0trdG5ju8c58+fx9mz\nZwEAVVVV2Lx5M/r27dus5IiIqPmcXse+adMmTJs2DVarFRkZGZg9ezZycnIAAJmZmTh27BgGDBiA\nyspKdOjQAQEBASguLsaJEycwevRoAEBNTQ3GjRuH2bNnN0yArRgiombTa8XwD5SIiAzIgF8Cpnk8\nxwi9NcYwVg6MwRiq5uBI0cJORETuYiuGiMiADNiKISIidyla2DWP5xiht8YYxsqBMRhD1RwcKVrY\niYjIXeyxExEZEHvsRETtiKKFXfN4jhF6a4xhrBwYgzFUzcGRooWdiIjcxR47EZEBscdORNSOKFrY\nNY/nGKG3xhjGyoExGEPVHBwpWtiJiMhd7LETERkQe+xERO2IooVd83iOEXprjGGsHBiDMVTNwZGi\nhZ2IiNzFHjsRkQGxx05E1I4oWtg1j+cYobfGGMbKgTEYQ9UcHCla2ImIyF3ssRMRGRB77ERE7Yii\nhV3zeI4RemuMYawcGIMxVM3BkaKFnYiI3MUeOxGRAbHHTkTUjjgt7BaLBZGRkQgPD0d2dnaD8e+/\n/x633norOnbsiIULFzbr2KZpHs8xQm+NMYyVA2Mwhqo5ONIt7FarFVlZWbBYLCguLkZubi727dtn\nN6dLly5YsmQJZsyY0exjiYjI+3R77Dt27MC8efNgsVgAAAsWLAAAzJo1q8HcefPmwd/fH9OnT2/W\nseyxExE1n16P3VfvwPLycoSGhtr2TSYTCgoKXAranGMnTpx45dZcAJ0AxANIuXKf5jC7br/x8bof\nWVJSUrjPfe5zv83sa5qGFStWAADMZjN0iY61a9fK5MmTbfsrV66UrKysRufOnTtXXn311WYfW5cC\nAAHkypZf77Yrcxo+jPz8fL2H5nTcG+dgjLaVA2Mwhko56JVv3R57SEgIysrKbPtlZWUwmUz67xRe\nOJaIiNyn22OvqalBREQEPv30U9x8881ISkpCbm4uoqKiGsydO3cuAgICbD12V49lj52IqPnc7rH7\n+vpi6dKlSE1NhdVqRUZGBqKiopCTkwMAyMzMxLFjxzBgwABUVlaiQ4cOWLRoEYqLi+Hv79/osURE\n1MKcNndaGNhjZwyD5MAYjKFSDnrlm395SkTUxvC7YoiIDIjfFUNE1I4oWtg1j+fUXdjv7rg3zsEY\nbSsHxmAMVXNwpGhhJyIid7HHTkRkQOyxExG1I4oWds3jOUborTGGsXJgDMZQNQdHihZ2IiJyF3vs\nREQG1G567IGBneHj49NgCwzsfLVTIyJqNYoWds2tOWfPVqD2E70AyLfdrr3f4WgFemuMYawcGIMx\nVM3BkaKFnYiI3NWmeuzswRNRe9FueuxERKRsYde8MEd/XIXeGmMYKwfGYAxVc3CkaGEnIiJ3scdO\nRGRA7LETEbUjihZ2zQtz9MdV6K0xhrFyYAzGUDUHR4oWdiIichd77EREBsQeOxFRO6JoYde8MEd/\nXIXeGmMYKwfGYAxVc3CkaGEnIiJ3scdORGRA7LETEbUjihZ2zQtz9MdV6K0xhrFyYAzGUDUHR04L\nu8ViQWRkJMLDw5Gdnd3onKeeegrh4eGIi4vDnj17bPebzWb069cPCQkJSEpKalZiRETkHt0eu9Vq\nRUREBLZs2YKQkBAMGDAAubm5iIqKss3ZuHEjli5dio0bN6KgoABPP/00du7cCQDo2bMnvv76a3Tu\n3PQ/TcceOxFR87ndYy8sLERYWBjMZjP8/PyQnp6OvLw8uzkbNmzAI488AgBITk7GmTNncPz4cds4\nCyoRUevy1RssLy9HaGiobd9kMqGgoMDpnPLycnTr1g0+Pj4YOnQorrnmGmRmZmLKlCmNxpk4ceKV\nW3MBdLpye9qV/2oOs+vvpzR7vH6vKiUlxbafkpJiN15/rKXGAeD1119HfHx8i41rmoa9e/di2rRp\nLTZe/zFerXE+X66P13+MfD6u/vPh6vMFACtWrABQ2+bWJTrWrl0rkydPtu2vXLlSsrKy7OaMHDlS\nPv/8c9v+kCFD5OuvvxYRkfLychEROXHihMTFxcm2bdsaxKhLAYAAcmXLr3fblTmujdeXn5+v99Bd\nmuPpOGMYKwfGYAyVctAr37o99p07d2Lu3LmwWCwAgPnz56NDhw6YOXOmbc7jjz+OlJQUpKenAwAi\nIyPx2WefoVu3bnbnmjdvHvz9/TF9+nS7+1uzxx4Y2Blnz1Y0GA0ICEZl5ekmYhMRqcftHntiYiIO\nHDiAkpISVFdXY82aNUhLS7Obk5aWhnfeeQdA7RtBp06d0K1bN5w/fx5nz54FAFRVVWHz5s3o27ev\nNx6P22qLujTYGiv2RERGpVvYfX19sXTpUqSmpiI6OhoPPvggoqKikJOTg5ycHADAiBEj0KtXL4SF\nhSEzMxNvvPEGAODYsWMYPHgw4uPjkZycjJEjR2LYsGEupqV5YY5744GBneHj49NgCwzs7NK4XQQF\nrpFtKzFUyIExGEPVHBzp/vIUAIYPH47hw4fb3ZeZmWm3v3Tp0gbH9erVC3v37m1WMir436d6oLb4\np1y538elcSKiq61dfVeMK9e581p5IjICfldMK2pOq4aIqCUoWtg1L8zxdNy9c9j/gjYfzn5Bq0J/\nzggxVMiBMRhD1RwcKVrYiYjIXeyx1xuvy6elYxAReYo9diKidkTRwq55YY6n460TQ4X+nBFiqJAD\nYzCGqjk4UrSwExGRu9hjrzdelw977ESkOvbYFcNr3YmoJSla2DUvzPF0vOViNOdadyP0CFsjhgo5\nMAZjqJqDI0ULOxERuYs99nrjdflc7RhERM6wx05E1I4oWtg1L8zxdPzqxeB3vquZA2Mwhqo5OFK0\nsLdvzf0iMSKi+thjrzdel4/qMYiI2GNvY3gdPBHpUbSwa16Y4+m4ujHa43e+q5ADYzCGqjk4cvpv\nnpLxBAZ2brLIBwQEo7LydCtnREStiT32euN1+Rg9hitrSUTGxh47EVE7omhh17wwx9Pxth3DaNfK\nq5ADYzCGqjk4UrSwU0vjtfJEbRd77PXG6/IxegxvrCURqY09diKidkTRwq55YY6n4+07hrMefFPj\nrszxdJwxmj/eVmK4spZ2r+x22mOHXGV1KQAQQK5sf61325U53hlvKzHsx9tKjNZ+TbSVGO3vtR0Q\nEHxlnv0WEBDs0XhzzlHfX//61wb3NWe8qTl65dvpJ3aLxYLIyEiEh4cjOzu70TlPPfUUwsPDERcX\nhz179jTr2Mad8cIcT8cZQ60YKuTAGEaIYX9hwBzb7boLA5o37t457DI8o/84nI27Oqc+3cJutVqR\nlZUFi8WC4uJi5ObmYt++fXZzNm7ciIMHD+LAgQP4+9//jqlTp7p8LBEReZ9uYS8sLERYWBjMZjP8\n/PyQnp6OvLw8uzkbNmzAI488AgBITk7GmTNncOzYMZeObVqJF+Z4Os4YasVQIQfGYIzmj5eUeDbu\n6hw7en2dDz74QCZPnmzbX7lypWRlZdnNGTlypHzxxRe2/SFDhsiuXbtk7dq1To+t6xNx48aNG7fm\nb03R/RKw2mudnautz+7x5FgiImpIt7CHhISgrKzMtl9WVgaTyaQ75/DhwzCZTLh06ZLTY4mIyPt0\ne+yJiYk4cOAASkpKUF1djTVr1iAtLc1uTlpaGt555x0AwM6dO9GpUyd069bNpWOJiMj7dD+x+/r6\nYunSpUhNTYXVakVGRgaioqKQk5MDAMjMzMSIESOwceNGhIWF4frrr8fy5ct1jyUiopZ11b8rBgD2\n7duHvLw8lJeXAwBMJhPS0tKa9Uawb98+HDlyBMnJyfD397fdb7FYcPfdd+Pzzz9H586dER0dDU3T\nsGvXLiQkJGDIkCFu5bx9+3YUFhaib9++GDZsGHbu3ImoqCgEBQXh/PnzWLBgAXbv3o2YmBg899xz\nCAoKwuLFizFq1CiEhoY2es6LFy/ivffeQ0hICIYOHYrVq1fjyy+/RHR0NB577DH4+fnhxx9/xLp1\n63D48GF06NABEREReOihhxAYGOjW4yCitueqf6VAdnY2xo4dC6D2csnk5GRcvnwZY8eOxfz5850e\nv3z5cixevBj33XcflixZgpiYGKxfv942Pnv2bMyePRszZszAI488gmeffRazZs3ChQsXMG/ePLzy\nyitNnvvhhx+23U5KSrLdfvPNN/GHP/wB586dw7x58zB//nw8+uijuP766wEATz/9NCorKzFr1ixc\ne+21mDRpEgDg+eefR1JSEgYNGoQ33ngDJ0+etIs3adIkbNy4EYsWLcKECROwdu1aDBw4EIWFhZg8\neTIWLVqExx9/HBcvXkRhYSEuXryI0tJSJCcnIz8/34XVvnpOnDjh0fGnTp3yUiatY/jw4fjll18w\na9YsjB8/Hu+++67d+BNPPOH2uT1dS8BY6zl8+HAAaJH1bLNrqXe5Y2sICwuT6urqBvdfvHhRevfu\n7fR4k8kkMTExcvbsWREROXTokPTv39/2J7jx8fESFRUlly5dkqqqKvH395czZ86IiMj58+elb9++\nIlJ72eY999wjI0eOtG3XXXed7f74+HhbzP79+8uJEydEROTcuXMSExMjkZGRtvGEhAS7HPv162fL\nxWq1yr///W+ZNGmS3HDDDZKamiorVqyQyspKiY2NFRGRS5cuyY033iiXLl0SEZHLly9LbGysxMTE\nSE1NjYiIVFVVyf/93/+JiMhPP/0kcXFxIiJSUVEhM2fOlIiICOnUqZMEBwdLRESEzJw5UyoqKnTX\n8u6775YzZ87IzJkzZdy4cbJ69Wq78alTp4qISGlpqWRkZNjOOXHiRImJiZHx48fL8ePH5dSpU3bb\nzz//LD169LDtb9q0yXbOiooKefTRRyU2NlbGjh0rx44dk2effda2vl999ZX07NlTevfuLaGhoZKf\nn29byxdffFEOHjzY6GMpLCyUlJQUGTdunJSWlsrQoUMlMDBQEhMTZffu3VJZWSnPP/+8REdHS0BA\ngHTp0kWSkpJk+fLldrnpreXXX3/d6LZr1y7p1q2bjBo1SmbOnCnr1q2TkSNHyujRo+XChQu2/Ftj\nLUXE6Xp6upYi4nQ9PV1LEXG6np6upYh4/NpsjbV0xVUv7BEREXLo0KEG9x86dEj69OkjIiKxsbFN\nbr/5zW8kOjra7tizZ8/KsGHDZNq0aRIXF2creiJid7v+fnx8vDz00EOydetW0TRN8vPz5aabbhJN\n00TTNOnbt6/txVC/yNedY8yYMfLWW2+JiMjEiROlsLBQRET2798viYmJthj1Xbx4UdavXy8PPvig\ndOnSRaKjo+XXX3+V06dPi7+/v/z8888iUvsGFB0dLbGxsbYX8qlTp6R///62c9WtwV133SULFiyQ\no0ePyuXLl0VE5MiRIzJ//ny56667vFKM7rzzTlm8eLG89NJLEhERIfPnz5effvpJFi9eLKNHjxYf\nHx8xm812m6+vr5jNZunZs6fdOjz66KPyxz/+UQ4dOiSvvfaa3HvvvRITE2Mbv+OOO+zW8ne/+52I\niJjNZpk+fbqEhoZKYmKivPbaa1JeXm47LjExUTZu3CjvvvuuhISEyPvvvy+XL1+WLVu2yMCBA+We\ne+6Rt99+W0pLS2XhwoUyb9482b9/v0yYMEFmz57t0lp26NBBUlJSGt06duxoe0Ov85e//EVuu+02\nOXnyZKutpYg4XU9P11JEnK6np2spIk7X09O1rP8ad/e12Rpr6YqrXtg3bdokvXv3ltTUVJk8ebJM\nnjxZUlNTpVevXrJx40YREenatavs3r1bDh061GDr3r27pKSkyJ49e+zOW11dLRMmTBAfHx9JSkqS\nqqoqERGxWq22ORUVFbZP1zU1NbJw4UIZMmSI7Z3TbDbb5vbo0cP2YujZs6ccOXJERGrfXePi4qSi\nokIefvhh6dmzpyQlJdleMIMHD5a9e/eKSMPCXt+5c+fkpZdekp49e0qfPn0kJydHoqKiJCMjQ2Ji\nYiQ7O1tef/11iY2NlYyMDOnTp4/tjeT48eMyePBgEREJDw9vMkZ4eLhXilH9N8fQ0FC7+XFxcfLq\nq69KamqqfPPNN7b7669l/XXo16+f7X/0uv3IyEjbT3HJycl256/7qabuHJcvX5bPPvtMHn/8cenW\nrZukpKRITk6OXYzGcqz7Sa1O3Zuk1Wq1faBwtpbR0dGyf//+RsdNJpNERkbavd5ERJYvXy7R0dFy\nyy232HLRy9PTtRQRp+vp6VqKiNP19HQt6x6H3np6upYinr82W2MtXXHVC7tIbVH98ssv5YMPPpC1\na9fKjh07bG0IEZFJkybJtm3bGj02PT1dSktL5ejRow3GLl++LNu3b7d94nR08uRJKSoqsruvrKxM\n7r//fnniiSdsLyg9VVVV8t///te2f+bMGdmzZ4989dVXDXL6/vvvnZ7v0KFDth8LDx48KO+9957t\njUFE5Nt6UwlJAAAFLUlEQVRvv5UPPvhA9u3b1+jxQ4cOlezsbNuP4SIiR48elQULFsiQIUO8Uozq\nF//nnnvObm5d4S0tLZX7779fpk2bJr/88ovd/0AhISGycOFCefXVV6VHjx52//P07dtXFi9eLEOH\nDpVPP/1U5syZI0899ZRomiZ//vOfZfz48SLS+JvkpUuXZNOmTTJx4kQZMGCAWCwWWbNmjZhMJlm3\nbp2IiGiaJklJSTJw4EDba2r9+vUybNgw23nq/udxtpbvv/9+k8/DRx99JDNmzJDNmzc3GNu0aZOE\nhYW1ylqKiNP19HQtRcTpenqylnXxnK2np2vpynqqsJauUKKwq+jjjz92+ccelZw6dUqeeeYZWy+z\nU6dOEhERIc8884ycOnXKK8XoT3/6k1RWVjaY88MPP8iYMWPs7lu/fr0kJSVJ165dbffNmTNH5s6d\na9uOHz8uIrU/mk+YMEFERLZu3Sq///3vJT4+XmJjY+Xuu++WZcuW2T4tPfjgg7rrUFBQIHfccYek\np6dLSUmJDBkyRAICAiQhIUG++uor2bt3ryQmJkpQUJDcdttttjfdEydOyKJFi1xaSxGR4uJi2bJl\ni+13PHXqftp0Nt4aa1m3ng888ECj6/nAAw94tJYi4nQ9XVnL7du3y3fffSciIvn5+fLKK6/Ili1b\n7HLRm9PUWh44cKDRtUxOTrZbS731PHr0qN1r05O1TElJcXstX3/9dd3z12Fhb0fefvtt3fG61o67\n403Nqaqqsv1k5GkOzo53ZU5zciguLpZPPvmkQcHYtGmTLFq0SPr06SP33nuv3HLLLfLRRx/ZxuPj\n452O14/h6ptD/bWsP/7JJ580ebyzx6GXQ/1xd2PUzdm+fbv85z//EZHa4li/KM+aNUuSk5MlMTFR\nnnnmGUlOTpYXXnhBBg8eLC+//LLLc5y9OdQf/9e//iUvvvhio28edXk2dQ5n485ycDz+k08+aVYO\nzrCwtyPOWkuejrdGjNbMwVlhdnY1lrNxV2J4481DhRjOirIrV645m+MshjfePFp63JUcXKHEHyiR\n9/Tt27fJsf379yMiIsKj8erqao9i/PDDD+jTp49Hx1+8eFE3B09j1D3O2NhY7Ny5E/7+/igpKcGY\nMWMwYcIETJs2DQkJCaiursZ3331nO+7cuXMYM2YMoqOjkZ+fj0uXLumO7927t0GM+++/H+PHj7fF\nuHTpkkfje/bsUSLGxYsXUVRUhOrqanTr1g2HDx9GUFAQLly4gOTkZHTo0AF79+4FAMTHx9tu19+v\nf39jc6qrq3Vj1NTU6I4XFRUhOjrao3N4Ou5KDkVFRU2+duvofqUAGc+JEydgsVgQHBzcYOy2227z\neLw1YqiQA1D7zaN1f8VsNpvx2WefYcyYMfjpp58gIujataut4ACAv78//vnPfyIjIwNFRUW44447\ndMcbi6Fpml0MT8dVifGb3/wGvr6+8PX1Re/evREUFAQAuPbaa9GhQwf89re/xfnz53Hddddh9+7d\ntufizJkz6NCh9u8onc1xFsPZOACPz9EaObjEpc/1ZBjOriDydLw1YqiQg4g4vYzW2dVYzsZdieHp\nuCoxnF1y7MqVa87mOIvhymXPnp6jNXJwBVsxRE0oKyuDn58fbrrpJrv7RQRffPEFBg0a1OIxevTo\n4dH4oEGDlIiRmJiIjh07Nnj8P//8M44eParbWnPVr7/+qhsjPDzcaQ6ensPTcVdycGWtWNiJiNqY\nq/4lYERE5F0s7EREbQwLOxFRG8PCTkTUxvw/LC3SVFwuywcAAAAASUVORK5CYII=\n", "text": [ "" ] } ], "prompt_number": 45 }, { "cell_type": "code", "collapsed": false, "input": [ "## combine two features\n", "## standarization\n", "ss = StandardScaler()\n", "train_combined_X = ss.fit_transform(np.c_[train_sf_X, train_pca_X])\n", "test_combined_X = ss.transform(np.c_[test_sf_X, test_pca_X])\n", "print train_combined_X.shape, test_combined_X.shape\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(1000, 65) (9000, 65)\n" ] } ], "prompt_number": 46 }, { "cell_type": "code", "collapsed": false, "input": [ "## filter features by forest model\n", "trees = ExtraTreesClassifier(n_estimators=100)\n", "trees.fit(train_combined_X, train_y)\n", "pd.DataFrame(trees.feature_importances_).plot(kind='bar')\n", "selected_features = np.where(trees.feature_importances_ > 0.005)[0]\n", "\n", "train_selected_X = train_combined_X[:, selected_features]\n", "test_selected_X = test_combined_X[:, selected_features]\n", "print train_selected_X.shape, test_selected_X.shape" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(1000, 49) (9000, 49)\n" ] }, { "metadata": {}, "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEBCAYAAACDu+UiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnX9UVOeZxx8S0JRfIlEgAWVUkN8MGHQSe0zGxYRqCdbE\nbjBZVyPsobbEuJvY6G7a/GiCuGqMHrdbzEnt0TRqtWm0FWmTLRM1ldAk2O4G22AXgsEf8QcoIW5Q\nfPaPCZcZ5s6dO3PvzLx3+H7OmaP34X2f9/vee+e5w/e+3AljZiYAAAAhxU3BFgAAAEB/UNwBACAE\nQXEHAIAQBMUdAABCEBR3AAAIQVDcAQAgBPFY3BsaGigzM5PS09Np3bp1sm1WrFhB6enpZDabqaWl\nRYqbTCbKz8+nwsJCmjFjhn6qAQAAKBKu9MOBgQGqrq6mt99+m5KTk2n69OlUVlZGWVlZUpv6+no6\nefIktbW10XvvvUfLly+npqYmIiIKCwsjm81G8fHx/p0FAAAAJxQ/uTc3N1NaWhqZTCaKiIig8vJy\n2r9/v1ObAwcO0JIlS4iIyGKxUE9PD507d076Of5GCgAAAo9ice/q6qIJEyZI2ykpKdTV1aW6TVhY\nGM2ZM4eKiorolVde0VM3AAAABRRtmbCwMFVJ3H06P3r0KN1+++10/vx5uvfeeykzM5NmzZrl0xgA\nAACcUXJGFD+5Jycn06lTp6TtU6dOUUpKimKbTz/9lJKTk4mI6PbbbyciovHjx9OCBQuoubnZrcDB\n1zPPPOO0LVpMND1G0CiaHmgMTT1G0KhnTk8oFveioiJqa2ujjo4O6u/vpz179lBZWZlTm7KyMtqx\nYwcRETU1NVFcXBwlJibSF198Qb29vURE1NfXR7/73e8oLy/Po6COjg6hY6LpMYJG0fRAY2jqMYJG\nf+WUQ9GWCQ8Pp61bt1JJSQkNDAxQRUUFZWVlUV1dHRERVVVV0bx586i+vp7S0tIoKiqKtm/fTkRE\nZ8+epQceeICIiK5fv06PPPII3XfffaqFAQAA0AAHmeESGhsbXdqIFBNNjxE0iqYHGkNTjxE06pnT\nU/kO+6pR0AgLC1PlHwEAABjCU+0U7vEDNptN6JhoeoygUTQ90BiaeuRi8fHxFBYWZuiX4x+BujsO\ncih67gAAYGS6u7sN7wz4ulwctgwAIGQJhfribg6Gs2UAAABoR7jiLpJfZ1SfEXq8j4mmxwgaRdPj\njR9tVLyZo3DFHQAAgHbguQMAQha5+hIbG0+9vd1+GzMmZixduXLJY7tLly5RRUUFvfXWWzRu3Dha\nu3YtLVq0yKWdr547VssAAEYU9sLuvw+Uvb3qVrd873vfo1tuuYU+++wzamlpoW9+85tkNpspOztb\nFx3C2TIi+XVG9hmhx7uYaHqMoFE0PUby3Pv6+uiNN96gH/3oRxQZGUlf//rXaf78+bRz507FfvDc\nAQBAYD7++GMKDw+ntLQ0KWY2m+mjjz7SbQx47gCAkEWuvtj/KMifNcdzTTty5Aj9/d//PZ05c0aK\nvfLKK/T6669TY2OjczascwcjgdhY5z8nj43F9/MC4xEdHU1Xrlxxil2+fJliYmJ0G0O44i6SX2dk\nnzFU9QzdDGskInZa9SCKRn/FoMf7mKhMnTqVrl+/TidPnpRif/rTnyg3N1exHzx3AAAQmKioKHrg\ngQfohz/8IX3xxRd09OhR+vWvf02LFy/WbQx47sBQuPqlOH+Ae0T13InsDzVbtmyZtM69traWysvL\nXbP56LmjuANDgeIOvEHkP2JSS8jcUBXJrzOyzxjqeoj0zifWPjOCRtH0qPWjr1y5JPvF1Xq99Czs\nvs6RSMDiDgAAQDuwZYChgC0DvCEU6kvI2DIAAAC0I1xxF8mvM7LPGOp64LkHPyaaHrnY2LFjg/4d\nqFpfY8eOVZyjO4Qr7gAAoBeXLl2ixsZGl5uewYr50v/SJd9u0MJzB4YCnjsAduC5AwDACES44i6S\nX2dUn3Ek6IHnHvyYaHqMoNFfOeUQrrgDAADQDjx3YCjguQNgB547AACMQIQr7iL5Y0bw8Iyg0T85\n9c4n1j4zgkbR9BhBo79yyiFccQcAAKAdeO7AUMBzNxbDH6+r9+NwRzJ4njsIKVDcjQWOl/8w3A1V\nkfwxI3h4RtDon5x65xNrnxlBo3dt/a8n1PYZPHcAAAAuwJYBhgK/5hsLHC//odmWaWhooMzMTEpP\nT6d169bJtlmxYgWlp6eT2WymlpYWp58NDAxQYWEh3X///V5KBwAA4CuKxX1gYICqq6upoaGBWltb\nadeuXXTixAmnNvX19XTy5Elqa2ujbdu20fLly51+vnnzZsrOzv7qCu4ZkfwxI3h4RtDon5x65xNr\nnxlBo3dt/a8n1PaZXz335uZmSktLI5PJRBEREVReXk779+93anPgwAFasmQJERFZLBbq6emhc+fO\nERHRp59+SvX19VRZWYlfxQAAIJCwAnv37uXKykppe+fOnVxdXe3UprS0lN99911pu7i4mD/44ANm\nZl64cCF/+OGHbLPZuLS0VHYMDxIAcIKImIgdXjh/9CImZuxX+9f+iokZqzknjpf/8LQvw5UKv1or\nhYd9Kmdm+s1vfkMJCQlUWFjo8VeJpUuXkslkIiKiuLg4KigoIKvVSkRDv4ZgG9tD2IhoaNtmswmj\nz8jb9j82aiQ7VurtDcPxEmjbZrPRz372MyIiqV4qolT5jx07xiUlJdJ2TU0N19bWOrWpqqriXbt2\nSdsZGRl85swZXrNmDaekpLDJZOKkpCSOjIzkxYsXe7z6NDY2urQRKSaaHiNo1DMnSZ8EG10+CYqi\n0V8xf4/j7b71R06j7bNg6vFQvlnRcy8qKqK2tjbq6Oig/v5+2rNnD5WVlTm1KSsrox07dhARUVNT\nE8XFxVFSUhLV1NTQqVOnqL29nXbv3k1/93d/J7UDAADgXzyucz906BCtXLmSBgYGqKKigtasWUN1\ndXVERFRVVUVEJK2oiYqKou3bt9O0adOccrzzzju0ceNGOnDggKsArHMHXoB10/7DH/sWx8t/4Nky\nIKRAsfAfKO7GAs+WMcA61VDX6J+ceucTa58FU6PafeuPnEbdZ8F9L8ijuFoGAKOCR82CkQ5sGWAo\n1P6aDzvAe2DLGAvD2TIAAAC0I1xxF8kfM4KHZwSN/smpdzux9hk8d31igRpHRM9duOIOAABAO/Dc\ngaGA5+4/4LkbC3juAAAwAhGuuIvkjxnBwzOCRv/k1LudWPsslD332Nh4CgsLk16xsfFe5xNtn8Fz\nBwCMeOx/f8BkfwIlO/09AtAPeO7AUMBz9x+B8tzt4NhoBZ47AACMQIQr7iL5Y0bw8Iyg0T859W4n\n1j4LZc/d23GMsM/guQMAAAgI8NyBoYDn7j/guRsLeO4AOOC4DM9xCR4AoYZwxV0kf2x4TKkwiKJR\n75x6rEnWU49D1Kd2Q18C7boET6RjCM9dn5g/coqmxx0h+zx3fzzPe2h9ro16e2drE2gQHOdMZKXe\n3jDlDgAAIQhZz93//uHI8AlF8661eu4j8RiqBZ67sYDnDgAAIxDhinuw/EPv/C19cxrD1zOmHrVe\nr1Y9Rj2u8NwDG/NXTjmEK+4AAAC0A8/d55wjwyeE5z5ygOduLOC5AwDACES44i6KV6z8xy76ajSG\nr2dMPfDcfRlHfz3w3P2bUw7hirsoKP2xCwAAiA48d1X9R65fC8995ADP3VjAcwcAgBGIcMVdLK9Y\nLuZbTqN/byQ89+DHAjeO/nrgufs3pxzCFfdQBd8bCQAIJPDcVfXX7teK5l2rRTTdRvXcvXmQnWNb\nPR54pxZ47sbCU+1EcVfVH8XdIYLi7gPe7EcjaNSS044455RRMdwNVbG8YrmY/jmN4esZU49onrs2\n3aJphOcugh53CFfcAQAAaAe2jKr+sGUcIrBlfAC2zFBOO+KcU0ZFsy3T0NBAmZmZlJ6eTuvWrZNt\ns2LFCkpPTyez2UwtLS1ERPR///d/ZLFYqKCggLKzs2nNmjU+TgEAAIDXsALXr1/nKVOmcHt7O/f3\n97PZbObW1lanNgcPHuS5c+cyM3NTUxNbLBbpZ319fczMfO3aNbZYLHzkyBGXMYZLaGxsdGnjS4yI\nmIiZqPGrf0l136H+zn2dc3qnW6seb2J65tRDdzD0uGsnd1z11qhFt1E0aslp1PeCaHo8lG9W/OTe\n3NxMaWlpZDKZKCIigsrLy2n//v1ObQ4cOEBLliwhIiKLxUI9PT107tw5IiKKjIwkIqL+/n4aGBig\n+Hh82zwAAAQEpcq/d+9erqyslLZ37tzJ1dXVTm1KS0v53XfflbaLi4v5/fffZ2b7J3+z2czR0dG8\natUq2TE8SPAZkj4dsOwnIO/6D//krjWf/+atN6LpVqvHXTstxzAQuo2iUUtO0c4po+Jpv4UrFX77\nzRBVFwjZfjfffDMdP36cLl++TCUlJWSz2chqtbr0X7p0KZlMJiIiiouLo4KCAqnd4NIfb7eHcN7W\n2l/vfL7OL1DbQ5qHth2Po6h6nNsOZ6h/oPS70+O5fWD395A+vfIN5nTcpmE/c9gS7PwXadtms9HP\nfvYzIiKpXiqiVPmPHTvGJSUl0nZNTQ3X1tY6tamqquJdu3ZJ2xkZGXz27FmXXM8//zyvX7/e49VH\nFK+Y4LkzMzx3eO7u+/qa06jvBdH0eCjfyp57UVERtbW1UUdHB/X399OePXuorKzMqU1ZWRnt2LGD\niIiampooLi6OEhMT6cKFC9TT00NERFevXqW33nqLCgsLPV9tAAAAaMbjOvdDhw7RypUraWBggCoq\nKmjNmjVUV1dHRERVVVVERFRdXU0NDQ0UFRVF27dvp2nTptF///d/05IlS+jGjRt048YNWrx4Ma1a\ntcpVANa5C41ourHO3X9gnbuxwLNlhiIo7j4gmm4Ud/+B4m4s8GwZDc+vcPd8D71z6j9n0fZj8PSo\nfaaJVj2B0S2aRq3vJXV9RXsviKbHHcIVdwAAANqBLaOqP2wZhwhsGR+ALTOU044455RRMZwtAwAA\nQDvCFXexvGK5mP45jeHrGVMPPHfvY/DcjaXHHcIVdwAAANqB566qPzx3h0jAdMt97+jQF40r64Hn\n7j3w3I2Fp9qp+GwZAILJ8ELe26vuWUcAAAFtGbG8YrmY/jmN4esFT48Wvxaeu/cxeO7G0uMO4Yo7\nAAAA7cBzV9UfnrtDJKgesB147v4AnruxwDp3AAAYgQhX3MXyiuVi+uc0hq8XPD3w3P2nB557YGP+\nyimHcMUdAACAduC5q+oPz90hAs/dB+C5D+W0Y7z3gmjAcwcAgBGIcMVdLK9YLqZ/TmP4esHTA8/d\nf3rguQc25q+ccuAvVAEAQiL3+IkrVy4FUZGxgOeuqj88d4cIPHcfgOc+lNOO78cQ2IHnDgAAIxDh\nirtYXrFcTP+cxvD1gqcnGJ57bGw8hYWFSa/Y2HjFceC5e59Ty76A5+4Z4Yo7ACIw9ETKRiJiJ+8X\nACMAz11Vf3juDpER4bnrPW947kM57cBz1wo8dwAAGIEIV9zF8orlYvrnNIavFzw9wfDcvR0Hnrv3\nOeG565dTDuGKOwAAAO3Ac1fVH567QwSeuw/Acx/KaQeeu1bguQMAwAhEuOIullcsF9M/pzF8veDp\ngeeuz9jw3IMf81dOOYQr7gAAALQDz11Vf3juDhF47j4Az30opx147lqB5w4AACMQ4Yq7WF6xXEz/\nnMbw9YKnB567PmPDcw9+zF855RCuuAMAANAOPHdV/QPnuYv2BQXw3If6x8SM9fnYwHMfymkHnrtW\nPNVOFHdV/QNX3EU7oVHcvR9bbT4Ud+VxRHsviIYuN1QbGhooMzOT0tPTad26dbJtVqxYQenp6WQ2\nm6mlpYWIiE6dOkWzZ8+mnJwcys3NpS1btngcSyyvWC6mf04j+Izw3L0fG567ck6jvhdCxnMfGBig\n6upqamhooNbWVtq1axedOHHCqU19fT2dPHmS2traaNu2bbR8+XIiIoqIiKBNmzbRRx99RE1NTfQf\n//EfLn0BAADoj0db5tixY/Tcc89RQ0MDERHV1tYSEdHq1aulNt/5zndo9uzZ9NBDDxERUWZmJr3z\nzjuUmJjolOtb3/oWPfbYY1RcXDwkALaMrrr1BraM92OrzQdbRnkc0d4LoqHZlunq6qIJEyZI2ykp\nKdTV1eWxzaeffurUpqOjg1paWshisagWDwAAwDfCPTWwXz09M/wK4tjv888/p4ULF9LmzZspOjra\npe/SpUvJZDIREdGFCxdo4cKFZLVaicjuMR0/fpxWrlwpbQ9itVqlbcf/O/MyERVIW2rzfdVaJp+N\niI475dNTj/O4VqnN8HyD++fll1+mgoICp/2lRo9Sf5H0DO1v7/QM4by/h2LOx199f2c9/jsfbS7z\nI/J8vPTe30r51OgZyknSfFzjQ2jRIzc/vfeP1nxa+ttsNqqtraWkpCSpXirCHjh27BiXlJRI2zU1\nNVxbW+vUpqqqinft2iVtZ2Rk8NmzZ5mZub+/n++77z7etGmTbP7hEhobG13a+BIjIiZiJmr86l9S\n3Xeov3Nf55ze6VarR6tub9v6ez/qPbbW/Sh3XPUe2xs9cn3VatQ75u38fM1p1PeCHjE9c3oq3x6L\n+7Vr13jy5Mnc3t7OX375JZvNZm5tbXVqc/DgQZ47dy4z2y8GFouFmZlv3LjBixcv5pUrV7oX8JXA\nmJix0oGPiRnrSZZHhk4Mln2TeNd/eHHXms99f6269SaYeuTG1rof1R5DLWN7o8dz2+Dub3/kNOp7\nQTQ87Q+Ptkx4eDht3bqVSkpKaGBggCoqKigrK4vq6uqIiKiqqormzZtH9fX1lJaWRlFRUbR9+3Yi\nInr33Xfptddeo/z8fCosLCQiorVr19I3vvENl3GGvm2eqLdXnRUEAADADQG6yLhlUAI5/Qqm/VdR\n53ze/0pHMr8aa9GoVo9W3d62hS2j/9je6JHrq1aj3jFv5+drTqO+F/SI6ZnTU/nGs2UAACAEEebx\nA3qv7cU6d33AOnfvx1abT93YWOeup55QwtDPc4+NjaewsDDpFRsbH2xJAABgCAQs7jbpf0M3WRuJ\niJ2eyKf+uQvq2qnt64+c+usO3vNGhsccL9DDL87ajoO+ff0xtt7H35txAnGsteY06ntB2771T045\nPK6WAaFJoB4tPHSBtlFv72zd8wMA5BHac9fiucFzF0OP3vvMDjx3fwDP3VgY2nMHAADgGwIWd5uq\nGDx3pb7i6dH/OOjb1x9jw3NXzinaewGeOwBAWET7mkYQPOC5q+oPz91XPfDclfV4buuf80zvvt7k\ntCPOe8GowHMHAIARiIDF3aYqBs9dqa94euC5e9dXq+ceGI3w3EXQ4w547iMAOR8WABDawHNX1d/Y\nnnswfU947sp6PLeF566nnlACnjsAAIxABCzuNlUxeO5KfcXzPeG5e9cXnrv37eC5OyNgcQcAAKAV\neO6q+o8Mzz0mZqzLjVfHrz/0RQ88d2U9ntvCc9dTTyjhyXPHahkgMbyQ47tsATAuAtoyNlUxeO5K\nfbX6z/rrgefuXV947t63g+fujIDFHQAAgFbguavqPzI8d7UxeO7KY6vNB89deRx47spgnTsAAQbf\n/QtEQMDiblMVg+eu1Fdsz12p+IWC56703b/B8Nz12N/w3PWJ+SunHAIWdxDqKBc/oDfY3yMTeO6q\n+sNz91WPluNqVM9dq18v2n0KLYimJ5SA5z7CgN8LACASsrjbVMXgucvHvPV7g7nOXW/fXBTP3X99\nxfK4RdMDz90ZAYs7AAAArcBzV9XfOJ673v66XEzuGTTuvoQZnrt3fV37w3PXU08oAc8d6M6Q9WN/\nYfUFIML9HtEQsLjbVMXguSv1le8fiBg8d3/2FcvjHh7X434PPHffcsqBp0ICoIFAfT+t3DjurDAA\niOC5q+wPz91TzBv/OJQ8dy0xo+wztWjZZ/DcvQeeuwPwBAEAIwUBi7tNVcwX71HJE4Tnrk9spHru\n+u8zuZjW/r739cY/DsS+gOfuGY/FvaGhgTIzMyk9PZ3WrVsn22bFihWUnp5OZrOZWlpapPiyZcso\nMTGR8vLyVAsCAACgA6zA9evXecqUKdze3s79/f1sNpu5tbXVqc3Bgwd57ty5zMzc1NTEFotF+tnh\nw4f5ww8/5NzcXLdjDEogIibir15ysaG4GuT6epPPsx71WryZi5Y5u+sfiJg6Pd4dVy1ju2un9hiG\n+j6LiRnr8DPH7eCce2rzgSE87Q/FT+7Nzc2UlpZGJpOJIiIiqLy8nPbv3+/U5sCBA7RkyRIiIrJY\nLNTT00Nnz54lIqJZs2bR2LH+WT0AnO8h4P4B8Ab8rULoo7gUsquriyZMmCBtp6Sk0HvvveexTVdX\nFyUlJakWsXTp0q/+9ywRXRj2UxsRHSeilfYtB8/JarVK247/d+ZlIirwOt9Q2+EM9idd9NhsNjp+\n/DitXLlSZlyr1GZ4PqvV+tUbchMRFVBv72y3/ZXno7x/vM338ssvU0FBAVmtVtl2nvava3/PeuT2\nj0NGcp7fYMz5+Pt6/sjpcY27zzf8+A9psJHj/NT216rHm/cbkW/HS0mPp3xq32+D/d3vX3X9h+vR\nmk9Lf5vNRrW1tZSUlEQmk4k8ovSxft++fVxZWSlt79y5k6urq53alJaW8tGjR6Xt4uJi/uCDD6Tt\n9vZ2L22ZRjcx519DGhsbXXI5xuT6epPP3ta5nZxGuf7Kv/Iqj62vRvfz1jumrNG346plbHft5PaZ\nlrH9t888H9dA6HanUe+xvTmG7vSo0RjsmJ45PZRvVvzpsWPHuKSkRNquqanh2tpapzZVVVW8a9cu\naTsjI4PPnj0rbXtf3N3FPE9meN7hfb3J51mP2r7eja2vRvdj6x3TqjFQ+1HvY2jUfaZVo95jq80H\nhvC0PxQ996KiImpra6OOjg7q7++nPXv2UFlZmVObsrIy2rFjBxERNTU1UVxcHCUmJiqlBQAA4GcU\ni3t4eDht3bqVSkpKKDs7mx566CHKysqiuro6qqurIyKiefPm0eTJkyktLY2qqqroxz/+sdR/0aJF\nNHPmTPr4449pwoQJtH37dhWSbKpi6teA+p7PGGuN5WLqx8Y6d736+x4L5rmn9bjqPbaW90Kg1pVr\nXZPuj5xyeHy2zNy5c2nu3LlOsaqqKqftrVu3yvbdtWuXaiEAAAD0Y0Q9W8aOunxqny0j90CnoWVm\n3o/tzZzV7jO5sfWOadWodl+oHRvPlvGvRs+6tY+DZ8sog2fL+JlArBfGM3EAAN4iYHG3qYqJ5rn7\n02f0/jnZ2vTAc/elv+8xeO7e54Pn7hk8zx0AMGIYSc/Fh+eu0XMPhM/ori88d+/6wnPXT6Nn3drH\nCdR3uhrVx4fnDgAAIxABi7tNVWwkee7ex7T29z0Gz937GDx37/Np9bi15DSK5y5gcQcAAKAVeO7w\n3HWJwXP3PgbPXXkceO7KwHMXGKxfBwD4CwGLu01VLBQ8d+/Xr6uNae3vewyeu/cxeO7e54Pn7hkB\nizsAAACtwHMPoueuJSaiHjnguXun23VseO6O7Yae3WRn8I+QHP84SS42GJd77hM8dwAEReu9C9z7\n8J5gfX+vu2c5OcblYq62Z+gjYHG3qYqFgufuv1jwxg6G56713oX/7n2oixnRc3fcZ65FU79xtObT\nOraRPXc8WwYAAIYRCs+ggecOz103PXKI5h9jnwVWo5ZxAvU3H0Z9ljw8dwAAEIBA36cQsLjbVMXg\nuSvFgjd2MNe5h9Y+k4tp7e97TOtxNarn7qmd0s14b+5TYJ07AAAIhPLN+OBiOM/d3TpXubzw3AOr\nRw7R/GPsM+9i7t5v8Ny9a+faVp/n5ISU5z7S164CEEgC9X4L5t8ahOrfOQhY3G0+x+C5B39seO7q\nY8pFRa6vfDwU9lkwn7Okdmxvj5c37wWsc3eD3JpULX2Ntp41FNByDI3KUFGxEZGVenvDlDsMYyTu\ns2Ci9niJUlMM57nbEcHPNoLGwOkJlDerJSbaPoPG0NTjjUYtFwJPnntIfHIHwWf4A5m8/RQKwEjE\nn++bkPLcjeYz+i8WzLHlYsEcW20smGOrjQVzbLlYMMdWGwvm2HIxz23lvH1fbvoKWNwBAGDkIneD\n15f19PDcZWLw3ENTo2h6oDE09QRSY0itcwcAAOAZAYu7TfBYMMdWGwvm2HKxYI6tNhbMsdXGgjm2\nXCyYY6uNBXNsuZi/croiYHEHAACgFXjuMjF47qGpUTQ90BiaegKpEZ47AACMMAQs7jbBY8EcW20s\nmGPLxYI5ttpYMMdWGwvm2HKxYI6tNhbMseVi/srpisfi3tDQQJmZmZSenk7r1q2TbbNixQpKT08n\ns9lMLS0tXvV15bjgMdH0GEGjaHqgMTT1GEGjv3K6oljcBwYGqLq6mhoaGqi1tZV27dpFJ06ccGpT\nX19PJ0+epLa2Ntq2bRstX75cdV95egSPiabHCBpF0wONoanHCBr9ldMVxeLe3NxMaWlpZDKZKCIi\ngsrLy2n//v1ObQ4cOEBLliwhIiKLxUI9PT109uxZVX0BAAD4B8Xi3tXVRRMmTJC2U1JSqKurS1Wb\n06dPe+wrT4fgsWCOrTYWzLHlYsEcW20smGOrjQVzbLlYMMdWGwvm2HIxf+WUgRXYt28fV1ZWSts7\nd+7k6upqpzalpaV89OhRabu4uJjff/99VX2/WoaJF1544YWXDy8lFB/5m5ycTKdOnZK2T506RSkp\nKYptPv30U0pJSaFr16557Et2dUoSAAAA+ICiLVNUVERtbW3U0dFB/f39tGfPHiorK3NqU1ZWRjt2\n7CAioqamJoqLi6PExERVfQEAAPgHxU/u4eHhtHXrViopKaGBgQGqqKigrKwsqqurIyKiqqoqmjdv\nHtXX11NaWhpFRUXR9u3bFfsCAADwP0F//MCJEydo//790s3WlJQUKisrc7kQnDhxgk6fPk0Wi4Wi\no6OJiOjo0aP00UcfUVVVFdlsNnr//fepsLCQiouL3Y535MgR2rt3L82ePZsWLFhAX3zxBdXW1tIv\nf/lLuvurIX9jAAAQcElEQVTuu6m2tpbGjBkjtf/yyy9p9+7dlJycTHPmzKGf//zn9Ic//IHGjx9P\nt9xyC505c4ZuuukmysjIoIcffphiY2P9sJcAAMA7gvoXquvWraNFixYRkX0ZpcVioRs3btCiRYto\n7dq1UrstW7bQt771LVq1ahXl5OTQm2++SWvWrKEnn3yS/vmf/5m+//3v0+rVq+nq1av03HPP0fr1\n66W+48aNk/7/yiuv0GOPPUa7d++m9evX09q1a+nxxx+nK1eu0CeffEKvvfYaTZ48mX784x/T+fPn\niYjo0Ucfpfr6etq8eTMtXryY9u3bR5cvX6Zt27bRnj17qLm5mb788kvq7Owki8VCjY2Nfttfn332\nmeq2Fy9e9JuOQS5fvkyrV6+m22+/nV5//XWnn333u9/12F/tfAIxFyJt8xFtLlowwnl26tQpqqys\npGnTplFPTw89+uijlJubS4sXL3bRv3v3bun/PT09VFFRQXl5efTwww/TuXPnpJ/l5+fTCy+8QH/7\n298CPp9B9DrPiMjD7VY/k5aWxv39/S7xL7/8kqdMmSJt5+TkcG9vL6ekpHB7ezvfcccdnJiYyNeu\nXeP8/HyOjo7mnp4eLi0t5Xnz5nFMTAyXlpZyaWkph4WFcWlpKd9///18xx138GeffcaZmZn8+eef\nc05ODhcWFjIzc0FBAQ8MDPCkSZP40Ucf5XHjxnFJSQmnpKTwlStX+Nq1azx+/Hi+du0a5+Tk8LVr\n1zg3N5f7+vr47rvv5u7ubl6+fDmPHj2a4+LieOzYsZyRkcFPPfUUd3d3S3Pp6enhp556im+77Tb+\n+c9/zszMnZ2dXFFRwYWFhdzd3c1Lly7ljIwM/va3v81/+ctf+OLFi3zhwgVOSEjgixcv8sWLF7m7\nu5uXLVvGubm5nJWVxf/zP//DzMx//OMfedKkSTxq1CiOjY3l1157TRq7ubmZrVYrP/LII9zZ2clz\n5szhmJgYvu2223jy5MkcExPDt956K0+bNo3nzZvHGRkZ0lxSU1N5yZIlbLPZ+IMPPuAPPviAZ8+e\nzUuWLOExY8ZwaWkpP/DAA3z16lXu7Ozk+Ph4ae7ezGfcuHG8YMECPnv2rN/mkpGR4TIXd/Npa2vj\niooKTkhICOhcYmNjubCwkP/pn/6Js7OzPc5npJ1nVquVt2zZwrfddhtnZGTw2rVr+ZNPPuG1a9dy\naWmppP/ChQscEREhbS9btoz/7d/+jauqqvj555/n+fPnS3MJDw/nmJgYTkhI4KKiIn7ppZf44MGD\nQp1nqamp0lw8EdTinpGRwe3t7U6x3NxczsjI4FGjRnFubi7n5uby6NGjOTc3l0eNGsXMzL29vRwT\nE8MrV65ks9nMZrOZme0F+uGHH+YpU6awzWbjxsZGDg8P5wMHDvD+/fu5oKCAmZkffPBBfvXVV9ls\nNvPSpUu5ubmZCwoK+K9//SsXFRUxs/0C8+abb3JsbCzHx8fzpUuXODo6mi9cuMC5ubl86dIlzs7O\n5osXL/Idd9zB9957L9fW1vLUqVP5xo0bzMz829/+lqurq9lisXh9ooaFhXF8fDxHRkayyWRik8nE\nRMQmk4knTZoknaTt7e2clJTE8+fPZ2bme+65h5ubm9lkMvGyZcs4IiJCOlHNZjPX19fz66+/zsnJ\nyfyLX/yC77//fn7yySe5sLCQN27cyM899xx//etf58LCQn7sscekudx00008efJkHjt2LFutVrZa\nrRwVFcVWq5VvueUWZmZ+4YUXeObMmTxr1ixOTk7mmpoar+czdepUfumll3j+/Pl+m8vp06c5LCzM\naS7u5jNmzBiuqanx6dhomcuNGzf4rrvu4smTJ3NnZ6fifEbieRYZGcnnz5/ngoICnjBhglQ/wsLC\neNSoUdJchs8nPz+fb9y4wTk5OczMnJ+fL81lsAYUFhbyO++8w9/5znc4PDyc8/LyuKKiQojzLDw8\nXJqLJ4Ja3A8dOsRTpkzhkpISrqys5MrKSh41ahQnJyfz9u3bub29ndvb2/nOO+/kgwcP8m233Sb1\nnT59Oi9atIjDwsJ4YGCAmZmvX7/OL7zwAsfExPCHH37IzCztjMEdcvr0ae7u7uZFixbxqFGjeMaM\nGRweHs6jRo3iWbNm8fHjx5001tTUsMlk4qlTp3JdXR1nZWXxnXfeyaNHj+bp06fz1KlT+dVXX+X0\n9HQ+d+4cz5o1S+p70003sdVq5a997Wten6gbNmzgkpISnjp1qpRv8OLGzNJJysycmZnJeXl5zMxs\nsViYmaULWU5OjtOJarVaua6uThpnsN/gBfKOO+7g9PR0HhgYcBo7Ozub//rXv3J6eroUy8zM5IGB\nAU5JSZFi27dv59GjR/PEiROZmb2eT2ZmJvf393N+fr7f5jI4n9TUVKeY3HwmTJjA2dnZPHHixIDO\nZXA+g3NRms9IPM8cj8u//uu/SvENGzZwdHQ0/+lPf5JiN998M2/cuJE3bNjAqampTscmLy/PZS65\nublS34KCAj506BAvXbpUiPPMZDKxWoJa3JntBfkPf/gD7927l/ft28elpaXc2Njo1Kazs5PPnDnD\n5eXlUuzq1at848YNPnLkiFPb8+fP81tvvcULFy7k7373u0470JG+vj4+fvw4t7S08B//+EeXPI60\nt7dLvwadPHmSd+/ezfv27eO9e/fyiRMnmJl5zpw5vG7dOj579qzUb+rUqfzEE09wcXGxFPPmRO3s\n7OTY2FheuXIlX758WfYkZWbesmULR0dH83/913/xM888wytWrOC0tDT+4Q9/yP/wD/8g5SsqKuIX\nXniB77nnHk5JSeE33niD77zzTt6yZQvPmDGD33zzTb7vvvukuUyePFnqu23bNv6Xf/kXp7k8+eST\n/Lvf/Y5/9atfOe2vSZMmcVpaGjOz1/PZsmULz5kzhydNmuS3uZw5c4YffvhhvvPOO510D87njTfe\nkGL5+fl86NAhTktLC+hcmO2FISsri5lZcT4j5TxzPC5PP/00//KXv5TOs0Ha2tp47ty5vHDhQmk+\ncXFx/Oyzz0qvc+fO8ZYtW/juu+/me++9V5rL7NmzXeYyffp0bmho4D179ghxnhmquPuTX//617xm\nzZqAjHXx4kVetWqV5LnFxcXx7bffzsuWLXPyx+QKorsT9eOPP+YHH3yQ33zzTZ4xYwZHRUW5nKTM\n9l//7r33Xv72t7/NBQUFnJuby0lJSfyTn/zE6Z7Ge++9x/fccw+Xl5dzR0cHFxcXc2RkJEdGRnJ0\ndDTPnDlT8ve+973vcUJCgjSXjIwMXrZsGb/xxhvc29sr5WxtbeWamhqn2NNPP8379u1z2Udq57N3\n715OTU3161xWrVrFBw8e5I8++oiZmRsbG3n9+vW8adMmp1hxcTEfOHDAZS5tbW3SXCwWi9u57Nu3\nT9VcrFar01xiYmI4MzOTs7OzecyYMYrzUTrP1BREd8flmWeecTnP5syZ4/fzbNWqVdJcDh8+zBs2\nbODf/va3Un61scFjk5CQ4DTfwbb//u//7jSXb3zjG05zOXz4MK9YsYLz8/N9mktmZiY/8sgjkvXc\n19fHP/jBD3jmzJn8+OOPc09PjxRLS0vjlStXOt03kTvPEhMTXc5Fd4R0cReFn/70py6xV1991atY\nX18f//nPf/Yqn7u2amKtra38xBNP8JUrV5iZefPmzTx16lS+6667eOLEifyrX/1Kio0ZM0aKDZKZ\nmclvv/222wvB4HxaW1v5xRdfdGn34osvSmMPxtauXevxwiLX98iRI/yf//mfzMz8+9//ntevX8/l\n5eVssVi4qKiIV61axRaLhWfNmsUxMTGckpIixZ5//nnOz8/nJ554gpnlLwIHDx7kH/3oR7xp0ybp\nhqNjO7mY3EVleLu33nqLjxw54hJ/++23nY6V4yfNQRYvXuxTrK+vjx988EFVfb0ZW027w4cP84QJ\nE6QCvW3bNjabzZycnMwzZ87kmpoaxdizzz4rxRxzJicnu+Qc3tbd2HLtNmzYwA0NDU5jPPbYY9KF\n9PPPP+enn36a582bx7feeitfuHCBmZkrKyv58ccf59TUVP7BD37ACxYskGI/+clP+KmnnuIFCxa4\nXAS6u7u5r6+Pn3zySS4uLubvf//73NPTI3s8HEFxDwBy1lAgYr72Hyzat9xyi1S0B1csFRQUSCuW\nkpKSXGKbNm3izZs38+jRo3n+/PmKF4JAxFavXs0Wi4W/9rWvORXsyMhIXrt2Lff19UmrrbKysvjK\nlSuck5MjxVavXs3Tp0936i93EdA79vzzz3NKSgpPnDjR6QKUkZHB8fHxnJWVJa0Iu/nmmzkxMZET\nExP9FouMjOSEhASnmNacN910k7SSbbCYJiUlScV0cHVbQUGBtLpNKcZs978Hb5Yq5Zw+fbrU39t2\ncheW8ePH81133cU1NTVSwT5y5Ii0YoqZpZV5mZmZzGy3YgZjWVlZ0uo/pYvAkSNH+JlnnpFyKoHi\nrhODK3scX6NHj+bRo0czEekaCwsLcxnHsZ27tkqx4eNkZWVxREQEt7e387Rp0zgpKYmZh2469fb2\nclRUlLRiaTB233338a233irdlFK6EAQiJrdkltn+xhp+Y8zx38H/D77p8vLyFC8CeseY7UVgcLnt\nYLygoIAfeughnjRpktOKsDlz5vCmTZtUxV566SVVMce+SUlJnJaWxsXFxfz73//e57GHx2w2G9ts\nNqmY5uXlcWdnJ2dkZEjnWl5eHl+8eJFzcnIUY4Pnp+ONTnc5B/81m81et5O7sMgtr2a2r8wb/OA0\nuDLvwQcf5BdffJGLioqkWGZmprRaT+kiMEh+fr7HmoTirhMJCQn84YcfSit82tvbedy4cfyb3/yG\nx48f77fYYHzcuHG6jZ2ens7t7e3S6qTe3l4eO3YsP/LII06rN+655x7+5je/yWFhYVKsv7+fx4wZ\n4xRzdyEIRExuySwz84wZM6TiPrjaasaMGdzV1cWFhYVSzGw2c3d3NxcWFipeBPSOuWtz/fp13rhx\nI0dHR0srwlJTU3njxo1cXFzst5jJZJLG1ivn4PrzCxcuSEU0NTWVTSYTjxo1SlrdlpqayqmpqR5j\nzPZVOzk5OR5z5uXlcUdHh9PFQW07uQuL3PJqZvvfA9x66608adIkaWXexIkTOSEhgZOTk6VYZGQk\np6en8/HjxxUvAszstGRbCRR3nXj00Uf58OHDsjHHVT56xwbjjqsLtOa0Wq3c0tLiFPvb3/7GCxcu\ndCranZ2dfPr0aZeVRlar1cXDl7sQBCImt2SW2b6SYfinoatXr/L58+elexvM9oL/ySef8J///GfF\ni4DeMWb7J8/Bou4Y7+7u5pycHJcVYadOnfJ7TM+cN998s8syZWbmK1euOF3kmO33Av73f//XY2zi\nxImckpLiMefgxSElJcXrdnIXFrnl1SaTSVpe3dPTI63MO3PmDDOzU+wvf/kL/+M//qPHi4BjTk+g\nuAMXBpeeDkdu6ana/nIXgkDElJbMOhZxd1y9elU2Nry/3jFmeyGU0+jYVm5FWCBi/srJLF+0taI2\np5Z2w5dXy72HPOHpIuBNzqA/OAwAAID+BPXBYQAAAPwDijsAAIQgKO4AABCCoLgDAEAIguIOAAAh\nyP8DvbAmNCmDe9sAAAAASUVORK5CYII=\n", "text": [ "" ] } ], "prompt_number": 47 }, { "cell_type": "code", "collapsed": false, "input": [ "svc = SVC(probability=True)\n", "gammas = [1e-4, 3e-4, 1e-3, 3e-3, 1e-2, 3e-2, 1e-1, 3e-1, 1., 3., 10.]\n", "gs = GridSearchCV(svc, {'gamma': gammas}, scoring = 'accuracy', cv = 10, n_jobs=-1)\n", "gs.fit(train_selected_X, train_y)\n", "print gs.best_params_\n", "print gs.best_score_" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "{'gamma': 0.03}\n", "0.97\n" ] } ], "prompt_number": 55 }, { "cell_type": "code", "collapsed": false, "input": [ "svc = SVC(probability=True, **gs.best_params_)\n", "svc.fit(train_selected_X, train_y)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 56, "text": [ "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.03,\n", " kernel='rbf', max_iter=-1, probability=True, random_state=None,\n", " shrinking=True, tol=0.001, verbose=False)" ] } ], "prompt_number": 56 }, { "cell_type": "code", "collapsed": false, "input": [ "test_yhat = svc.predict(test_selected_X)\n", "print test_yhat.shape" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(9000,)\n" ] } ], "prompt_number": 57 }, { "cell_type": "code", "collapsed": false, "input": [ "test_yhat_df = pd.DataFrame(dict(Id = np.arange(1, test_yhat.shape[0]+1), Solution=test_yhat))\n", "test_yhat_df.to_csv('submission/submission1.csv', header = True, index=False)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 62 }, { "cell_type": "code", "collapsed": false, "input": [ "forest = ExtraTreesClassifier(n_estimators=50)\n", "max_features_choices = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]\n", "gs = GridSearchCV(forest, {'max_features': max_features_choices}, scoring = 'accuracy', cv = 10, n_jobs=-1)\n", "gs.fit(train_selected_X, train_y)\n", "print gs.best_params_\n", "print gs.best_score_" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "{'max_features': 0.1}\n", "0.966\n" ] } ], "prompt_number": 52 }, { "cell_type": "markdown", "metadata": {}, "source": [ "**The cross validation result is very close to the evaluation score in public board, which implies the assumpiton of iid is quite solid, and thus confirms that it is most likely to be \"simulated\" data**" ] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }