{ "cells": [ { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "# Chapter 4 - Classification" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "- [Lab 4.6.1 The Stock Market Data](#lab-4.6.1)\n", "- [Lab 4.6.2 Logistic Regression](#lab-4.6.2)\n", "- [Lab 4.6.3 Linear Discriminant Analysis](#lab-4.6.3)\n", "- [Lab 4.6.4 Quadratic Discriminant Analysis](#lab-4.6.4)\n", "- [Lab 4.6.5 K-Nearest Neighbors](#lab-4.6.5)\n", "- [Lab 4.6.6 An Application to Caravan Insurance Data](#lab-4.6.6)" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "### Imports and Configurations" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "# Standard imports\n", "import warnings\n", "\n", "# Use rpy2 for loading R datasets\n", "from rpy2.robjects.packages import importr\n", "from rpy2.robjects.packages import data as rdata\n", "from rpy2.robjects import pandas2ri\n", "\n", "# Math and data processing\n", "import numpy as np\n", "import scipy as sp\n", "import pandas as pd\n", "\n", "# StatsModels\n", "import statsmodels.api as sm\n", "import statsmodels.formula.api as smf\n", "\n", "# scikit-learn\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", "from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.preprocessing import scale\n", "from sklearn.metrics import confusion_matrix, classification_report\n", "\n", "# Visulization\n", "from IPython.display import display\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "%matplotlib inline\n", "mpl.style.use('ggplot')\n", "import statsmodels.graphics.api as smg" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "### Lab 4.6.1 The Stock Market Data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "# Import Smarket dataset from R package ISLR\n", "islr = importr('ISLR')\n", "smarket_rdf = rdata(islr).fetch('Smarket')['Smarket']\n", "smarket = pandas2ri.ri2py(smarket_rdf)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
YearLag1Lag2Lag3Lag4Lag5VolumeTodayDirection
12001.00.381-0.192-2.624-1.0555.0101.19130.959Up
22001.00.9590.381-0.192-2.624-1.0551.29651.032Up
32001.01.0320.9590.381-0.192-2.6241.4112-0.623Down
42001.0-0.6231.0320.9590.381-0.1921.27600.614Up
52001.00.614-0.6231.0320.9590.3811.20570.213Up
\n", "
" ], "text/plain": [ " Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today Direction\n", "1 2001.0 0.381 -0.192 -2.624 -1.055 5.010 1.1913 0.959 Up\n", "2 2001.0 0.959 0.381 -0.192 -2.624 -1.055 1.2965 1.032 Up\n", "3 2001.0 1.032 0.959 0.381 -0.192 -2.624 1.4112 -0.623 Down\n", "4 2001.0 -0.623 1.032 0.959 0.381 -0.192 1.2760 0.614 Up\n", "5 2001.0 0.614 -0.623 1.032 0.959 0.381 1.2057 0.213 Up" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 1250 entries, 1 to 1250\n", "Data columns (total 9 columns):\n", "Year 1250 non-null float64\n", "Lag1 1250 non-null float64\n", "Lag2 1250 non-null float64\n", "Lag3 1250 non-null float64\n", "Lag4 1250 non-null float64\n", "Lag5 1250 non-null float64\n", "Volume 1250 non-null float64\n", "Today 1250 non-null float64\n", "Direction 1250 non-null object\n", "dtypes: float64(8), object(1)\n", "memory usage: 97.7+ KB\n" ] }, { "data": { "text/plain": [ "None" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
YearLag1Lag2Lag3Lag4Lag5VolumeToday
count1250.0000001250.0000001250.0000001250.0000001250.0000001250.000001250.0000001250.000000
mean2003.0160000.0038340.0039190.0017160.0016360.005611.4783050.003138
std1.4090181.1362991.1362801.1387031.1387741.147550.3603571.136334
min2001.000000-4.922000-4.922000-4.922000-4.922000-4.922000.356070-4.922000
25%2002.000000-0.639500-0.639500-0.640000-0.640000-0.640001.257400-0.639500
50%2003.0000000.0390000.0390000.0385000.0385000.038501.4229500.038500
75%2004.0000000.5967500.5967500.5967500.5967500.597001.6416750.596750
max2005.0000005.7330005.7330005.7330005.7330005.733003.1524705.733000
\n", "
" ], "text/plain": [ " Year Lag1 Lag2 Lag3 Lag4 \\\n", "count 1250.000000 1250.000000 1250.000000 1250.000000 1250.000000 \n", "mean 2003.016000 0.003834 0.003919 0.001716 0.001636 \n", "std 1.409018 1.136299 1.136280 1.138703 1.138774 \n", "min 2001.000000 -4.922000 -4.922000 -4.922000 -4.922000 \n", "25% 2002.000000 -0.639500 -0.639500 -0.640000 -0.640000 \n", "50% 2003.000000 0.039000 0.039000 0.038500 0.038500 \n", "75% 2004.000000 0.596750 0.596750 0.596750 0.596750 \n", "max 2005.000000 5.733000 5.733000 5.733000 5.733000 \n", "\n", " Lag5 Volume Today \n", "count 1250.00000 1250.000000 1250.000000 \n", "mean 0.00561 1.478305 0.003138 \n", "std 1.14755 0.360357 1.136334 \n", "min -4.92200 0.356070 -4.922000 \n", "25% -0.64000 1.257400 -0.639500 \n", "50% 0.03850 1.422950 0.038500 \n", "75% 0.59700 1.641675 0.596750 \n", "max 5.73300 3.152470 5.733000 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Display dataset structures and statistics\n", "display(smarket.head())\n", "display(smarket.info())\n", "display(smarket.describe())" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
YearLag1Lag2Lag3Lag4Lag5VolumeToday
Year1.0000000.0297000.0305960.0331950.0356890.0297880.5390060.030095
Lag10.0297001.000000-0.026294-0.010803-0.002986-0.0056750.040910-0.026155
Lag20.030596-0.0262941.000000-0.025897-0.010854-0.003558-0.043383-0.010250
Lag30.033195-0.010803-0.0258971.000000-0.024051-0.018808-0.041824-0.002448
Lag40.035689-0.002986-0.010854-0.0240511.000000-0.027084-0.048414-0.006900
Lag50.029788-0.005675-0.003558-0.018808-0.0270841.000000-0.022002-0.034860
Volume0.5390060.040910-0.043383-0.041824-0.048414-0.0220021.0000000.014592
Today0.030095-0.026155-0.010250-0.002448-0.006900-0.0348600.0145921.000000
\n", "
" ], "text/plain": [ " Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume \\\n", "Year 1.000000 0.029700 0.030596 0.033195 0.035689 0.029788 0.539006 \n", "Lag1 0.029700 1.000000 -0.026294 -0.010803 -0.002986 -0.005675 0.040910 \n", "Lag2 0.030596 -0.026294 1.000000 -0.025897 -0.010854 -0.003558 -0.043383 \n", "Lag3 0.033195 -0.010803 -0.025897 1.000000 -0.024051 -0.018808 -0.041824 \n", "Lag4 0.035689 -0.002986 -0.010854 -0.024051 1.000000 -0.027084 -0.048414 \n", "Lag5 0.029788 -0.005675 -0.003558 -0.018808 -0.027084 1.000000 -0.022002 \n", "Volume 0.539006 0.040910 -0.043383 -0.041824 -0.048414 -0.022002 1.000000 \n", "Today 0.030095 -0.026155 -0.010250 -0.002448 -0.006900 -0.034860 0.014592 \n", "\n", " Today \n", "Year 0.030095 \n", "Lag1 -0.026155 \n", "Lag2 -0.010250 \n", "Lag3 -0.002448 \n", "Lag4 -0.006900 \n", "Lag5 -0.034860 \n", "Volume 0.014592 \n", "Today 1.000000 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Correlation matrix\n", "display(smarket.corr())" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3YAAAGDCAYAAABnbYIOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X98zfX///H72S/7wZJ3sUzIYqwUM0JD0/JrpCL58VFJ\nrRVSRPIjv0KapIiItx+pd0jLOz8+FSpTSStv8o7mt7f5nfCendlm5/uHr/Mxw36enfM853a9XFzm\nvF7nx+P5fD3P63Xu5/XjWGw2m00AAAAAAGN5ObsAAAAAAEDJEOwAAAAAwHAEOwAAAAAwHMEOAAAA\nAAxHsAMAAAAAwxHsAAAAAMBwBDsAcAE5OTmaM2eO2rZtqzvvvFPNmjXToEGDdODAgXz3nTlzppo3\nb67mzZtr0aJF+eb37t1b4eHh9n/16tVTs2bNNHLkSJ08efKaNcyePTvP48LDw/Xss8/a5x89elTx\n8fFq2LCh7r//fn3++ed5Hl/Q/NJy6NAhhYeHKzU11SHPb7LXXntNkydPzjPtzJkzGjlypKKjo9Wk\nSRP1799fR44csc8/fvx4vuUeHh6uU6dO2e8zY8YMRUdHKzIyUsOHD1dGRkaha0pNTVV4eLgOHTpU\n8gYCAK7Jx9kFAACkqVOnau3atRoxYoTCwsJ06tQpvffee+rVq5dWr16t4OBgSdLatWs1b948ffDB\nBzp+/LhefPFFNWnSRHXr1s3zfD169FC/fv0kSbm5uTpy5IheffVVvfzyy1qwYMFVa9i1a5cee+wx\nDRgwwD6tXLly9v/369dPlStX1tKlS/Xrr79q5MiRCg0NVePGjQs1H471/vvva8mSJXrqqafyTH/1\n1Vd19OhRvffee/L399ebb76p5557TsuXL5e3t7d2796t4OBgrV69Os/jbrzxRknS4sWL9dFHHykx\nMVEVKlTQsGHDNGHCBE2YMKHM2gYAKBjBDgBcwPLlyzV8+HC1atVKklStWjVNmzZNzZs319q1a/XI\nI49Ikv744w9VrVpVkZGRstlsKleunA4ePJgv2AUEBOjmm2+2365SpYr69eunwYMH68yZM7rhhhvy\n1bBnzx5169Ytz+Mu+fnnn7Vz507NnTtXN954o2rXrq1//etf+vDDD9W4ceMC58NxTp06peHDh2vL\nli265ZZb8s1bt26dlixZorvvvluSNHHiRLVs2VKpqamqV6+edu/erdtuu+2qy12S5s+fr+eee07R\n0dGSpLFjx6pPnz565ZVX7F84AACcj0MxAcAFWCwWbdq0STk5OfZpAQEB+vzzz/XAAw/Yp91zzz1K\nTU3V6tWrNXPmTAUFBalp06aFeg0fHx9ZLBb5+vrmm5ebm6t9+/bptttuu+pjt2zZotq1a9v34khS\nVFSUtmzZUqj5l7PZbIqJicl3GGlCQoJGjx4tSTp58qReeeUVNWvWTJGRkRo4cKCOHz9+1drCw8P1\nzTff2G//9NNPCg8P17lz5yRJrVu31rJly9SzZ0/ddddd6tKli/bv36/JkycrKipK0dHRWrJkif3x\n6enpGjVqlJo0aaJ77rlHL7zwgo4dO3bV1750/9dff10tW7bU3Xffrb59+2rv3r2SpGnTpunhhx/O\nc/8vv/xSjRs3VlZWliRp3rx5iomJUcOGDdWjRw/961//st+3d+/eGjNmjNq3b69mzZppz549+V5/\nz5498vHx0eeff65bb701zzx/f3/NmTNHERER+R539uxZSbIHu6s5ceKEDh06pCZNmtinNWzYUDab\nTb/99ttVH3Pq1Cn1799fDRs2VGxsrH7++ec88/fv36+EhARFRUXpzjvvVMeOHe3L74MPPtB9990n\nm81mv/+mTZvUoEEDnTt3Trt371bv3r3VsGFDNW3aVCNGjCjSYaEA4M4IdgDgAp566il99tlnatWq\nlYYNG6akpCSdPHlSNWrUUIUKFez3i4qKUvv27TVo0CB98cUXWrBgQYF7TWw2m/bs2aPZs2fr3nvv\nVWBgYL77pKWlyWq1asWKFWrdurUeeOABTZ061R4+jh07psqVK+d5zE033aTjx4/LZrMVOP9yFotF\ncXFxWrNmjX3a2bNntXHjRnXq1Ek5OTl68sknlZaWpg8++EALFy7UsWPH1K9fv3zPVVhvvfWWnn76\naS1fvlzp6enq1q2bcnJytHTpUj344IMaP368/Zyy1157Tfv379e8efP04YcfymKx6Omnn84Tui83\ncOBA/fTTT5o6daqWLl2qcuXKqW/fvrJarerUqZN+//33POdKrl69Wg888ID8/Pz0ySefaPHixRoz\nZoySkpLUqlUrPfHEE3nOR/v00081cuRIzZ49W2FhYflev3HjxpoxY0a+vXWSFBgYqFatWsnPz88+\nbcGCBapQoYLq168v6WIwPHbsmLp166bo6Gg9++yz2rdvnyTZA+3ly9bX11cVK1bU0aNHr9kfJ0+e\n1Mcff6xx48Zp7ty59nk2m00JCQkKCgrS0qVLtWLFCtWpU0evvvqqsrKy1KlTJx07dizPFwKrVq3S\n/fffr6CgIA0ePFhVq1bVihUrNHv2bP3444+aM2fOVesAAE9DsAMAFxAfH693331XtWvX1sqVKzVs\n2DC1bNlS48eP14ULFyRd/FCcmJio7777TtWqVZOXl5eqVaumrKysfKHjww8/VMOGDdWwYUPVr19f\nHTt21A033KDXX3/9qq9/aU/QjTfeqFmzZmnQoEFKSkqy399qteY5306SPSxkZWUVOP9KHTt21JYt\nW+zB4euvv9bNN9+sRo0aKTk5Wfv379dbb72lO++8U/Xr19e0adP0+++/64cffihSv17+eq1bt1bt\n2rUVGxsri8WiYcOGqVatWnr66aeVnZ2tAwcO6D//+Y9WrVqlKVOmqH79+qpTp44SExN16NAhbdy4\nMd/zpqamauPGjZo4caKioqIUHh6uKVOmKCMjQ1988YXCwsIUERFhD7EZGRn67rvv1KlTJ0kXL1gz\naNAgtWrVSjVr1lRCQoIaNWqkjz76yP4aTZs21b333qu77rqrWG2/3KpVq7RgwQINGTLEHvD37Nmj\n//73vxo6dKhmzJghi8Wi3r1768yZM8rMzJSkPMHw0u2rLdc9e/Zo8+bNGj9+vOrVq6fmzZvr5Zdf\nts+3Wq3q2rWrRo0apVq1aiksLExPPfWU/vrrL/35558KCQlR48aN7ef7ZWdn66uvvtKDDz4o6eKF\ncypWrKiqVavq7rvv1qxZs+zzAMDTcY4dALiItm3bqm3btjp37pw2b96szz//XIsXL1aVKlUUHx+v\nBQsWaNmyZfrHP/6h8uXLq0uXLho9erQiIiI0d+5cJScny8vr4vd1nTt3Vnx8vCTJ29tbf/vb3xQQ\nEHDN177vvvu0adMm+6GU4eHhslgsGjRokEaMGCF/f3/9+eefeR6TlZUlLy8vlStXrsD5V6pbt65q\n166tNWvW6Mknn9Tq1avVoUMHWSwW7d69W1WrVlWVKlXs9w8JCVFoaKh27dqlGjVqFLlvq1WrZv+/\nv7+/qlatKm9vb0n/d4GYrKws7d69W5LUrl27PI+3Wq3au3ev7rvvvjzTd+/eLV9fX9155532aYGB\ngYqIiNCuXbskSZ06ddKKFSuUkJCgb775RuXLl9c999yjc+fO6fDhwxo5cqRee+21PP12eZC68vDK\n4vrss880cuRI9enTR4899ph9+vr16+Xt7W3vh3feeUetWrXSl19+qXr16tlrulxWVtZVx1Nqaqr8\n/PxUu3Zt+7TLA2lgYKB69eqllStX6rffftP+/fv1+++/S5L9C4zOnTvr7bff1vDhw+1j+t5775Uk\nDRgwQJMnT9Znn32m6OhoPfDAA2rfvn1pdA8AGI9gBwBOtnPnTi1btkyjRo2SJAUFBSkmJkYxMTF6\n6aWXlJycrPj4eP3zn/9Ut27d7BdKmTJliuLj4/Xtt9+qdevW9lAnScHBwUUOQJefHydJt99+uy5c\nuKATJ04oJCQkz7lf0sXzry4dolfQ/Kvp2LGj/vd//1edO3fWpk2bNGTIEEm6ahCULp4HmJubW2A7\nLgWEy/n45N3cWSyWaz7W19f3qj/VcLULzhSm1ri4OCUmJmrfvn1as2aNOnToIC8vL3udb7zxRr5z\n4Pz9/a/6/+KaP3++3njjDT377LMaNGhQnnlXHppbrlw5VatWTceOHVNMTIyki8uyUqVKki7uRTt9\n+nSe4H2JxWKRzWaTzWaz9/Hl53SeO3dO3bt3l5+fnx544AHFxMQoMDBQjz/+uP0+bdu21bhx45SS\nkqJVq1apQ4cO9uX35JNPql27dlq3bp2Sk5M1dOhQJScna9KkSSXuIwAwHYdiAoCT5ebmavHixdq8\neXO+eeXLl7cHriv3irVo0UIdOnTQ2bNn1aJFixLVMH/+fLVv3z7POWzbt29XYGCgQkJC1KBBA6Wm\npurMmTP2+b/88osaNmwoSQXOv5qOHTtq27Zt+vTTT1WzZk17YA0LC9Phw4fzXLDk6NGjOnLkyFXP\nMfP19bVfKEWS/vOf/xSjBy6qVauWsrOzlZGRoRo1aqhGjRq66aabNGnSJO3fvz/f/cPCwpSdnZ3n\nQiIZGRnauXOnatWqJeniFUkbN26sVatWKTk5WR07dpR0MXzffPPNOnbsmP21atSooYULFyo5ObnY\nbbjSp59+qjfeeEMDBw7MF+qOHTumRo0a5Rl76enp2r9/v8LCwnTzzTcrNDRUKSkp9vlbtmyRl5dX\nnr2Ul9SpU0fZ2dnasWOHfdq///1v+/83btyoffv26eOPP1ZCQoJiYmLsY/rS2CtfvrxiYmL01Vdf\nKTk52X7Y6vnz5zVhwgRlZ2erV69eev/99/Xaa69p1apVpdBLAGA+gh0AOFlERITatGmjgQMHatmy\nZTp48KB27NihefPm6YsvvlCfPn0kSY899pj++c9/asmSJTp48KDmzZunL7/8UnfccYdef/11bd++\nvdg1tGzZUocPH9bEiRN14MABrV27VomJiYqPj5ePj4+ioqJ0++23a8iQIUpNTdXSpUv1xRdfqHfv\n3pJU4PyrCQ0N1d13362ZM2faw44kNW/eXOHh4Ro8eLC2b9+u3377TYMGDVLNmjXVrFmzfM9Tv359\nffzxx9qzZ49+/PFH/f3vfy92P9SqVUutW7fW0KFDlZKSoj179mjIkCHavn37VUNlzZo11aZNG40Y\nMUIpKSn6448/9Morr8jHx0dxcXH2+3Xq1Enz5s1TSEhInkMTn376ac2cOVOrV6/WwYMHNWPGDC1Z\nssQeCkvqxIkTev3119WxY0c9+uijOnHihP1fVlaWqlSpYh8/W7du1c6dOzV48GBVrlxZbdq0kXTx\nypzvvvuuvvvuO23btk2jR4/WI488kueiPpf3X6tWrTRixAht27ZNKSkpSkxMtM+vUqWKsrOztXr1\naqWlpenrr7/WxIkTJeU93LNz585aunSpbrjhBjVo0EDSxT2Jv/76q8aNG6fU1FTt2bNHX3/9tb0/\ns7Ky7O0CAE9EsAMAF/DWW2+pd+/eWrBggTp16qSePXsqOTlZc+fOte/1euihhzRs2DDNnj1bHTp0\n0MqVKzVjxgz94x//UIMGDa55+fnCCAsL05w5c7Rt2zZ17txZ48aNU8+ePZWQkCBJ8vLy0owZM5Sd\nna2uXbtqzpw5mjhxoho1alSo+dfSqVMnZWRk5Al2FotFM2fOVKVKldS7d2/16dNHISEhWrBgQb6L\neEgXr2KZlZWlzp07a/LkyRo8eHCx+0GSJk+erDvvvFP9+vVT165dlZmZab+S5NVMnDhR9evX13PP\nPafu3bvr/PnzWrx4sSpWrGi/T9u2bZWTk5Mn7EnS448/rqeeekqJiYmKi4vTV199pXfffVeRkZEl\nasMl3377raxWq1auXKno6Og8/77//ntJF3+SISIiQgkJCerRo4e8vb01b968PIc/PvbYYxo6dKj6\n9u2rBg0aaPjw4dd8zalTpyosLExPPPGEBg8enCfcN2jQQC+99JLeeustxcXF6b333tMrr7yiG264\nIc+evRYtWiggIMC+t+6SadOmycvLSz179lTXrl3l5+enKVOmSLq4JzE6OvqqP7EBAJ7AYivutaMB\nAC7j8nOaANOdOXNG0dHRWrlyZbEulgMAnoiLpwCAGyDUwR1YrVZt2LBBK1euVKNGjQh1AFAE7LED\nAAAuISsrS9HR0fbfUyytcw0BwBMQ7AAAAADAcFw8BQAAAAAMR7ADAAAAAMMR7AAAAADAcAQ7AAAA\nADAcwQ4AAAAADEewAwAAAADDEewAAAAAwHAEOwAAAAAwHMEOAAAAAAxHsAMAAAAAwxHsAAAAAMBw\nbhXsLBZLqf4ryP/8z//oxx9/zDPt9ddf17Jly/Ld99ChQ+rWrVuptRUAAAAALnGrYFfWHn30Ua1Y\nscJ+OysrS998843i4uKcWBUAAAAAT+Pj7AJM1q5dO7399tuyWq0KCAjQunXrdO+992r//v0aP368\nvL29Va5cOY0fPz7P41q3bq01a9aoXLlymjJlimrVqqXQ0FDNmTNHvr6+Onr0qLp3765NmzZp586d\nevzxx9WzZ09t3rxZb7/9try9vXXrrbdq3Lhx8vX1dVLrAQAAALgK9tiVQLly5RQbG6uvv/5akvTZ\nZ5+pe/fuGjlypF577TUtXrxYPXr00BtvvFGo5zt69KimT5+uMWPGaNasWXrzzTf1wQcfaMmSJbLZ\nbBo1apRmzJihxYsXq0qVKkpKSnJk8wAAAAAYgmBXQpcOxzx27JjOnj2riIgIHT9+XPXq1ZMkNW7c\nWLt27brm4202m/3/tWvXlq+vrypUqKDq1avLz89PN9xwg86fP69Tp07p+PHjevHFF9W7d299//33\nSktLc3j7AAAAALg+DsUsofDwcJ07d06LFi1Sly5dJEmVK1fWzp07VbduXf3888+qWbNmnsf4+fnp\n+PHjqlatmnbu3KmwsDBJuu4FW2688UaFhIRo5syZqlChgtatW6fAwECHtQsAAACAOQh2paBLly5K\nTEzUN998I+nilTHHjx8vm80mb29vTZw4Mc/9n376acXHxys0NFTBwcGFeg0vLy+NGDFC8fHxstls\nCgoK0ptvvlnqbQEAAADckcViyXO0nLux2Ny5dQAAAAAg9w92nGMHAAAAAIYj2AEAAACA4Qh2AAAA\nAGA4gh0AAAAAGI5gBwAAAACGI9gBAAAAgOEIdgAAAABgOIIdAAAAABiOYAcAAAAAhiPYAQAAAIDh\nfBz1xLm5uXr//fd15MgRSdIzzzyj6tWr2+enpKRo+fLl8vLyUkxMjGJjYx1VCgAAAAC4NYftsUtJ\nSZEkjR8/Xt27d9cnn3xin5eTk6OFCxdqxIgRGjt2rNatW6fTp087qhQAAAAAcGsOC3ZNmjTRs88+\nK0k6ceKEAgMD7fPS0tIUEhKi8uXLy8fHR+Hh4dqxY4ejSgEAAAAAt+bQc+y8vb01Y8YMzZ8/Xy1a\ntLBPt1qteYJeQECAMjIyHFkKAAAAALgth188pX///nrnnXc0e/ZsZWZmSroY5C79X7oY9IKCghxd\nCgAAAAC4JYcFuw0bNigpKUmS5OfnJ4vFIi+viy8XGhqqI0eOKD09XTk5OdqxY4fq1KnjqFIAAAAA\nwK1ZbDabzRFPnJmZqZkzZ+rMmTPKycnRQw89pPPnzyszM1OxsbH2q2Lm5uYqJiZG7dq1c0QZAAAA\nACCLxSIHRR+X4LBgBwAAAACuwt2DHT9QDgAAAACGI9gBAAAAgOEIdgAAAABgOIIdAAAAABiOYAcA\nAAAAhiPYAQAAAIDhCHYAAAAAYDiCHQAAAAAYjmAHAAAAAIYj2AEAAACA4Qh2AAAAAGA4gh0AAAAA\nGI5gBwAAAACGI9gBAAAAgOEIdgAAAABgOIIdAAAAABiOYAcAAAAAhiPYAQAAAIDhCHYAAAAAYDiC\nHQAAAAAYjmAHAAAAAIYj2AEAAACA4Qh2AAAAAGA4gh0AAAAAlIDFYnF2CQQ7AAAAADAdwQ4AAAAA\nDEewAwAAAADDEewAAAAAwHAEOwAAAAAwHMEOAAAAAAxHsAMAAAAAwxHsAAAAAMBwBDsAAAAAMBzB\nDgAAAAAMR7ADAAAAAMMR7AAAAADAcAQ7AAAAAHAhFoulyI8h2AEAAACA4Qh2AFxScb6pAgAAKEuu\n9HmFYAcAAAAAhiPYAQAAAPAY19vL5kp74IqKYAcAAAAAhvNxxJPm5ORo1qxZOnHihLKzs9WlSxdF\nRUXZ569cuVLr169XcHCwJCk+Pl5Vq1Z1RCkAAAAA4PYcEuySk5NVoUIFDRgwQOnp6RoyZEieYLd3\n7171799ftWrVcsTLAwAAAIBHcUiwa9asmZo2bSpJstls8vb2zjN/3759SkpK0unTpxUZGamHH37Y\nEWUAAAAAgEdwSLDz9/eXJFmtVk2dOlXdu3fPM7958+Zq27atAgMDlZiYqF9++UWNGjVyRCkAAAAA\n4PYcdvGUkydPauzYsWrRooWio6Pt0202m+Li4hQcHCwfHx9FRkZq3759jioDAAAAANyeQ4Ld6dOn\nNWHCBPXq1UutW7fOM89qtWrw4MHKzMyUzWbT9u3bOdcOAAAAgMtz5Z9DsNhsNltpP+n8+fP1ww8/\nKDQ01D7t/vvv1/nz5xUbG6sNGzZozZo18vHxUf369dWtW7fSLgGA4SwWixywegIAAB7q0meL633G\nKOjzx5XPUZjnLEmtRXqMI4IdAJQUwQ4AAJQmdw92/EA5AAAAABiOYAcAAAAAhiPYATCCK5+sDAAA\n4GwEOwAAAAAwHMEOAAAAQLFxVI1rINgBAAAAgAsoSUgm2AEAAACA4Qh2AAAAAFCGHHH4KsEOAAAA\nAAxHsAMAAADcABcxKX0m9SnBDgAAAAAMR7ADAAAAAMMR7OAUJu3WBgAAAByhND8TE+wAAAAAwHAE\nOwAAAAAwHMEOAAAAAAxHsAMAAAAAwxHsAAAAAHBxO8MR7AAAAADAcAQ7AC6Nbw8BAAAKRrADAAAA\nAMMR7AAAAADAcAQ7AAAAADAcwQ4AAAAwBOee41oIdgAAAABwmSsDtAmBmmAHAAAAeDATQgsKRrAD\nAAAAACcqjXBNsAMAAAAAwxHsAAAAAMBwBDsAAADAjXEOnWcg2AEAAABuhCDnmQh2AAAAAGA4gh0A\np+DbRAAACsb20myXll9ZLEeCHQAAAIBSRygtWwQ7AAAAAB7NHUIowQ4AAAAADEewAwAAAOC23GFv\nXGEQ7AAAAADABRUllBLsAAAAAMBwBDsAxvGUQyoAACgJV91eumpdpiPYAQAAAChzBLzS5eOIJ83J\nydGsWbN04sQJZWdnq0uXLoqKirLPT0lJ0fLly+Xl5aWYmBjFxsY6ogwAAAAA8AgOCXbJycmqUKGC\nBgwYoPT0dA0ZMsQe7HJycrRw4UJNmjRJ/v7+GjVqlKKiolSxYkVHlAIAAAAAbs8hh2I2a9ZMjz32\nmCTJZrPJ29vbPi8tLU0hISEqX768fHx8FB4erh07djiiDAAAAADwCA7ZY+fv7y9Jslqtmjp1qrp3\n726fZ7VaFRgYaL8dEBCgjIwMR5QBAAAA4AoWi0U2m83ZZaCUOSTYSdLJkyc1ZcoUtWnTRtHR0fbp\nAQEByszMtN+2Wq0KCgpyVBkAAAAA4PYccijm6dOnNWHCBPXq1UutW7fOMy80NFRHjhxRenq6cnJy\ntGPHDtWpU8cRZQAAAAD4/0y7CmVZ1Gtan1yPxeaA/bDz58/XDz/8oNDQUPu0+++/X+fPn1dsbKz9\nqpi5ubmKiYlRu3btSrsEuDgOAUBBY+DS/Cv/FuaxAAC4iyu3edfbBl5r23m9berVHlfSGgs7vbCf\nBUqqMJ8pSvtvQW0t7vNdt52OCHZAQfhgDoIdAAAFI9gVf3tfmPa7U7DjB8oBAAAAOJ07HRbpDAQ7\nACgiNjwAAMDVEOwAAAAAwHAEOwAAAAdg7z5Qtjz9PUewAwAAAADDEewAAAAAuCxP3xNXWAQ7AAAA\nwAOVVWAimJUNgh0AAABgGMISrkSwAwAATsEHUwAoPQQ7ACgmPpQCAFD22P5eHcEOAAAAgMMRyByr\nUMEuKytLBw8elM1mU1ZWlqNrAgDAIfhQAQBwVwUGu9TUVA0YMECTJk3SqVOnlJCQoD/++KMsagMA\nAHBZfFEAT3Wtsc97wrkKDHaLFy/WqFGjVKFCBf3tb39T//79tWDBgjIoDQAAAICzEdjMUGCwO3/+\nvKpVq2a/HRkZqQsXLji0KAAAAABA4RUY7Hx8fJSenm5P6ocPH3Z4UQDMZ/K3eybXDgAAPJNPQXd4\n5JFHNGbMGJ0+fVrTpk3Ttm3bFB8fXxa1AQAAAAAKocBg16hRI4WGhmrbtm3Kzc1V165d8xyaCaDw\nLBaLbDabs8sAAMDtsc2FMzlj/BUY7CTJ399fERER9tuHDh0i3AEAAAAoMUJ46Sgw2C1cuFBffvml\nAgMD7R1usVg0d+5chxcHAAAAAChYgcFu8+bNmj17tipUqFAW9QAAygDfjgJwZayj8jOpT0yq1Z0U\neFXMW265RUFBQWVRCwAAADwQVyMGSq7APXbt27fX6NGjdccdd8jH5//u3rVrV4cWBgCugm8egbLB\new3Xw/gArq/AYLd06VJVrFhRGRkZZVEPAAAA4PYIqtd2rb6hz66vwGCXlZWlV199tSxqAQC3wIaH\nPgCAojJxvWlize6swHPsqlWrpgMHDpRFLQAAAJxvBYcq6fhifMJVFbjH7q+//tKwYcNUuXJl+fr6\n2qdPmTLFoYUBAFBSfJsMT8d7AMXBuDFTgcGuZ8+eZVEHALi8km7o2FCiMBgn/4e+AIDCKzDYVa9e\nvSzqAADAKISOgtFHKA2Mo+ujf3BJgcGub9+++abdeOONev/99x1SENwbKx+YhPHqfCwDlCbGE1A0\nvGfMUmCwW7Jkif3/OTk5+umnn7iYCgCXwoYHAOAsbINcj6cukwKvink5Hx8f3Xvvvdq2bZuj6kEJ\ncJUmlATjB/AsJr3nL9Xq6jW7en2m8ZT+9JR2wvEK3GOXnp5u/7/NZtOePXt07tw5hxYFAJ7IHb5h\ndIc2XIs33b2LAAAVeElEQVQ7tw0ATMR6Oa8in2MXHBysPn36OKwgeDbeoGAMAABMwPYKrqZI59gB\nAACz8WGUPgDgnq4Z7FauXHndB3bs2LHUiwEAeAYTPlibUKO7oK8BoOSuGewOHjxYlnUAgLHc4UOp\nO7QB5mC8eR6WedHRZyiqawa7559/Ps/tEydO6MKFCwoJCXF4UQAAAIAzEazMwbK6qMBz7I4ePao3\n33xTf/31l3JzcxUcHKxhw4YpNDS0LOoD3AIrHDgLY899sWwdh75FcTBu4GwFBrt58+bpwQcf1H33\n3SdJ+uabbzR37lyNHj3a0bUBEBsK4HK8H8zgrsupNNrlrn2DgrnDsneHNrizAn+g/MyZM/ZQJ0kx\nMTE6e/asI2tyOfxwJAAAAABnKUweKXCP3YULF5Senq7y5ctLks6ePVvooLNr1y599NFHGjNmTJ7p\nK1eu1Pr16xUcHCxJio+PV9WqVQv1nADcizt/++fObStrV/ZlafctywqewhPHurPb7OzXh+coMNi1\nb99eI0aMULNmzSRJP/74o+Li4gp84hUrVmjDhg3y9/fPN2/v3r3q37+/atWqVYyS4U5Y2Tkfy8B5\n6HvnMbnvi1q7yW01hcl9bHLtpqLP4SjXPBTzzTff1G+//abY2Fg988wzysnJUXZ2tvr27as2bdoU\n+MRVqlTRyy+/fNV5+/btU1JSkkaNGqWkpKTiVw84CYfnFh99B0e4NK4YXwBKivUITHXNPXb16tXT\n3//+d0lSmzZt1KVLFwUEBBT6iZs2barjx49fdV7z5s3Vtm1bBQYGKjExUb/88osaNWpUxNIBOBLf\nKAKegfc6TMOYBa7umsGuU6dO6tSpk37//XetXbtWy5cvV9OmTdW2bVvdeuutxX5Bm82muLg4BQYG\nSpIiIyO1b98+gh1cEhsPOJMrjD9H1+AKbQTgPCasA4pbowltg3sp8KqYEREReuGFFzRt2jRVrVpV\nM2bM0NixY4v9glarVYMHD1ZmZqZsNpu2b9/OuXZwOxzGAQDOx7oYgCcp8OIp9jv6+KhcuXIKDAzU\nf//73yK/0MaNG5WZmanY2Fj16NFDY8eOlY+Pj+rXr6/IyMgiPx8A18I3k66HD7WAc7A+LD76Dii+\nAoPdzp07tX79eqWkpOiuu+7So48+qoiIiEI9eeXKlTVhwgRJUnR0tH16y5Yt1bJly2KWDBQeGwjg\n+lztPeIK9bhCDSic0lpWLHOzsfyAi64Z7D7//HN9++23On/+vO6//35NnTpVFStWLMvaros3sWcp\nq+XNZcQ9W0mX57Ueb9I4ManWoijNdrlrH8E5HDWePGGcekIbgaK4ZrDbunWrunfvriZNmsjLq8BT\n8QCUEBsouCNPGNee0MZrcfQPxwOlyZTxaUqdcD3XDHajR48uyzrg5lhJwREYVwCAS9gmwNOxKw4A\nAABlwpUu6uRKtQClgWDngljRFJ879507tw0AXFFprXddYf1dUA2uUCOAkiHYeTBW4gBQehy9TjX9\n+Z3Jndp2rba4ahtdtS7AHRHsSgErLTiCu40rd2uPK6BPXdOVy4XlVPbocwCeiGAHj+Sojb4JHyZM\nqBGlz12Xu7u2yxWYdOjepVpcqSZH86S2FhV9A0/lscGON73ZTF9+16vf9LaZgD4uffSp89D3xUff\nAXAnHhvsAFPxQaTw6Kv86BMUFWMGAMxAsCsCUzZuptQJXI5x+3/oi7JDXwMobaxX4CwEuzLAGxyO\n5Grjy9XqMYEn9JkntNFR6Ds4AuOq6OgzuDqCnQcweUXkTr8hBAAAADgKwQ4ejcAHwFFYv5iHZQbA\nZAQ7AABgFAIYAOTndsGuuCt7NhJmYrnhcowHuBPGMwCgKNwu2LkCNsaexxWX+ZU1FbbGorbFFdvu\nrlypr12pFgCuh3UEUPaMC3asKByPPi5b9DfA+6A0OaovWUYA4NqMC3auzFF7RAAUzbXeY+7y3nOX\ndqB0MS4AwLN5XLDjMDMUBss9P/oEV8O4AEqHqe+lS3WbWj/gTjwu2AFXwwYJAC5ifQiJcQCYyPhg\n58wVj7us9NylHbg6Zy9fZ7++O3BGH7LcAAAwi/HBzlH4UONc9H/R0WcoCyUdZ4xT4CLeCwBKG8EO\nBWLj4zj0LeC53On9705tcTZT+tKUOgFPQrArQ6wEAaBssL4Fiof3DmAugh2AInHHjb47tglA6bly\nHeHK6wxn1+bs1wc8GcEOcCBX3sC5cm0AAAAoGoKdCyvrD96OeD1+tB0AXAPrWQBwbwQ7wFB8SAPc\nC+9pXI4f/gZQVAQ7g7ByBwAAAHA1BDsAAAAUCl8yA66LYHeFslxhldZrsZItPPoKl3PX8eCu7TId\nywUA4EgEO7gFPjABANwZ2zkABfH4YFecFSVXegRQEqwbCkYfAQBQNG4f7Nzhw4E7tKG00BcAUDis\nLwHAs7hNsPPEDZgnthkA4FxsezwLyxswh9sEOwCOwUYdAFwX62gAl3hMsGPFBwDFxzoUAADX5jHB\nDqWPD3oAAACAayDYuTGCl2diuXsulj0AAJ6LYOeB+PAHAAAAuBcfRz75rl279NFHH2nMmDF5pqek\npGj58uXy8vJSTEyMYmNjHVkGAAAAALg1hwW7FStWaMOGDfL3988zPScnRwsXLtSkSZPk7++vUaNG\nKSoqShUrVnRUKQAAAADg1hx2KGaVKlX08ssv55uelpamkJAQlS9fXj4+PgoPD9eOHTtK/fU53LD0\n0acwFWMXcA28FwHAcRwW7Jo2bSpvb+98061WqwIDA+23AwIClJGR4agyjMCGDgAAAEBJlPnFUwIC\nApSZmWm/bbVaFRQUVNZlwM0RlgEAAOBJyjzYhYaG6siRI0pPT1dOTo527NihOnXqlHUZcADCFAAA\nAOAcDr0q5uU2btyozMxMxcbG6vHHH9eECROUm5urmJgYVapUqazKAADApVgsFtlsNmeXAQAwnEOD\nXeXKlTVhwgRJUnR0tH16VFSUoqKiHPnScKDCfAjhgwoAAABQdviBcgAAAHgsTiWBuyDYAUXAyh8A\ngJJjewqUPoIdAAAAABiOYAcAAACPx15EmI5gh2tiBQcAAACYgWDnQkwNUqbWDQAAALgLgh0AAAAA\nGI5gB6BY2FMLAADgOgh2AAAAAGA4gh0AAAAAGI5gBwAAgOvi8HvA9RHsDMTKFQAAAMDlCHYAAAAA\nYDiCHQAAAAAYjmAHAAAAAIYj2AEAAACA4Qh2QCFwwRoAAAC4MoIdAADXwRc7AAATEOwAAAAAwHAE\nOwAAAAAwHMEOAAAAAAxHsAMAAAAAwxHsAAAAAKAMOPKCXAQ7AAAAADAcwQ4AAAAADEewAwAAAADD\nEewAAAAAwHAEOwAAAAAwHMEOAAAAAAxHsAMAAAAAwxHsAAAAAMBwBDsAAMqQI3+cFgDguQh2AAAA\nAGA4gp0H4VtiAAAAwD0R7AAAAADAcAQ7AAAAADAcwQ4AAAAADEewAwAAAADDEewAAAAAwHAEOwAA\nAAAwHMEOAAAAAAxHsAMAAAAAw/k46olzc3M1d+5cHThwQL6+vkpISFBISIh9/sqVK7V+/XoFBwdL\nkuLj41W1alVHlQMAAAAAbsthwe7nn39Wdna2JkyYoNTUVC1atEhDhw61z9+7d6/69++vWrVqOaoE\nAADgAiwWi2w2m7PLAAC35rBgt3PnTjVo0ECSVKdOHe3ZsyfP/H379ikpKUmnT59WZGSkHn74YUeV\nAgAAAABuzWHBzmq1KjAw0H7by8tLFy5ckLe3tySpefPmatu2rQIDA5WYmKhffvlFjRo1clQ5AAAA\nAOC2HHbxlICAAFmtVvttm81mD3U2m01xcXEKDg6Wj4+PIiMjtW/fPkeVAgAAAABuzWHBLjw8XFu2\nbJEkpaamqnr16vZ5VqtVgwcPVmZmpmw2m7Zv3865dgAAAABQTA47FLNJkybatm2bRo4cKZvNpuef\nf14bN25UZmamYmNj1aNHD40dO1Y+Pj6qX7++IiMjHVUKAAAAALg1hwU7Ly8vxcfH55kWGhpq/3/L\nli3VsmVLR708AHgMrjgIAAD4gXIAAAAAMBzBDgAAAAAMR7ADAAAAAMMR7AAAAADAcAQ7AAAAADAc\nwQ4AAAAADEewAwAAAADDEewAAAAAwHAEOwAAAAAwHMEOAAAAAAxHsAMAAAAAwxHsAAAAAMBwBDsA\nAAAAMBzBDgAAAAAMR7ADAAAAAMMR7AAAAADAcAQ7AAAAADAcwQ4AAAAADEewAwAAAADDEewAAAAA\nwHAEOwAAAAAwHMEOAAAAAAxHsAMAAAAAwxHsAAAAAMBwBDsAAAAAMBzBDgAAAAAMR7ADAAAAAMMR\n7AAAAADAcAQ7AAAAADAcwQ4AAAAADEewAwAAAADDEewAAAAAwHAEOwAAAAAwHMEOAAAAAAxHsAMA\nAAAAwxHsAAAAAMBwBDsAAAAAMBzBDgAAAAAMR7ADAAAAAMMR7AAAAADAcAQ7AAAAADCcj6OeODc3\nV3PnztWBAwfk6+urhIQEhYSE2OenpKRo+fLl8vLyUkxMjGJjYx1VCgAAAAC4NYftsfv555+VnZ2t\nCRMmqGfPnlq0aJF9Xk5OjhYuXKgRI0Zo7NixWrdunU6fPu2oUgAAAADArTks2O3cuVMNGjSQJNWp\nU0d79uyxz0tLS1NISIjKly8vHx8fhYeHa8eOHY4qBQAAAADcmsMOxbRarQoMDLTf9vLy0oULF+Tt\n7Z1vXkBAgDIyMgr1vDabzSl/nfnapv11hRpM++sKNZjy1xVqMO2vK9Rgyl9XqMG0v65Qgyl/XaEG\n0/66Qg2m/HWFGkz76wo1FKfma3HYHruAgABZrVb7bZvNJm9vb/u8zMxM+zyr1aqgoCBHlQIAAAAA\nbs1hwS48PFxbtmyRJKWmpqp69er2eaGhoTpy5IjS09OVk5OjHTt2qE6dOo4qBQAAAADcmsVWmP16\nxXDpqpgHDx6UzWbT888/r3379ikzM1OxsbH2q2Lm5uYqJiZG7dq1c0QZAAAAAOD2HBbsAAAAAABl\ngx8oBwAAAADDEewAAAAAwHAO+7mDwsrJydGsWbN04sQJZWdnq0uXLqpWrZree+89WSwW3Xrrrerb\nt6+8vLy0du1arV27Vt7e3nrkkUfUqFEj+/Ns3rxZP/74owYOHOjE1sBUJR2HGRkZevfdd2W1WpWT\nk6MnnniCCwKh1JR0fGZmZurdd9/VuXPn5OPjo379+qlSpUrObhbcSGlty9PS0jR8+HB98MEH8vPz\nc2KL4G5KOkZtNpsSEhJ0yy23SLr4G809e/Z0cquAvJwe7JKTk1WhQgUNGDBA6enpGjJkiGrWrKnu\n3bvrjjvu0Jw5c5SSkqI6depozZo1euONN5Sdna1Ro0bprrvukq+vr+bPn6+tW7eqZs2azm4ODFXS\ncbhy5UrVr19fcXFxOnz4sN555x1NnjzZ2c2Cmyjp+Fy3bp1q1aqlrl276ttvv9WKFSvUp08fZzcL\nbqQ0tuUZGRlatGiRfH19nd0cuKGSjtE///xTt912m4YNG+bspgDX5PRg16xZMzVt2lTS//3W3d69\nexURESFJatiwobZu3SovLy+Fh4fL19dXvr6+CgkJ0YEDB3T77bcrPDxcjRs31tq1a53ZFBispOMw\nLi7O/mHkwoULfDBBqSqN8ZmbmytJOnnyJL8bilJX0jEaFhamOXPmqEePHkpMTHRmU+CmSjpGjx8/\nrr/++ktjx46Vn5+fnnjiCVWtWtWZTQLycfo5dv7+/vYfM586daq6d+8uSbJYLJIu/ph5RkaGMjIy\nFBgYaH/cpemS1Lx5c/v9geIo6TgMCgqSn5+fTp8+renTp3N4BkpVaawnvby8NHbsWK1Zs0ZNmjQp\n+0bArZV0jC5btkyRkZEceQOHKekYrVixoh566CGNHj1aDz/8sKZPn+6UdgDX4/RgJ138Bnns2LFq\n0aKFoqOj84Q0q9WqoKAgBQYGKjMzM990oLSUdBwePHhQ48aNU48ePezfAAKlpTTWk6NHj9a4ceP0\n1ltvlWnt8AwlGaPJyclav369xowZo9OnT2vChAnOaALcXEnGaFhYmBo3bixJqlu3rk6dOiV+MQyu\nxunB7tIKvFevXmrdurUkqWbNmvr3v/8tSdqyZYvq1aun22+/XTt27FBWVpYyMjKUlpamW2+91Zml\nw42UdBweOnRIU6dO1QsvvKCGDRs6sylwQyUdn0lJSdqwYYOki99ae3k5fdUPN1PSMTp9+nSNGTNG\nY8aMUcWKFTVixAhnNgduqKRjdNmyZVq1apUkaf/+/brppps4Wgwux+nn2CUlJSk9PV3Lly/X8uXL\nJUlPPvmk5s+fr5ycHIWGhqpp06by8vJS+/btNXr0aOXm5qp79+5cMQulpqTj8OOPP1Z2drYWLFgg\nSQoMDNTQoUOd2CK4k5KOz5iYGL333ntav369cnNz9dxzzzm5RXA3bMvh6ko6Rh966CFNnz5dv/76\nq7y9vfX88887uUVAfhYb+5EBAAAAwGgcjwMAAAAAhiPYAQAAAIDhCHYAAAAAYDiCHQAAAAAYjmAH\nAAAAAIYj2AEAPNK8efM0YsQI5ebm2qfl5uZq5MiR+uSTT5xYGQAARUewAwB4pN69eyszM1NJSUn2\naUlJSfL29la3bt2cWBkAAEXn9B8oBwDAGfz8/DRw4ECNGjVKjRo1ks1m01dffaVJkybJy8tLmzdv\nVlJSki5cuKBy5crp8ccfV+3atfXXX39pzpw5Onv2rE6fPq3KlSvrpZdeUnBwsBISElS3bl0dOHBA\nvXr1UlRUlLObCQDwEAQ7AIDHql69urp37673339fubm5SkhIUKVKlZSWlqZly5Zp9OjRKl++vA4c\nOKCJEydq+vTp+v7771WvXj09+OCDys3N1cSJE7Vx40Z16NBBklSjRg29+OKLTm4ZAMDTEOwAAB6t\nffv22rRpk6pVq6aGDRtKkrZu3apTp05p7Nixee577NgxdezYUb///rtWrlypI0eOKC0tTREREfb7\n1KtXr0zrBwBAItgBAKDKlSurSpUq9tu5ubm6++679cILL9innTx5UpUqVdKiRYt04MABtWrVSnfc\ncYeysrJks9ns9/P39y/T2gEAkLh4CgAA+dSvX19btmzR4cOHJUkpKSkaOnSosrOztXXrVsXFxall\ny5YKDg7W9u3b81xZEwAAZ2CPHQAAV6hRo4aeeeYZvf3225Ikb29vDR06VOXKlVPXrl01f/58LVmy\nRD4+Pqpbt66OHj3q5IoBAJ7OYrv8+BEAAAAAgHE4FBMAAAAADEewAwAAAADDEewAAAAAwHAEOwAA\nAAAwHMEOAAAAAAxHsAMAAAAAwxHsAAAAAMBwBDsAAAAAMNz/A2ChACTSsVa+AAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plot Smarket volumes\n", "ax = smarket.plot('Year', 'Volume', kind='bar', figsize=(15,6), color='k')\n", "\n", "# Remove redundant xtick labels\n", "xticklabels = ['',] * smarket.shape[0]\n", "xtext, xlocs = np.unique(smarket.Year, return_index=True)\n", "for t, i in zip(xtext, xlocs):\n", " xticklabels[i] = str(int(t))\n", "ax.set_xticklabels(xticklabels, rotation=0)\n", " \n", "ax.set_ylabel('Volume')\n", "ax.set_title('S&P 500 volume over 1250 days.')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "### Lab 4.6.2 Logistic Regression" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "##### StatsModels" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Deviance Residuals:\n" ] }, { "data": { "text/plain": [ "count 1250.000000\n", "mean -0.012030\n", "std 1.176023\n", "min -1.325832\n", "25% -1.145081\n", "50% -1.065292\n", "75% 1.203130\n", "max 1.446343\n", "dtype: float64" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " Generalized Linear Model Regression Results \n", "================================================================================================\n", "Dep. Variable: ['Direction[Down]', 'Direction[Up]'] No. Observations: 1250\n", "Model: GLM Df Residuals: 1243\n", "Model Family: Binomial Df Model: 6\n", "Link Function: logit Scale: 1.0\n", "Method: IRLS Log-Likelihood: -863.79\n", "Date: Tue, 28 Feb 2017 Deviance: 1727.6\n", "Time: 16:49:35 Pearson chi2: 1.25e+03\n", "No. Iterations: 6 \n", "==============================================================================\n", " coef std err z P>|z| [95.0% Conf. Int.]\n", "------------------------------------------------------------------------------\n", "Intercept 0.1260 0.241 0.523 0.601 -0.346 0.598\n", "Lag1 0.0731 0.050 1.457 0.145 -0.025 0.171\n", "Lag2 0.0423 0.050 0.845 0.398 -0.056 0.140\n", "Lag3 -0.0111 0.050 -0.222 0.824 -0.109 0.087\n", "Lag4 -0.0094 0.050 -0.187 0.851 -0.107 0.089\n", "Lag5 -0.0103 0.050 -0.208 0.835 -0.107 0.087\n", "Volume -0.1354 0.158 -0.855 0.392 -0.446 0.175\n", "==============================================================================\n", "\n", " Null deviance: 1731.2 on 1249 degrees of freedom\n", "Residual deviance: 1727.6 on 1243 degrees of freedom\n", "AIC: 1741.58\n" ] } ], "source": [ "# Logistic regression by GLM\n", "formula = 'Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume'\n", "smarket_glm = smf.glm(formula, data=smarket, family=sm.families.Binomial()).fit()\n", "print('Deviance Residuals:')\n", "display(smarket_glm.resid_deviance.describe())\n", "print(smarket_glm.summary())\n", "print('\\n Null deviance: {0:.1f} on {1} degrees of freedom'.format(smarket_glm.null_deviance, smarket_glm.df_model+smarket_glm.df_resid))\n", "print('Residual deviance: {0:.1f} on {1} degrees of freedom'.format(smarket_glm.deviance, smarket_glm.df_resid))\n", "print('AIC: {0:.2f}'.format(smarket_glm.aic))" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "##### scikit-learn LogisticRegression" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
(Intercept)Lag1Lag2Lag3Lag4Lag5Volume
-0.125941-0.073073-0.04230.0110840.0093590.0103120.135402
\n", "
" ], "text/plain": [ " (Intercept) Lag1 Lag2 Lag3 Lag4 Lag5 Volume\n", " -0.125941 -0.073073 -0.0423 0.011084 0.009359 0.010312 0.135402" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Specify features and response\n", "features = ['Lag1', 'Lag2', 'Lag3', 'Lag4', 'Lag5', 'Volume']\n", "response = 'Direction'\n", "\n", "# Fit on the whole dataset\n", "X = smarket[features]\n", "y = smarket[response]\n", "logreg = LogisticRegression(C=1e9) # Use a large C to disable regularization\n", "logreg.fit(X, y)\n", "\n", "# Extract coefficients from fitting results\n", "coef = pd.DataFrame(logreg.coef_, columns=features)\n", "coef.insert(loc=0, column='(Intercept)', value=logreg.intercept_)\n", "coef.index=['']\n", "display(coef)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "First ten in-sample prediction probabilities P(Y=1|X): \n" ] }, { "data": { "text/plain": [ "array([ 0.50708676, 0.48147055, 0.48114106, 0.51522477, 0.51078395,\n", " 0.50695844, 0.49265161, 0.50923079, 0.51761641, 0.48884115])" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "In-sample prediction decision results: \n" ] }, { "data": { "text/plain": [ "array(['Up', 'Down', 'Down', 'Up', 'Up', 'Up', 'Down', 'Up', 'Up', 'Down'], dtype=object)" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# In-sample prediction with type='response', or P(Y=1|X)\n", "print(\"First ten in-sample prediction probabilities P(Y=1|X): \")\n", "display(logreg.predict_proba(X)[0:10, 1])\n", "\n", "# In-sample prediction with decisions\n", "y_pred = logreg.predict(X)\n", "print(\"In-sample prediction decision results: \")\n", "display(y_pred[0:10])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
DownUp
TrueDown145457
Up141507
\n", "
" ], "text/plain": [ " Predict \n", " Down Up\n", "True Down 145 457\n", " Up 141 507" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Score: 0.5216\n" ] } ], "source": [ "# Evaluate accruacy by confusion matrix and score\n", "cfmat_cnames = pd.MultiIndex.from_product([['Predict'], logreg.classes_])\n", "cfmat_index = pd.MultiIndex.from_product([['True'], logreg.classes_])\n", "cfmat = confusion_matrix(y, y_pred, labels=logreg.classes_)\n", "print(\"\\nConfusion Matrix: \")\n", "display(pd.DataFrame(cfmat, columns=cfmat_cnames, index=cfmat_index))\n", "print('\\nScore: ', logreg.score(X, y))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training dataset shape: (998, 9)\n", "Test dataset shape: (252, 9)\n" ] } ], "source": [ "# Manual train-test split\n", "smarket_train = smarket[smarket.Year!=2005]\n", "smarket_test = smarket[smarket.Year==2005]\n", "print(\"Training dataset shape: \", smarket_train.shape)\n", "print(\"Test dataset shape: \", smarket_test.shape)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
DownUp
TrueDown7734
Up9744
\n", "
" ], "text/plain": [ " Predict \n", " Down Up\n", "True Down 77 34\n", " Up 97 44" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Score: 0.480158730159\n" ] } ], "source": [ "# Specify features and response\n", "features = ['Lag1', 'Lag2', 'Lag3', 'Lag4', 'Lag5', 'Volume']\n", "response = 'Direction'\n", "\n", "# Fit on training data subset\n", "X = smarket_train[features]\n", "y = smarket_train[response]\n", "logreg = LogisticRegression(C=1e9) # Use a large C to disable regularization\n", "logreg.fit(X, y)\n", "\n", "# Prediction on test data subset\n", "X = smarket_test[features]\n", "y = smarket_test[response]\n", "y_pred = logreg.predict(X)\n", "\n", "# Evaluate accuracy\n", "cfmat_cnames = pd.MultiIndex.from_product([['Predict'], logreg.classes_])\n", "cfmat_index = pd.MultiIndex.from_product([['True'], logreg.classes_])\n", "cfmat = confusion_matrix(y, y_pred, labels=logreg.classes_)\n", "print(\"\\nConfusion Matrix: \")\n", "display(pd.DataFrame(cfmat, columns=cfmat_cnames, index=cfmat_index))\n", "print('\\nScore: ', logreg.score(X, y))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
DownUp
TrueDown3576
Up35106
\n", "
" ], "text/plain": [ " Predict \n", " Down Up\n", "True Down 35 76\n", " Up 35 106" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Score: 0.559523809524\n" ] } ], "source": [ "# Specify features and response\n", "features = ['Lag1', 'Lag2']\n", "response = 'Direction'\n", "\n", "# Improve prediction by removing features with large p-values\n", "X = smarket_train[features]\n", "y = smarket_train[response]\n", "logreg = LogisticRegression(C=1e9) # Use a large C to disable regularization\n", "logreg.fit(X, y)\n", "\n", "# Prediction on test data subset\n", "X = smarket_test[features]\n", "y = smarket_test[response]\n", "y_pred = logreg.predict(X)\n", "\n", "# Evaluate accuracy\n", "cfmat_cnames = pd.MultiIndex.from_product([['Predict'], logreg.classes_])\n", "cfmat_index = pd.MultiIndex.from_product([['True'], logreg.classes_])\n", "cfmat = confusion_matrix(y, y_pred, labels=logreg.classes_)\n", "print(\"\\nConfusion Matrix: \")\n", "display(pd.DataFrame(cfmat, columns=cfmat_cnames, index=cfmat_index))\n", "print('\\nScore: ', logreg.score(X, y))" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "### Lab 4.6.3 Linear Discriminant Analysis" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prior probabilities of groups:\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DownUp
0.4919840.508016
\n", "
" ], "text/plain": [ " Down Up\n", " 0.491984 0.508016" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Group means:\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Lag1Lag2
Down0.0427900.033894
Up-0.039546-0.031325
\n", "
" ], "text/plain": [ " Lag1 Lag2\n", "Down 0.042790 0.033894\n", "Up -0.039546 -0.031325" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Coefficients of linear discriminants:\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LD1
Lag1-0.642019
Lag2-0.513529
\n", "
" ], "text/plain": [ " LD1\n", "Lag1 -0.642019\n", "Lag2 -0.513529" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Specify features and response\n", "features = ['Lag1', 'Lag2']\n", "response = 'Direction'\n", "\n", "# Fit on training subset\n", "X = smarket_train[features]\n", "y = smarket_train[response]\n", "lda = LinearDiscriminantAnalysis()\n", "lda.fit(X, y)\n", "\n", "# Priors, group means, and coefficients of linear discriminants\n", "priors = pd.DataFrame(lda.priors_, index=lda.classes_, columns=['']).T\n", "print(\"Prior probabilities of groups:\")\n", "display(priors)\n", "gmeans = pd.DataFrame(lda.means_, index=lda.classes_, columns=features)\n", "print(\"\\nGroup means:\")\n", "display(gmeans)\n", "coef = pd.DataFrame(lda.scalings_, columns=['LD1'], index=features)\n", "print(\"\\nCoefficients of linear discriminants:\")\n", "display(coef)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAsEAAAF1CAYAAAAJAjeKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9wVOW9x/FPdrOEBKSI3mvc1UgJ7DId0IAh1ChQuIHp\njXPHDo5tTQV6i9AYtLejFPkRJkRI/cVAQSs0JFOJXjtBQZ1qvU6rlZIZw02TXKKUuJhKMImdYlvk\nYja4yZ77h3owN5FDSMLZ5Hm//tpznrN7vjk8WT775NnzJFiWZQkAAAAwiMftAgAAAICLjRAMAAAA\n4xCCAQAAYBxCMAAAAIxDCAYAAIBxCMEAAAAwTqLbBQAA+iYUCtmPExISlJycrEmTJmnZsmWaP3++\ni5UBwNBBCAaAIeiRRx5Rdna2YrGYTp06pVdffVX/8R//oc2bNys3N9ft8gAg7hGCAWAIGjNmjP7p\nn/5JknTFFVdo0qRJ+vjjj/XTn/5UOTk5GjFihMsVAkB8Y04wAHxBa2urli5dqoyMDC1YsECVlZX2\n9IOWlhaFQiE98cQTysrK0rJlyyRJhw8f1pIlSzR9+nTddNNNevTRRxWNRiVJ+/bt08yZM7udY/Xq\n1frRj34kSTp48KAyMzO1b98+3XTTTcrMzNS6desUiUT6XHteXp5OnDih2tpaSdInn3yixx9/XPPm\nzdPUqVN1++2369ChQ5Kkn/70p1q6dKn93P/6r/9SKBRSQ0ODve9f//Vf9cILL2jfvn1auHChdu3a\npRtvvFEZGRm677771N7e3ucaASBeEIIB4DOdnZ364Q9/KI/Hoz179mj16tXatm1bj+P+8Ic/aM+e\nPVq1apWOHTumO+64Q+np6Xr22We1ceNGvfjii9qyZct5n7e9vV3l5eXavn27duzYoerqahUXF/e5\n/quvvlrJycl69913JUmbNm3Ss88+q6KiIr3wwguaNGmS/v3f/11//etfNXv2bNXV1dlh/eDBg0pI\nSFBNTY0kqa2tTceOHdOsWbMkSeFwWHV1ddq9e7e2bdum3/3ud9qzZ0+fawSAeEEIBoDPVFdX69ix\nY3r44YcVDAY1b9483XPPPT2OW7x4scaPH69JkyapsrJSV111ldavX6/09HTNnTtXa9eu1VNPPXXe\nI6VdXV0qLi7W9OnTNWPGDK1Zs0YvvfSSTp8+3eefYcyYMTp9+rROnTql5557TmvWrNGcOXOUnp6u\nDRs26Morr9R//ud/KisrS5Zl2SPDBw8e1Jw5c+wQXFVVpSlTpuiyyy6TJEWjUW3cuFETJ07UnDlz\nNGvWLL311lt9rg8A4gUhGAA+88477ygQCGjcuHH2vmnTpvU47qqrrrIfv/vuu7ruuuuUkJBg77v+\n+usVjUbV3Nx8Xuf1er3KyMiwt6dOnapoNKo///nPff4ZTp8+rUsuuUTHjh1TV1dXt/o9Ho+mTZum\no0ePasSIEZo5c6aqq6t14sQJtbS0aMmSJaqrq5NlWaqqqtKcOXPs544aNUqXX365vT169Gh7FBkA\nhiJCMAB8JjExUZZlOR43cuRI+3FSUlKP9lgsJunTEd4vhuPPdXZ2dtv2eDzyeM6+HX/+fK/Xe36F\nf+b48eP6+OOPNXny5C/9YlwsFrNff/bs2Tp48KD++7//W9ddd50yMzPV0dGhI0eOqLq6ulsI9vl8\nfaoFAOIdIRgAPhMMBtXW1qa///3v9j6nP/mnp6fr0KFD3cJzfX29fD6f0tLS5PP5FIlEurW3tLR0\ne41oNKpwOGxvHzp0SElJSfrqV7/ap/orKyuVmpqqadOm6ZprrpHP51NdXZ3dblmW/ud//kcTJkyQ\n9GkIrq+vV1VVlWbMmKERI0bouuuuU3l5uXw+n6ZMmdKn8wPAUEIIBoDPfP3rX9eECRO0Zs0ahcNh\nHThwoNcvxn1RXl6eWlpatGnTJjU1NWn//v166KGH9K1vfUtjxozRlClTdObMGe3atUvvv/++SktL\n9ac//anH66xfv16HDx9WdXW1HnnkEd12221KSUn50vOeOnVKJ06c0F//+lcdPXpUP//5z7V7926t\nXbtWXq9XycnJuuOOO/TQQw9p//79ampqUnFxsVpbW/Xtb39b0qdfpAsEAvr1r3+tGTNmSJJmzpyp\nl19+WbNnz+51FBsAhgvuEwwAn0lISNDPf/5zFRYW6tZbb5Xf79dtt92mXbt2felzrrjiCpWVlenR\nRx/VLbfcoksvvVQLFy7UihUrJEnjx4/XmjVrVFZWph07dmjBggVasmRJj/nCN998s5YuXSrLsrRw\n4ULde++956x11apVds3jxo3T5MmTVVpaquzsbPuYe++9VwkJCVqzZo0+/vhjTZ06VRUVFRo/frx9\nzOzZs/WrX/3Knjs8Y8YMWZal2bNn9+naAcBQk2CdzwQ4ADDA3/72N7399tvd5sK+8sor2rx5s157\n7bVBOefBgwe1ePFi1dXVadSoUYNyDgBAT0yHAIDPJCQk6O6779Yvf/lLtbS0qLa2Vo8//jjLEAPA\nMMR0CAD4zLhx47Rt2zZt27ZNW7du1Ve+8hXdcsst9upuAIDhg+kQAAAAMA7TIQAAAGAcQjAAAACM\nQwgGAACAcVz5YlxbW5sbp407fr+fa4Ee6BfoDf0CvaFfoDf0i7P8fv+XtjESDAAAAOMQggEAAGAc\nQjAAAACMQwgGAACAcQjBAAAAMA7LJgOAywLhsPNB53PMBWgNBgfldQEg3jESDAAAAOMQggEAAGAc\nQjAAAACMQwgGAACAcQjBAAAAMA4hGAAAAMYhBAMAAMA4hGAAAAAYhxAMAAAA47BiHADoPFdtAwAM\nG4wEAwAAwDiEYAAAABiHEAwAAADjEIIBAABgHEIwAAAAjEMIBgAAgHEIwQAAADAOIRgAAADGcVws\nIxaLqaysTM3NzfL5fMrPz1dqamqP437xi19o9OjR+t73vjcohQIAAAADxXEkuKamRtFoVCUlJcrL\ny1NFRUWPY37729/q+PHjg1IgAAAAMNAcQ3BjY6MyMjIkScFgUE1NTd3a33nnHR09elTz588fnAoB\nAACAAeY4HSISiSglJcXe9ng86urqktfr1T/+8Q8999xzWrlypd58883zPqnf77+waochrgV6Q79w\nQTjsdgWuoK8Nffwbojf0C2eOITg5OVmRSMTetixLXq9XkvTmm2/q1KlTevDBB3Xy5EmdOXNGgUBA\n3/jGN875mm1tbf2repjw+/1cC/RAv8DFRF8b2ni/QG/oF2ed68OAYwgOhUKqra1Vdna2wuGw0tLS\n7Lbc3Fzl5uZKkt544w21trY6BmAAAADAbY4hOCsrSw0NDSosLJRlWSooKFBVVZU6OjqUk5NzMWoE\nAAAABpRjCPZ4PFq+fHm3fYFAoMdxjAADAABgqGCxDAAAABiHEAwAAADjEIIBAABgHEIwAAAAjEMI\nBgAAgHEIwQAAADAOIRgAAADGIQQDAADAOIRgAAAAGIcQDAAAAOMQggEAAGAcQjAAAACMk+h2AQAA\n9wTCYVfP3xoMunp+AOZiJBgAAADGIQQDAADAOIRgAAAAGIcQDAAAAOMQggEAAGAcQjAAAACMQwgG\nAACAcQjBAAAAMA4hGAAAAMZxXDEuFouprKxMzc3N8vl8ys/PV2pqqt1eXV2tF198UZI0a9Ys5ebm\nDl61AAAAwABwHAmuqalRNBpVSUmJ8vLyVFFRYbfFYjE988wzWr9+vUpKSvTqq6/q1KlTg1owAAAA\n0F+OI8GNjY3KyMiQJAWDQTU1NdltHo9HW7duldfr1UcffaRYLKbERMeXlN/v70fJwwvXAr2hX7gg\nHHa7AiPR1/uPa4je0C+cOSbWSCSilJQUe9vj8airq0ter1eS5PV6dfDgQZWXl2v69OkaOXKk40nb\n2tr6UfLw4ff7uRbogX4Bk9DX+4f3C/SGfnHWuT4MOE6HSE5OViQSsbcty7ID8OdmzpypnTt3qrOz\nU/v37+9HqQAAAMDgcxwJDoVCqq2tVXZ2tsLhsNLS0uy29vZ2PfzwwyosLJTP51NSUpISEhIGtWAA\nw1OA6QgAgIvIMQRnZWWpoaFBhYWFsixLBQUFqqqqUkdHh3JycjRr1iwVFRXJ6/Xqmmuu0ezZsy9G\n3QAAAMAFcwzBHo9Hy5cv77YvEAjYj3NycpSTkzPwlQEAAACDhMUyAAAAYBxCMAAAAIxDCAYAAIBx\nCMEAAAAwDiEYAAAAxiEEAwAAwDiEYAAAABiHEAwAAADjEIIBAABgHEIwAAAAjEMIBgAAgHEIwQAA\nADAOIRgAAADGIQQDAADAOIRgAAAAGIcQDAAAAOMQggEAAGAcQjAAAACMQwgGAACAcQjBAAAAMA4h\nGAAAAMZJdDogFouprKxMzc3N8vl8ys/PV2pqqt1eVVWl3/zmN/J6vbr66qt15513yuMhWwMAACB+\nOabVmpoaRaNRlZSUKC8vTxUVFXbbJ598osrKShUVFWnjxo1qb29XXV3doBYMAAAA9JdjCG5sbFRG\nRoYkKRgMqqmpyW5LTEzUxo0blZSUJOnTUWOfzzdIpQIAAAADw3E6RCQSUUpKir3t8XjU1dUlr9cr\nj8ejsWPHSpJeeeUVdXR06Nprr3U8qd/v70fJwwvXAr0xsl+Ew25XABcY2dcHGNcQvaFfOHMMwcnJ\nyYpEIva2ZVnyer32diwW09NPP60PPvhA9913nxISEhxP2tbWdoHlDi9+v59rgR7oFzAJfb1/eL9A\nb+gXZ53rw4DjdIhQKKT6+npJUjgcVlpaWrf20tJSRaNR/eQnP7GnRQAAAADxzHEkOCsrSw0NDSos\nLJRlWSooKFBVVZU6Ojo0YcIE/f73v9fkyZP1wAMPSJJyc3OVlZU16IUDAAAAF8oxBHs8Hi1fvrzb\nvkAgYD+urKwc+KoAAACAQcQNfQEAAGAcQjAAAACM4zgdAgCAwRJw8dZ4rcGga+cG4D5GggEAAGAc\nQjAAAACMQwgGAACAcQjBAAAAMA4hGAAAAMYhBAMAAMA4hGAAAAAYhxAMAAAA4xCCAQAAYBxCMAAA\nAIxDCAYAAIBxCMEAAAAwDiEYAAAAxiEEAwAAwDiEYAAAABiHEAwAAADjEIIBAABgHEIwAAAAjEMI\nBgAAgHESnQ6IxWIqKytTc3OzfD6f8vPzlZqa2u2YM2fOaNOmTcrPz1cgEBi0YgEAAICB4DgSXFNT\no2g0qpKSEuXl5amioqJbe1NTk4qKivSXv/xl0IoEAAAABpLjSHBjY6MyMjIkScFgUE1NTd3ao9Go\nVq5cqccff3xwKgRw0QTCYbdLAADgonAMwZFIRCkpKfa2x+NRV1eXvF6vJGny5Ml9Pqnf7+/zc4Yr\nrgV641q/IATDIMPl/Xe4/BwYWPQLZ44hODk5WZFIxN62LMsOwBeqra2tX88fLvx+P9cCPdAvgItj\nOPye8X6B3tAvzjrXhwHHOcGhUEj19fWSpHA4rLS0tIGrDAAAAHCB40hwVlaWGhoaVFhYKMuyVFBQ\noKqqKnV0dCgnJ+di1AgAwIBzew58azDo6vkB0zmGYI/Ho+XLl3fb19tt0DZs2DBgRQEAAACDicUy\nAAAAYBxCMAAAAIxDCAYAAIBxCMEAAAAwDiEYAAAAxiEEAwAAwDiEYAAAABiHEAwAAADjOC6WAZjG\n7VWk5Pb5AQAwACPBAAAAMA4hGAAAAMYhBAMAAMA4hGAAAAAYhxAMAAAA4xCCAQAAYBxCMAAAAIzD\nfYIBAHDBgNyT/AJfozUY7P+5gSGOkWAAAAAYh5FgxCXXV20DAAwKt9/fGQXH5xgJBgAAgHEIwQAA\nADAO0yEAADCM21MSgHjgGIJjsZjKysrU3Nwsn8+n/Px8paam2u1//OMftXfvXnk8Hs2dO1c5OTmD\nWjAAAMBQdNE+fPRyHuZC9+Q4HaKmpkbRaFQlJSXKy8tTRUWF3dbZ2andu3dr3bp1Ki4u1muvvaaT\nJ08OasEAAABAfzmOBDc2NiojI0OSFAwG1dTUZLe1trYqNTVVo0ePliSFQiEdOXJEN9xwwyCVi4uF\nP5UBAIDhzDEERyIRpaSk2Nsej0ddXV3yer092pKTk9Xe3u54Ur/ff4HlDj/xei2sOK0LAIChiv9b\n44vjdIjk5GRFIhF727Iseb1eu62jo8Nui0QiGjVq1CCUCQAAAAwcxxAcCoVUX18vSQqHw0pLS7Pb\nAoGAPvjgA50+fVqdnZ06cuSIgky8BgAAQJxLsCzLOtcBn98d4vjx47IsSwUFBXrvvffU0dGhnJwc\n++4QsVhMc+fO1Te/+c2LVTsAAABwQRxDMAAAADDcsGIcAAAAjEMIBgAAgHFYNjkOtLa2au3atdq1\na5dGjBjhdjlwWXt7u7Zv365IJKLOzk4tWbKEL5wazGnVTpins7NTO3bs0IkTJxSNRnXrrbcqMzPT\n7bIQJz766COtXr1ahYWFCgQCbpcT1xgJdll7e7sqKirk8/ncLgVx4qWXXtLUqVNVXFysFStWqLy8\n3O2S4KJzrdoJMx04cECXXHKJHnjgAa1bt473CNg6OztVWlrKgNp5IgS7yLIslZaW6vbbb1dSUpLb\n5SBO3HzzzZo/f74kqauriw9IhjvXqp0w0w033KDvfOc7krrfux946qmnNH/+fF166aVulzIkMB3i\nInn99df18ssvd9t3+eWX68Ybb9T48ePdKQqu661f3HXXXZo4caJOnjypxx57TN///vfdKQ5x4Vyr\ndsJMI0eOlPRp39iyZYu++93vulwR4sEbb7yhMWPGKCMjQy+88ILb5QwJ3CLNRffcc48uu+wySdLR\no0c1ceJEFRcXu1wV4sHx48f1s5/9TIsWLdK0adPcLgcu2r17tyZNmqTs7GxJUn5+vnbu3OlyVXDb\nhx9+qM2bN2vBggWaN2+e2+UgDhQVFUmSEhISdOzYMV155ZW6//77NXbsWJcri1+MBLvosccesx+v\nWLFC69atc7EaxIuWlhZt2bJFP/7xj/krARQKhVRbW6vs7Oweq3bCTCdPnlRJSYl+8IMfaOrUqW6X\ngzjxxUG0DRs2aNmyZQRgB4RgIM4888wzikajevLJJyVJKSkpWrVqlbtFwTVZWVlqaGhQYWGhvWrn\n+QiFQtq5c6fmzp3bbf/Bgwe1ePFi1dXVadSoUYNRMgbZ888/r9OnT2vv3r3au3evJGnt2rV8GQro\nI6ZDAMAwRAgGgHPj7hAAAAAwDiEYAPqotbVVS5cuVUZGhhYsWKDKykqFQiFJn87pDoVCeuKJJ5SV\nlaVly5ZJkg4fPqwlS5Zo+vTpuummm/Too48qGo1Kkvbt26eZM2d2O8fq1av1ox/9SNKno7eZmZna\nt2+fbrrpJmVmZmrdunWKRCL9/lkWLVqkrVu3aunSpbr22mv1b//2bzpw4EC/XxcA4h0hGAD6oLOz\nUz/84Q/l8Xi0Z88erV69Wtu2betx3B/+8Aft2bNHq1at0rFjx3THHXcoPT1dzz77rDZu3KgXX3xR\nW7ZsOe/ztre3q7y8XNu3b9eOHTtUXV09YHeTKS8v17Rp0/T8889rzpw5uuuuu/Tee+8NyGsDQLwi\nBANAH1RXV+vYsWN6+OGHFQwGNW/ePN1zzz09jlu8eLHGjx+vSZMmqbKyUldddZXWr1+v9PR0zZ07\nV2vXrtVTTz2l9vb28zpvV1eXiouLNX36dM2YMUNr1qzRSy+9pNOnT/f7Z7r++ut19913Kz09XStX\nrlR6erqee+65fr8uAMQzQjAA9ME777yjQCCgcePG2ft6u5fzVVddZT9+9913dd111ykhIcHed/31\n1ysajaq5ufm8zuv1eu2V4yRp6tSpikaj+vOf/9zr8YmJiYrFYj32f77viysRZmZmdjvm2muv1dGj\nR8+rLgAYqgjBANAHiYmJOp+b6ny+qpekXpdF/zyMdnV1dQvHn+vs7Oy27fF45PGcfcv+/PlftnLc\nmDFj9L//+7899p86dUpJSUndbqf1/1+jq6ur27kAYDjiXQ4A+iAYDKqtrU1///vf7X1vvfXWOZ+T\nnp6uQ4cOdQvP9fX18vl8SktLk8/nUyQS6dbe0tLS7TWi0ajC4bC9fejQISUlJemrX/1qr+f82te+\npvr6+h776+vr9bWvfa3bvsOHD9uPLcvSW2+9pcmTJ5/zZwKAoY4QDAB98PWvf10TJkzQmjVrFA6H\ndeDAgV6/GPdFeXl5amlp0aZNm9TU1KT9+/froYce0re+9S2NGTNGU6ZM0ZkzZ7Rr1y69//77Ki0t\n1Z/+9Kcer7N+/XodPnxY1dXVeuSRR3TbbbcpJSWl13MuXrxYe/fu1a5du9Tc3KyjR4+qvLxcv/rV\nr3TnnXd2O/b111/X008/rffee0+PPPKIWlpa9O1vf/vCLxIADAEslgEAffT++++rsLBQdXV18vv9\nys3N1a5du/T222+rpaVF//Iv/6Jf//rXCgaD9nNqa2v16KOP6u2339all16qhQsXasWKFfa0hCef\nfFJlZWX6+OOPtWDBAv3zP/+zmpubtX37dnuBizVr1mjnzp2yLEsLFy7Uvffe221u7//3u9/9TmVl\nZQqHw7IsS8FgUMuWLVNOTo59zKJFi3TFFVfoww8/VF1dnUKhkNauXdvrPGcAGE4IwQDQB3/729/0\n9ttva86cOfa+V155RZs3b9Zrr702KOcczFXeFi1apClTpuj+++8f0NcFgHjHdAgA6IOEhATdfffd\n+uUvf6mWlhbV1tbq8ccfV25urtulAQD6INHtAgBgKBk3bpy2bdumbdu2aevWrfrKV76iW265xV7d\nDQAwNDAdAgAAAMZhOgQAAACMQwgGAACAcQjBAAAAMI4rX4xra2tz47Rxx+/3cy3QA/0CvaFfoDf0\nC/SGfnGW3+//0jZGggEAAGAcQjAAAACMQwgGAACAcQjBAAAAMA4hGAAAAMZh2WQAcFEgHD6/A8/3\nuH5oDQYH/RwAEC8YCQYAAIBxCMEAAAAwDiEYAAAAxiEEAwAAwDiEYAAAABiHEAwAAADjEIIBAABg\nHEIwAAAAjEMIBgAAgHEIwQAAADAOIRgAAADGIQQDAADAOIRgAAAAGIcQDAAAAOMkul0AAFxsgXDY\n7RIAAC5jJBgAAADGIQQDAADAOI7TIWKxmMrKytTc3Cyfz6f8/Hylpqb2OO4Xv/iFRo8ere9973uD\nUigAAAAwUBxHgmtqahSNRlVSUqK8vDxVVFT0OOa3v/2tjh8/PigFAgAAAAPNMQQ3NjYqIyNDkhQM\nBtXU1NSt/Z133tHRo0c1f/78wakQAAAAGGCO0yEikYhSUlLsbY/Ho66uLnm9Xv3jH//Qc889p5Ur\nV+rNN98875P6/f4Lq3YY4lqgN/SLQcbdIXpFvxua+HdDb+gXzhxDcHJysiKRiL1tWZa8Xq8k6c03\n39SpU6f04IMP6uTJkzpz5owCgYC+8Y1vnPM129ra+lf1MOH3+7kW6IF+AbfQ74Ye3i/QG/rFWef6\nMOAYgkOhkGpra5Wdna1wOKy0tDS7LTc3V7m5uZKkN954Q62trY4BGAAAAHCbYwjOyspSQ0ODCgsL\nZVmWCgoKVFVVpY6ODuXk5FyMGgEAAIAB5RiCPR6Pli9f3m1fIBDocRwjwAAAABgqWCwDAAAAxiEE\nAwAAwDiEYAAAABiHEAwAAADjEIIBAABgHEIwAAAAjEMIBgAAgHEIwQAAADAOIRgAAADGIQQDAADA\nOIRgAAAAGIcQDAAAAOMQggEAAGAcQjAAAACMQwgGAACAcQjBAAAAMA4hGAAAAMYhBAMAAMA4hGAA\nAAAYhxAMAAAA4xCCAQAAYBxCMAAAAIyT6HRALBZTWVmZmpub5fP5lJ+fr9TUVLu9urpaL774oiRp\n1qxZys3NHbxqAQAAgAHgOBJcU1OjaDSqkpIS5eXlqaKiwm6LxWJ65plntH79epWUlOjVV1/VqVOn\nBrVgAAAAoL8cR4IbGxuVkZEhSQoGg2pqarLbPB6Ptm7dKq/Xq48++kixWEyJiY4vCQAAALjKMbFG\nIhGlpKTY2x6PR11dXfJ6vZIkr9ergwcPqry8XNOnT9fIkSMdT+r3+/tR8vDCtUBv6BeDLBx2u4K4\nRL8bmvh3Q2/oF84cQ3BycrIikYi9bVmWHYA/N3PmTM2YMUNPPPGE9u/fr7lz557zNdva2i6w3OHF\n7/dzLdAD/QJuod8NPbxfoDf0i7PO9WHAcU5wKBRSfX29JCkcDistLc1ua29vV1FRkaLRqDwej5KS\nkpSQkDAAJQMAAACDx3EkOCsrSw0NDSosLJRlWSooKFBVVZU6OjqUk5OjWbNmqaioSF6vV9dcc41m\nz559MeoGAAywQBxNE2kNBt0uAcAw5xiCPR6Pli9f3m1fIBCwH+fk5CgnJ2fgKwMAAAAGCYtlAAAA\nwDiEYAAAABiHEAwAAADjEIIBAABgHEIwAAAAjEMIBgAAgHEIwQAAADAOIRgAAADGIQQDAADAOIRg\nAAAAGIcQDAAAAOMkul0AADMEwmG3SwAAwMZIMAAAAIxDCAYAAIBxCMEAAAAwDiEYAAAAxiEEAwAA\nwDiEYAAAABiHEAwAAADjEIIBAABgHEIwAAAAjEMIBgAAgHEcl02OxWIqKytTc3OzfD6f8vPzlZqa\nardXVVXpN7/5jbxer66++mrdeeed8njI1gCACxdPy2y3BoNulwBgEDim1ZqaGkWjUZWUlCgvL08V\nFRV22yeffKLKykoVFRVp48aNam9vV11d3aAWDAAAAPSXYwhubGxURkaGJCkYDKqpqcluS0xM1MaN\nG5WUlCTp01Fjn883SKUCAAAAA8NxOkQkElFKSoq97fF41NXVJa/XK4/Ho7Fjx0qSXnnlFXV0dOja\na691PKnf7+9HycML1wK9GZb9Io7+vA30Rbz/PsZ7fXAH/cKZYwhOTk5WJBKxty3LktfrtbdjsZie\nfvppffDBB7rvvvuUkJDgeNK2trYLLHd48fv9XAv0QL8A4ks8/z7yfoHe0C/OOteHAcfpEKFQSPX1\n9ZKkcDistLS0bu2lpaWKRqP6yU9+Yk+LAAAAAOKZ40hwVlaWGhoaVFhYKMuyVFBQoKqqKnV0dGjC\nhAn6/e9H5sApAAAG+klEQVR/r8mTJ+uBBx6QJOXm5iorK2vQCwcAAAAulGMI9ng8Wr58ebd9gUDA\nflxZWTnwVQEAAACDiBv6AgAAwDiEYAAAABiHEAwAAADjEIIBAABgHEIwAAAAjEMIBgAAgHEIwQAA\nADAOIRgAAADGIQQDAADAOIRgAAAAGIcQDAAAAOMQggEAAGAcQjAAAACMQwgGAACAcQjBAAAAMA4h\nGAAAAMYhBAMAAMA4hGAAAAAYhxAMAAAA4xCCAQAAYBxCMAAAAIxDCAYAAIBxEp0OiMViKisrU3Nz\ns3w+n/Lz85WamtrtmDNnzmjTpk3Kz89XIBAYtGIBAACAgeA4ElxTU6NoNKqSkhLl5eWpoqKiW3tT\nU5OKior0l7/8ZdCKBAAAAAaSYwhubGxURkaGJCkYDKqpqalbezQa1cqVKxkBBgAAwJDhOB0iEoko\nJSXF3vZ4POrq6pLX65UkTZ48uc8n9fv9fX7OcMW1QG+GZb8Ih92uALgg8f77GO/1wR30C2eOITg5\nOVmRSMTetizLDsAXqq2trV/PHy78fj/XAj3QL4D4Es+/j7xfoDf0i7PO9WHAcTpEKBRSfX29JCkc\nDistLW3gKgMAAABc4DgSnJWVpYaGBhUWFsqyLBUUFKiqqkodHR3Kycm5GDUCuEABpiAAANArxxDs\n8Xi0fPnybvt6+xLchg0bBqwoAAAAYDCxWAYAAACMQwgGAACAcRynQwAAYLJ4m1vfGgy6XQIwLDAS\nDAAAAOMQggEAAGAcQjAAAACMQwgGAACAcQjBAAAAMA4hGAAAAMYhBAMAAMA4hGAAAAAYhxAMAAAA\n4xCCAQAAYBxCMAAAAIxDCAYAAIBxCMEAAAAwDiEYAAAAxiEEAwAAwDiJbhcAAADOXyAcPrvxxccu\naA0GXT0/0B+EYGCABfr7n5LL/6kBAGACQjAuWL/DHgBgSIun/wcYlUZfEYIBAMCQRyBHXzmG4Fgs\nprKyMjU3N8vn8yk/P1+pqal2+x//+Eft3btXHo9Hc+fOVU5OzqAWDAAAAPSXYwiuqalRNBpVSUmJ\nwuGwKioqtGrVKklSZ2endu/erQcffFAjR47U+vXrlZmZqbFjxw564RfToH26vIDX5dMlAABA/zmG\n4MbGRmVkZEiSgsGgmpqa7LbW1lalpqZq9OjRkqRQKKQjR47ohhtuGKRyAQAA4pvrUzO+cH4Gz76c\nYwiORCJKSUmxtz0ej7q6uuT1enu0JScnq7293fGkfr//Ast1hzXE6r1YuC4AAGCoclwsIzk5WZFI\nxN62LEter9du6+josNsikYhGjRo1CGUCAAAAA8cxBIdCIdXX10uSwuGw0tLS7LZAIKAPPvhAp0+f\nVmdnp44cOaIgw+4AAACIcwmWZVnnOuDzu0McP35clmWpoKBA7733njo6OpSTk2PfHSIWi2nu3Ln6\n5je/ebFqBwAAAC6IYwgGAAAAhhvH6RAAAADAcEMIBgAAgHFYNjkOtLa2au3atdq1a5dGjBjhdjlw\nWXt7u7Zv365IJKLOzk4tWbKEL5wazGnVTpins7NTO3bs0IkTJxSNRnXrrbcqMzPT7bIQJz766COt\nXr1ahYWFCgQCbpcT1xgJdll7e7sqKirk8/ncLgVx4qWXXtLUqVNVXFysFStWqLy83O2S4KIvrtqZ\nl5eniooKt0uCyw4cOKBLLrlEDzzwgNatW8d7BGydnZ0qLS1lQO08EYJdZFmWSktLdfvttyspKcnt\nchAnbr75Zs2fP1+S1NXVxQckw51r1U6Y6YYbbtB3vvMdSd3v3Q889dRTmj9/vi699FK3SxkSmA5x\nkbz++ut6+eWXu+27/PLLdeONN2r8+PHuFAXX9dYv7rrrLk2cOFEnT57UY489pu9///vuFIe4cK5V\nO2GmkSNHSvq0b2zZskXf/e53Xa4I8eCNN97QmDFjlJGRoRdeeMHtcoYEbpHmonvuuUeXXXaZJOno\n0aOaOHGiiouLXa4K8eD48eP62c9+pkWLFmnatGlulwMX7d69W5MmTVJ2drYkKT8/Xzt37nS5Krjt\nww8/1ObNm7VgwQLNmzfP7XIQB4qKiiRJCQkJOnbsmK688krdf//9Gjt2rMuVxS9Ggl302GOP2Y9X\nrFihdevWuVgN4kVLS4u2bNmiH//4x/yVAAqFQqqtrVV2dnaPVTthppMnT6qkpEQ/+MEPNHXqVLfL\nQZz44iDahg0btGzZMgKwA0IwEGeeeeYZRaNRPfnkk5KklJQUrVq1yt2i4JqsrCw1NDSosLDQXrUT\nZnv++ed1+vRp7d27V3v37pUkrV27li9DAX3EdAgAAAAYh7tDAAAAwDiEYAAAABiHEAwAAADjEIIB\nAABgHEIwAAAAjEMIBgAAgHEIwQAAADAOIRgAAADG+T8XqSetVZ+YdQAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plot linear discriminants of the LDA training fit\n", "smarket_grouped = smarket_train.groupby('Direction')\n", "smarket_down = smarket_grouped.get_group('Down')\n", "smarket_up = smarket_grouped.get_group('Up')\n", "discrim_down = lda.transform(smarket_down[['Lag1', 'Lag2']])\n", "discrim_up = lda.transform(smarket_up[['Lag1', 'Lag2']])\n", "plt.figure(figsize=(12,6))\n", "plt.subplot(2, 1, 1)\n", "plt.hist(discrim_down, 16, normed=True, color='c')\n", "plt.title('group Down')\n", "plt.xlim(-5, 5)\n", "plt.subplot(2, 1, 2)\n", "plt.hist(discrim_up, 16, normed=True, color='c')\n", "plt.title('group Up')\n", "plt.xlim(-5, 5)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "# Prediction on test subset\n", "X = smarket_test[['Lag1', 'Lag2']]\n", "y = smarket_test['Direction']\n", "y_pred = lda.predict(X)\n", "# Generate posterior probability matrix\n", "posterior = pd.DataFrame(lda.predict_proba(X), columns=lda.classes_)\n", "# Generate linear discriminants on the test subset\n", "discrim_test = lda.transform(X)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
DownUp
TrueDown3576
Up35106
\n", "
" ], "text/plain": [ " Predict \n", " Down Up\n", "True Down 35 76\n", " Up 35 106" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Score: 0.559523809524\n" ] } ], "source": [ "# Evaluate accuracy\n", "cfmat_cnames = pd.MultiIndex.from_product([['Predict'], lda.classes_])\n", "cfmat_index = pd.MultiIndex.from_product([['True'], lda.classes_])\n", "cfmat = confusion_matrix(y, y_pred, labels=lda.classes_)\n", "print(\"\\nConfusion Matrix: \")\n", "display(pd.DataFrame(cfmat, columns=cfmat_cnames, index=cfmat_index))\n", "print('\\nScore: ', lda.score(X, y))" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Posterior probabilities:\n", " Down Up\n", "0 0.490179 0.509821\n", "1 0.479218 0.520782\n", "2 0.466818 0.533182\n", "3 0.474001 0.525999\n", "4 0.492788 0.507212\n", "5 0.493856 0.506144\n", "6 0.495102 0.504898\n", "7 0.487286 0.512714\n", "8 0.490701 0.509299\n", "9 0.484403 0.515597\n", "10 0.490696 0.509304\n", "11 0.511999 0.488001\n", "12 0.489515 0.510485\n", "13 0.470676 0.529324\n", "14 0.474459 0.525541\n", "15 0.479958 0.520042\n", "16 0.493578 0.506422\n", "17 0.503089 0.496911\n", "18 0.497881 0.502119\n", "19 0.488633 0.511367\n", "\n", "First 20 prediction results: \n", " ['Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Down' 'Up' 'Up'\n", " 'Up' 'Up' 'Up' 'Down' 'Up' 'Up']\n", "\n", "Number of Down class with threshold = 0.9: 0\n" ] } ], "source": [ "# First 20 posterior probabilities\n", "print('Posterior probabilities:')\n", "print(posterior.iloc[:20])\n", "# First 20 prediction results\n", "print('\\nFirst 20 prediction results: \\n', y_pred[:20])\n", "# Number of Down class with threshold changed to 0.9\n", "print('\\nNumber of Down class with threshold = 0.9: ', sum(posterior['Down'] > 0.9))" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "### Lab 4.6.4 Quadratic Discriminant Analysis" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prior probabilities of groups:\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DownUp
0.4919840.508016
\n", "
" ], "text/plain": [ " Down Up\n", " 0.491984 0.508016" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Group means:\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Lag1Lag2
Down0.0427900.033894
Up-0.039546-0.031325
\n", "
" ], "text/plain": [ " Lag1 Lag2\n", "Down 0.042790 0.033894\n", "Up -0.039546 -0.031325" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Coefficients of quadratic discriminants:\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
QD1QD2
Lag11.5629451.479273
Lag21.5345511.472723
\n", "
" ], "text/plain": [ " QD1 QD2\n", "Lag1 1.562945 1.479273\n", "Lag2 1.534551 1.472723" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Specify features and response\n", "features = ['Lag1', 'Lag2']\n", "response = 'Direction'\n", "\n", "# Fit on training subset\n", "X = smarket_train[features]\n", "y = smarket_train[response]\n", "qda = QuadraticDiscriminantAnalysis()\n", "qda.fit(X, y)\n", "\n", "# Priors, group means, and coefficients of linear discriminants\n", "priors = pd.DataFrame(qda.priors_, index=qda.classes_, columns=['']).T\n", "print(\"Prior probabilities of groups:\")\n", "display(priors)\n", "gmeans = pd.DataFrame(qda.means_, index=qda.classes_, columns=features)\n", "print(\"\\nGroup means:\")\n", "display(gmeans)\n", "coef = pd.DataFrame(qda.scalings_, columns=['QD1', 'QD2'], index=features)\n", "print(\"\\nCoefficients of quadratic discriminants:\")\n", "display(coef)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
DownUp
TrueDown3081
Up20121
\n", "
" ], "text/plain": [ " Predict \n", " Down Up\n", "True Down 30 81\n", " Up 20 121" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Score: 0.599206349206\n" ] } ], "source": [ "# Prediction on test data subset\n", "X = smarket_test[features]\n", "y = smarket_test[response]\n", "y_pred = qda.predict(X)\n", "\n", "# Evaluate accuracy\n", "cfmat_cnames = pd.MultiIndex.from_product([['Predict'], qda.classes_])\n", "cfmat_index = pd.MultiIndex.from_product([['True'], qda.classes_])\n", "cfmat = confusion_matrix(y, y_pred, labels=qda.classes_)\n", "print(\"\\nConfusion Matrix: \")\n", "display(pd.DataFrame(cfmat, columns=cfmat_cnames, index=cfmat_index))\n", "print('\\nScore: ', qda.score(X, y))" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "### Lab 4.6.5 K-Nearest Neighbors" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
DownUp
TrueDown4368
Up5883
\n", "
" ], "text/plain": [ " Predict \n", " Down Up\n", "True Down 43 68\n", " Up 58 83" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Score: 0.5\n" ] } ], "source": [ "# Fit on training subset with K=1\n", "features = ['Lag1', 'Lag2']\n", "response = 'Direction'\n", "K = 1\n", "X = smarket_train[features]\n", "y = smarket_train[response]\n", "knn = KNeighborsClassifier(n_neighbors=K)\n", "knn.fit(X, y)\n", "\n", "# Prediction on test data subset\n", "X = smarket_test[features]\n", "y = smarket_test[response]\n", "y_pred = knn.predict(X)\n", "\n", "# Evaluate accuracy\n", "cfmat_cnames = pd.MultiIndex.from_product([['Predict'], knn.classes_])\n", "cfmat_index = pd.MultiIndex.from_product([['True'], knn.classes_])\n", "cfmat = confusion_matrix(y, y_pred, labels=knn.classes_)\n", "print(\"\\nConfusion Matrix: \")\n", "display(pd.DataFrame(cfmat, columns=cfmat_cnames, index=cfmat_index))\n", "print('\\nScore: ', knn.score(X, y))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
DownUp
TrueDown4863
Up5586
\n", "
" ], "text/plain": [ " Predict \n", " Down Up\n", "True Down 48 63\n", " Up 55 86" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Score: 0.531746031746\n" ] } ], "source": [ "# Fit on training subset with K=3\n", "features = ['Lag1', 'Lag2']\n", "response = 'Direction'\n", "K = 3\n", "X = smarket_train[features]\n", "y = smarket_train[response]\n", "knn = KNeighborsClassifier(n_neighbors=K)\n", "knn.fit(X, y)\n", "\n", "# Prediction on test data subset\n", "X = smarket_test[features]\n", "y = smarket_test[response]\n", "y_pred = knn.predict(X)\n", "\n", "# Evaluate accuracy\n", "cfmat_cnames = pd.MultiIndex.from_product([['Predict'], knn.classes_])\n", "cfmat_index = pd.MultiIndex.from_product([['True'], knn.classes_])\n", "cfmat = confusion_matrix(y, y_pred, labels=knn.classes_)\n", "print(\"\\nConfusion Matrix: \")\n", "display(pd.DataFrame(cfmat, columns=cfmat_cnames, index=cfmat_index))\n", "print('\\nScore: ', knn.score(X, y))" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "### Lab 4.6.6 An Application to Caravan Insurance Data" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "# Import Caravan data from R package ISLR\n", "islr = importr('ISLR')\n", "caravan_rdf = rdata(islr).fetch('Caravan')['Caravan']\n", "caravan = pandas2ri.ri2py(caravan_rdf)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MOSTYPEMAANTHUIMGEMOMVMGEMLEEFMOSHOOFDMGODRKMGODPRMGODOVMGODGEMRELGE...APERSONGAGEZONGAWAOREGABRANDAZEILPLAPLEZIERAFIETSAINBOEDABYSTANDPurchase
133.01.03.02.08.00.05.01.03.07.0...0.00.00.01.00.00.00.00.00.0No
237.01.02.02.08.01.04.01.04.06.0...0.00.00.01.00.00.00.00.00.0No
337.01.02.02.08.00.04.02.04.03.0...0.00.00.01.00.00.00.00.00.0No
49.01.03.03.03.02.03.02.04.05.0...0.00.00.01.00.00.00.00.00.0No
540.01.04.02.010.01.04.01.04.07.0...0.00.00.01.00.00.00.00.00.0No
623.01.02.01.05.00.05.00.05.00.0...0.00.00.00.00.00.00.00.00.0No
739.02.03.02.09.02.02.00.05.07.0...0.00.00.00.00.00.00.00.00.0No
833.01.02.03.08.00.07.00.02.07.0...0.00.00.00.00.00.00.00.00.0No
933.01.02.04.08.00.01.03.06.06.0...0.00.00.00.00.00.00.00.00.0No
1011.02.03.03.03.03.05.00.02.07.0...0.00.00.01.00.00.00.00.00.0No
\n", "

10 rows × 86 columns

\n", "
" ], "text/plain": [ " MOSTYPE MAANTHUI MGEMOMV MGEMLEEF MOSHOOFD MGODRK MGODPR MGODOV \\\n", "1 33.0 1.0 3.0 2.0 8.0 0.0 5.0 1.0 \n", "2 37.0 1.0 2.0 2.0 8.0 1.0 4.0 1.0 \n", "3 37.0 1.0 2.0 2.0 8.0 0.0 4.0 2.0 \n", "4 9.0 1.0 3.0 3.0 3.0 2.0 3.0 2.0 \n", "5 40.0 1.0 4.0 2.0 10.0 1.0 4.0 1.0 \n", "6 23.0 1.0 2.0 1.0 5.0 0.0 5.0 0.0 \n", "7 39.0 2.0 3.0 2.0 9.0 2.0 2.0 0.0 \n", "8 33.0 1.0 2.0 3.0 8.0 0.0 7.0 0.0 \n", "9 33.0 1.0 2.0 4.0 8.0 0.0 1.0 3.0 \n", "10 11.0 2.0 3.0 3.0 3.0 3.0 5.0 0.0 \n", "\n", " MGODGE MRELGE ... APERSONG AGEZONG AWAOREG ABRAND AZEILPL \\\n", "1 3.0 7.0 ... 0.0 0.0 0.0 1.0 0.0 \n", "2 4.0 6.0 ... 0.0 0.0 0.0 1.0 0.0 \n", "3 4.0 3.0 ... 0.0 0.0 0.0 1.0 0.0 \n", "4 4.0 5.0 ... 0.0 0.0 0.0 1.0 0.0 \n", "5 4.0 7.0 ... 0.0 0.0 0.0 1.0 0.0 \n", "6 5.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 \n", "7 5.0 7.0 ... 0.0 0.0 0.0 0.0 0.0 \n", "8 2.0 7.0 ... 0.0 0.0 0.0 0.0 0.0 \n", "9 6.0 6.0 ... 0.0 0.0 0.0 0.0 0.0 \n", "10 2.0 7.0 ... 0.0 0.0 0.0 1.0 0.0 \n", "\n", " APLEZIER AFIETS AINBOED ABYSTAND Purchase \n", "1 0.0 0.0 0.0 0.0 No \n", "2 0.0 0.0 0.0 0.0 No \n", "3 0.0 0.0 0.0 0.0 No \n", "4 0.0 0.0 0.0 0.0 No \n", "5 0.0 0.0 0.0 0.0 No \n", "6 0.0 0.0 0.0 0.0 No \n", "7 0.0 0.0 0.0 0.0 No \n", "8 0.0 0.0 0.0 0.0 No \n", "9 0.0 0.0 0.0 0.0 No \n", "10 0.0 0.0 0.0 0.0 No \n", "\n", "[10 rows x 86 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "No 5474\n", "Yes 348\n", "Name: Purchase, dtype: int64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(caravan.head(10))\n", "display(caravan['Purchase'].value_counts())" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training features shape: (4822, 85)\n", "Test features shape: (1000, 85)\n" ] } ], "source": [ "# Scaling, train-test split, and building design matrcies\n", "features = caravan.columns.drop('Purchase')\n", "response = 'Purchase'\n", "X_scaled = scale(caravan[features])\n", "X_train = X_scaled[1000:]\n", "y_train = caravan[response][1000:]\n", "X_test = X_scaled[:1000]\n", "y_test = caravan[response][:1000]\n", "print(\"Training features shape: \", X_train.shape)\n", "print(\"Test features shape: \", X_test.shape)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "======================\n", "K = 1:\n", "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
NoYes
TrueNo87368
Yes509
\n", "
" ], "text/plain": [ " Predict \n", " No Yes\n", "True No 873 68\n", " Yes 50 9" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Score: 0.882\n", "\n", "Classification Report:\n", " precision recall f1-score support\n", "\n", " No 0.946 0.928 0.937 941\n", " Yes 0.117 0.153 0.132 59\n", "\n", "avg / total 0.897 0.882 0.889 1000\n", "\n", "\n", "======================\n", "K = 3:\n", "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
NoYes
TrueNo92120
Yes545
\n", "
" ], "text/plain": [ " Predict \n", " No Yes\n", "True No 921 20\n", " Yes 54 5" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Score: 0.926\n", "\n", "Classification Report:\n", " precision recall f1-score support\n", "\n", " No 0.945 0.979 0.961 941\n", " Yes 0.200 0.085 0.119 59\n", "\n", "avg / total 0.901 0.926 0.912 1000\n", "\n", "\n", "======================\n", "K = 5:\n", "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
NoYes
TrueNo93011
Yes554
\n", "
" ], "text/plain": [ " Predict \n", " No Yes\n", "True No 930 11\n", " Yes 55 4" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Score: 0.934\n", "\n", "Classification Report:\n", " precision recall f1-score support\n", "\n", " No 0.944 0.988 0.966 941\n", " Yes 0.267 0.068 0.108 59\n", "\n", "avg / total 0.904 0.934 0.915 1000\n", "\n" ] } ], "source": [ "# KNN fit on training set with K=1,3,5 and predict on test set\n", "for K in (1, 3, 5):\n", " print(\"\\n======================\\nK = {}:\".format(K))\n", " knn = KNeighborsClassifier(n_neighbors=K)\n", " knn.fit(X_train, y_train)\n", " y_pred = knn.predict(X_test)\n", "\n", " # Evaluate accuracy\n", " cfmat_cnames = pd.MultiIndex.from_product([['Predict'], knn.classes_])\n", " cfmat_index = pd.MultiIndex.from_product([['True'], knn.classes_])\n", " cfmat = confusion_matrix(y_test, y_pred, labels=knn.classes_)\n", " print(\"\\nConfusion Matrix: \")\n", " display(pd.DataFrame(cfmat, columns=cfmat_cnames, index=cfmat_index))\n", " print('\\nScore: ', knn.score(X_test, y_test))\n", " print('\\nClassification Report:')\n", " print(classification_report(y_test, y_pred, digits=3))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
NoYes
TrueNo9347
Yes590
\n", "
" ], "text/plain": [ " Predict \n", " No Yes\n", "True No 934 7\n", " Yes 59 0" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Classification Report:\n", " precision recall f1-score support\n", "\n", " No 0.941 0.993 0.966 941\n", " Yes 0.000 0.000 0.000 59\n", "\n", "avg / total 0.885 0.934 0.909 1000\n", "\n" ] } ], "source": [ "# Logistic regression on training set and predict on test set\n", "logreg = LogisticRegression(C=1e9) # Large C to disable regularization\n", "logreg.fit(X_train, y_train)\n", "y_pred = logreg.predict(X_test)\n", "\n", "# Evaluate accuracy\n", "cfmat_cnames = pd.MultiIndex.from_product([['Predict'], logreg.classes_])\n", "cfmat_index = pd.MultiIndex.from_product([['True'], logreg.classes_])\n", "cfmat = confusion_matrix(y_test, y_pred, labels=logreg.classes_)\n", "print(\"\\nConfusion Matrix: \")\n", "display(pd.DataFrame(cfmat, columns=cfmat_cnames, index=cfmat_index))\n", "print('\\nClassification Report:')\n", "print(classification_report(y_test, y_pred, digits=3))" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Confusion Matrix: \n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Predict
NoYes
TrueNo91922
Yes4811
\n", "
" ], "text/plain": [ " Predict \n", " No Yes\n", "True No 919 22\n", " Yes 48 11" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Classification Report:\n", " precision recall f1-score support\n", "\n", " No 0.950 0.977 0.963 941\n", " Yes 0.333 0.186 0.239 59\n", "\n", "avg / total 0.914 0.930 0.921 1000\n", "\n" ] } ], "source": [ "# Generate predicts with decision threshold = 0.25\n", "posterior = logreg.predict_proba(X_test)\n", "Yes_idx = np.where(logreg.classes_ == 'Yes')[0][0]\n", "y_pred = pd.Series(posterior[:, Yes_idx] > 0.25)\n", "y_pred.replace([True, False], ['Yes', 'No'], inplace=True)\n", "\n", "# Evaluate accuracy\n", "cfmat = confusion_matrix(y_test, y_pred, labels=logreg.classes_)\n", "print(\"\\nConfusion Matrix: \")\n", "display(pd.DataFrame(cfmat, columns=cfmat_cnames, index=cfmat_index))\n", "print('\\nClassification Report:')\n", "print(classification_report(y_test, y_pred, digits=3))\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" }, "name": "Chapter 4 Classification.ipynb" }, "nbformat": 4, "nbformat_minor": 2 }