{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#import\n", "from sklearn import datasets" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "iris = datasets.load_iris()\n", "digits = datasets.load_digits()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'target_names': array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), 'DESCR': \"Optical Recognition of Handwritten Digits Data Set\\n===================================================\\n\\nNotes\\n-----\\nData Set Characteristics:\\n :Number of Instances: 5620\\n :Number of Attributes: 64\\n :Attribute Information: 8x8 image of integer pixels in the range 0..16.\\n :Missing Attribute Values: None\\n :Creator: E. Alpaydin (alpaydin '@' boun.edu.tr)\\n :Date: July; 1998\\n\\nThis is a copy of the test set of the UCI ML hand-written digits datasets\\nhttp://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits\\n\\nThe data set contains images of hand-written digits: 10 classes where\\neach class refers to a digit.\\n\\nPreprocessing programs made available by NIST were used to extract\\nnormalized bitmaps of handwritten digits from a preprinted form. From a\\ntotal of 43 people, 30 contributed to the training set and different 13\\nto the test set. 32x32 bitmaps are divided into nonoverlapping blocks of\\n4x4 and the number of on pixels are counted in each block. This generates\\nan input matrix of 8x8 where each element is an integer in the range\\n0..16. This reduces dimensionality and gives invariance to small\\ndistortions.\\n\\nFor info on NIST preprocessing routines, see M. D. Garris, J. L. Blue, G.\\nT. Candela, D. L. Dimmick, J. Geist, P. J. Grother, S. A. Janet, and C.\\nL. Wilson, NIST Form-Based Handprint Recognition System, NISTIR 5469,\\n1994.\\n\\nReferences\\n----------\\n - C. Kaynak (1995) Methods of Combining Multiple Classifiers and Their\\n Applications to Handwritten Digit Recognition, MSc Thesis, Institute of\\n Graduate Studies in Science and Engineering, Bogazici University.\\n - E. Alpaydin, C. Kaynak (1998) Cascading Classifiers, Kybernetika.\\n - Ken Tang and Ponnuthurai N. Suganthan and Xi Yao and A. Kai Qin.\\n Linear dimensionalityreduction using relevance weighted LDA. School of\\n Electrical and Electronic Engineering Nanyang Technological University.\\n 2005.\\n - Claudio Gentile. A New Approximate Maximal Margin Classification\\n Algorithm. NIPS. 2000.\\n\", 'target': array([0, 1, 2, ..., 8, 9, 8]), 'data': array([[ 0., 0., 5., ..., 0., 0., 0.],\n", " [ 0., 0., 0., ..., 10., 0., 0.],\n", " [ 0., 0., 0., ..., 16., 9., 0.],\n", " ..., \n", " [ 0., 0., 1., ..., 6., 0., 0.],\n", " [ 0., 0., 2., ..., 12., 0., 0.],\n", " [ 0., 0., 10., ..., 12., 1., 0.]]), 'images': array([[[ 0., 0., 5., ..., 1., 0., 0.],\n", " [ 0., 0., 13., ..., 15., 5., 0.],\n", " [ 0., 3., 15., ..., 11., 8., 0.],\n", " ..., \n", " [ 0., 4., 11., ..., 12., 7., 0.],\n", " [ 0., 2., 14., ..., 12., 0., 0.],\n", " [ 0., 0., 6., ..., 0., 0., 0.]],\n", "\n", " [[ 0., 0., 0., ..., 5., 0., 0.],\n", " [ 0., 0., 0., ..., 9., 0., 0.],\n", " [ 0., 0., 3., ..., 6., 0., 0.],\n", " ..., \n", " [ 0., 0., 1., ..., 6., 0., 0.],\n", " [ 0., 0., 1., ..., 6., 0., 0.],\n", " [ 0., 0., 0., ..., 10., 0., 0.]],\n", "\n", " [[ 0., 0., 0., ..., 12., 0., 0.],\n", " [ 0., 0., 3., ..., 14., 0., 0.],\n", " [ 0., 0., 8., ..., 16., 0., 0.],\n", " ..., \n", " [ 0., 9., 16., ..., 0., 0., 0.],\n", " [ 0., 3., 13., ..., 11., 5., 0.],\n", " [ 0., 0., 0., ..., 16., 9., 0.]],\n", "\n", " ..., \n", " [[ 0., 0., 1., ..., 1., 0., 0.],\n", " [ 0., 0., 13., ..., 2., 1., 0.],\n", " [ 0., 0., 16., ..., 16., 5., 0.],\n", " ..., \n", " [ 0., 0., 16., ..., 15., 0., 0.],\n", " [ 0., 0., 15., ..., 16., 0., 0.],\n", " [ 0., 0., 2., ..., 6., 0., 0.]],\n", "\n", " [[ 0., 0., 2., ..., 0., 0., 0.],\n", " [ 0., 0., 14., ..., 15., 1., 0.],\n", " [ 0., 4., 16., ..., 16., 7., 0.],\n", " ..., \n", " [ 0., 0., 0., ..., 16., 2., 0.],\n", " [ 0., 0., 4., ..., 16., 2., 0.],\n", " [ 0., 0., 5., ..., 12., 0., 0.]],\n", "\n", " [[ 0., 0., 10., ..., 1., 0., 0.],\n", " [ 0., 2., 16., ..., 1., 0., 0.],\n", " [ 0., 0., 15., ..., 15., 0., 0.],\n", " ..., \n", " [ 0., 4., 16., ..., 16., 6., 0.],\n", " [ 0., 8., 16., ..., 16., 8., 0.],\n", " [ 0., 1., 8., ..., 12., 1., 0.]]])}\n" ] } ], "source": [ "print(digits)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n" ] } ], "source": [ "print(type(iris)); \n", "print(type(digits))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 5.1 3.5 1.4 0.2]\n", " [ 4.9 3. 1.4 0.2]\n", " [ 4.7 3.2 1.3 0.2]\n", " [ 4.6 3.1 1.5 0.2]\n", " [ 5. 3.6 1.4 0.2]\n", " [ 5.4 3.9 1.7 0.4]]\n" ] } ], "source": [ "print(iris.data[:6]) # training data is stored in data member of iris dataset" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2\n", " 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n", " 2 2]\n" ] } ], "source": [ "print(iris.target)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(150, 4)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iris.data.shape" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(150,)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iris.target.shape" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n" ] } ], "source": [ "# print feature\n", "print(iris.feature_names)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['setosa' 'versicolor' 'virginica']\n" ] } ], "source": [ "print(iris.target_names)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " \n" ] } ], "source": [ "print(type(iris.data), type(iris.target))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(150, 4)\n", "(150,)\n" ] } ], "source": [ "# loading featured and responsed into X and y\n", "X = iris.data\n", "y = iris.target\n", "\n", "print(X.shape);\n", "print(y.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Choosing KNN Classifier algorithm to predict the IRIS data" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.neighbors import KNeighborsClassifier" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", " metric_params=None, n_jobs=1, n_neighbors=2, p=2,\n", " weights='uniform')" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# instantiate the KN\n", "knn = KNeighborsClassifier(n_neighbors=2)\n", "\n", "# training the model\n", "knn.fit(X, y)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:386: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and willraise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.\n", " DeprecationWarning)\n" ] }, { "data": { "text/plain": [ "array([1])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#predict the value [5,4,3,2]\n", "knn.predict([5,4,3,2])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([1, 1])" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn.predict([[5,4,3,2], [1,2,3,5]])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using another model - Logistic Regression" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "lrm = LogisticRegression()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", " verbose=0, warm_start=False)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lrm.fit(X, y)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([0, 2])" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lrm.predict([[5,4,3,2], [1,2,3,5]])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Testing the model accuracy when model is trained with all data" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn import metrics" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.95999999999999996" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# test LogisticRegression\n", "\n", "# training my model\n", "lrm = LogisticRegression()\n", "lrm.fit(X, y)\n", "y_pred = lrm.predict(X)\n", "\n", "# testing accuracy\n", "metrics.accuracy_score(y, y_pred)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1.0" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# test KNN when K=1\n", "\n", "knn = KNeighborsClassifier(n_neighbors=1)\n", "knn.fit(X, y)\n", "y_pred = knn.predict(X)\n", "\n", "# testing accracy\n", "metrics.accuracy_score(y, y_pred)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.96666666666666667" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# test KNN when K = 5\n", "\n", "knn = KNeighborsClassifier(n_neighbors=5)\n", "knn.fit(X, y)\n", "y_pred = knn.predict(X)\n", "\n", "# testing accracy\n", "metrics.accuracy_score(y, y_pred)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Testing the model accuracy when model is trained with train/test way" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# splitting the data \n", "from sklearn.cross_validation import train_test_split\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=4)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.94999999999999996" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# test LogisticRegression\n", "\n", "# training my model\n", "lrm = LogisticRegression()\n", "lrm.fit(X_train, y_train)\n", "y_pred = lrm.predict(X_test)\n", "\n", "# testing accuracy\n", "metrics.accuracy_score(y_test, y_pred)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.94999999999999996" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# test KNN when K=1\n", "\n", "knn = KNeighborsClassifier(n_neighbors=1)\n", "knn.fit(X_train, y_train)\n", "y_pred = knn.predict(X_test)\n", "\n", "# testing accracy\n", "metrics.accuracy_score(y_test, y_pred)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.96666666666666667" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# test KNN when K=5\n", "\n", "knn = KNeighborsClassifier(n_neighbors=5)\n", "knn.fit(X_train, y_train)\n", "y_pred = knn.predict(X_test)\n", "\n", "# testing accracy\n", "metrics.accuracy_score(y_test, y_pred)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "By training and testing our data, we can say it is better in KNN when K = 5" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Let's have a loop which will check for all possible value of K\n", "accuracy = []\n", "K = range(1,26)\n", "\n", "for k in K:\n", " knn = KNeighborsClassifier(n_neighbors=k)\n", " knn.fit(X_train, y_train)\n", " y_pred = knn.predict(X_test)\n", "\n", " # testing accracy\n", " ac = metrics.accuracy_score(y_test, y_pred)\n", " accuracy.append(ac)\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEACAYAAACznAEdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3WuwXHWZ7/HvLzcgAbNjzEVy2QEZI2G4yIyROY5V24IZ\nUqdKsVJzFKw64qUwM2PUmldE3mRr+YJYNdRgoTMwMlTOKRxKrXjAcgqj4lZnxhwyJyFyCYEyYece\nCCRACGCS/ZwXazXpNHunb6vXWt39+1Ttyup1/Xez6Kf//+f/dCsiMDMzm1R0A8zMrBwcEMzMDHBA\nMDOzlAOCmZkBDghmZpZyQDAzM6DBgCBphaSnJT0j6dZxtg9I2iBpm6RNkpZVbfs7SU9I+p2k+yVN\nS9evlbRX0pb0b0V2T8vMzJpVNyBImgTcBVwPXAbcJOl9NbvdBmyNiCuBm4FvpcdeCHwJuDoirgCm\nADdWHXdHRFyd/j3c9rMxM7OWNdJDWA48GxGjEXECeAC4oWafZcAjABGxA1giaU66bTIwQ9IUYDqw\nv+o4tdN4MzPLTiMBYQGwp+rx3nRdtW3ASgBJy4HFwMKI2A/8PbAb2AccjYifVx23WtJjkr4raWaL\nz8HMzDKQVVL5dmCWpC3AF4GtwClJAyS9iUHgQuB8SZ9Kj/kOcHFEXAUcBO7IqC1mZtaCKQ3ss4/k\nE3/FwnTdWyLiVeBzlceSdgI7gRXAzoh4KV2/AfhvwPci4oWqU/wz8OPxLi7JX7ZkZtaCiGhqWL6R\nHsJm4BJJg+kMoRuBh6p3kDRT0tR0+Rbg1xFxjGSo6BpJ50oScC2wPd1vftUpVgJPTNSAiPBfBGvX\nri28DWX582vh18Kvxdn/WlG3hxARpyStBjaSBJB7I2K7pFXJ5rgHuBRYL2kMeBL4fHrso5J+SDKE\ndCL995701N+UdBUwBjwHrGrpGZiZWSYaGTIikimhS2vW3V21vKl2e9W2rwFfG2f9p5tqqZmZdZQr\nlbvI0NBQ0U0oDb8Wp/m1OM2vRXvU6lhTXiRF2dtoZlY2kogOJJXNzKwPOCCYmRnggGBmZqmGZhlZ\ndzp1Cn79azh5suiWWC845xz48IdBOXwD2c6dcPHFnb+OnckBoYf99rfwV38FV19ddEusF/z2t7B1\nK/zRH3X2Ojt3wp/+Kbz0UmevY2/ngNDDdu6EFSvg/vuLbon1guuuS+6pTgeE3/8ejhyBo0dhYKCz\n17IzOYfQw557DpYsKboV1iuWLEnuqU6rXCOPa9mZHBB62HPPwUUXFd0K6xUXXeSA0OscEHqYewiW\npTx7CLNmOSAUwQGhh+3a5YBg2VmyJLmnOm3XLhgayudadiYHhB518iTs3w+LFhXdEusVeQ4ZfeQj\n7iEUwQGhR+3bB3PnJnPHzbIwf34y8+f11zt3jTfegBdfhD/7MweEIjgg9CjnDyxrkybB4sUwOtq5\na+zenfRq3/Oe5B7291rmywGhRzl/YJ3Q6TxC5b6t1B8cOdK5a9nbOSD0KPcQrBM6nUeoTJWW8stZ\n2GkOCD3KNQjWCZ2eelr9QSavaa52mgNCj3IPwTrBAaG3OSD0KOcQrBPyyiHkcS17OweEHnTiBBw4\n4BoEy15eOYQ8rmVv54DQg/buhXe/G6ZOLbol1mvmzYNXX4XXXsv+3K+/ntQ5zJ+fPPaQUf4cEHqQ\n8wfWKRIMDnbmjXp0NKlzmJS+K1Wu41qE/DQUECStkPS0pGck3TrO9gFJGyRtk7RJ0rKqbX8n6QlJ\nv5N0v6Rp6fpZkjZK2iHpp5JmZve0+pvzB9ZJnfrkXnvfDgzAlClJ5bLlo25AkDQJuAu4HrgMuEnS\n+2p2uw3YGhFXAjcD30qPvRD4EnB1RFxB8oM8N6bHrAF+HhFLgUeAr7b/dAw85dQ6q1Nj++Pdt84j\n5KuRHsJy4NmIGI2IE8ADwA01+ywjeVMnInYASyTNSbdNBmZImgJMB/al628A1qfL64GPt/ws7Awe\nMrJO6lQPYbz71nmEfDUSEBYAe6oe703XVdsGrASQtBxYDCyMiP3A3wO7SQLB0Yj4RXrM3Ig4BBAR\nB4G5rT4JO5MDgnVSp6aDOiAUL6vfVL4duFPSFuBxYCtwStIASU9gEHgZ+KGkT0XE98Y5x4Spo+Hh\n4beWh4aGGBoayqjZvck5BOukvHIIlWvt2JH9tXrRyMgIIyMjbZ1DUSeFL+kaYDgiVqSP1wAREevO\ncsxO4ApgBXB9RNySrv+fwAcjYrWk7cBQRBySNB/4ZURcOs65ol4b7bQ//AEuuCCZFjglq3BvVuX5\n52HZMjh8ONvzzp0Lv/vd6WmnAD/+MfzTP8FPfpLttfqBJCJCzRzTyJDRZuASSYPpDKEbgYdqLjxT\n0tR0+Rbg1xFxjGSo6BpJ50oScC2wPT3sIeAz6fLNwIPNNNzGt2cPXHihg4F1zpw5cPx4Uo+Qldde\nS843b96Z6z1klK+6ASEiTgGrgY3Ak8ADEbFd0ipJX0h3uxR4Iv3Ufz3wlfTYR4EfkgwhbQME3JMe\nsw74C0k7SALF7Zk9qz7m/IF1mpT9G/XoaFJ3oJrPs65FyFdDnyMj4mFgac26u6uWN9Vur9r2NeBr\n46x/CbiumcZafc4fWB4q00Evvzyb8010377jHXDuufDCC8mQknWWK5V7jGsQLA9Z9xDOdt+6FiE/\nDgg9xkNGlodOBISJ7lvnEfLjgNBjHBAsD1nXIjgglIMDQo9xDsHykPUwztnuW/8uQn4cEHrIm28m\nc8MX1NaRm2XMOYTe5IDQQ3bvhoULYfLkoltivW727KQI8uWX2z/Xq68mdQ1z5oy/3UNG+XFA6CHO\nH1hesqxFGB1NzlVbg1AxOJjs41qEznNA6CHOH1ieshrKqXffnn8+zJgBhw61fy07OweEHuIaBMtT\nVj2ERu5b5xHy4YDQQzxkZHnKavZPI/et8wj5cEDoIQ4IlqcsewgOCOXggNBDnEOwPOWVQwDXIuTF\nAaFHvP46HDmSfPW1WR6cQ+g9Dgg9YvduWLQIJvm/qOVk1iwYG0s+iLTq5ZeTeobZs8++n4eM8uG3\njx7h/IHlLYtahHo1CBWDg8mHnrGx1q9l9Tkg9AjnD6wI7Q7l7NrV2FTp6dNh5kw4eLD1a1l9Dgg9\nwjUIVoR2ewjN9Gw9bNR5Dgg9wkNGVoR2Z/84IJSLA0KP8JCRFaHdIaNmA4KnnnaWA0KPcA/BitDu\np/ZGcwjgqad5cEDoAcePwyuvwPz5RbfE+k0lILT6TaQeMioXB4QeMDoKixe7BsHyNzCQ3HcvvdT8\nsUePJtNIZ81qbH8HhM7zW0gPcP7AitTqUE6ld1CvBqFicBD27IFTp5q/ljWmoYAgaYWkpyU9I+nW\ncbYPSNogaZukTZKWpevfK2mrpC3pvy9L+nK6ba2kvem2LZJWZPvU+ofzB1akVj+5N5M/ADj3XHjn\nO+HAgeavZY2ZUm8HSZOAu4Brgf3AZkkPRsTTVbvdBmyNiJWSlgLfBq6LiGeA91edZy+woeq4OyLi\njmyeSv9yDYIVqdWA0MoHmcq1Fi5s/npWXyM9hOXAsxExGhEngAeAG2r2WQY8AhARO4Alkmp/IfU6\n4PcRsbdqXYOdRTsb9xCsSK1OB20nIFhnNBIQFgB7qh7vTddV2wasBJC0HFgM1MbwTwL/WrNutaTH\nJH1X0syGW21ncA7BitRuDqEZrkXorLpDRg26HbhT0hbgcWAr8FbqR9JU4GPAmqpjvgN8PSJC0jeA\nO4DPj3fy4eHht5aHhoYYGhrKqNm9wT0EK1JeOQRI9t+0qflr9YORkRFGRkbaOoeizgRiSdcAwxGx\nIn28BoiIWHeWY3YBl0fEsfTxx4C/rZxjnP0HgR9HxBXjbIt6bexnx47B3Lnw2muNz9Ywy9LLLye/\nw3HsWOP3YETyZXW7dydTVxv1s5/B7bfDL37RWlv7iSQioql3hUaGjDYDl0galDQNuBF4qObCM9Ne\nAJJuAX5VCQapm6gZLpJUXUa1EniimYZbYnQ0mY7nYGBFmTkTzjkHDh9u/JgjR5L6hWaCATiH0Gl1\nh4wi4pSk1cBGkgByb0Rsl7Qq2Rz3AJcC6yWNAU9SNfQjaTpJQvkLNaf+pqSrgDHgOWBVBs+n7zh/\nYGVQySPMqZ1KMoFWhzkXL4a9e5NahMmTmz/ezq6hHEJEPAwsrVl3d9XyptrtVduOA2+7TSLi0021\n1Mbl/IGVQeWT+wc+0Nj+reQPIOmJzJkD+/YlwcGy5UrlLucaBCuDZmf/tPNBxsNGneOA0OXcQ7Ay\naPZN2gGhnBwQupxzCFYGzdYitHPfuhahcxwQupx7CFYGrfQQWh3q9O8idI4DQhd75RV4443GZ3aY\ndUozv4sQkew7ONjetSx7DghdbHS0ua8PNuuU88+HGTPg+efr7/viizBtWlK/0AoHhM5xQOhizh9Y\nmTT6Rt3ufbtoEezfDydPtn4OG58DQhdz/sDKpNGA0O5U6WnTYN68pEDNsuWA0MVcg2Bl0ujsnyw+\nyHjYqDMcELqYewhWJo3O/nFAKC8HhC7mHIKVSV45hMq1XIuQPQeELuYegpVJXjkEcC1CpzggdKmj\nR5NZFrNnF90Ss8TgYDIVemxs4n3arUGo8JBRZzggdCnXIFjZzJgB73gHHDo08T4vvADTp8MFF7R3\nLQeEznBA6FLOH1gZ1Xujzuq+XbgQDh6EEyfaP5ed5oDQpZw/sDKql+zNaqr01Knw7nfDnj3tn8tO\nc0DoUq5BsDKq10PI8oOMh42y54DQpdxDsDKqN/vHAaHcHBC6lHMIVkZ55RAq13ItQrYcELpQZeqe\nA4KVTV45BHAtQic4IHSho0eTf2fNKrYdZrUGB5NE73i1CBHJdOl2axAqPGSUPQeELlTpHbgGwcrm\nvPOSDyoHDrx926FDSf3BjBnZXMsBIXsOCF3I+QMrs4neqLO+bxcsSH6Q5803sztnv2soIEhaIelp\nSc9IunWc7QOSNkjaJmmTpGXp+vdK2ippS/rvy5K+nG6bJWmjpB2Sfiqpxd9P6j+ecmplNlEeIev7\ndsqUJCi4FiE7dQOCpEnAXcD1wGXATZLeV7PbbcDWiLgSuBn4FkBEPBMR74+Iq4E/AV4DNqTHrAF+\nHhFLgUeAr2bwfPqCE8pWZhMleztx33rYKFuN9BCWA89GxGhEnAAeAG6o2WcZyZs6EbEDWCKp9qff\nrwN+HxGV3zm6AVifLq8HPt5C+/uSA4KV2URv0g4I5ddIQFgAVHfK9qbrqm0DVgJIWg4sBhbW7PNJ\n4F+rHs+NiEMAEXEQmNt4s/ubcwhWZnnlECrXci1CdqZkdJ7bgTslbQEeB7YCpyobJU0FPkYyTDSR\nmGjD8PDwW8tDQ0MMDQ2119ou5hoEK7u8cgiQnO/hh7M9Z7caGRlhZGSkrXMoYsL34WQH6RpgOCJW\npI/XABER685yzC7g8og4lj7+GPC3lXOk67YDQxFxSNJ84JcRcek454p6bewnL74Il1wCR44U3RKz\n8b35ZvI12MePw+TJybqxseRrr48cSaamZuU3v4E1a+A//iO7c/YKSUREU5PTGxky2gxcImlQ0jTg\nRuChmgvPTHsBSLoF+FUlGKRu4szhItJzfCZdvhl4sJmG9yv3DqzszjkH3vUu2L//9LqDB2FgINtg\nAM4hZK1uQIiIU8BqYCPwJPBARGyXtErSF9LdLgWeSD/1Xw98pXK8pOkkCeUNZ56ZdcBfSNoBXEsy\n7GR1OH9g3aD2jbpT9+2FF8Lhw/DGG9mfux81lEOIiIeBpTXr7q5a3lS7vWrbcaB2xhER8RJJoLAm\nuAbBukElj/DhDyePO3XfTp4MixbB7t3w3vdmf/5+40rlLuMhI+sGtbUInbxvPWyUHQeELuOAYN2g\n9k3aAaE7OCB0GecQrBvUTj3t5H3rWoTsOCB0EdcgWLcYr4fQqdyXfxchO1kVplkODh+Gc89N5nib\nldnixcm005Mnk69p37MnWdcJHjLKjgNCF3HvwLrFtGkwdy7s25fMBJo9O/kw0wkOCNlxQOgizh9Y\nN6mM7U+e3Nn79t3vTiqgX389+8K3fuMcQhdxDYJ1k8on907ft5MmJcNRo6Odu0a/cEDoIh4ysm5S\nSfbmcd962CgbDghdxAHBukl1D8EBoTs4IHQR5xCsm1RyCHnct65FyIYDQpeISMZIHRCsW1QPGXU6\n9+VahGx4llGXeP55mDEDzj+/6JaYNWbhQjhwIKlDWLSos9fykFE2HBC6hPMH1m2mTk2mhJ46lfxG\nQic5IGTDAaFLOH9g3WjJkiQgdNq8efDKK/Daa0lP2lrjHEKXcA2CdaOLLsrnvp00CQYHXYvQLvcQ\n2rBhA3ziE/lca2wM7rsvn2uZZeXKK/PpIQD88R/D5ZcnOYuyuusu+Ou/LroVE1PZf8BeUpS1jWvX\nJm/Ua9fmc70pDt9mE4rIL/i04h//EZ56Kvk3D5KIiKbCo99i2rBrF3zkI36jNisDqdz/L158Mfzk\nJ0W34uycQ2iDx/XNrFHdUCvhgNAGTwU1s0ZVkt4lHQEHHBBa9oc/wKFDSfGNmVk9M2bABRck7xtl\n5YDQoj17kqKbMo9Zmlm5lP07lxoKCJJWSHpa0jOSbh1n+4CkDZK2SdokaVnVtpmSfiBpu6QnJX0w\nXb9W0l5JW9K/Fdk9rc5z/sDMmlX2PELdz7eSJgF3AdcC+4HNkh6MiKerdrsN2BoRKyUtBb4NXJdu\nuxP4t4j4H5KmANOrjrsjIu7I4onkzfkDM2tW2b9io5EewnLg2YgYjYgTwAPADTX7LAMeAYiIHcAS\nSXMkvQP4cETcl247GRGvVB1X4hKSs3NAMLNm9cKQ0QJgT9Xjvem6atuAlQCSlgOLgYXARcBhSfel\nw0L3SKr+1dPVkh6T9F1JM1t+FgXwdwuZWbPK3kPIKiV6O3CnpC3A48BW4BQwFbga+GJE/JekfwDW\nAGuB7wBfj4iQ9A3gDuDz4518eHj4reWhoSGGhoYyanbrnEMws2Z1MocwMjLCyMhIW+eo+9UVkq4B\nhiNiRfp4DRARse4sx+wCLgdmAL+NiIvT9X8O3BoRH63ZfxD4cURcMc65SvnVFQsXwn/+Z/Lj3mZm\njXj9dZg1C44fT76Qr5Na+eqKRpq0GbhE0qCkacCNwEM1F54paWq6fAvwq4g4FhGHgD2S3pvuei3w\nVLrf/KpTrASeaKbhRXrzTXjhBbjwwqJbYmbd5LzzYGAg+eGgMqo7ZBQRpyStBjaSBJB7I2K7pFXJ\n5rgHuBRYL2kMeJIzh36+DNyfBoydwGfT9d+UdBUwBjwHrMroOXXc7t2wYIFrEMyseZU8woLaTGwJ\nNPSWFhEPA0tr1t1dtbypdnvVtm3AB8ZZ/+mmWloizh+YWasqeYQPfajolrydK5Vb4CmnZtaqMs80\nckBogQOCmbWqzLUIDggtcA2CmbXKPYQe4xyCmbWqzN9n5IDQAg8ZmVmrFi9Ovi25jD/36YDQpDfe\ngBdfTL762sysWeeeC7Nnw/79Rbfk7RwQmjQ6CosWweTJRbfEzLpVWYeNHBCa5PyBmbWrrIllB4Qm\nOX9gZu1yQOgRDghm1q6y1iI4IDTJNQhm1i7nEHqEcwhm1i4PGfUIDxmZWbsWLYJ9++DkyaJbciYH\nhCYcPw5Hj8L8+fX3NTObyDnnwJw5SVAoEweEJoyOJlWGnf6lIzPrfWXMI/itrQnOH5hZVsqYR3BA\naILzB2aWlTJOPXVAaIKnnJpZVtxD6HLuIZhZVpxD6HLOIZhZVtxD6HLuIZhZVhYuTL4C+8SJolty\nmgNCg44dg1dfhXnzim6JmfWCadOSmqa9e4tuyWkOCA0aHYXBQZCKbomZ9Yqy5REaCgiSVkh6WtIz\nkm4dZ/uApA2StknaJGlZ1baZkn4gabukJyV9MF0/S9JGSTsk/VTSzOyeVvacPzCzrJUtj1A3IEia\nBNwFXA9cBtwk6X01u90GbI2IK4GbgW9VbbsT+LeIuBS4Etierl8D/DwilgKPAF9t54l0mvMHZpa1\nstUiNNJDWA48GxGjEXECeAC4oWafZSRv6kTEDmCJpDmS3gF8OCLuS7edjIhX0mNuANany+uBj7f3\nVDrLNQhmlrVuHDJaAOyperw3XVdtG7ASQNJyYDGwELgIOCzpPklbJN0j6bz0mLkRcQggIg4Cc1t/\nGp3nHoKZZa1sQ0ZTMjrP7cCdkrYAjwNbgVPAVOBq4IsR8V+S/oFkqGgtUJuejYlOPjw8/Nby0NAQ\nQ0NDGTW7cc4hmFnWsgwIIyMjjIyMtHUORUz4PpzsIF0DDEfEivTxGiAiYt1ZjtkFXA7MAH4bERen\n6/8cuDUiPippOzAUEYckzQd+meYZas8V9dqYh3e9C556CuaWuh9jZt3k5EmYMSOZ0j5tWrbnlkRE\nNDUvspEho83AJZIGJU0DbgQeqrnwTElT0+VbgF9FxLF0SGiPpPemu14LPJUuPwR8Jl2+GXiwmYbn\n6dVXk99CmDOn6JaYWS+ZMgUuvBD27Km/bx7qDhlFxClJq4GNJAHk3ojYLmlVsjnuAS4F1ksaA54E\nPl91ii8D96cBYyfw2XT9OuD7kj4HjAKfyOpJZa2SP3ANgpllrTJs9J73FN2SBnMIEfEwsLRm3d1V\ny5tqt1dt2wZ8YJz1LwHXNdPYojh/YGadUqapp65UboBnGJlZp5RpppEDQgNcg2BmnVKmWgQHhAa4\nh2BmneIeQpdxDsHMOsU5hC7jHoKZdcqCBXD4MLz5ZtEtcUCo6+WX4Q9/gNmzi26JmfWiyZOTH8vZ\nvbvoljgg1OUaBDPrtLLkERwQ6nD+wMw6rSx5BAeEOpw/MLNOcw+hS7gGwcw6rSy1CA4IdbiHYGad\n5h5Cl3AOwcw6rSw5hLq/h1C0on8PYWAAdu6Ed76zsCaYWY8bG4Pp0+HIETjvvPr7N6JTv4fQt44c\nSf5DzZpVdEvMrJdNmgSLFhVfi+CAcBauQTCzvJQhj+CAcBbOH5hZXsqQR3BAOAvPMDKzvJRh6qkD\nwlm4BsHM8uIho5JzD8HM8uIho5JzDsHM8lKGHoLrECYQATNnJtPABgZyv7yZ9ZmxMZgxA158MalJ\naJfrEDL00kvJ3GAHAzPLw6RJsHgxjI4W2IbiLl1uHi4ys7wVnUdoKCBIWiHpaUnPSLp1nO0DkjZI\n2iZpk6RlVdueS9dvlfRo1fq1kvZK2pL+rcjmKWXDCWUzy1vReYQp9XaQNAm4C7gW2A9slvRgRDxd\ntdttwNaIWClpKfBt4Lp02xgwFBFHxjn9HRFxR1vPoEMcEMwsb0XXIjTSQ1gOPBsRoxFxAngAuKFm\nn2XAIwARsQNYImlOuk1nuU5pvxTCNQhmlreiewiNBIQFwJ6qx3vTddW2ASsBJC0HFgML020B/EzS\nZkm31By3WtJjkr4raWbTre8g5xDMLG9F5xDqDhk16HbgTklbgMeBrcCpdNuHIuJA2mP4maTtEfHv\nwHeAr0dESPoGcAfw+fFOPjw8/Nby0NAQQ0NDGTV7Yh4yMrO8tdNDGBkZYWRkpK3r161DkHQNMBwR\nK9LHa4CIiHVnOWYXcHlEHKtZvxZ4tTZvIGkQ+HFEXDHOuXKvQ4iACy6AffuSWgQzszxEJLUIzz8P\n55/f3rk6VYewGbhE0qCkacCNwEM1F54paWq6fAvwq4g4Jmm6pPPT9TOAvwSeSB/PrzrFysr6Mjh8\nGKZNczAws3xJMDhYXC1C3SGjiDglaTWwkSSA3BsR2yWtSjbHPcClwHpJY8CTnB76mQf8SFKk17o/\nIjam274p6SqSWUjPAasyfF5tcf7AzIpSySNcdln+124ohxARDwNLa9bdXbW8qXZ7un4XcNUE5/x0\nUy3NkfMHZlaUIqeeulJ5HA4IZlaUIqeeOiCMwzUIZlYUB4SScQ7BzIpSZC2CA8I4PGRkZkVxDqFE\nIpL/GIODRbfEzPrRu94Fb7wBr7yS/7UdEGo8/3zy4xQXXFB0S8ysH0nF5REcEGo4f2BmRXNAKAnn\nD8ysaEXlERwQajggmFnR3EMoCdcgmFnRipp66oBQwzkEMyuaewgl4SEjMyuacwglEJF87axrEMys\nSO98J5w8CUeP5ntdB4QqBw8m9QczZhTdEjPrZ0XVIjggVHH+wMzKwgGhYM4fmFlZFJFHcECo4imn\nZlYW7iEUzD0EMyuLImoRHBCqOIdgZmXhIaOCuYdgZmVRGTKKyO+aDgipsTHYvds1CGZWDgMDyb95\n1iI4IKQOHEj+A5x3XtEtMTM7XYuQZx6hoYAgaYWkpyU9I+nWcbYPSNogaZukTZKWVW17Ll2/VdKj\nVetnSdooaYekn0qamc1Tao3zB2ZWNnnnEeoGBEmTgLuA64HLgJskva9mt9uArRFxJXAz8K2qbWPA\nUES8PyKWV61fA/w8IpYCjwBfbf1ptK8b8gcjIyNFN6E0/Fqc5tfitF57LfKeetpID2E58GxEjEbE\nCeAB4IaafZaRvKkTETuAJZLmpNs0wXVuANany+uBjzfZ9kx1Qw1Cr93s7fBrcZpfi9N67bUo45DR\nAmBP1eO96bpq24CVAJKWA4uBhem2AH4mabOkW6qOmRsRhwAi4iAwt/nmZ6cbeghm1l/y7iFMyeg8\ntwN3StoCPA5sBU6l2z4UEQfSHsPPJG2PiH8f5xyZTK7auxf+5m+aP+7RR+ETn8iiBWZm2bj4YvjN\nb+CjH83neoo6k1wlXQMMR8SK9PEaICJi3VmO2QVcHhHHatavBV6NiDskbSfJLRySNB/4ZURcOs65\ncpyFa2bWOyJCzezfSA9hM3CJpEHgAHAjcFP1DukMoeMRcSIdFvpVRByTNB2YlC7PAP4S+Fp62EPA\nZ4B1JInoB7N4QmZm1pq6ASEiTklaDWwkyTncGxHbJa1KNsc9wKXAekljwJPA59PD5wE/Sj/lTwHu\nj4iN6bZ1wPclfQ4YBTxgY2ZWoLpDRmZm1h9KW6lcrxiu30xU4NcPJN0r6ZCk31WtK1VhY14meC3W\nStoraUv6t6LINuZB0kJJj0h6UtLjkr6cru+7+2Kc1+JL6fqm74tS9hDSYrhngGuB/SR5jBsj4ulC\nG1YgSTuxv2KvAAACGElEQVSBP4mII0W3JW+S/hw4BvyviLgiXbcOeDEivpl+YJgVEWuKbGceJngt\n3pqsUWjjcpRORJkfEY9JOh/4fyS1TZ+lz+6Ls7wWn6TJ+6KsPYRGiuH6zUQFfj0vnaZcGwhLVdiY\nlwleC0juj74REQcj4rF0+RiwnaT2qe/uiwlei0qtWFP3RVnfYBophus3ExX49atSFTaWwGpJj0n6\nbj8Mk1STtAS4CtgEzOvn+6Lqtfi/6aqm7ouyBgR7uw9FxNXAfwe+mA4d2GnlG/vMz3eAiyPiKuAg\n0E9DR+cDPwS+kn46rr0P+ua+GOe1aPq+KGtA2Efy9RcVC9N1fSsiDqT/vgD8iGRYrZ8dkjQP3hpD\nfb7g9hQmIl6I08nAfwY+UGR78iJpCskb4P+OiEodU1/eF+O9Fq3cF2UNCG8Vw0maRlIM91DBbSqM\npOlp9KeqwO+JYluVO3HmeGilsBHOUtjYo854LdI3voqV9M+98S/AUxFxZ9W6fr0v3vZatHJflHKW\nESTTToE7OV0Md3vBTSqMpItIegXVBX5983pI+h4wBMwGDgFrgf8D/ABYRFrYGBE5/rZUMSZ4LT5C\nMm48BjwHrKqMo/cqSR8Cfk3y3WmR/t0GPAp8nz66L87yWnyKJu+L0gYEMzPLV1mHjMzMLGcOCGZm\nBjggmJlZygHBzMwABwQzM0s5IJiZGeCAYGZmKQcEMzMD4P8DP8BoF3EqV84AAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# now plotting it\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "plt.plot(K, accuracy)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.97333333333333338" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# we can see, model is performing better when K between 6 and 16\n", "# let's train our model on KNN when K = 6\n", "\n", "# test KNN when K=6\n", "\n", "knn = KNeighborsClassifier(n_neighbors=6)\n", "knn.fit(X, y)\n", "y_pred = knn.predict(X)\n", "\n", "# testing accracy\n", "metrics.accuracy_score(y, y_pred)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "ename": "NameError", "evalue": "name 'digits' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdigits\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mNameError\u001b[0m: name 'digits' is not defined" ] } ], "source": [ "print(digits)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }