{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### 通过职业，工作事件长短，种族来预测性别" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 从疝气病症预测病马的死亡率" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 手写数字识别" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "导包，使用SVM" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "datasets读取数据" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "分割训练和预测数据train_test_split(可以放多个要分割的数据)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "绘制前100个图片" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "创建SVC模型gamma =0.001 \n", "训练数据" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "预测数据，可视化" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 手迹识别" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "导包" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "from pandas import Series,DataFrame\n", "\n", "from sklearn.neighbors import KNeighborsClassifier\n", "\n", "from sklearn.svm import SVC\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "#降维处理，不仅仅可以节省时间，更重要的可以提高准确性\n", "from sklearn.decomposition import PCA" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "

	label	pixel0	pixel1	pixel2	pixel3	pixel4	pixel5	pixel6	pixel7	pixel8	...	pixel774	pixel775	pixel776	pixel777	pixel778	pixel779	pixel780	pixel781	pixel782	pixel783
0	1	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	1	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	4	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

\n", "

5 rows × 785 columns

\n", "

" ], "text/plain": [ " label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 \\\n", "0 1 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 0 0 \n", "2 1 0 0 0 0 0 0 0 0 \n", "3 4 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 0 \n", "\n", " pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 \\\n", "0 0 ... 0 0 0 0 0 \n", "1 0 ... 0 0 0 0 0 \n", "2 0 ... 0 0 0 0 0 \n", "3 0 ... 0 0 0 0 0 \n", "4 0 ... 0 0 0 0 0 \n", "\n", " pixel779 pixel780 pixel781 pixel782 pixel783 \n", "0 0 0 0 0 0 \n", "1 0 0 0 0 0 \n", "2 0 0 0 0 0 \n", "3 0 0 0 0 0 \n", "4 0 0 0 0 0 \n", "\n", "[5 rows x 785 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#读取数据\n", "digits = pd.read_csv('./train.csv')\n", "digits.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "(42000, 785)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "digits.shape" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJIAAACPCAYAAAARM4LLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAABypJREFUeJzt3UFoFGcYBuD3VduD1kNNiwYN1UMUpJeK1Jb2ILRCVEQv\nlngoHhQRUmig0GorHjyIUKhevAiVeChqsRWlHqSRqBRKMQWxRonaihiJDaZgixcr+XrYv2H+xc1O\ndr/dmUneB0L2m53d+ZHXf/6d7H5LM4NIvWZkPQCZGhQkcaEgiQsFSVwoSOJCQRIXCpK4UJDERV1B\nItlBcpDkHZK7vAYlxcNar2yTnAngFoA1AIYAXAGwxcxuTPAYXUYvnkdm9mq1neqZkd4EcMfM/jCz\npwBOANhYx/NJPt1Ls1M9QVoI4H6iHgrbIiR3kOwn2V/HsSTnZjX6AGZ2BMARQKe2qayeGekBgLZE\nvShsk2moniBdAdBOcgnJFwF0AjjrMywpmppPbWb2jORHAM4DmAngqJkNuI1MCqXml/81HUxrpCL6\n1cxWVttJV7bFhYIkLhQkcaEgiYuGX5CcKjZv3hzVJ06cGL89Y8aMCfc9depU4waWE5qRxIWCJC4U\nJHGhNVJK69evj+rkhdyxsbFmDyd3NCOJCwVJXOjUVkFLS0tUr1q1quK+IyMjUd3X19eQMeWZZiRx\noSCJCwVJXGiNVEFnZ2dUt7e3V9z36dOnUT06OtqQMeWZZiRxoSCJCwVJXGiNFMyaFf9TdHR0RDXJ\nio+9fPlyQ8ZUJJqRxIWCJC4UJHGhNVLQ2toa1WvXro3qiT7/d+7cuYaMqUg0I4kLBUlcKEjiQmuk\noPxva5OR/GjSdKUZSVxUDRLJoyRHSF5PbJtH8keSt8Pvlxs7TMm7NDNSD4COsm27AFwws3YAF0It\n01iq/kgkFwP4wcxeD/UggNVmNkyyFcBFM1uW4nly2x/p8ePHUT1nzpzUjy3/O90U09D+SPPNbDjc\nfghgfo3PI1NE3f+VzMwmmmlI7gCwo97jSL7VOiP9GU5pCL9HKu1oZkfMbGWa6VGKq9YZ6SyArQAO\nhN9n3EaUkblz50Z1tbXjwYMHGzmcwknz8v84gJ8BLCM5RHIbSgFaQ/I2gPdDLdNY1RnJzLZUuOs9\n57FIgenKtriY0hdAJqN8TVRtjdTM/uRFoBlJXChI4kJBEhdaI9Xo5MmTWQ8hVzQjiQsFSVzo1Faj\nhw8fZj2EXNGMJC4UJHGhIIkLrZGCidrW1Gv27NlRXd56ubu7e8LHr1ixIqoPHToU1WfOZP8uHs1I\n4kJBEhcKkrjQGimY7NtIFixYMH57aGgoum/p0qVRvX///qjetGlTVJevz6odu7+/P6q1RpIpQ0ES\nFwqSuNAaqUanT58ev71hw4bovsOHD0f1RF/RlUb5Gqynp6eu52sEzUjiQkESFwqSuNAaqUbJdsqX\nLl2K7ptMS5znGR4ejuq9e/dG9cDAQF3P3wiakcSFgiQuFCRxoTVS8OTJk6iezDpnsi1xyo2NjUX1\nzp07o7oIX1GhGUlcpOmP1Eayj+QNkgMkPw7b1SJZxqWZkZ4B+MTMlgN4C0AXyeVQi2RJSNNoaxjA\ncLj9D8mbABYC2AhgddjtGICLAD5ryCibYN++fVF94EDtTegmu0Yqf39SEdZE5Sa1Rgr9tt8A8AvU\nIlkSUr9qI/kSgO8AdJvZ38l39U3UIlntkaeHVDMSyRdQCtE3ZvZ92JyqRbLaI08PVb9CgqWp5xiA\nv8ysO7H9SwCjZnaA5C4A88zs0yrPldt+eW1tbVF99+7d1I+t9p7r8vcTbd++Pap7e3tTHysDqb5C\nIs2p7R0AHwL4jeTVsO1zlFoifxvaJd8D8EGtI5XiS/Oq7ScAlT6GqhbJAkBXtsWJ/tYWlL8HqKur\nK6r37NkT1cn3I5V/nUT5daBr165F9ejoaM3jzCvNSOJCQRIXCpK4SPVVpG4Hy/F1JKmooV9FKhJR\nkMSFgiQuFCRxoSCJCwVJXChI4kJBEhcKkrhQkMSFgiQuFCRxoSCJCwVJXChI4kJBEhcKkrhQkMRF\nsz+O9AilT+W+Em7nUV7HltW4XkuzU1Pfsz1+ULI/r00l8jq2vI7rfzq1iQsFSVxkFaQjGR03jbyO\nLa/jApDRGkmmHp3axEVTg0Syg+QgyTuhy1tmSB4lOULyemJbLnqHF7G3edOCRHImgMMA1gJYDmBL\n6NedlR4AHWXb8tI7vHi9zc2sKT8A3gZwPlHvBrC7WcevMKbFAK4n6kEAreF2K4DBLMeXGNcZAGvy\nOj4za+qpbSGA+4l6KGzLk9z1Di9Kb3Mttiuw0n/7TF/Slvc2T96Xh/ElNTNIDwAkexAvCtvyJFXv\n8Gaop7d5FpoZpCsA2kkuIfkigE4AZ5t4/DTOAtgabm9FaW3SdKG3+dcAbprZV4m7cjG+52ryonEd\ngFsAfgfwRcYL2OMofVnPvyit17YBaEHp1dBtAL0oNaHPYmzvonTaugbgavhZl5fxPe9HV7bFhRbb\n4kJBEhcKkrhQkMSFgiQuFCRxoSCJCwVJXPwHNrv0b+7ndNwAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "index = np.random.randint(0,42000,size = 1)\n", "\n", "#对DataFrame操作，获取数据是df.loc[?]\n", "#? == 数字那么返回的数据就是Series\n", "#？ == [数字]那么返回的结果是DataFrame\n", "image = digits.loc[index[0]][1:].values.reshape((28,28))\n", "\n", "plt.figure(figsize=(2,2))\n", "plt.imshow(image,cmap = 'gray')" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0])" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#42000个数据\n", "digits['pixel20'].unique()" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "#PCA 提高计算的精度而\n", "\n", "y = digits['label']" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "

	pixel0	pixel1	pixel2	pixel3	pixel4	pixel5	pixel6	pixel7	pixel8	pixel9	...	pixel774	pixel775	pixel776	pixel777	pixel778	pixel779	pixel780	pixel781	pixel782	pixel783
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

\n", "

5 rows × 784 columns

\n", "

" ], "text/plain": [ " pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 \\\n", "0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 0 \n", "\n", " pixel9 ... pixel774 pixel775 pixel776 pixel777 pixel778 \\\n", "0 0 ... 0 0 0 0 0 \n", "1 0 ... 0 0 0 0 0 \n", "2 0 ... 0 0 0 0 0 \n", "3 0 ... 0 0 0 0 0 \n", "4 0 ... 0 0 0 0 0 \n", "\n", " pixel779 pixel780 pixel781 pixel782 pixel783 \n", "0 0 0 0 0 0 \n", "1 0 0 0 0 0 \n", "2 0 0 0 0 0 \n", "3 0 0 0 0 0 \n", "4 0 0 0 0 0 \n", "\n", "[5 rows x 784 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = digits.drop('label',axis = 1)\n", "x.head()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.1)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", " metric_params=None, n_jobs=1, n_neighbors=10, p=2,\n", " weights='uniform')" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn = KNeighborsClassifier(n_neighbors= 10 )\n", "knn.fit(X_train,y_train)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": true }, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mknn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_test\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/base.py\u001b[0m in \u001b[0;36mscore\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 347\u001b[0m \"\"\"\n\u001b[1;32m 348\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 349\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0maccuracy_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 350\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 351\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/neighbors/classification.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'csr'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 145\u001b[0;31m \u001b[0mneigh_dist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mneigh_ind\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkneighbors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 146\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0mclasses_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/neighbors/base.py\u001b[0m in \u001b[0;36mkneighbors\u001b[0;34m(self, X, n_neighbors, return_distance)\u001b[0m\n\u001b[1;32m 379\u001b[0m delayed(self._tree.query, check_pickle=False)(\n\u001b[1;32m 380\u001b[0m X[s], n_neighbors, return_distance)\n\u001b[0;32m--> 381\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0ms\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mgen_even_slices\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 382\u001b[0m )\n\u001b[1;32m 383\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreturn_distance\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 756\u001b[0m \u001b[0;31m# was dispatched. In particular this covers the edge\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 757\u001b[0m \u001b[0;31m# case of Parallel used with an exhausted iterator.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 758\u001b[0;31m \u001b[0;32mwhile\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_one_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 759\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 760\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mdispatch_one_batch\u001b[0;34m(self, iterator)\u001b[0m\n\u001b[1;32m 606\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 607\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 608\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtasks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 609\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 610\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m_dispatch\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 569\u001b[0m \u001b[0mdispatch_timestamp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 570\u001b[0m \u001b[0mcb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBatchCompletionCallBack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdispatch_timestamp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 571\u001b[0;31m \u001b[0mjob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 572\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jobs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 573\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/externals/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mapply_async\u001b[0;34m(self, func, callback)\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[0;34m\"\"\"Schedule a func to be run\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 109\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImmediateResult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 110\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/externals/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[0;31m# Don't delay the application, to avoid keeping the input\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 325\u001b[0m \u001b[0;31m# arguments in memory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 326\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 327\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 131\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 132\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 131\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 132\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "knn.score(x_test,y_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#使用pca进行降维\n", "pca = PCA(n_components=150,whiten=True,svd_solver='randomized')" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PCA(copy=True, iterated_power='auto', n_components=150, random_state=None,\n", " svd_solver='randomized', tol=0.0, whiten=True)" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pca.fit(x)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X_train_pca = pca.transform(X_train)\n", "x_test_pca = pca.transform(x_test)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", " metric_params=None, n_jobs=1, n_neighbors=10, p=2,\n", " weights='uniform')" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn.fit(X_train_pca,y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "???\n", "进行PCA降维和不进行PCA降维的准确度，是否有差距" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.86952380952380948" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn.score(x_test_pca,y_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": true }, "outputs": [], "source": [ "svc = SVC()" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", " decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',\n", " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", " tol=0.001, verbose=False)" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "svc.fit(X_train_pca,y_train)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import time" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1510196966.2637584\n", "不进行数据降维，预测的准确率：0\n", "1510200943.6910596\n" ] } ], "source": [ "print(time.time())\n", "svc_orignal = SVC()\n", "svc_orignal.fit(X_train,y_train)\n", "score = svc_orignal.score(x_test[-420:],y_test[-420:])\n", "print('不进行数据降维，预测的准确率：%d'%(score))\n", "print(time.time())" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.97619047619047616" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "svc.score(x_test_pca[-420:],y_test[-420:])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 人脸识别" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }