{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "- Import iris dataset from sklearn dataset" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.datasets import load_iris\n", "from sklearn import svm\n", "iris = load_iris()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- in data array, we have four parameters: #花萼长度,花萼宽度,花瓣长度,花瓣宽度\n", "- three types of iris: 'setosa', 'versicolor', 'virginica'" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 5.1 3.5 1.4 0.2]\n", " [ 4.9 3. 1.4 0.2]\n", " [ 4.7 3.2 1.3 0.2]\n", " [ 4.6 3.1 1.5 0.2]\n", " [ 5. 3.6 1.4 0.2]\n", " [ 5.4 3.9 1.7 0.4]\n", " [ 4.6 3.4 1.4 0.3]\n", " [ 5. 3.4 1.5 0.2]\n", " [ 4.4 2.9 1.4 0.2]\n", " [ 4.9 3.1 1.5 0.1]\n", " [ 5.4 3.7 1.5 0.2]\n", " [ 4.8 3.4 1.6 0.2]\n", " [ 4.8 3. 1.4 0.1]\n", " [ 4.3 3. 1.1 0.1]\n", " [ 5.8 4. 1.2 0.2]\n", " [ 5.7 4.4 1.5 0.4]\n", " [ 5.4 3.9 1.3 0.4]\n", " [ 5.1 3.5 1.4 0.3]\n", " [ 5.7 3.8 1.7 0.3]\n", " [ 5.1 3.8 1.5 0.3]\n", " [ 5.4 3.4 1.7 0.2]\n", " [ 5.1 3.7 1.5 0.4]\n", " [ 4.6 3.6 1. 0.2]\n", " [ 5.1 3.3 1.7 0.5]\n", " [ 4.8 3.4 1.9 0.2]\n", " [ 5. 3. 1.6 0.2]\n", " [ 5. 3.4 1.6 0.4]\n", " [ 5.2 3.5 1.5 0.2]\n", " [ 5.2 3.4 1.4 0.2]\n", " [ 4.7 3.2 1.6 0.2]\n", " [ 4.8 3.1 1.6 0.2]\n", " [ 5.4 3.4 1.5 0.4]\n", " [ 5.2 4.1 1.5 0.1]\n", " [ 5.5 4.2 1.4 0.2]\n", " [ 4.9 3.1 1.5 0.1]\n", " [ 5. 3.2 1.2 0.2]\n", " [ 5.5 3.5 1.3 0.2]\n", " [ 4.9 3.1 1.5 0.1]\n", " [ 4.4 3. 1.3 0.2]\n", " [ 5.1 3.4 1.5 0.2]\n", " [ 5. 3.5 1.3 0.3]\n", " [ 4.5 2.3 1.3 0.3]\n", " [ 4.4 3.2 1.3 0.2]\n", " [ 5. 3.5 1.6 0.6]\n", " [ 5.1 3.8 1.9 0.4]\n", " [ 4.8 3. 1.4 0.3]\n", " [ 5.1 3.8 1.6 0.2]\n", " [ 4.6 3.2 1.4 0.2]\n", " [ 5.3 3.7 1.5 0.2]\n", " [ 5. 3.3 1.4 0.2]\n", " [ 7. 3.2 4.7 1.4]\n", " [ 6.4 3.2 4.5 1.5]\n", " [ 6.9 3.1 4.9 1.5]\n", " [ 5.5 2.3 4. 1.3]\n", " [ 6.5 2.8 4.6 1.5]\n", " [ 5.7 2.8 4.5 1.3]\n", " [ 6.3 3.3 4.7 1.6]\n", " [ 4.9 2.4 3.3 1. ]\n", " [ 6.6 2.9 4.6 1.3]\n", " [ 5.2 2.7 3.9 1.4]\n", " [ 5. 2. 3.5 1. ]\n", " [ 5.9 3. 4.2 1.5]\n", " [ 6. 2.2 4. 1. ]\n", " [ 6.1 2.9 4.7 1.4]\n", " [ 5.6 2.9 3.6 1.3]\n", " [ 6.7 3.1 4.4 1.4]\n", " [ 5.6 3. 4.5 1.5]\n", " [ 5.8 2.7 4.1 1. ]\n", " [ 6.2 2.2 4.5 1.5]\n", " [ 5.6 2.5 3.9 1.1]\n", " [ 5.9 3.2 4.8 1.8]\n", " [ 6.1 2.8 4. 1.3]\n", " [ 6.3 2.5 4.9 1.5]\n", " [ 6.1 2.8 4.7 1.2]\n", " [ 6.4 2.9 4.3 1.3]\n", " [ 6.6 3. 4.4 1.4]\n", " [ 6.8 2.8 4.8 1.4]\n", " [ 6.7 3. 5. 1.7]\n", " [ 6. 2.9 4.5 1.5]\n", " [ 5.7 2.6 3.5 1. ]\n", " [ 5.5 2.4 3.8 1.1]\n", " [ 5.5 2.4 3.7 1. ]\n", " [ 5.8 2.7 3.9 1.2]\n", " [ 6. 2.7 5.1 1.6]\n", " [ 5.4 3. 4.5 1.5]\n", " [ 6. 3.4 4.5 1.6]\n", " [ 6.7 3.1 4.7 1.5]\n", " [ 6.3 2.3 4.4 1.3]\n", " [ 5.6 3. 4.1 1.3]\n", " [ 5.5 2.5 4. 1.3]\n", " [ 5.5 2.6 4.4 1.2]\n", " [ 6.1 3. 4.6 1.4]\n", " [ 5.8 2.6 4. 1.2]\n", " [ 5. 2.3 3.3 1. ]\n", " [ 5.6 2.7 4.2 1.3]\n", " [ 5.7 3. 4.2 1.2]\n", " [ 5.7 2.9 4.2 1.3]\n", " [ 6.2 2.9 4.3 1.3]\n", " [ 5.1 2.5 3. 1.1]\n", " [ 5.7 2.8 4.1 1.3]\n", " [ 6.3 3.3 6. 2.5]\n", " [ 5.8 2.7 5.1 1.9]\n", " [ 7.1 3. 5.9 2.1]\n", " [ 6.3 2.9 5.6 1.8]\n", " [ 6.5 3. 5.8 2.2]\n", " [ 7.6 3. 6.6 2.1]\n", " [ 4.9 2.5 4.5 1.7]\n", " [ 7.3 2.9 6.3 1.8]\n", " [ 6.7 2.5 5.8 1.8]\n", " [ 7.2 3.6 6.1 2.5]\n", " [ 6.5 3.2 5.1 2. ]\n", " [ 6.4 2.7 5.3 1.9]\n", " [ 6.8 3. 5.5 2.1]\n", " [ 5.7 2.5 5. 2. ]\n", " [ 5.8 2.8 5.1 2.4]\n", " [ 6.4 3.2 5.3 2.3]\n", " [ 6.5 3. 5.5 1.8]\n", " [ 7.7 3.8 6.7 2.2]\n", " [ 7.7 2.6 6.9 2.3]\n", " [ 6. 2.2 5. 1.5]\n", " [ 6.9 3.2 5.7 2.3]\n", " [ 5.6 2.8 4.9 2. ]\n", " [ 7.7 2.8 6.7 2. ]\n", " [ 6.3 2.7 4.9 1.8]\n", " [ 6.7 3.3 5.7 2.1]\n", " [ 7.2 3.2 6. 1.8]\n", " [ 6.2 2.8 4.8 1.8]\n", " [ 6.1 3. 4.9 1.8]\n", " [ 6.4 2.8 5.6 2.1]\n", " [ 7.2 3. 5.8 1.6]\n", " [ 7.4 2.8 6.1 1.9]\n", " [ 7.9 3.8 6.4 2. ]\n", " [ 6.4 2.8 5.6 2.2]\n", " [ 6.3 2.8 5.1 1.5]\n", " [ 6.1 2.6 5.6 1.4]\n", " [ 7.7 3. 6.1 2.3]\n", " [ 6.3 3.4 5.6 2.4]\n", " [ 6.4 3.1 5.5 1.8]\n", " [ 6. 3. 4.8 1.8]\n", " [ 6.9 3.1 5.4 2.1]\n", " [ 6.7 3.1 5.6 2.4]\n", " [ 6.9 3.1 5.1 2.3]\n", " [ 5.8 2.7 5.1 1.9]\n", " [ 6.8 3.2 5.9 2.3]\n", " [ 6.7 3.3 5.7 2.5]\n", " [ 6.7 3. 5.2 2.3]\n", " [ 6.3 2.5 5. 1.9]\n", " [ 6.5 3. 5.2 2. ]\n", " [ 6.2 3.4 5.4 2.3]\n", " [ 5.9 3. 5.1 1.8]]\n" ] } ], "source": [ "print iris.data " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- classify the dataset into training dataset and test dataset" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/zjm/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", " \"This module will be removed in 0.20.\", DeprecationWarning)\n" ] } ], "source": [ "from sklearn.cross_validation import train_test_split\n", "X = iris.data\n", "y = iris.target\n", "X_train, X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2, random_state = 0)#to reproduce the result, we need to set the random_state" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n", " intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n", " multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n", " verbose=0)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = svm.LinearSVC()\n", "#train the model\n", "model.fit(X_train,y_train)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1, 0,\n", " 0, 2, 0, 0, 1, 1, 0])" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.predict(X_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- we use 20% of the data set to test the accuration of the model, and we got the score is 1, which means 100% accurate." ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0\n" ] } ], "source": [ "print(model.score(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 2 }