{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This is an example of using pipelining in sklearn.\n", "\n", "### Import the modules" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "from sklearn.datasets import fetch_olivetti_faces\n", "from sklearn.svm import SVC\n", "from sklearn.cross_validation import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.metrics import accuracy_score" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load the data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "dataset = fetch_olivetti_faces()\n", "X = dataset.data\n", "y = dataset.target" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Split the dataset for training and testing " ] }, { "cell_type": "code", "collapsed": false, "input": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, random_state=42)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Prepare the classifier" ] }, { "cell_type": "code", "collapsed": false, "input": [ "clf = Pipeline([('scaler', StandardScaler()), ('linear_model', SVC(kernel='linear'))])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Fit the classifier" ] }, { "cell_type": "code", "collapsed": false, "input": [ "clf.fit(X_train, y_train)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ "Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('linear_model', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,\n", " kernel='linear', max_iter=-1, probability=False, random_state=None,\n", " shrinking=True, tol=0.001, verbose=False))])" ] } ], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Perform the prediction" ] }, { "cell_type": "code", "collapsed": false, "input": [ "y_pred = clf.predict(X_test)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Print the accuracy score" ] }, { "cell_type": "code", "collapsed": false, "input": [ "print \"Accuracy score is {}\".format(accuracy_score(y_test, y_pred))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Accuracy score is 1.0\n" ] } ], "prompt_number": 7 } ], "metadata": {} } ] }