{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "This is an example of using pipelining in sklearn.\n",
      "\n",
      "### Import the modules"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import numpy as np\n",
      "from sklearn.datasets import fetch_olivetti_faces\n",
      "from sklearn.svm import SVC\n",
      "from sklearn.cross_validation import train_test_split\n",
      "from sklearn.preprocessing import StandardScaler\n",
      "from sklearn.pipeline import Pipeline\n",
      "from sklearn.metrics import accuracy_score"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Load the data"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dataset = fetch_olivetti_faces()\n",
      "X = dataset.data\n",
      "y = dataset.target"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Split the dataset for training and testing "
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, random_state=42)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Prepare the classifier"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "clf = Pipeline([('scaler', StandardScaler()), ('linear_model', SVC(kernel='linear'))])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 4
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Fit the classifier"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "clf.fit(X_train, y_train)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 5,
       "text": [
        "Pipeline(steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('linear_model', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,\n",
        "  kernel='linear', max_iter=-1, probability=False, random_state=None,\n",
        "  shrinking=True, tol=0.001, verbose=False))])"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Perform the prediction"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "y_pred = clf.predict(X_test)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Print the accuracy score"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print \"Accuracy score is {}\".format(accuracy_score(y_test, y_pred))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Accuracy score is 1.0\n"
       ]
      }
     ],
     "prompt_number": 7
    }
   ],
   "metadata": {}
  }
 ]
}