{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Chapter 9 - Scaling ML Workflows" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "source": [ "%pylab inline " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Polynomial features" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy (linear):\t0.95 (+/- 0.12)\n", "Accuracy (nonlinear):\t0.98 (+/- 0.09)\n" ] } ], "source": [ "from sklearn import datasets\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn.cross_validation import cross_val_score\n", "\n", "iris = datasets.load_iris()\n", "\n", "linear_classifier = LogisticRegression()\n", "linear_scores = cross_val_score(linear_classifier, \\\n", "iris.data, iris.target, cv=10) #2\n", "print \"Accuracy (linear):\\t%0.2f (+/- %0.2f)\" % \\\n", "(linear_scores.mean(), linear_scores.std() * 2)\n", "\n", "\n", "pol = PolynomialFeatures(degree=2)\n", "nonlinear_data = pol.fit_transform(iris.data)\n", "\n", "nonlinear_classifier = LogisticRegression()\n", "nonlinear_scores = cross_val_score(nonlinear_classifier, \\\n", "nonlinear_data, iris.target, cv=10)\n", "print \"Accuracy (nonlinear):\\t%0.2f (+/- %0.2f)\" % \\\n", "(nonlinear_scores.mean(), nonlinear_scores.std() * 2)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }