{ "metadata": { "name": "01B_sklearn_overview" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "An Overview of Scikit-learn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "*Adapted from* [*http://scikit-learn.org/stable/tutorial/basic/tutorial.html*](http://scikit-learn.org/stable/tutorial/basic/tutorial.html)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "%pylab inline" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Loading an Example Dataset" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn import datasets\n", "digits = datasets.load_digits()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "digits.data" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "digits.target" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "digits.images[0]" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Learning and Predicting" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn import svm\n", "clf = svm.SVC(gamma=0.001, C=100.)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "clf.fit(digits.data[:-1], digits.target[:-1])" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "clf.predict(digits.data[-1])" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "plt.figure(figsize=(2, 2))\n", "plt.imshow(digits.images[-1], interpolation='nearest', cmap=plt.cm.binary)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print digits.target[-1]" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Model Persistence" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn import svm\n", "from sklearn import datasets\n", "clf = svm.SVC()\n", "iris = datasets.load_iris()\n", "X, y = iris.data, iris.target\n", "clf.fit(X, y)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "import pickle\n", "s = pickle.dumps(clf)\n", "clf2 = pickle.loads(s)\n", "clf2.predict(X[0])" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "y[0]" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn.externals import joblib\n", "joblib.dump(clf, 'filename.pkl') " ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }