{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Cross-validation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![cross-validation](images/cross_validation.svg)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%matplotlib notebook\n", "import matplotlib.pyplot as plt\n", "plt.rcParams[\"figure.dpi\"] = 200\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.datasets import load_digits\n", "from sklearn.model_selection import train_test_split\n", "digits = load_digits()\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " digits.data, digits.target)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.model_selection import cross_val_score\n", "from sklearn.neighbors import KNeighborsClassifier" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cross_val_score(KNeighborsClassifier(),\n", " X_train, y_train, cv=5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.model_selection import KFold, StratifiedKFold" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cross_val_score(KNeighborsClassifier(),\n", " X_train, y_train, cv=KFold(n_splits=10, shuffle=True, random_state=42))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Grid Searches\n", "=================" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![grid-search](images/grid_search_cross_validation.svg)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Grid-Search with build-in cross validation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import GridSearchCV\n", "from sklearn.svm import SVC" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define parameter grid:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "param_grid = {'C': 10. ** np.arange(-3, 3),\n", " 'gamma' : 10. ** np.arange(-5, 0)}\n", "\n", "np.set_printoptions(suppress=True)\n", "print(param_grid)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grid_search = GridSearchCV(SVC(), param_grid, verbose=3, cv=5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A GridSearchCV object behaves just like a normal classifier." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "grid_search.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "grid_search.predict(X_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grid_search.score(X_test, y_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grid_search.best_params_" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grid_search.best_score_" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grid_search.best_estimator_" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# We extract just the scores\n", "\n", "scores = grid_search.cv_results_['mean_test_score']\n", "scores = np.array(scores).reshape(6, 5)\n", "\n", "plt.matshow(scores)\n", "plt.xlabel('gamma')\n", "plt.ylabel('C')\n", "plt.colorbar()\n", "plt.xticks(np.arange(5), param_grid['gamma'])\n", "plt.yticks(np.arange(6), param_grid['C']);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exercises\n", "Use GridSearchCV to adjust n_neighbors of KNeighborsClassifier." ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 }