{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Two-class AdaBoost\n\nThis example fits an AdaBoosted decision stump on a non-linearly separable\nclassification dataset composed of two \"Gaussian quantiles\" clusters\n(see :func:`sklearn.datasets.make_gaussian_quantiles`) and plots the decision\nboundary and decision scores. The distributions of decision scores are shown\nseparately for samples of class A and B. The predicted class label for each\nsample is determined by the sign of the decision score. Samples with decision\nscores greater than zero are classified as B, and are otherwise classified\nas A. The magnitude of a decision score determines the degree of likeness with\nthe predicted class label. Additionally, a new dataset could be constructed\ncontaining a desired purity of class B, for example, by only selecting samples\nwith a decision score above some value.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import make_gaussian_quantiles\nfrom sklearn.ensemble import AdaBoostClassifier\nfrom sklearn.inspection import DecisionBoundaryDisplay\nfrom sklearn.tree import DecisionTreeClassifier\n\n# Construct dataset\nX1, y1 = make_gaussian_quantiles(\n cov=2.0, n_samples=200, n_features=2, n_classes=2, random_state=1\n)\nX2, y2 = make_gaussian_quantiles(\n mean=(3, 3), cov=1.5, n_samples=300, n_features=2, n_classes=2, random_state=1\n)\nX = np.concatenate((X1, X2))\ny = np.concatenate((y1, -y2 + 1))\n\n# Create and fit an AdaBoosted decision tree\nbdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200)\nbdt.fit(X, y)\n\nplot_colors = \"br\"\nplot_step = 0.02\nclass_names = \"AB\"\n\nplt.figure(figsize=(10, 5))\n\n# Plot the decision boundaries\nax = plt.subplot(121)\ndisp = DecisionBoundaryDisplay.from_estimator(\n bdt,\n X,\n cmap=plt.cm.Paired,\n response_method=\"predict\",\n ax=ax,\n xlabel=\"x\",\n ylabel=\"y\",\n)\nx_min, x_max = disp.xx0.min(), disp.xx0.max()\ny_min, y_max = disp.xx1.min(), disp.xx1.max()\nplt.axis(\"tight\")\n\n# Plot the training points\nfor i, n, c in zip(range(2), class_names, plot_colors):\n idx = np.where(y == i)\n plt.scatter(\n X[idx, 0],\n X[idx, 1],\n c=c,\n s=20,\n edgecolor=\"k\",\n label=\"Class %s\" % n,\n )\nplt.xlim(x_min, x_max)\nplt.ylim(y_min, y_max)\nplt.legend(loc=\"upper right\")\n\nplt.title(\"Decision Boundary\")\n\n# Plot the two-class decision scores\ntwoclass_output = bdt.decision_function(X)\nplot_range = (twoclass_output.min(), twoclass_output.max())\nplt.subplot(122)\nfor i, n, c in zip(range(2), class_names, plot_colors):\n plt.hist(\n twoclass_output[y == i],\n bins=10,\n range=plot_range,\n facecolor=c,\n label=\"Class %s\" % n,\n alpha=0.5,\n edgecolor=\"k\",\n )\nx1, x2, y1, y2 = plt.axis()\nplt.axis((x1, x2, y1, y2 * 1.2))\nplt.legend(loc=\"upper right\")\nplt.ylabel(\"Samples\")\nplt.xlabel(\"Score\")\nplt.title(\"Decision Scores\")\n\nplt.tight_layout()\nplt.subplots_adjust(wspace=0.35)\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.21" } }, "nbformat": 4, "nbformat_minor": 0 }