{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Decision boundary of semi-supervised classifiers versus SVM on the Iris dataset\n\nA comparison for the decision boundaries generated on the iris dataset\nby Label Spreading, Self-training and SVM.\n\nThis example demonstrates that Label Spreading and Self-training can learn\ngood boundaries even when small amounts of labeled data are available.\n\nNote that Self-training with 100% of the data is omitted as it is functionally\nidentical to training the SVC on 100% of the data.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn import datasets\nfrom sklearn.semi_supervised import LabelSpreading, SelfTrainingClassifier\nfrom sklearn.svm import SVC\n\niris = datasets.load_iris()\n\nX = iris.data[:, :2]\ny = iris.target\n\n# step size in the mesh\nh = 0.02\n\nrng = np.random.RandomState(0)\ny_rand = rng.rand(y.shape[0])\ny_30 = np.copy(y)\ny_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\ny_50 = np.copy(y)\ny_50[y_rand < 0.5] = -1\n# we create an instance of SVM and fit out data. We do not scale our\n# data since we want to plot the support vectors\nls30 = (LabelSpreading().fit(X, y_30), y_30, \"Label Spreading 30% data\")\nls50 = (LabelSpreading().fit(X, y_50), y_50, \"Label Spreading 50% data\")\nls100 = (LabelSpreading().fit(X, y), y, \"Label Spreading 100% data\")\n\n# the base classifier for self-training is identical to the SVC\nbase_classifier = SVC(kernel=\"rbf\", gamma=0.5, probability=True)\nst30 = (\n SelfTrainingClassifier(base_classifier).fit(X, y_30),\n y_30,\n \"Self-training 30% data\",\n)\nst50 = (\n SelfTrainingClassifier(base_classifier).fit(X, y_50),\n y_50,\n \"Self-training 50% data\",\n)\n\nrbf_svc = (SVC(kernel=\"rbf\", gamma=0.5).fit(X, y), y, \"SVC with rbf kernel\")\n\n# create a mesh to plot in\nx_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\ny_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\ncolor_map = {-1: (1, 1, 1), 0: (0, 0, 0.9), 1: (1, 0, 0), 2: (0.8, 0.6, 0)}\n\nclassifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\nfor i, (clf, y_train, title) in enumerate(classifiers):\n # Plot the decision boundary. For that, we will assign a color to each\n # point in the mesh [x_min, x_max]x[y_min, y_max].\n plt.subplot(3, 2, i + 1)\n Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n\n # Put the result into a color plot\n Z = Z.reshape(xx.shape)\n plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n plt.axis(\"off\")\n\n # Plot also the training points\n colors = [color_map[y] for y in y_train]\n plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors=\"black\")\n\n plt.title(title)\n\nplt.suptitle(\"Unlabeled points are colored white\", y=0.1)\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.21" } }, "nbformat": 4, "nbformat_minor": 0 }