{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Decision boundary of semi-supervised classifiers versus SVM on the Iris dataset\n\nA comparison for the decision boundaries generated on the iris dataset\nby Label Spreading, Self-training and SVM.\n\nThis example demonstrates that Label Spreading and Self-training can learn\ngood boundaries even when small amounts of labeled data are available.\n\nNote that Self-training with 100% of the data is omitted as it is functionally\nidentical to training the SVC on 100% of the data.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn import datasets\nfrom sklearn.semi_supervised import LabelSpreading, SelfTrainingClassifier\nfrom sklearn.svm import SVC\n\niris = datasets.load_iris()\n\nX = iris.data[:, :2]\ny = iris.target\n\n# step size in the mesh\nh = 0.02\n\nrng = np.random.RandomState(0)\ny_rand = rng.rand(y.shape[0])\ny_30 = np.copy(y)\ny_30[y_rand < 0.3] = -1  # set random samples to be unlabeled\ny_50 = np.copy(y)\ny_50[y_rand < 0.5] = -1\n# we create an instance of SVM and fit out data. We do not scale our\n# data since we want to plot the support vectors\nls30 = (LabelSpreading().fit(X, y_30), y_30, \"Label Spreading 30% data\")\nls50 = (LabelSpreading().fit(X, y_50), y_50, \"Label Spreading 50% data\")\nls100 = (LabelSpreading().fit(X, y), y, \"Label Spreading 100% data\")\n\n# the base classifier for self-training is identical to the SVC\nbase_classifier = SVC(kernel=\"rbf\", gamma=0.5, probability=True)\nst30 = (\n    SelfTrainingClassifier(base_classifier).fit(X, y_30),\n    y_30,\n    \"Self-training 30% data\",\n)\nst50 = (\n    SelfTrainingClassifier(base_classifier).fit(X, y_50),\n    y_50,\n    \"Self-training 50% data\",\n)\n\nrbf_svc = (SVC(kernel=\"rbf\", gamma=0.5).fit(X, y), y, \"SVC with rbf kernel\")\n\n# create a mesh to plot in\nx_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\ny_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n\ncolor_map = {-1: (1, 1, 1), 0: (0, 0, 0.9), 1: (1, 0, 0), 2: (0.8, 0.6, 0)}\n\nclassifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\nfor i, (clf, y_train, title) in enumerate(classifiers):\n    # Plot the decision boundary. For that, we will assign a color to each\n    # point in the mesh [x_min, x_max]x[y_min, y_max].\n    plt.subplot(3, 2, i + 1)\n    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n\n    # Put the result into a color plot\n    Z = Z.reshape(xx.shape)\n    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n    plt.axis(\"off\")\n\n    # Plot also the training points\n    colors = [color_map[y] for y in y_train]\n    plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors=\"black\")\n\n    plt.title(title)\n\nplt.suptitle(\"Unlabeled points are colored white\", y=0.1)\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.21"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}