{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Plot class probabilities calculated by the VotingClassifier\n\n.. currentmodule:: sklearn\n\nPlot the class probabilities of the first sample in a toy dataset predicted by\nthree different classifiers and averaged by the\n:class:`~ensemble.VotingClassifier`.\n\nFirst, three exemplary classifiers are initialized\n(:class:`~linear_model.LogisticRegression`, :class:`~naive_bayes.GaussianNB`,\nand :class:`~ensemble.RandomForestClassifier`) and used to initialize a\nsoft-voting :class:`~ensemble.VotingClassifier` with weights `[1, 1, 5]`, which\nmeans that the predicted probabilities of the\n:class:`~ensemble.RandomForestClassifier` count 5 times as much as the weights\nof the other classifiers when the averaged probability is calculated.\n\nTo visualize the probability weighting, we fit each classifier on the training\nset and plot the predicted class probabilities for the first sample in this\nexample dataset.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.ensemble import RandomForestClassifier, VotingClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.naive_bayes import GaussianNB\n\nclf1 = LogisticRegression(max_iter=1000, random_state=123)\nclf2 = RandomForestClassifier(n_estimators=100, random_state=123)\nclf3 = GaussianNB()\nX = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])\ny = np.array([1, 1, 2, 2])\n\neclf = VotingClassifier(\n estimators=[(\"lr\", clf1), (\"rf\", clf2), (\"gnb\", clf3)],\n voting=\"soft\",\n weights=[1, 1, 5],\n)\n\n# predict class probabilities for all classifiers\nprobas = [c.fit(X, y).predict_proba(X) for c in (clf1, clf2, clf3, eclf)]\n\n# get class probabilities for the first sample in the dataset\nclass1_1 = [pr[0, 0] for pr in probas]\nclass2_1 = [pr[0, 1] for pr in probas]\n\n\n# plotting\n\nN = 4 # number of groups\nind = np.arange(N) # group positions\nwidth = 0.35 # bar width\n\nfig, ax = plt.subplots()\n\n# bars for classifier 1-3\np1 = ax.bar(ind, np.hstack(([class1_1[:-1], [0]])), width, color=\"green\", edgecolor=\"k\")\np2 = ax.bar(\n ind + width,\n np.hstack(([class2_1[:-1], [0]])),\n width,\n color=\"lightgreen\",\n edgecolor=\"k\",\n)\n\n# bars for VotingClassifier\np3 = ax.bar(ind, [0, 0, 0, class1_1[-1]], width, color=\"blue\", edgecolor=\"k\")\np4 = ax.bar(\n ind + width, [0, 0, 0, class2_1[-1]], width, color=\"steelblue\", edgecolor=\"k\"\n)\n\n# plot annotations\nplt.axvline(2.8, color=\"k\", linestyle=\"dashed\")\nax.set_xticks(ind + width)\nax.set_xticklabels(\n [\n \"LogisticRegression\\nweight 1\",\n \"GaussianNB\\nweight 1\",\n \"RandomForestClassifier\\nweight 5\",\n \"VotingClassifier\\n(average probabilities)\",\n ],\n rotation=40,\n ha=\"right\",\n)\nplt.ylim([0, 1])\nplt.title(\"Class probabilities for sample 1 by different classifiers\")\nplt.legend([p1[0], p2[0]], [\"class 1\", \"class 2\"], loc=\"upper left\")\nplt.tight_layout()\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.21" } }, "nbformat": 4, "nbformat_minor": 0 }