{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Comparison of LDA and PCA 2D projection of Iris dataset\n\nThe Iris dataset represents 3 kind of Iris flowers (Setosa, Versicolour\nand Virginica) with 4 attributes: sepal length, sepal width, petal length\nand petal width.\n\nPrincipal Component Analysis (PCA) applied to this data identifies the\ncombination of attributes (principal components, or directions in the\nfeature space) that account for the most variance in the data. Here we\nplot the different samples on the 2 first principal components.\n\nLinear Discriminant Analysis (LDA) tries to identify attributes that\naccount for the most variance *between classes*. In particular,\nLDA, in contrast to PCA, is a supervised method, using known class labels.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA\nfrom sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n\niris = datasets.load_iris()\n\nX = iris.data\ny = iris.target\ntarget_names = iris.target_names\n\npca = PCA(n_components=2)\nX_r = pca.fit(X).transform(X)\n\nlda = LinearDiscriminantAnalysis(n_components=2)\nX_r2 = lda.fit(X, y).transform(X)\n\n# Percentage of variance explained for each components\nprint(\n \"explained variance ratio (first two components): %s\"\n % str(pca.explained_variance_ratio_)\n)\n\nplt.figure()\ncolors = [\"navy\", \"turquoise\", \"darkorange\"]\nlw = 2\n\nfor color, i, target_name in zip(colors, [0, 1, 2], target_names):\n plt.scatter(\n X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=0.8, lw=lw, label=target_name\n )\nplt.legend(loc=\"best\", shadow=False, scatterpoints=1)\nplt.title(\"PCA of IRIS dataset\")\n\nplt.figure()\nfor color, i, target_name in zip(colors, [0, 1, 2], target_names):\n plt.scatter(\n X_r2[y == i, 0], X_r2[y == i, 1], alpha=0.8, color=color, label=target_name\n )\nplt.legend(loc=\"best\", shadow=False, scatterpoints=1)\nplt.title(\"LDA of IRIS dataset\")\n\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.21" } }, "nbformat": 4, "nbformat_minor": 0 }