{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Incremental PCA\n\nIncremental principal component analysis (IPCA) is typically used as a\nreplacement for principal component analysis (PCA) when the dataset to be\ndecomposed is too large to fit in memory. IPCA builds a low-rank approximation\nfor the input data using an amount of memory which is independent of the\nnumber of input data samples. It is still dependent on the input data features,\nbut changing the batch size allows for control of memory usage.\n\nThis example serves as a visual check that IPCA is able to find a similar\nprojection of the data to PCA (to a sign flip), while only processing a\nfew samples at a time. This can be considered a \"toy example\", as IPCA is\nintended for large datasets which do not fit in main memory, requiring\nincremental approaches.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import load_iris\nfrom sklearn.decomposition import PCA, IncrementalPCA\n\niris = load_iris()\nX = iris.data\ny = iris.target\n\nn_components = 2\nipca = IncrementalPCA(n_components=n_components, batch_size=10)\nX_ipca = ipca.fit_transform(X)\n\npca = PCA(n_components=n_components)\nX_pca = pca.fit_transform(X)\n\ncolors = [\"navy\", \"turquoise\", \"darkorange\"]\n\nfor X_transformed, title in [(X_ipca, \"Incremental PCA\"), (X_pca, \"PCA\")]:\n plt.figure(figsize=(8, 8))\n for color, i, target_name in zip(colors, [0, 1, 2], iris.target_names):\n plt.scatter(\n X_transformed[y == i, 0],\n X_transformed[y == i, 1],\n color=color,\n lw=2,\n label=target_name,\n )\n\n if \"Incremental\" in title:\n err = np.abs(np.abs(X_pca) - np.abs(X_ipca)).mean()\n plt.title(title + \" of iris dataset\\nMean absolute unsigned error %.6f\" % err)\n else:\n plt.title(title + \" of iris dataset\")\n plt.legend(loc=\"best\", shadow=False, scatterpoints=1)\n plt.axis([-4, 4, -1.5, 1.5])\n\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.21" } }, "nbformat": 4, "nbformat_minor": 0 }