{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Multi-dimensional scaling\n\nAn illustration of the metric and non-metric MDS on generated noisy data.\n\nThe reconstructed points using the metric MDS and non metric MDS are slightly\nshifted to avoid overlapping.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\nfrom matplotlib.collections import LineCollection\n\nfrom sklearn import manifold\nfrom sklearn.decomposition import PCA\nfrom sklearn.metrics import euclidean_distances\n\nEPSILON = np.finfo(np.float32).eps\nn_samples = 20\nseed = np.random.RandomState(seed=3)\nX_true = seed.randint(0, 20, 2 * n_samples).astype(float)\nX_true = X_true.reshape((n_samples, 2))\n# Center the data\nX_true -= X_true.mean()\n\nsimilarities = euclidean_distances(X_true)\n\n# Add noise to the similarities\nnoise = np.random.rand(n_samples, n_samples)\nnoise = noise + noise.T\nnoise[np.arange(noise.shape[0]), np.arange(noise.shape[0])] = 0\nsimilarities += noise\n\nmds = manifold.MDS(\n n_components=2,\n max_iter=3000,\n eps=1e-9,\n random_state=seed,\n dissimilarity=\"precomputed\",\n n_jobs=1,\n)\npos = mds.fit(similarities).embedding_\n\nnmds = manifold.MDS(\n n_components=2,\n metric=False,\n max_iter=3000,\n eps=1e-12,\n dissimilarity=\"precomputed\",\n random_state=seed,\n n_jobs=1,\n n_init=1,\n)\nnpos = nmds.fit_transform(similarities, init=pos)\n\n# Rescale the data\npos *= np.sqrt((X_true**2).sum()) / np.sqrt((pos**2).sum())\nnpos *= np.sqrt((X_true**2).sum()) / np.sqrt((npos**2).sum())\n\n# Rotate the data\nclf = PCA(n_components=2)\nX_true = clf.fit_transform(X_true)\n\npos = clf.fit_transform(pos)\n\nnpos = clf.fit_transform(npos)\n\nfig = plt.figure(1)\nax = plt.axes([0.0, 0.0, 1.0, 1.0])\n\ns = 100\nplt.scatter(X_true[:, 0], X_true[:, 1], color=\"navy\", s=s, lw=0, label=\"True Position\")\nplt.scatter(pos[:, 0], pos[:, 1], color=\"turquoise\", s=s, lw=0, label=\"MDS\")\nplt.scatter(npos[:, 0], npos[:, 1], color=\"darkorange\", s=s, lw=0, label=\"NMDS\")\nplt.legend(scatterpoints=1, loc=\"best\", shadow=False)\n\nsimilarities = similarities.max() / (similarities + EPSILON) * 100\nnp.fill_diagonal(similarities, 0)\n# Plot the edges\nstart_idx, end_idx = np.where(pos)\n# a sequence of (*line0*, *line1*, *line2*), where::\n# linen = (x0, y0), (x1, y1), ... (xm, ym)\nsegments = [\n [X_true[i, :], X_true[j, :]] for i in range(len(pos)) for j in range(len(pos))\n]\nvalues = np.abs(similarities)\nlc = LineCollection(\n segments, zorder=0, cmap=plt.cm.Blues, norm=plt.Normalize(0, values.max())\n)\nlc.set_array(similarities.flatten())\nlc.set_linewidths(np.full(len(segments), 0.5))\nax.add_collection(lc)\n\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.21" } }, "nbformat": 4, "nbformat_minor": 0 }