{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " Loading BokehJS ...\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(global) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = true;\n", "\n", " if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n", " window._bokeh_onload_callbacks = [];\n", " window._bokeh_is_loading = undefined;\n", " }\n", "\n", "\n", " \n", " if (typeof (window._bokeh_timeout) === \"undefined\" || force === true) {\n", " window._bokeh_timeout = Date.now() + 5000;\n", " window._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"
\\n\"+\n", " \"

\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"

\\n\"+\n", " \"\\n\"+\n", " \"\\n\"+\n", " \"from bokeh.resources import INLINE\\n\"+\n", " \"output_notebook(resources=INLINE)\\n\"+\n", " \"\\n\"+\n", " \"
\"}};\n", "\n", " function display_loaded() {\n", " if (window.Bokeh !== undefined) {\n", " var el = document.getElementById(\"f981697c-83d6-4fdb-91b7-84e9dde6b213\");\n", " el.textContent = \"BokehJS \" + Bokeh.version + \" successfully loaded.\";\n", " } else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(display_loaded, 100)\n", " }\n", " }\n", "\n", " function run_callbacks() {\n", " window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", " delete window._bokeh_onload_callbacks\n", " console.info(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(js_urls, callback) {\n", " window._bokeh_onload_callbacks.push(callback);\n", " if (window._bokeh_is_loading > 0) {\n", " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls == null || js_urls.length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " window._bokeh_is_loading = js_urls.length;\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " var s = document.createElement('script');\n", " s.src = url;\n", " s.async = false;\n", " s.onreadystatechange = s.onload = function() {\n", " window._bokeh_is_loading--;\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", " run_callbacks()\n", " }\n", " };\n", " s.onerror = function() {\n", " console.warn(\"failed to load library \" + url);\n", " };\n", " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", " }\n", " };var element = document.getElementById(\"f981697c-83d6-4fdb-91b7-84e9dde6b213\");\n", " if (element == null) {\n", " console.log(\"Bokeh: ERROR: autoload.js configured with elementid 'f981697c-83d6-4fdb-91b7-84e9dde6b213' but no matching script tag was found. \")\n", " return false;\n", " }\n", "\n", " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.5.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.5.min.js\"];\n", "\n", " var inline_js = [\n", " function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", " \n", " function(Bokeh) {\n", " \n", " },\n", " \n", " function(Bokeh) {\n", " \n", " document.getElementById(\"f981697c-83d6-4fdb-91b7-84e9dde6b213\").textContent = \"BokehJS is loading...\";\n", " },\n", " function(Bokeh) {\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.5.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.5.min.css\");\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.5.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.5.min.css\");\n", " }\n", " ];\n", "\n", " function run_inline_js() {\n", " \n", " if ((window.Bokeh !== undefined) || (force === true)) {\n", " for (var i = 0; i < inline_js.length; i++) {\n", " inline_js[i](window.Bokeh);\n", " }if (force === true) {\n", " display_loaded();\n", " }} else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!window._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " window._bokeh_failed_load = true;\n", " } else if (force !== true) {\n", " var cell = $(document.getElementById(\"f981697c-83d6-4fdb-91b7-84e9dde6b213\")).parents('.cell').data().cell;\n", " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", " }\n", "\n", " }\n", "\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", " run_inline_js();\n", " } else {\n", " load_libs(js_urls, function() {\n", " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", "}(this));" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "from bokeh.plotting import figure, show, output_notebook\n", "\n", "output_notebook()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## algorithm" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def PCA(X, n_components):\n", " # normalize to zero mean\n", " mu = X.mean(axis=0)\n", " X = X - mu\n", " \n", " # eigenvectors of covariance matrix\n", " sigma = X.T @ X\n", " eigvals, eigvecs = np.linalg.eig(sigma)\n", " \n", " # principal components\n", " order = np.argsort(eigvals)[::-1]\n", " components = eigvecs[:, order[:n_components]]\n", " \n", " # projection\n", " Z = X @ components\n", " \n", " # result\n", " return Z, components" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2D data & principal components" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# generate points\n", "x = np.linspace(0, 13, num=100)\n", "y = x + np.sin(x) - np.cos(x)\n", "\n", "# 2D data\n", "X = np.c_[x, y]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[ 0.72299657, -0.69085162],\n", " [ 0.69085162, 0.72299657]])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# PCA\n", "projection, components = PCA(X, n_components=2)\n", "\n", "# principal components\n", "components" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[ 2705.081, 0. ],\n", " [ 0. , 47.716]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# convariance matrix of projected data\n", "(projection.T @ projection).round(3)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# prepare plot data\n", "mean = np.mean(X, axis=0)\n", "extent = projection.min(), projection.max()\n", "angle = np.arctan(components[1] / components[0]) + np.pi * (components[0] < 0)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# plot original data & principal components\n", "plot = figure()\n", "\n", "plot.scatter(x, y)\n", "plot.ray(*mean, length=0, angle=angle[0], line_width=2, line_color='red')\n", "plot.ray(*mean, length=0, angle=angle[1], line_width=2, line_color='green')\n", "\n", "show(plot)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# plot projected data\n", "plot = figure(x_range=extent, y_range=extent)\n", "plot.scatter(projection[:, 0], projection[:, 1])\n", "show(plot)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## binary vectors & dimensionality reduction" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# generate binary vectors\n", "X = np.random.rand(90, 30)\n", "X[:30, :] = X[:30, :] < ([.4] * 10 + [.1] * 10 + [.1] * 10)\n", "X[30:60, :] = X[30:60, :] < ([.1] * 10 + [.4] * 10 + [.1] * 10)\n", "X[60:, :] = X[60:, :] < ([.1] * 10 + [.1] * 10 + [.4] * 10)\n", "\n", "# define 3 classes\n", "Y = ['red'] * 30 + ['green'] * 30 + ['blue'] * 30" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# PCA\n", "projection, _ = PCA(X, n_components=2)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# plot projected data: 30D -> 2D\n", "plot = figure()\n", "plot.scatter(projection[:, 0], projection[:, 1], color=Y)\n", "show(plot)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }