{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Demo: Reactive plot with multiple regressions" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import bqplot.pyplot as plt\n", "from ipywidgets import VBox, FloatSlider" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Non-parametric regression" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let us define the random variables $X$ and $Y$ by\n", "$$\n", "f(x) = x \\frac{1 + x}{1 + x^2}, \\qquad X \\sim \\mathcal{N}(0, 1), \\quad Y = f(X) + \\varepsilon,\n", "$$\n", "where $\\varepsilon \\sim \\mathcal{N}(0, 1/4)$ is independent of X" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def f(x):\n", " return (x + x ** 2) / (1 + x ** 2)\n", "\n", "n = 1000\n", "sigma = 0.5\n", "X = np.random.randn(n)\n", "Y = f(X) + sigma * np.random.randn(n)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5b354a70e0c84fbcb6018b7668e27cbf", "version_major": "2", "version_minor": "0" }, "text/plain": [ "A Jupyter Widget" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.figure(title='Joint sample, and Conditional expecation for (X, Y)')\n", "plt.scatter(X, Y, alpha=0.3)\n", "mesh = np.linspace(-4, 4, 201)\n", "plt.plot(mesh, f(mesh), linewidth=3)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Non-parametric regression" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def reg_non_param(x, bdwidth, x_sample, y_sample):\n", " \"\"\"Values of the non-parametric regression of Y wrt X using a Gaussian kernel.\"\"\"\n", " def kern(u, x):\n", " \"\"\"Gaussian kernel function\"\"\"\n", " return np.exp(-(u[:, np.newaxis] - x) ** 2 / (2 * bdwidth ** 2))\n", "\n", " return np.sum(kern(x_sample, x) * y_sample[:, np.newaxis], axis=0) \\\n", " / np.sum(kern(x_sample, x), axis=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Plotting non-parametric regressions of $Y$ with respect to $X$ with different values of the bandwidth:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a39c75bfa5ba467cb8c6e93507f81ed9", "version_major": "2", "version_minor": "0" }, "text/plain": [ "A Jupyter Widget" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.figure(legend_location='bottom-right')\n", "plt.scatter(X, Y, alpha=0.3)\n", "plt.plot(mesh, reg_non_param(mesh, 0.1, X, Y), 'b', linewidth=3, labels=['bandwidth 0.1'], display_legend=True)\n", "plt.plot(mesh, reg_non_param(mesh, 0.2, X, Y), 'r', linewidth=3, labels=['bandwidth 0.2'], display_legend=True)\n", "plt.plot(mesh, reg_non_param(mesh, 0.5, X, Y), 'g', linewidth=3, labels=['bandwidth 0.5'], display_legend=True)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4bc3671aa6064f5985f52bb9c37b1a61", "version_major": "2", "version_minor": "0" }, "text/plain": [ "A Jupyter Widget" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "figure = plt.figure(title='Non-parametric regression')\n", "scatter = plt.scatter(X, Y, alpha=0.3, enable_move=True)\n", "regression = plt.plot(mesh, np.zeros(mesh.shape), 'b', linewidth=3)\n", "slider = FloatSlider(min=0.05, max=2.0, value=1.0, step=0.05, description='bandwidth')\n", "\n", "def update(change=None):\n", " regression.y = reg_non_param(regression.x, slider.value, scatter.x, scatter.y)\n", "\n", "slider.observe(update, names=['value'])\n", "scatter.observe(update, names=['x', 'y'])\n", "update()\n", "VBox([figure, slider])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Multiple regression" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def basis(knots, x):\n", " \"\"\"Values of order-1 B-spline basis functions.\"\"\"\n", " nb_knots = len(knots)\n", " diag = np.identity(nb_knots)\n", " res = np.empty((len(x), nb_knots))\n", " for i in range(nb_knots):\n", " res[:, i] = np.interp(x, knots, diag[i])\n", " return res" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "373379e64b134ed1a337de08068e9f7d", "version_major": "2", "version_minor": "0" }, "text/plain": [ "A Jupyter Widget" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "basis_len = 10\n", "knots = np.linspace(-3.5, 3.5, basis_len)\n", "\n", "plt.figure(title='Order-0 B-splines')\n", "plt.plot(mesh, basis(knots, mesh).T, linewidth=2)\n", "plt.ylim(0.0, 2.0)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def reg_param_coeffs(knots, x_sample, y_sample):\n", " \"\"\"Computes the coefficients of the P-L regression of y_sample wrt. x_sample.\"\"\"\n", " bis = basis(knots, x_sample)\n", " var = bis.T.dot(bis)\n", " covar = y_sample.dot(bis)\n", " return np.linalg.lstsq(var, covar.T)[0]\n", "\n", "def eval_piecewise_linear(x, knots, coeffs):\n", " \"\"\"Eveluates the piecewise linear function at the specified x for the knots and coeffs.\n", " \"\"\"\n", " return np.interp(x, knots, coeffs)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4c4131b9da5b4af6a2343dac51b04072", "version_major": "2", "version_minor": "0" }, "text/plain": [ "A Jupyter Widget" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.figure()\n", "plt.scatter(X, Y, alpha=0.3)\n", "\n", "knots1 = np.linspace(-3.0, 3.0, 10)\n", "plt.plot(mesh, eval_piecewise_linear(mesh, knots1, reg_param_coeffs(knots1, X, Y)), 'b', linewidth=3)\n", "\n", "knots2 = np.linspace(-3.0, 3.0, 20)\n", "plt.plot(mesh, np.interp(mesh, knots2, reg_param_coeffs(knots2, X, Y)), 'r', linewidth=3)\n", "\n", "plt.title('Different collections of knots')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Penalized multiple regression" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def second_derivative_on_dirac_basis(knots):\n", " \"\"\"\n", " Computes the coefficients of the second derivative of the basis functions\n", " on the Dirac comb.\n", " \"\"\"\n", " nb_knots = len(knots)\n", " res = np.zeros((nb_knots, nb_knots))\n", " if nb_knots > 1:\n", " res[0, 0] = -1.0 / (knots[1] - knots[0])\n", " res[0, 1] = 1.0 / (knots[1] - knots[0])\n", " for i in range(1, nb_knots - 1):\n", " res[i, i - 1] = (1.0 / (knots[i] - knots[i - 1]))\n", " res[i, i] = -(1.0 / (knots[i] - knots[i - 1]) + 1.0 / (knots[i + 1] - knots[i]))\n", " res[i, i + 1] = 1.0 / (knots[i + 1] - knots[i])\n", " if nb_knots > 1:\n", " res[nb_knots - 1, nb_knots - 2] = 1.0 / (knots[nb_knots - 1] - knots[nb_knots - 2])\n", " res[nb_knots - 1, nb_knots - 1] = -1.0 / (knots[nb_knots - 1] - knots[nb_knots - 2])\n", " return res\n", "\n", "def dirac_inner_product(knots, coeffs1, coeffs2):\n", " \"\"\"\n", " Equivalent to the finite-difference approximation for the second derivative.\n", " \"\"\"\n", " nb_knots = len(knots) \n", " res = 0.0\n", " for i in range(nb_knots):\n", " res += 0.5 * (coeffs1[i] * coeffs2[i] + coeffs1[i - 1] * coeffs2[i - 1]) / (knots[i] - knots[i - 1])\n", " return res\n", "\n", "def tikhonov_matrix(knots):\n", " \"\"\"Computes the second-order Tikhonov matrix of the B-splines corresponding to specified knots.\n", " \n", " Note\n", " ----\n", " The specified array of knots must be non-empty and increasingly sorted.\n", " \"\"\"\n", " basis_len = len(knots)\n", " res = np.zeros((basis_len, basis_len))\n", " coeffs_on_dirac_basis = second_derivative_on_dirac_basis(knots)\n", " influence_order = 2\n", " for i in range(basis_len):\n", " min_j = max(0, i - influence_order)\n", " max_j = min(basis_len, i + influence_order + 1)\n", " for j in range(min_j, max_j):\n", " res[i, j] = dirac_inner_product(knots, coeffs_on_dirac_basis[i], coeffs_on_dirac_basis[j])\n", " return res" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def penalized_pl_regression(knots, x_sample, y_sample, tikhonov_factor):\n", " \"\"\"Compute the second-order penalized P-L regression of y_sample wrt. x_sample.\n", " \"\"\"\n", " bis = basis(knots, x_sample)\n", " var = (bis.T).dot(bis) / len(x_sample)\n", " covar = y_sample.dot(bis) / len(x_sample)\n", " tikho = tikhonov_matrix(knots)\n", " \n", " return np.linalg.lstsq(var + tikhonov_factor * tikho, covar.T)[0]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b3a6413975f143159f6832471ff305fd", "version_major": "2", "version_minor": "0" }, "text/plain": [ "A Jupyter Widget" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.figure(title='Testing multiple values for Tikhonov factor')\n", "plt.scatter(X, Y, alpha=0.3)\n", "knots = np.linspace(-3.0, 3.0, 25)\n", "plt.plot(mesh, eval_piecewise_linear(mesh, knots, penalized_pl_regression(knots, X, Y, 0.01)), 'r', linewidth=3)\n", "plt.plot(mesh, eval_piecewise_linear(mesh, knots, penalized_pl_regression(knots, X, Y, 0.1)), 'g', linewidth=3)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "77af689f25e3412483e980467750b099", "version_major": "2", "version_minor": "0" }, "text/plain": [ "A Jupyter Widget" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "figure = plt.figure(title='Non-parametric regression')\n", "scatter_s = plt.scatter(X, Y, alpha=0.3, enable_move=True)\n", "spline = plt.plot(mesh, np.zeros(mesh.shape), 'b', linewidth=3)\n", "tikhonov = FloatSlider(min=0.02, max=1.0, value=0.5, step=0.01, description='Tikhonov')\n", "\n", "def update_spline(change=None):\n", " spline.y = eval_piecewise_linear(spline.x, knots, \n", " penalized_pl_regression(knots, scatter_s.x, scatter_s.y, tikhonov.value))\n", "\n", "tikhonov.observe(update_spline, names=['value'])\n", "scatter_s.observe(update_spline, names=['x', 'y'])\n", "update_spline()\n", "VBox([figure, tikhonov])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "widgets-tutorial", "language": "python", "name": "widgets-tutorial" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" }, "name": "Regression and American Options.ipynb" }, "nbformat": 4, "nbformat_minor": 1 }