{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# HLMA 408: Quantiles, densités et fonctions de répartition \n", "\n", "***\n", "> __Auteur__: Joseph Salmon \n", "\n", "On va illustrer la notion de quantile sur des lois classiques en affichant la densité, la fonction de répartion et la fonction quantile associée.\n", "On observera ainsi l'impact du centrage (en: *centering*) et de la mise à l'échelle (en: *mise à l'échelle*).\n", "\n", "La liste exhaustive des lois disponibles sous `scipy` se trouve ici: https://docs.scipy.org/doc/scipy/reference/stats.html\n", "\n", "Ce notebook est inspiré des posts suivants:\n", "- https://medium.com/kapernikov/ipywidgets-with-matplotlib-93646718eb84\n", "- https://medium.com/@jdchipox/how-to-interact-with-jupyter-33a98686f24e" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from scipy import stats \n", "import ipywidgets # ipywidgets>=7.5" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%matplotlib widget" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "## Visualisation, cas continu: densités, fonctions de répartition et quantiles " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def make_box_layout():\n", " return ipywidgets.Layout(\n", " border='solid 1px black',\n", " margin='0px 10px 10px 0px',\n", " padding='5px 5px 5px 5px',\n", " )" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def keep_no_param_distribution():\n", " distributions = stats._continuous_distns._distn_names\n", " distributions_0 = []\n", " for i, name in enumerate(distributions):\n", " dist = getattr(stats, name)\n", " if not dist.shapes or len(dist.shapes)==0:\n", " distributions_0.append(name)\n", " distributions_0_val = [getattr(stats.distributions, string) for string in distributions_0 ]\n", " distributions_0_dict = dict(zip(distributions_0, distributions_0_val))\n", " return distributions_0_dict\n", "\n", "# inspired by: https://stackoverflow.com/questions/30453097/getting-the-parameter-names-of-scipy-stats-distributions" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "distributions_0_dict = keep_no_param_distribution()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "solution": "hidden", "solution_first": true }, "outputs": [], "source": [ "class RandomWidgetQuantiles(ipywidgets.HBox):\n", "\n", " def __init__(self):\n", " super().__init__()\n", " output = ipywidgets.Output()\n", " output0 = ipywidgets.Output()\n", " self.xranges = (-6, 6) # Bornes d'observation\n", " self.yranges = (0, 0.5) # Bornes d'observation\n", "\n", " self.x = np.linspace(self.xranges[0], self.xranges[1], num=400)\n", " self.quantiles_value = 0.5\n", "\n", " self.mu, self.sigma = 0, 1\n", " self.distribution = distributions_0_dict['norm']\n", " self.size = 5\n", " self.initial_color = '#1a60e1'\n", "# self.jitter = 0.10\n", " self.params = dict(\n", " color=self.initial_color, alpha=0.50, linewidth=0.2, edgecolor=\"black\"\n", " )\n", " self.fontsize = 4\n", "\n", " with output:\n", " self.fig, self.ax = plt.subplots(2, 2, sharey='row', sharex='col',\n", " num='Quantiles et fonctions de répartition',\n", " constrained_layout=True, figsize=(4, 4))\n", " with output0:\n", " self.fig0, self.ax0 = plt.subplots(1, 1,\n", " num='Densité',\n", " constrained_layout=True, figsize=(3, 1.5))\n", "\n", " self.fig0.canvas.toolbar_visible = False\n", " self.fig.canvas.toolbar_visible = False\n", "\n", " self.pdf, = self.ax0.plot(self.x,\n", " self.distribution.pdf(\n", " self.x,\n", " loc=self.mu, scale=self.sigma),\n", " self.initial_color)\n", " self.ax0.set_xlim(self.xranges)\n", "\n", " self.area = self.ax0.fill_between(self.x, 0, self.distribution.pdf(\n", " self.x, 0, 1), where=self.x <= self.quantiles_value, alpha=0.25, color=self.initial_color)\n", "\n", " self.quantile_bar = self.ax0.axvline(\n", " x=self.quantiles_value, c='k', ls=\"--\", lw=2)\n", "\n", " self.ticks = np.arange(self.xranges[0], self.xranges[1] + 0.1, step=2.)\n", "\n", " self.mytext = self.ax[0, 1].text(np.min(self.x), np.mean(self.x), \"Quantile d'ordre {:.2f} : \\n {:.2f}\".format(self.quantiles_value, self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)))\n", " self.ax[0, 1].axis('Off')\n", "\n", " self.diago, = self.ax[1, 0].plot(\n", " [-0.1, 1.1], [-0.1, 1.1], self.initial_color)\n", "\n", " self.ax[1, 0].set_xlim((-0.1, 1.1))\n", " self.ax[1, 0].set_ylim((-0.1, 1.1))\n", " self.ax[1, 0].set_xticks([0, 0.25, 0.5, 0.75, 1])\n", " self.ax[1, 0].set_yticks([0, 0.25, 0.5, 0.75, 1])\n", " self.points_q10, = self.ax[1, 0].plot(np.array([self.quantiles_value, self.quantiles_value, 1.1]), np.array([\n", " 1.1, self.quantiles_value, self.quantiles_value]), '--', color='k')\n", "\n", " self.quantiles_range = self.distribution.cdf(\n", " self.x,\n", " loc=self.mu, scale=self.sigma)\n", "\n", " self.cdf, = self.ax[1, 1].plot(self.x,\n", " self.quantiles_range,\n", " self.initial_color)\n", "\n", " self.ax[1, 1].set_xlabel(r'Fonction de répartition')\n", " self.ax[1, 1].set_xlim(self.xranges)\n", " self.ax[1, 1].set_ylim((-0.1, 1.1))\n", " self.ax[1, 1].set_xticks(self.ticks)\n", "\n", " self.points_q11, = self.ax[1, 1].plot(np.array([self.xranges[0], self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)]), np.array([self.quantiles_value, self.quantiles_value]), '--', color='k')\n", " self.points_cdf, = self.ax[1, 1].plot(self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma), self.quantiles_value, 'o', color=self.initial_color)\n", " \n", " self.ax[0, 0].set_xlabel(r'Quantiles')\n", " self.ax[0, 0].set_ylim(self.xranges)\n", " self.ax[0, 0].set_xticks([0, 0.25, 0.5, 0.75, 1])\n", "\n", " self.ppf, = self.ax[0, 0].plot(self.quantiles_range, self.x)\n", " self.ax[0, 0].set_ylim(self.xranges)\n", " self.ax[0, 0].set_yticks(self.ticks)\n", " self.points_q00, = self.ax[0, 0].plot(np.array([self.quantiles_value, self.quantiles_value]), np.array([self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma), self.xranges[0]]), '--', color='k')\n", " self.points, = self.ax[0, 0].plot(self.quantiles_value, self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma), 'o', color=self.initial_color)\n", " self.fig.canvas.toolbar_position = 'bottom'\n", "\n", " # define widgets\n", " style = {'description_width': '100px'}\n", " layout = {'width': '300px'}\n", "\n", " mu_slider = ipywidgets.FloatSlider(\n", " value=0, min=-2, max=2, step=0.1, description='$\\mu$',\n", " style=style, layout=layout)\n", " sigma_slider = ipywidgets.FloatSlider(\n", " value=1, min=0.5, max=3, step=0.1, description='$\\sigma$',\n", " style=style, layout=layout)\n", " color_picker = ipywidgets.ColorPicker(\n", " value=self.initial_color,\n", " description='Couleur', style=style, layout=layout)\n", " quantile_slider = ipywidgets.FloatSlider(\n", " value=self.quantiles_value, min=0.001, max=0.999, step=0.001,\n", " description=\"Quantile d'ordre \", style=style, layout=layout)\n", " text_distribution = ipywidgets.Dropdown(\n", " options=list(distributions_0_dict),\n", " value='norm',\n", " description='Distribution', style=style, layout=layout)\n", " int_xrange_slider = ipywidgets.FloatRangeSlider(\n", " value=self.xranges,\n", " min=-5, max=5, step=0.1,\n", " description=\"Zoom en x\", style=style, layout=layout)\n", " int_yrange_slider_pdf = ipywidgets.FloatRangeSlider(\n", " value=(self.yranges),\n", " min=-1, max=3, step=0.1,\n", " description=\"Zoom en y\", style=style, layout=layout)\n", "\n", " controls = ipywidgets.VBox([\n", " mu_slider,\n", " sigma_slider,\n", " text_distribution,\n", " color_picker,\n", " int_xrange_slider,\n", " int_yrange_slider_pdf,\n", " quantile_slider,\n", " output0\n", " ])\n", "\n", " controls.layout = make_box_layout()\n", " out_box = ipywidgets.Box([output])\n", " output.layout = make_box_layout()\n", "\n", " # A Afficher\n", " mu_slider.observe(self.update_mu, 'value')\n", " sigma_slider.observe(self.update_sigma, 'value')\n", " color_picker.observe(self.line_color, 'value')\n", " text_distribution.observe(self.update_text_distribution, 'value')\n", " int_xrange_slider.observe(self.update_xrange_slider, 'value')\n", " int_yrange_slider_pdf.observe(self.update_yrange_slider_pdf, 'value')\n", " quantile_slider.observe(self.update_quantile_slider, 'value')\n", "\n", " self.children = [controls, output]\n", "\n", " def update_mu(self, change):\n", " \"\"\"Evolution with the mu parameter.\"\"\"\n", " self.mu = change.new\n", " self.quantiles_range = self.distribution.cdf(\n", " self.x,\n", " loc=self.mu, scale=self.sigma)\n", "\n", " self.pdf.set_ydata(self.distribution.pdf(\n", " self.x, loc=self.mu, scale=self.sigma))\n", " self.cdf.set_ydata(self.distribution.cdf(\n", " self.x, loc=self.mu, scale=self.sigma))\n", " self.ppf.set_xdata(self.quantiles_range)\n", "\n", " self.points.set_ydata(self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma))\n", " self.points_cdf.set_xdata(self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma))\n", "\n", " self.points_q11.set_xdata(np.array([self.xranges[0], self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)]))\n", " self.points_q11.set_ydata(\n", " np.array([self.quantiles_value, self.quantiles_value]))\n", " self.points_q00.set_xdata(\n", " np.array([self.quantiles_value, self.quantiles_value]))\n", " self.points_q00.set_ydata(np.array([self.xranges[0], self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)]))\n", "\n", " self.area.remove()\n", " self.quantile_bar.remove()\n", "\n", " self.mytext.set_text(\"Quantile d'ordre {:.2f} : \\n {:.2f}\".format(self.quantiles_value, self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)))\n", "\n", " self.area = self.ax0.fill_between(self.x, 0, self.distribution.pdf(\n", " self.x, loc=self.mu, scale=self.sigma), where=self.x <= self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma), alpha=0.25, color=self.initial_color)\n", " self.quantile_bar = self.ax0.axvline(\n", " x=self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma), c='k', ls=\"--\", lw=2)\n", " self.fig.canvas.draw()\n", "\n", " def update_sigma(self, change):\n", " \"\"\"Evolution with the sigma parameter.\"\"\"\n", " self.sigma = change.new\n", " self.quantiles_range = self.distribution.cdf(\n", " self.x,\n", " loc=self.mu, scale=self.sigma)\n", "\n", " self.pdf.set_ydata(self.distribution.pdf(\n", " self.x, loc=self.mu, scale=self.sigma))\n", " self.cdf.set_ydata(self.distribution.cdf(\n", " self.x, loc=self.mu, scale=self.sigma))\n", " self.ppf.set_xdata(self.quantiles_range)\n", "\n", " self.points.set_ydata(self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma))\n", " self.points_cdf.set_xdata(self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma))\n", " self.mytext.set_text(\"Quantile d'ordre {:.2f} : \\n {:.2f}\".format(self.quantiles_value, self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)))\n", "\n", " self.points_q11.set_xdata(np.array([self.xranges[0], self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)]))\n", " self.points_q11.set_ydata(\n", " np.array([self.quantiles_value, self.quantiles_value]))\n", " self.points_q00.set_xdata(\n", " np.array([self.quantiles_value, self.quantiles_value]))\n", " self.points_q00.set_ydata(np.array([self.xranges[0], self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)]))\n", "\n", " self.area.remove()\n", " self.quantile_bar.remove()\n", "\n", " self.area = self.ax0.fill_between(self.x, 0, self.distribution.pdf(\n", " self.x, loc=self.mu, scale=self.sigma), where=self.x <= self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma), alpha=0.25, color=self.initial_color)\n", " self.quantile_bar = self.ax0.axvline(\n", " x=self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma), c='k', ls=\"--\", lw=2)\n", " self.fig.canvas.draw()\n", "\n", " def update_quantile_slider(self, change):\n", " \"\"\"Evolution with the quantile parameter.\"\"\"\n", " self.quantiles_value = change.new\n", "\n", " self.ppf.set_xdata(self.quantiles_range)\n", "\n", " self.points.set_ydata(self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma))\n", " self.points.set_xdata(self.quantiles_value)\n", "\n", " self.points_cdf.set_xdata(self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma))\n", " self.points_cdf.set_ydata(self.quantiles_value)\n", "\n", " self.mytext.set_text(\"Quantile d'ordre {:.2f} : \\n {:.2f}\".format(self.quantiles_value, self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)))\n", "\n", " self.points_q11.set_xdata(np.array([self.xranges[0], self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)]))\n", " self.points_q11.set_ydata(\n", " np.array([self.quantiles_value, self.quantiles_value]))\n", " self.points_q00.set_xdata(\n", " np.array([self.quantiles_value, self.quantiles_value]))\n", " self.points_q00.set_ydata(np.array([self.xranges[0], self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)]))\n", "\n", " self.points_q10.set_xdata(\n", " np.array([self.quantiles_value, self.quantiles_value, 1.1]))\n", " self.points_q10.set_ydata(\n", " np.array([1.1, self.quantiles_value, self.quantiles_value]))\n", "\n", " self.area.remove()\n", " self.quantile_bar.remove()\n", "\n", " self.area = self.ax0.fill_between(self.x, 0, self.distribution.pdf(\n", " self.x, loc=self.mu, scale=self.sigma), where=self.x <= self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma), alpha=0.25, color=self.initial_color)\n", " self.quantile_bar = self.ax0.axvline(\n", " x=self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma), c='k', ls=\"--\", lw=2)\n", " self.fig.canvas.draw()\n", "\n", " def line_color(self, change):\n", " self.initial_color = change.new\n", " self.pdf.set_color(self.initial_color)\n", " self.cdf.set_color(self.initial_color)\n", " self.ppf.set_color(self.initial_color)\n", " self.area.set_color(self.initial_color)\n", " self.params['color'] = self.initial_color\n", " self.points_cdf.set_color(self.initial_color)\n", " self.points.set_color(self.initial_color)\n", " self.diago.set_color(self.initial_color)\n", " self.fig.canvas.draw()\n", "\n", " def update_xrange_slider(self, change):\n", " self.xranges = change.new\n", " self.ax[1, 1].set_xlim(self.xranges)\n", " self.ax[0, 0].set_ylim(self.xranges)\n", " self.ax0.set_xlim(self.xranges)\n", "\n", " self.mytext.set_position((self.xranges[0], np.mean(self.xranges)))\n", " self.fig.canvas.draw()\n", "\n", " def update_yrange_slider_pdf(self, change):\n", " self.yranges_pdf = change.new\n", " self.ax0.set_ylim(self.yranges_pdf)\n", " self.fig.canvas.draw()\n", "\n", " def update_text_distribution(self, change):\n", " self.distribution = distributions_0_dict[change.new]\n", "\n", " self.quantiles_range = self.distribution.cdf(\n", " self.x,\n", " loc=self.mu, scale=self.sigma)\n", "\n", " self.pdf.set_ydata(self.distribution.pdf(\n", " self.x, loc=self.mu, scale=self.sigma))\n", " self.cdf.set_ydata(self.distribution.cdf(\n", " self.x, loc=self.mu, scale=self.sigma))\n", " self.ppf.set_xdata(self.quantiles_range)\n", "\n", " self.points.set_ydata(self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma))\n", " self.points_cdf.set_xdata(self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma))\n", "\n", " self.mytext.set_text(\"Quantile d'ordre {:.2f} : \\n {:.2f}\".format(self.quantiles_value, self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma)))\n", "\n", " self.area.remove()\n", " self.quantile_bar.remove()\n", "\n", " self.area = self.ax0.fill_between(self.x, 0, self.distribution.pdf(\n", " self.x, loc=self.mu, scale=self.sigma), where=self.x <= self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma), alpha=0.25, color=self.initial_color)\n", " self.quantile_bar = self.ax0.axvline(\n", " x=self.distribution.ppf(\n", " self.quantiles_value, loc=self.mu, scale=self.sigma), c='k', ls=\"--\", lw=2)\n", " self.fig.canvas.draw()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "442f8158cf824febbdd940545f3db4ee", "version_major": 2, "version_minor": 0 }, "text/plain": [ "RandomWidgetQuantiles(children=(VBox(children=(FloatSlider(value=0.0, description='$\\\\mu$', layout=Layout(widt…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "RandomWidgetQuantiles()" ] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 1, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 2 }