{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:07.148300Z", "iopub.status.busy": "2024-04-26T11:44:07.148300Z", "iopub.status.idle": "2024-04-26T11:44:08.125477Z", "shell.execute_reply": "2024-04-26T11:44:08.125477Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from pandas import DataFrame\n", "import numpy as np\n", "from lets_plot import *\n", "\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.158737Z", "iopub.status.busy": "2024-04-26T11:44:08.158737Z", "iopub.status.idle": "2024-04-26T11:44:08.173220Z", "shell.execute_reply": "2024-04-26T11:44:08.173220Z" } }, "outputs": [], "source": [ "# This example was found at: www.cookbook-r.com/Graphs/Plotting_distributions_(ggplot2)\n", "np.random.seed(123)\n", "data = DataFrame(dict(\n", " cond=np.repeat(['A','B'], 200),\n", " rating=np.concatenate((np.random.normal(0, 1, 200), np.random.normal(.8, 1, 200)))\n", "))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.173220Z", "iopub.status.busy": "2024-04-26T11:44:08.173220Z", "iopub.status.idle": "2024-04-26T11:44:08.300809Z", "shell.execute_reply": "2024-04-26T11:44:08.299776Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Basic histogram of \"rating\"\n", "p = ggplot(data, aes(x='rating')) + ggsize(500, 250)\n", "p + geom_histogram(binwidth=.5)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.300809Z", "iopub.status.busy": "2024-04-26T11:44:08.300809Z", "iopub.status.idle": "2024-04-26T11:44:08.363546Z", "shell.execute_reply": "2024-04-26T11:44:08.363546Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Histogram overlaid with kernel density curve\n", "# - histogram with density instead of count on y-axis\n", "# - overlay with transparent density plot\n", "(p \n", " + geom_histogram(aes(y='..density..'), binwidth=.5, colour=\"black\", fill=\"white\") \n", " + geom_density(alpha=.2, color=\"#de2d26\", fill=\"#ff6666\")\n", ") " ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.363546Z", "iopub.status.busy": "2024-04-26T11:44:08.363546Z", "iopub.status.idle": "2024-04-26T11:44:08.379183Z", "shell.execute_reply": "2024-04-26T11:44:08.379183Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(p \n", " + geom_histogram(binwidth=.5, colour=\"black\", fill=\"white\") \\\n", " + geom_vline(\n", " xintercept=np.mean(data['rating']), \n", " color=\"red\", linetype=\"dashed\", size=1)\n", ") " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Histogram and density plots with multiple groups" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.379183Z", "iopub.status.busy": "2024-04-26T11:44:08.379183Z", "iopub.status.idle": "2024-04-26T11:44:08.394804Z", "shell.execute_reply": "2024-04-26T11:44:08.394804Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p1 = ggplot(data, aes(x='rating', fill='cond')) + ggsize(500, 250)\n", "\n", "# Default histogram (stacked)\n", "p1 + geom_histogram(binwidth=.5)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.394804Z", "iopub.status.busy": "2024-04-26T11:44:08.394804Z", "iopub.status.idle": "2024-04-26T11:44:08.410531Z", "shell.execute_reply": "2024-04-26T11:44:08.410531Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Overlaid histograms\n", "p1 + geom_histogram(binwidth=.5, alpha=.7, position=\"identity\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.410531Z", "iopub.status.busy": "2024-04-26T11:44:08.410531Z", "iopub.status.idle": "2024-04-26T11:44:08.426754Z", "shell.execute_reply": "2024-04-26T11:44:08.426754Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Interleaved histograms\n", "p1 + geom_histogram(binwidth=.5, position=\"dodge\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.426754Z", "iopub.status.busy": "2024-04-26T11:44:08.426754Z", "iopub.status.idle": "2024-04-26T11:44:08.505369Z", "shell.execute_reply": "2024-04-26T11:44:08.505369Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Density plot\n", "p2 = ggplot(data, aes(x='rating', color='cond')) + ggsize(500, 250)\n", "p2 + geom_density()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.505369Z", "iopub.status.busy": "2024-04-26T11:44:08.505369Z", "iopub.status.idle": "2024-04-26T11:44:08.599778Z", "shell.execute_reply": "2024-04-26T11:44:08.599778Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Density plot with semi-transparent fill\n", "p2 + geom_density(aes(fill='cond'), alpha=.7)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.599778Z", "iopub.status.busy": "2024-04-26T11:44:08.599778Z", "iopub.status.idle": "2024-04-26T11:44:08.647011Z", "shell.execute_reply": "2024-04-26T11:44:08.647011Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
condrating
0A0.003787
1B0.685638
\n", "
" ], "text/plain": [ " cond rating\n", "0 A 0.003787\n", "1 B 0.685638" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Find the mean of each group\n", "cdat = data.groupby(['cond'], as_index=False).mean()\n", "cdat" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.647011Z", "iopub.status.busy": "2024-04-26T11:44:08.647011Z", "iopub.status.idle": "2024-04-26T11:44:08.678486Z", "shell.execute_reply": "2024-04-26T11:44:08.678486Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Overlaid histograms with means\n", "(p2 \n", " + geom_histogram(aes(fill='cond'), alpha=.5, position=\"identity\", size=0)\n", " + geom_vline(data=cdat, \n", " mapping=aes(xintercept='rating', color='cond'), \n", " linetype=\"dashed\", size=1)\n", ")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.678486Z", "iopub.status.busy": "2024-04-26T11:44:08.678486Z", "iopub.status.idle": "2024-04-26T11:44:08.709920Z", "shell.execute_reply": "2024-04-26T11:44:08.709920Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Use frqpoly instead of histogram\n", "(p2 \n", " + geom_freqpoly(aes(fill='cond'))\n", " + geom_vline(data=cdat, \n", " mapping=aes(xintercept='rating', color='cond'), \n", " linetype=\"dashed\", size=1)\n", ")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.709920Z", "iopub.status.busy": "2024-04-26T11:44:08.709920Z", "iopub.status.idle": "2024-04-26T11:44:08.788985Z", "shell.execute_reply": "2024-04-26T11:44:08.788985Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Density plots with means\n", "(p2 \n", " + geom_density()\n", " + geom_vline(data=cdat, \n", " mapping=aes(xintercept='rating', color='cond'), \n", " linetype=\"dashed\", size=1)\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using facets" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.788985Z", "iopub.status.busy": "2024-04-26T11:44:08.788985Z", "iopub.status.idle": "2024-04-26T11:44:08.804611Z", "shell.execute_reply": "2024-04-26T11:44:08.804611Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(ggplot(data, aes(x='rating')) \n", " + geom_histogram(binwidth=.5, colour=\"black\", fill=\"white\")\n", " + facet_grid('cond')\n", ")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.804611Z", "iopub.status.busy": "2024-04-26T11:44:08.804611Z", "iopub.status.idle": "2024-04-26T11:44:08.820188Z", "shell.execute_reply": "2024-04-26T11:44:08.820188Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# With mean lines, using 'cdat' computed earlier.\n", "(ggplot(data, aes(x='rating')) \n", " + geom_histogram(binwidth=.5, colour=\"black\", fill=\"white\")\n", " + geom_vline(data=cdat, \n", " mapping=aes(xintercept='rating'), \n", " linetype=\"dashed\", size=1, colour=\"red\")\n", " + facet_grid(None, 'cond') \n", " + ggsize(500, 250)\n", ") \n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Box plots" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.820188Z", "iopub.status.busy": "2024-04-26T11:44:08.820188Z", "iopub.status.idle": "2024-04-26T11:44:08.836478Z", "shell.execute_reply": "2024-04-26T11:44:08.836478Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# A basic box plot\n", "p3 = ggplot(data, aes(x='cond', y='rating')) + ggsize(300, 200)\n", "p3 + geom_boxplot()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.836478Z", "iopub.status.busy": "2024-04-26T11:44:08.836478Z", "iopub.status.idle": "2024-04-26T11:44:08.852310Z", "shell.execute_reply": "2024-04-26T11:44:08.852310Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# A basic box with the conditions colored\n", "p3 + geom_boxplot(aes(fill='cond'))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:44:08.852310Z", "iopub.status.busy": "2024-04-26T11:44:08.852310Z", "iopub.status.idle": "2024-04-26T11:44:08.867863Z", "shell.execute_reply": "2024-04-26T11:44:08.867863Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Style outliers\n", "p3 + geom_boxplot(outlier_color='red', outlier_shape=8, outlier_size=1.5)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 2 }