{ "cells": [ { "cell_type": "markdown", "id": "558d28c0", "metadata": {}, "source": [ "# Handling an overplotting on a scatter plot: `geom_count()`/`stat_sum()`\n", "\n", "The `geom_count()` counts the number of observations at each location. \n", "\n", "Computed variables:\n", "\n", "- `..n..` - number of observations at location\n", "- `..prop..` - value in range 0..1 : share of observations at location\n", "- `..proppct..` - value in range 0..100 : % of observations at location" ] }, { "cell_type": "code", "execution_count": 1, "id": "da3d73f3", "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:36:35.796089Z", "iopub.status.busy": "2024-08-23T10:36:35.795993Z", "iopub.status.idle": "2024-08-23T10:36:36.124869Z", "shell.execute_reply": "2024-08-23T10:36:36.124552Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "from lets_plot import *" ] }, { "cell_type": "code", "execution_count": 2, "id": "08103843", "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:36:36.126468Z", "iopub.status.busy": "2024-08-23T10:36:36.126327Z", "iopub.status.idle": "2024-08-23T10:36:36.128405Z", "shell.execute_reply": "2024-08-23T10:36:36.128236Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html() " ] }, { "cell_type": "code", "execution_count": 3, "id": "99409355", "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:36:36.129439Z", "iopub.status.busy": "2024-08-23T10:36:36.129369Z", "iopub.status.idle": "2024-08-23T10:36:36.276573Z", "shell.execute_reply": "2024-08-23T10:36:36.276225Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0manufacturermodeldisplyearcyltransdrvctyhwyflclass
01audia41.819994auto(l5)f1829pcompact
12audia41.819994manual(m5)f2129pcompact
23audia42.020084manual(m6)f2031pcompact
34audia42.020084auto(av)f2130pcompact
45audia42.819996auto(l5)f1626pcompact
\n", "
" ], "text/plain": [ " Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy \\\n", "0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 \n", "1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 \n", "2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 \n", "3 4 audi a4 2.0 2008 4 auto(av) f 21 30 \n", "4 5 audi a4 2.8 1999 6 auto(l5) f 16 26 \n", "\n", " fl class \n", "0 p compact \n", "1 p compact \n", "2 p compact \n", "3 p compact \n", "4 p compact " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mpg_df = pd.read_csv (\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")\n", "mpg_df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "78db0a6b", "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:36:36.277957Z", "iopub.status.busy": "2024-08-23T10:36:36.277876Z", "iopub.status.idle": "2024-08-23T10:36:36.279661Z", "shell.execute_reply": "2024-08-23T10:36:36.279484Z" } }, "outputs": [], "source": [ "p = ggplot(mpg_df, aes(x=as_discrete('class', order=1), y=as_discrete('drv', order=1)))" ] }, { "cell_type": "markdown", "id": "fafaa792", "metadata": {}, "source": [ "#### 1. Plot an Observation Count by Location" ] }, { "cell_type": "code", "execution_count": 5, "id": "9bf746ef", "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:36:36.280805Z", "iopub.status.busy": "2024-08-23T10:36:36.280734Z", "iopub.status.idle": "2024-08-23T10:36:36.312684Z", "shell.execute_reply": "2024-08-23T10:36:36.312489Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p + geom_count()" ] }, { "cell_type": "code", "execution_count": 6, "id": "903b4131-0ca4-487c-b3ef-f50b5242add1", "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:36:36.314014Z", "iopub.status.busy": "2024-08-23T10:36:36.313934Z", "iopub.status.idle": "2024-08-23T10:36:36.318107Z", "shell.execute_reply": "2024-08-23T10:36:36.317933Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p + stat_sum()" ] }, { "cell_type": "markdown", "id": "68d94a2c", "metadata": {}, "source": [ "#### 2. Plot an Observations Share by Location " ] }, { "cell_type": "code", "execution_count": 7, "id": "feb247d0", "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:36:36.319240Z", "iopub.status.busy": "2024-08-23T10:36:36.319166Z", "iopub.status.idle": "2024-08-23T10:36:36.323420Z", "shell.execute_reply": "2024-08-23T10:36:36.323251Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p + geom_count(aes(size='..prop..'))" ] }, { "cell_type": "markdown", "id": "caf89a4b", "metadata": {}, "source": [ "#### 3. Plot an Observations Share by Drivetrain Type within each Vehicle \"class\"\n", "\n", "Note: group by \"class\"." ] }, { "cell_type": "code", "execution_count": 8, "id": "e513cb6c", "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:36:36.324521Z", "iopub.status.busy": "2024-08-23T10:36:36.324445Z", "iopub.status.idle": "2024-08-23T10:36:36.329123Z", "shell.execute_reply": "2024-08-23T10:36:36.328948Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p + geom_count(aes(size='..prop..', group='class'))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }