{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:47:18.962598Z", "iopub.status.busy": "2025-11-05T13:47:18.962511Z", "iopub.status.idle": "2025-11-05T13:47:18.966074Z", "shell.execute_reply": "2025-11-05T13:47:18.965897Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import random\n", "from lets_plot import *\n", "\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:47:18.979651Z", "iopub.status.busy": "2025-11-05T13:47:18.979574Z", "iopub.status.idle": "2025-11-05T13:47:18.981205Z", "shell.execute_reply": "2025-11-05T13:47:18.981027Z" } }, "outputs": [], "source": [ "# This example was found at: www.cookbook-r.com/Graphs/Scatterplots_(ggplot2)\n", "random.seed(123)\n", "data = dict(\n", " cond=np.repeat(['A','B'], 10),\n", " xvar=[i + random.normalvariate(0, 3) for i in range(0,20)],\n", " yvar=[i + random.normalvariate(0, 3) for i in range(0,20)]\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Basic scatter-plot" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:47:18.982248Z", "iopub.status.busy": "2025-11-05T13:47:18.982180Z", "iopub.status.idle": "2025-11-05T13:47:19.011937Z", "shell.execute_reply": "2025-11-05T13:47:19.011624Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p = ggplot(data, aes(x='xvar', y='yvar')) + ggsize(300, 250)\n", "p + geom_point(shape=1) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Add regression line" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:47:19.013138Z", "iopub.status.busy": "2025-11-05T13:47:19.013066Z", "iopub.status.idle": "2025-11-05T13:47:19.016255Z", "shell.execute_reply": "2025-11-05T13:47:19.016075Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p + geom_point(shape=1) + geom_smooth(seed=42) " ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:47:19.017226Z", "iopub.status.busy": "2025-11-05T13:47:19.017160Z", "iopub.status.idle": "2025-11-05T13:47:19.020104Z", "shell.execute_reply": "2025-11-05T13:47:19.019938Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Without standard error band.\n", "p + geom_point(shape=1) + geom_smooth(se=False, seed=42) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Split dataset by the `cond` variable" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:47:19.020974Z", "iopub.status.busy": "2025-11-05T13:47:19.020904Z", "iopub.status.idle": "2025-11-05T13:47:19.024734Z", "shell.execute_reply": "2025-11-05T13:47:19.024566Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p1 = ggplot(data, aes(x='xvar', y='yvar', color='cond')) + ggsize(500, 250)\n", "p1 + geom_point(shape=1) + geom_smooth(se=False, seed=42)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:47:19.025749Z", "iopub.status.busy": "2025-11-05T13:47:19.025679Z", "iopub.status.idle": "2025-11-05T13:47:19.028097Z", "shell.execute_reply": "2025-11-05T13:47:19.027926Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Map `shape` to the `cond` variable.\n", "p2 = p1 + geom_point(aes(shape='cond'), size=5)\n", "p2" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:47:19.028922Z", "iopub.status.busy": "2025-11-05T13:47:19.028853Z", "iopub.status.idle": "2025-11-05T13:47:19.031227Z", "shell.execute_reply": "2025-11-05T13:47:19.031060Z" }, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Choose different shapes using `scale_shape_manual`:\n", "# 1 - hollow circle \n", "# 2 - hollow triangle\n", "\n", "p2 + scale_shape_manual(values=[1,2])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Handling overplotting" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:47:19.032066Z", "iopub.status.busy": "2025-11-05T13:47:19.031997Z", "iopub.status.idle": "2025-11-05T13:47:19.033433Z", "shell.execute_reply": "2025-11-05T13:47:19.033251Z" } }, "outputs": [], "source": [ "# Create data containing overlapping points.\n", "data['xrnd'] = [round(v / 5) * 5 for v in data['xvar']]\n", "data['yrnd'] = [round(v / 5) * 5 for v in data['yvar']]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:47:19.034153Z", "iopub.status.busy": "2025-11-05T13:47:19.034082Z", "iopub.status.idle": "2025-11-05T13:47:19.036726Z", "shell.execute_reply": "2025-11-05T13:47:19.036558Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p3 = ggplot(data, aes(x='xrnd', y='yrnd')) + ggsize(500, 250)\n", "\n", "# Use `alpha` to show overplotting.\n", "p3 + geom_point(alpha=.3, size=5)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2025-11-05T13:47:19.037469Z", "iopub.status.busy": "2025-11-05T13:47:19.037398Z", "iopub.status.idle": "2025-11-05T13:47:19.039894Z", "shell.execute_reply": "2025-11-05T13:47:19.039724Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# `jitter` points to show overplotting in another way.\n", "p3 + geom_point(shape=1, position=position_jitter(width=.2, height=.5, seed=42))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 4 }