{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:33:19.985737Z", "iopub.status.busy": "2024-04-17T07:33:19.985331Z", "iopub.status.idle": "2024-04-17T07:33:20.300691Z", "shell.execute_reply": "2024-04-17T07:33:20.300454Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import random\n", "from lets_plot import *\n", "\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:33:20.313779Z", "iopub.status.busy": "2024-04-17T07:33:20.313635Z", "iopub.status.idle": "2024-04-17T07:33:20.315674Z", "shell.execute_reply": "2024-04-17T07:33:20.315488Z" } }, "outputs": [], "source": [ "# This example was found at: www.cookbook-r.com/Graphs/Scatterplots_(ggplot2)\n", "random.seed(123)\n", "data = dict(\n", " cond=np.repeat(['A','B'], 10),\n", " xvar=[i + random.normalvariate(0, 3) for i in range(0,20)],\n", " yvar=[i + random.normalvariate(0, 3) for i in range(0,20)]\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Basic scatter-plot" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:33:20.316776Z", "iopub.status.busy": "2024-04-17T07:33:20.316604Z", "iopub.status.idle": "2024-04-17T07:33:20.348245Z", "shell.execute_reply": "2024-04-17T07:33:20.348044Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p = ggplot(data, aes(x='xvar', y='yvar')) + ggsize(300, 250)\n", "p + geom_point(shape=1) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Add regression line" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:33:20.349898Z", "iopub.status.busy": "2024-04-17T07:33:20.349787Z", "iopub.status.idle": "2024-04-17T07:33:20.352932Z", "shell.execute_reply": "2024-04-17T07:33:20.352691Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p + geom_point(shape=1) + geom_smooth() " ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:33:20.354207Z", "iopub.status.busy": "2024-04-17T07:33:20.354052Z", "iopub.status.idle": "2024-04-17T07:33:20.356862Z", "shell.execute_reply": "2024-04-17T07:33:20.356661Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Without standard error band.\n", "p + geom_point(shape=1) + geom_smooth(se=False) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Split dataset by the `cond` variable" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:33:20.357930Z", "iopub.status.busy": "2024-04-17T07:33:20.357772Z", "iopub.status.idle": "2024-04-17T07:33:20.361344Z", "shell.execute_reply": "2024-04-17T07:33:20.361126Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p1 = ggplot(data, aes(x='xvar', y='yvar', color='cond')) + ggsize(500, 250)\n", "p1 + geom_point(shape=1) + geom_smooth(se=False)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:33:20.362249Z", "iopub.status.busy": "2024-04-17T07:33:20.362171Z", "iopub.status.idle": "2024-04-17T07:33:20.364694Z", "shell.execute_reply": "2024-04-17T07:33:20.364517Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Map `shape` to the `cond` variable.\n", "p2 = p1 + geom_point(aes(shape='cond'), size=5)\n", "p2" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:33:20.365499Z", "iopub.status.busy": "2024-04-17T07:33:20.365423Z", "iopub.status.idle": "2024-04-17T07:33:20.367710Z", "shell.execute_reply": "2024-04-17T07:33:20.367541Z" }, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Choose different shapes using `scale_shape_manual`:\n", "# 1 - hollow circle \n", "# 2 - hollow triangle\n", "\n", "p2 + scale_shape_manual(values=[1,2])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Handling overplotting" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:33:20.368535Z", "iopub.status.busy": "2024-04-17T07:33:20.368463Z", "iopub.status.idle": "2024-04-17T07:33:20.369987Z", "shell.execute_reply": "2024-04-17T07:33:20.369810Z" } }, "outputs": [], "source": [ "# Create data containing overlapping points.\n", "data['xrnd'] = [round(v / 5) * 5 for v in data['xvar']]\n", "data['yrnd'] = [round(v / 5) * 5 for v in data['yvar']]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:33:20.370761Z", "iopub.status.busy": "2024-04-17T07:33:20.370685Z", "iopub.status.idle": "2024-04-17T07:33:20.373156Z", "shell.execute_reply": "2024-04-17T07:33:20.372982Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p3 = ggplot(data, aes(x='xrnd', y='yrnd')) + ggsize(500, 250)\n", "\n", "# Use `alpha` to show overplotting.\n", "p3 + geom_point(alpha=.3, size=5)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2024-04-17T07:33:20.373957Z", "iopub.status.busy": "2024-04-17T07:33:20.373885Z", "iopub.status.idle": "2024-04-17T07:33:20.376198Z", "shell.execute_reply": "2024-04-17T07:33:20.376028Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# `jitter` points to show overplotting in another way.\n", "p3 + geom_point(shape=1, position=position_jitter(width=.2,height=.5))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 4 }