{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# \n", "\n", "A few examples of using Lets-Plot with dictionaries, Pandas DataFrames and Polars DataFrames." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Table of Contents\n", "\n", "1. [Python Dictionaries](#dict)\n", "\n", "2. [Pandas Dataframe](#pandas)\n", "\n", " 2.1. [From Dictionary](#pandas-from-dict)\n", "\n", " 2.2. [From CSV](#pandas-from-csv)\n", "\n", "3. [Polars Dataframe](#polars)\n", "\n", " 3.1. [From Dictionary](#polars-from-dict)\n", "\n", " 3.2. [From CSV](#polars-from-csv)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:16.596517Z", "iopub.status.busy": "2024-08-23T10:35:16.596363Z", "iopub.status.idle": "2024-08-23T10:35:16.928024Z", "shell.execute_reply": "2024-08-23T10:35:16.927593Z" } }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import polars as pl\n", "\n", "from lets_plot import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:16.929840Z", "iopub.status.busy": "2024-08-23T10:35:16.929715Z", "iopub.status.idle": "2024-08-23T10:35:16.932251Z", "shell.execute_reply": "2024-08-23T10:35:16.931986Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "### 1. Python Dictionaries" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:16.945620Z", "iopub.status.busy": "2024-08-23T10:35:16.945499Z", "iopub.status.idle": "2024-08-23T10:35:16.947332Z", "shell.execute_reply": "2024-08-23T10:35:16.947084Z" } }, "outputs": [], "source": [ "x = np.linspace(-2 * np.pi, 2 * np.pi, 100)\n", "y = np.sin(x)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:16.948523Z", "iopub.status.busy": "2024-08-23T10:35:16.948450Z", "iopub.status.idle": "2024-08-23T10:35:16.976829Z", "shell.execute_reply": "2024-08-23T10:35:16.976413Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot({'x': x, 'y': y}, aes('x', 'y')) + geom_point()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "### 2. Pandas Dataframe" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "#### 2.1. From Dictionary" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:16.978102Z", "iopub.status.busy": "2024-08-23T10:35:16.978025Z", "iopub.status.idle": "2024-08-23T10:35:16.979990Z", "shell.execute_reply": "2024-08-23T10:35:16.979740Z" } }, "outputs": [], "source": [ "def get_data_dict():\n", " np.random.seed(42)\n", " n = 100\n", " x = np.random.uniform(-1, 1, size=n)\n", " y = 25 * x ** 2 + np.random.normal(size=n)\n", " return {'x': x, 'y': y}" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:16.981093Z", "iopub.status.busy": "2024-08-23T10:35:16.981014Z", "iopub.status.idle": "2024-08-23T10:35:16.985257Z", "shell.execute_reply": "2024-08-23T10:35:16.985018Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(100, 2)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
0-0.2509201.661065
10.90142920.015331
20.4639885.473880
30.197317-1.014219
4-0.68796311.612646
\n", "
" ], "text/plain": [ " x y\n", "0 -0.250920 1.661065\n", "1 0.901429 20.015331\n", "2 0.463988 5.473880\n", "3 0.197317 -1.014219\n", "4 -0.687963 11.612646" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pandas_df = pd.DataFrame(get_data_dict())\n", "print(pandas_df.shape)\n", "pandas_df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:16.986301Z", "iopub.status.busy": "2024-08-23T10:35:16.986230Z", "iopub.status.idle": "2024-08-23T10:35:16.989435Z", "shell.execute_reply": "2024-08-23T10:35:16.989196Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(pandas_df) + \\\n", " geom_point(aes('x', 'y', fill='y'), shape=21, size=5, color='white')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "#### 2.2. From CSV" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:16.990477Z", "iopub.status.busy": "2024-08-23T10:35:16.990404Z", "iopub.status.idle": "2024-08-23T10:35:17.127940Z", "shell.execute_reply": "2024-08-23T10:35:17.127467Z" } }, "outputs": [], "source": [ "# Load mpg dataset with pandas\n", "\n", "mpg_pandas_df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:17.129299Z", "iopub.status.busy": "2024-08-23T10:35:17.129203Z", "iopub.status.idle": "2024-08-23T10:35:17.134860Z", "shell.execute_reply": "2024-08-23T10:35:17.134621Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_pandas_df, aes('displ', 'cty', fill='drv', size='hwy')) + \\\n", " geom_point(shape=21) + \\\n", " scale_size(range=[5, 15], breaks=[15, 40]) + \\\n", " ggsize(600, 350)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "### 3. Polars Dataframe" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "#### 3.1. From Dictionary" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:17.136218Z", "iopub.status.busy": "2024-08-23T10:35:17.136150Z", "iopub.status.idle": "2024-08-23T10:35:17.153137Z", "shell.execute_reply": "2024-08-23T10:35:17.152896Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(100, 2)\n" ] }, { "data": { "text/html": [ "
\n", "shape: (5, 2)
xy
f64f64
-0.250921.661065
0.90142920.015331
0.4639885.47388
0.197317-1.014219
-0.68796311.612646
" ], "text/plain": [ "shape: (5, 2)\n", "┌───────────┬───────────┐\n", "│ x ┆ y │\n", "│ --- ┆ --- │\n", "│ f64 ┆ f64 │\n", "╞═══════════╪═══════════╡\n", "│ -0.25092 ┆ 1.661065 │\n", "│ 0.901429 ┆ 20.015331 │\n", "│ 0.463988 ┆ 5.47388 │\n", "│ 0.197317 ┆ -1.014219 │\n", "│ -0.687963 ┆ 11.612646 │\n", "└───────────┴───────────┘" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "polars_df = pl.DataFrame(get_data_dict())\n", "print(polars_df.shape)\n", "polars_df.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:17.154418Z", "iopub.status.busy": "2024-08-23T10:35:17.154351Z", "iopub.status.idle": "2024-08-23T10:35:17.157415Z", "shell.execute_reply": "2024-08-23T10:35:17.157187Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(polars_df) + \\\n", " geom_point(aes('x', 'y', fill='y'), shape=21, size=5, color='white')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "#### 3.2. From CSV" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:17.158661Z", "iopub.status.busy": "2024-08-23T10:35:17.158593Z", "iopub.status.idle": "2024-08-23T10:35:17.313159Z", "shell.execute_reply": "2024-08-23T10:35:17.312882Z" } }, "outputs": [], "source": [ "# Load mpg dataset with polars\n", "\n", "mpg_polars_df = pl.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2024-08-23T10:35:17.314668Z", "iopub.status.busy": "2024-08-23T10:35:17.314597Z", "iopub.status.idle": "2024-08-23T10:35:17.319460Z", "shell.execute_reply": "2024-08-23T10:35:17.319220Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mpg_polars_df, aes('displ', 'cty', fill='drv', size='hwy')) + \\\n", " geom_point(shape=21) + \\\n", " scale_size(range=[5, 15], breaks=[15, 40]) + \\\n", " ggsize(600, 350)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 4 }