{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# \n",
"\n",
"A few examples of using Lets-Plot with dictionaries, Pandas DataFrames and Polars DataFrames."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Table of Contents\n",
"\n",
"1. [Python Dictionaries](#dict)\n",
"\n",
"2. [Pandas Dataframe](#pandas)\n",
"\n",
" 2.1. [From Dictionary](#pandas-from-dict)\n",
"\n",
" 2.2. [From CSV](#pandas-from-csv)\n",
"\n",
"3. [Polars Dataframe](#polars)\n",
"\n",
" 3.1. [From Dictionary](#polars-from-dict)\n",
"\n",
" 3.2. [From CSV](#polars-from-csv)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:16.596517Z",
"iopub.status.busy": "2024-08-23T10:35:16.596363Z",
"iopub.status.idle": "2024-08-23T10:35:16.928024Z",
"shell.execute_reply": "2024-08-23T10:35:16.927593Z"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import polars as pl\n",
"\n",
"from lets_plot import *"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:16.929840Z",
"iopub.status.busy": "2024-08-23T10:35:16.929715Z",
"iopub.status.idle": "2024-08-23T10:35:16.932251Z",
"shell.execute_reply": "2024-08-23T10:35:16.931986Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"LetsPlot.setup_html()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"\n",
"### 1. Python Dictionaries"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:16.945620Z",
"iopub.status.busy": "2024-08-23T10:35:16.945499Z",
"iopub.status.idle": "2024-08-23T10:35:16.947332Z",
"shell.execute_reply": "2024-08-23T10:35:16.947084Z"
}
},
"outputs": [],
"source": [
"x = np.linspace(-2 * np.pi, 2 * np.pi, 100)\n",
"y = np.sin(x)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:16.948523Z",
"iopub.status.busy": "2024-08-23T10:35:16.948450Z",
"iopub.status.idle": "2024-08-23T10:35:16.976829Z",
"shell.execute_reply": "2024-08-23T10:35:16.976413Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot({'x': x, 'y': y}, aes('x', 'y')) + geom_point()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"\n",
"### 2. Pandas Dataframe"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"\n",
"#### 2.1. From Dictionary"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:16.978102Z",
"iopub.status.busy": "2024-08-23T10:35:16.978025Z",
"iopub.status.idle": "2024-08-23T10:35:16.979990Z",
"shell.execute_reply": "2024-08-23T10:35:16.979740Z"
}
},
"outputs": [],
"source": [
"def get_data_dict():\n",
" np.random.seed(42)\n",
" n = 100\n",
" x = np.random.uniform(-1, 1, size=n)\n",
" y = 25 * x ** 2 + np.random.normal(size=n)\n",
" return {'x': x, 'y': y}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:16.981093Z",
"iopub.status.busy": "2024-08-23T10:35:16.981014Z",
"iopub.status.idle": "2024-08-23T10:35:16.985257Z",
"shell.execute_reply": "2024-08-23T10:35:16.985018Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(100, 2)\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" x | \n",
" y | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" -0.250920 | \n",
" 1.661065 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.901429 | \n",
" 20.015331 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.463988 | \n",
" 5.473880 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.197317 | \n",
" -1.014219 | \n",
"
\n",
" \n",
" 4 | \n",
" -0.687963 | \n",
" 11.612646 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" x y\n",
"0 -0.250920 1.661065\n",
"1 0.901429 20.015331\n",
"2 0.463988 5.473880\n",
"3 0.197317 -1.014219\n",
"4 -0.687963 11.612646"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pandas_df = pd.DataFrame(get_data_dict())\n",
"print(pandas_df.shape)\n",
"pandas_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:16.986301Z",
"iopub.status.busy": "2024-08-23T10:35:16.986230Z",
"iopub.status.idle": "2024-08-23T10:35:16.989435Z",
"shell.execute_reply": "2024-08-23T10:35:16.989196Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(pandas_df) + \\\n",
" geom_point(aes('x', 'y', fill='y'), shape=21, size=5, color='white')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"\n",
"#### 2.2. From CSV"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:16.990477Z",
"iopub.status.busy": "2024-08-23T10:35:16.990404Z",
"iopub.status.idle": "2024-08-23T10:35:17.127940Z",
"shell.execute_reply": "2024-08-23T10:35:17.127467Z"
}
},
"outputs": [],
"source": [
"# Load mpg dataset with pandas\n",
"\n",
"mpg_pandas_df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:17.129299Z",
"iopub.status.busy": "2024-08-23T10:35:17.129203Z",
"iopub.status.idle": "2024-08-23T10:35:17.134860Z",
"shell.execute_reply": "2024-08-23T10:35:17.134621Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mpg_pandas_df, aes('displ', 'cty', fill='drv', size='hwy')) + \\\n",
" geom_point(shape=21) + \\\n",
" scale_size(range=[5, 15], breaks=[15, 40]) + \\\n",
" ggsize(600, 350)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"\n",
"### 3. Polars Dataframe"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"\n",
"#### 3.1. From Dictionary"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:17.136218Z",
"iopub.status.busy": "2024-08-23T10:35:17.136150Z",
"iopub.status.idle": "2024-08-23T10:35:17.153137Z",
"shell.execute_reply": "2024-08-23T10:35:17.152896Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(100, 2)\n"
]
},
{
"data": {
"text/html": [
"\n",
"
shape: (5, 2)x | y |
---|
f64 | f64 |
-0.25092 | 1.661065 |
0.901429 | 20.015331 |
0.463988 | 5.47388 |
0.197317 | -1.014219 |
-0.687963 | 11.612646 |
"
],
"text/plain": [
"shape: (5, 2)\n",
"┌───────────┬───────────┐\n",
"│ x ┆ y │\n",
"│ --- ┆ --- │\n",
"│ f64 ┆ f64 │\n",
"╞═══════════╪═══════════╡\n",
"│ -0.25092 ┆ 1.661065 │\n",
"│ 0.901429 ┆ 20.015331 │\n",
"│ 0.463988 ┆ 5.47388 │\n",
"│ 0.197317 ┆ -1.014219 │\n",
"│ -0.687963 ┆ 11.612646 │\n",
"└───────────┴───────────┘"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"polars_df = pl.DataFrame(get_data_dict())\n",
"print(polars_df.shape)\n",
"polars_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:17.154418Z",
"iopub.status.busy": "2024-08-23T10:35:17.154351Z",
"iopub.status.idle": "2024-08-23T10:35:17.157415Z",
"shell.execute_reply": "2024-08-23T10:35:17.157187Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(polars_df) + \\\n",
" geom_point(aes('x', 'y', fill='y'), shape=21, size=5, color='white')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"\n",
"#### 3.2. From CSV"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:17.158661Z",
"iopub.status.busy": "2024-08-23T10:35:17.158593Z",
"iopub.status.idle": "2024-08-23T10:35:17.313159Z",
"shell.execute_reply": "2024-08-23T10:35:17.312882Z"
}
},
"outputs": [],
"source": [
"# Load mpg dataset with polars\n",
"\n",
"mpg_polars_df = pl.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"execution": {
"iopub.execute_input": "2024-08-23T10:35:17.314668Z",
"iopub.status.busy": "2024-08-23T10:35:17.314597Z",
"iopub.status.idle": "2024-08-23T10:35:17.319460Z",
"shell.execute_reply": "2024-08-23T10:35:17.319220Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ggplot(mpg_polars_df, aes('displ', 'cty', fill='drv', size='hwy')) + \\\n",
" geom_point(shape=21) + \\\n",
" scale_size(range=[5, 15], breaks=[15, 40]) + \\\n",
" ggsize(600, 350)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 4
}