{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "1b9677a4-4d7d-448f-a7fe-454482c0cd48", "metadata": {}, "outputs": [], "source": [ "from lets_plot import *\n", "from lets_plot.bistro import *\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "0946ee7a-201e-47ed-9848-f1bbb9ed3b83", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 3, "id": "7bd5692b-cec2-4580-9a21-60af050f0edc", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame({\n", " 'x': [0.0, 4.0, 1.0, 5.0, 2.0, 6.0, 3.0, 7.0, 0.0, 4.0, 1.0, 5.0, 2.0, 6.0, 3.0, 7.0],\n", " 'y': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n", " 'h': [0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0, None, 0.0]\n", "})" ] }, { "cell_type": "code", "execution_count": 4, "id": "4a0647b0-5124-4e72-8b2b-6cdeef3d774a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df) + geom_area_ridges(aes(\"x\", \"y\", height=\"h\"), stat='identity')" ] }, { "cell_type": "code", "execution_count": 5, "id": "f7875343-0889-4242-ac7a-eff39f164483", "metadata": {}, "outputs": [], "source": [ "def dataset_array_to_dataframe(dataset_array):\n", " df = pd.DataFrame.from_records([\n", " (j, i, a)\n", " for i, r in enumerate(dataset_array)\n", " for j, a in enumerate(r)\n", " ], columns=[\"x\", \"y\", \"h\"])\n", " return df\n", "\n", "def process_rows(df, *, dist=1, step_y=1):\n", " def add_tails_to_row(subdf, y):\n", " subdf = subdf.sort_values(by='x').copy()\n", " x_to_h = lambda x: subdf[subdf['x'] == x].iloc[0]['h'] if x in subdf['x'].values else 0\n", " series = []\n", " s = []\n", " last_pick = subdf['x'].min()\n", " for x in range(subdf['x'].min(), subdf['x'].max() + 1):\n", " h = x_to_h(x)\n", " if h > 0:\n", " s.append(x)\n", " last_pick = x\n", " elif x - last_pick >= dist and len(s) > 0:\n", " series.append(s)\n", " s = []\n", " if len(s) > 0:\n", " series.append(s)\n", " return pd.concat([\n", " pd.concat([\n", " pd.DataFrame({'x': s, 'y': [y] * len(s), 'h': [x_to_h(x) for x in s]}),\n", " pd.DataFrame({'x': [min(s) - 2, min(s) - 1, max(s) + 1, max(s) + 2], 'y': [y] * 4, 'h': [-1, 0, 0, -1]})\n", " ])\n", " for s in series\n", " ]).sort_values(by=['x', 'h'], ascending=[True, False])\\\n", " .drop_duplicates(subset=['x'], keep='first')\\\n", " .reset_index(drop=True)\n", " return pd.concat([\n", " add_tails_to_row(df[df['y'] == y], y) for y in range(df['y'].min(), df['y'].max() + 1, step_y)\n", " ]).sort_values(by=['y', 'x']).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 6, "id": "a685a991-5a69-4d2c-abfd-697d0b3a420b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(4769, 3)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xyh
04316None
143260.0
24336114.518776
34356180.14386
443660.0
\n", "
" ], "text/plain": [ " x y h\n", "0 431 6 None\n", "1 432 6 0.0\n", "2 433 6 114.518776\n", "3 435 6 180.14386\n", "4 436 6 0.0" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data_array = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/new_zealand.csv\", header=None).to_numpy()\n", "df = dataset_array_to_dataframe(raw_data_array)\n", "min_h = df[df['h'] > 0].describe()['h']['min']\n", "df = process_rows(df[df[\"h\"] > 0], dist=25, step_y=2)\n", "bbox = dict(xmin=df['x'].min(), ymin=df['y'].min(), xmax=df['x'].max(), ymax=df['y'].max())\n", "df['h'] = df['h'].replace({-1.0: None})\n", "print(df.shape)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 7, "id": "b4e13c08-9d1a-40c2-b18d-40ddb5144987", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df) + \\\n", " geom_area_ridges(aes(\"x\", \"y\", height=\"h\"), \\\n", " stat='identity', min_height=0, scale=.0025, \\\n", " color=\"#08519c\", fill=\"#bdd7e7\", \\\n", " sampling=sampling_pick(df.shape[0]), \\\n", " tooltips=layer_tooltips().line(\"height|@h\").format(\"@h\", ',.1~f'), \\\n", " show_legend=False) + \\\n", " geom_text(x=bbox['xmin'] + .7 * (bbox['xmax'] - bbox['xmin']), \\\n", " y=bbox['ymin'] + .9 * (bbox['ymax'] - bbox['ymin']), \\\n", " label=\"New Zealand\", size=25, family=\"Cinzel\") + \\\n", " scale_y_continuous(trans='reverse') + \\\n", " ggsize(600, 600) + \\\n", " theme_minimal() + \\\n", " theme(axis='blank', panel_grid='blank', \\\n", " plot_background=element_rect(color='black', fill='#e6e6e6', size=1))" ] }, { "cell_type": "code", "execution_count": null, "id": "a5eaad26-ff4a-45e5-b8e0-7c4f3a5635eb", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.20" } }, "nbformat": 4, "nbformat_minor": 5 }