{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "afd12c08-eb91-43b8-9b35-cede73a823d1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from lets_plot import *\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "LetsPlot.setup_html() " ] }, { "cell_type": "code", "execution_count": 2, "id": "8cfaa9a4-3e48-4729-b2df-504de42f123b", "metadata": {}, "outputs": [], "source": [ "mtcars = pd.read_csv(\"https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "190c32be-58b0-4a93-9050-a73191d91556", "metadata": {}, "outputs": [], "source": [ "dat = mtcars[mtcars['wt'] > 2.75]\n", "dat = dat[dat['wt'] < 3.45]" ] }, { "cell_type": "code", "execution_count": 4, "id": "9285ecdb-f63c-4a37-b870-3e6b39bb8069", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot = ggplot(dat, aes('wt', 'mpg', label = 'model')) + geom_point(color = \"red\")\n", "gggrid([\n", " plot + geom_text() + ggtitle('geom_text()'), \n", " plot + geom_text_repel() + ggtitle('geom_text_repel()'),\n", "])" ] }, { "cell_type": "markdown", "id": "65934f5f-7e5d-42ba-9d79-8c144fc8f69b", "metadata": {}, "source": [ "### Hide some of the labels\n", "Set labels to the empty string \"\" to hide them. All data points repel the non-empty labels." ] }, { "cell_type": "code", "execution_count": 5, "id": "a38a42fc-c102-42b2-82cd-af5af57a46d7", "metadata": {}, "outputs": [], "source": [ "dat2 = mtcars[(mtcars['wt'] > 3) & (mtcars['wt'] < 4)].copy()\n", "\n", "# Скрыть все подписи\n", "dat2['car'] = \"\"\n", "\n", "# Проставить подписи только по индексам\n", "ix_label = [1, 2, 13]\n", "dat2.loc[dat2.index[ix_label], 'car'] = dat2.loc[dat2.index[ix_label], 'model']\n", "dat2['color'] = np.where(dat2['car'] == \"\", \"grey\", \"red\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "79f5b728-2edc-4f2b-9ebe-92d8ddea7279", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(dat2, aes('wt', 'mpg', label = 'car')) + geom_text_repel(seed=6) + geom_point(aes(color = 'color')) + scale_color_identity()" ] }, { "cell_type": "markdown", "id": "34ffa1e0-f01b-4951-83e2-94c2a14ec5ff", "metadata": {}, "source": [ "We can quickly repel a few text labels from 4,000 data points in the example below.\n", "\n", "We use max.overlaps = Inf to ensure that no text labels are discarded, even if a text label overlaps lots of other things (e.g. other text labels or other data points)." ] }, { "cell_type": "code", "execution_count": 7, "id": "560fa0a0-2933-4580-bb52-3b56a181cdff", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wtmpgcarmodelcyldisphpdratqsecvsamgearcarbcolor
40003.44018.7Hornet SportaboutHornet Sportabout8.0360.0175.03.1517.020.00.03.02.0red
40013.46018.1ValiantValiant6.0225.0105.02.7620.221.00.03.01.0red
40023.84519.2Pontiac FirebirdPontiac Firebird8.0400.0175.03.0817.050.00.03.02.0red
\n", "
" ], "text/plain": [ " wt mpg car model cyl disp hp \\\n", "4000 3.440 18.7 Hornet Sportabout Hornet Sportabout 8.0 360.0 175.0 \n", "4001 3.460 18.1 Valiant Valiant 6.0 225.0 105.0 \n", "4002 3.845 19.2 Pontiac Firebird Pontiac Firebird 8.0 400.0 175.0 \n", "\n", " drat qsec vs am gear carb color \n", "4000 3.15 17.02 0.0 0.0 3.0 2.0 red \n", "4001 2.76 20.22 1.0 0.0 3.0 1.0 red \n", "4002 3.08 17.05 0.0 0.0 3.0 2.0 red " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dat3_noise = pd.DataFrame({\n", " 'wt': np.random.normal(loc=3, scale=1, size=4000),\n", " 'mpg': np.random.normal(loc=19, scale=1, size=4000),\n", " 'car': \"\"\n", "})\n", "dat3 = pd.concat([dat3_noise, dat2[dat2['car'] != '']], ignore_index=True)\n", "red3 = dat3[dat3['car'] != '']\n", "red3" ] }, { "cell_type": "code", "execution_count": 8, "id": "59864e5b-09af-4b4a-bc53-9d5c9901cb2a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot() + \\\n", " geom_point(aes('wt', 'mpg'), data = dat3, color = 'grey') + \\\n", " geom_text_repel(aes('wt', 'mpg', label='car'), data = dat3, max_time=-1, max_overlaps = -1) + \\\n", " geom_point(aes('wt', 'mpg'), data = red3, color = 'red')" ] }, { "cell_type": "markdown", "id": "8798c3bd-60a3-406e-b753-90ea89c04c06", "metadata": {}, "source": [ "### Always show all labels, even when they have too many overlaps\n", "Some text labels will be discarded if they overlap too many other things (default limit is 10). So, if a text label overlaps 10 other text labels or data points, then it will be discarded.\n", "\n", "We can expect to see a warning if some data points could not be labeled due to too many overlaps.\n", "\n", "Set max.overlaps = Inf to override this behavior and always show all labels, regardless of whether or not a text label overlaps too many other things.\n", "\n", "Use options(ggrepel.max.overlaps = Inf) to set this globally for your entire session. The global option can be overridden by providing the max.overlaps argument to geom_text_repel()." ] }, { "cell_type": "code", "execution_count": 9, "id": "f3ca837f-5014-47d1-a358-6fcbc83c8e62", "metadata": {}, "outputs": [], "source": [ "n = 15\n", "dat4 = pd.DataFrame({\n", " 'x': [1] * n,\n", " 'y': [1] * n,\n", " 'label': list('abcdefghijklmno') # 15 букв\n", "})" ] }, { "cell_type": "code", "execution_count": 10, "id": "967b4986-7d29-4834-9375-20eb09986fdf", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "plot = ggplot(dat4, aes('x', 'y', label='label')) + geom_point()\n", "\n", "gggrid([\n", " plot + geom_text_repel(box_padding=20, max_overlaps=10) + ggtitle(\"max_overlaps = 10 (default)\"), \n", " plot + geom_text_repel(box_padding=20, max_overlaps=-1) + ggtitle(\"max_overlaps = -1\"),\n", "])" ] }, { "cell_type": "markdown", "id": "6525b3e3-ab08-49c8-a6c3-b4b4d8fe6499", "metadata": {}, "source": [ "### Do not repel labels from data points\n", "Set point.size = 0.0 to prevent label repulsion away from data points.\n", "\n", "Labels will still move away from each other and away from the edges of the plot." ] }, { "cell_type": "code", "execution_count": 11, "id": "ced5a49a-d190-462e-99e6-b14f0b5793b5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(dat, aes('wt', 'mpg', label = 'model')) + \\\n", " geom_point(color = \"red\") + \\\n", " geom_text_repel(point_size = 0.0)" ] }, { "cell_type": "markdown", "id": "abac4a59-558d-47b1-92fb-38353930a85a", "metadata": {}, "source": [ "### Do not repel labels from plot (panel) edges\n", "Set xlim or ylim to Inf or -Inf to disable repulsion away from the edges of the panel. Use NA to indicate the edge of the panel." ] }, { "cell_type": "code", "execution_count": 12, "id": "0c578356-ed3a-4e2c-9b95-28d5ecef8c58", "metadata": {}, "outputs": [], "source": [ "# ggplot(dat, aes('wt', 'mpg', label = 'model')) +\n", "# geom_point(color = \"red\") +\n", "# geom_text_repel(\n", "# # Repel away from the left edge, not from the right.\n", "# xlim = c(NA, Inf),\n", "# # Do not repel from top or bottom edges.\n", "# ylim = c(-Inf, Inf)\n", "# )" ] }, { "cell_type": "markdown", "id": "aa38e044-efde-43df-aa22-906b9c79d974", "metadata": {}, "source": [ "We can also disable clipping to allow the labels to go beyond the edges of the panel." ] }, { "cell_type": "code", "execution_count": 13, "id": "419e1a4d-51b0-4d2c-888c-5983c7962604", "metadata": {}, "outputs": [], "source": [ "# ggplot(dat, aes(wt, mpg, label = car)) +\n", "# geom_point(color = \"red\") +\n", "# coord_cartesian(clip = \"off\") +\n", "# geom_label_repel(fill = \"white\", xlim = c(-Inf, Inf), ylim = c(-Inf, Inf))" ] }, { "cell_type": "markdown", "id": "119692f9-e405-4d55-a59b-2a983286b52e", "metadata": {}, "source": [ "### Repel labels from data points with different sizes\n", "We can use the continuous_scale() function from ggplot2. It allows us to specify a single scale that applies to multiple aesthetics.\n", "\n", "For ggrepel, we want to apply a single size scale to two aesthetics:\n", "\n", "size, which tells ggplot2 the size of the points to draw on the plot\n", "point.size, which tells ggrepel the point size, so it can position the text labels away from them\n", "In the example below, there is a third size in the call to geom_text_repel() to specify the font size for the text labels." ] }, { "cell_type": "code", "execution_count": 14, "id": "23eca09e-1065-47a9-98da-42e0db8ce82b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(dat, aes('wt', 'mpg', label='model')) + \\\n", " geom_point(aes(size='cyl'), alpha=0.6) + \\\n", " geom_text_repel(\n", " aes(point_size='cyl'),\n", " size=5,\n", " box_padding=0.3,\n", " max_iterations=2000,\n", " min_segment_length=0.0\n", " )" ] }, { "cell_type": "code", "execution_count": 15, "id": "6cb742ec-2afa-4538-89dc-b8ed3228fd8b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mtcars) + geom_point(aes(x='wt', label = 'model'), y = 1, color='red') + xlim(1, 6) + ylim(1, 1.3) + \\\n", " geom_text_repel(\n", " aes(x='wt', label = 'model'), \n", " y = 1, \n", " nudge_y = 0.05, \n", " direction = 'x', \n", " box_padding = 1,\n", " angle = 90,\n", " hjust = 0.0,\n", " seed = 2\n", " ) " ] }, { "cell_type": "code", "execution_count": 16, "id": "db9e097c-aad1-4530-a4c7-3749c3dd520c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(mtcars, aes('wt', 'mpg', label = 'model', color='wt')) + geom_point() + geom_text_repel(max_time=-1) + flavor_darcula()" ] }, { "cell_type": "code", "execution_count": 18, "id": "b348cac2-c2b3-47a2-9bee-2ff4a89de60c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(dat, aes('wt', 'mpg', label='model')) + \\\n", " geom_point(size=0.1, size_unit='x', alpha=0.6) + \\\n", " geom_text_repel(\n", " point_size = 0.1,\n", " size_unit='x',\n", " size=0.05,\n", " box_padding=0.3,\n", " max_iterations=2000,\n", " min_segment_length=0.0\n", " )" ] }, { "cell_type": "code", "execution_count": 12, "id": "dd1b16c7-73a3-4d45-a2a8-259ec56318b6", "metadata": {}, "outputs": [], "source": [ "x = [0, 1]\n", "y = [1, -1]\n", "shape = [16, 21]\n", "size = [10, 20]\n", "stroke = [1, 0]\n", "label = ['one', 'two']\n", "data = {\n", " 'x': x,\n", " 'y': y,\n", " 'shape': shape,\n", " 'size': size,\n", " 'stroke': stroke,\n", " 'label': label\n", "}" ] }, { "cell_type": "code", "execution_count": 15, "id": "679fe1c2-1772-4766-9c12-67b994793b9e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", " \n", "
\n", " \n", "
\n", "
\n", " \n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(data, aes('x', 'y', label='label' )) + scale_shape_identity() + xlim(-2, 3) + ylim(-8,8) \\\n", " + scale_size(range=[10,20], guide='none') \\\n", " + scale_stroke(range=[5,10], guide='none') \\\n", " + geom_point(aes(size='size', stroke='stroke', shape='shape'), color = 'red') \\\n", " + geom_text_repel(aes(point_size='size', point_stroke='stroke', shape='shape'))" ] }, { "cell_type": "code", "execution_count": null, "id": "5377c53c-e664-4d1a-aa19-022e0ed69305", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.20" } }, "nbformat": 4, "nbformat_minor": 5 }