{ "cells": [ { "cell_type": "markdown", "id": "70b4719e", "metadata": {}, "source": [ "# BBC Visual and Data Journalism Cookbook for Lets-Plot\n", "\n", "The notebook is based on [this page](https://bbc.github.io/rcookbook/index.html).\n", "\n", "Data is extracted from the [gapminder](https://cran.r-project.org/web/packages/gapminder/readme/README.html) R package." ] }, { "cell_type": "code", "execution_count": 1, "id": "5e5cf787", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:11.283364Z", "iopub.status.busy": "2024-04-26T11:41:11.283364Z", "iopub.status.idle": "2024-04-26T11:41:12.372389Z", "shell.execute_reply": "2024-04-26T11:41:12.372389Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "from lets_plot import *\n", "from lets_plot.mapping import as_discrete" ] }, { "cell_type": "code", "execution_count": 2, "id": "8052d660-75a7-40d0-ab26-4031161a5ec1", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:12.372389Z", "iopub.status.busy": "2024-04-26T11:41:12.372389Z", "iopub.status.idle": "2024-04-26T11:41:12.387903Z", "shell.execute_reply": "2024-04-26T11:41:12.387903Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 3, "id": "68f44f0c", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:12.387903Z", "iopub.status.busy": "2024-04-26T11:41:12.387903Z", "iopub.status.idle": "2024-04-26T11:41:12.712871Z", "shell.execute_reply": "2024-04-26T11:41:12.712871Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countrycontinentyearlifeExppopgdpPercap
0AfghanistanAsia195228.8018425333779.445314
1AfghanistanAsia195730.3329240934820.853030
2AfghanistanAsia196231.99710267083853.100710
3AfghanistanAsia196734.02011537966836.197138
4AfghanistanAsia197236.08813079460739.981106
\n", "
" ], "text/plain": [ " country continent year lifeExp pop gdpPercap\n", "0 Afghanistan Asia 1952 28.801 8425333 779.445314\n", "1 Afghanistan Asia 1957 30.332 9240934 820.853030\n", "2 Afghanistan Asia 1962 31.997 10267083 853.100710\n", "3 Afghanistan Asia 1967 34.020 11537966 836.197138\n", "4 Afghanistan Asia 1972 36.088 13079460 739.981106" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/gapminder.csv\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "e6a07a28", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:12.712871Z", "iopub.status.busy": "2024-04-26T11:41:12.712871Z", "iopub.status.idle": "2024-04-26T11:41:12.728548Z", "shell.execute_reply": "2024-04-26T11:41:12.728548Z" } }, "outputs": [], "source": [ "line_size = 1.4\n", "\n", "def bbc_theme(show_x_axis=True):\n", " def get_element_text(title=False, subtitle=False, size=21):\n", " face = None\n", " margin = None\n", " if title:\n", " size = 33\n", " face = \"bold\"\n", " margin = [11, 0, 0, 0]\n", " if subtitle:\n", " size = 26\n", " margin = [9, 0, 0, 0]\n", " return element_text(family=\"Helvetica\", face=face, size=size, margin=margin)\n", " result = theme(\n", " plot_title=get_element_text(title=True),\n", " plot_subtitle=get_element_text(subtitle=True),\n", " legend_position='top',\n", " legend_background='blank',\n", " legend_title='blank',\n", " legend_text=get_element_text(),\n", " axis_title='blank',\n", " axis_text=get_element_text(),\n", " axis_text_x=element_text(margin=[20, 20]),\n", " axis_text_y=element_text(margin=[10, 5]),\n", " axis_ticks='blank',\n", " axis_line=element_line(size=2*line_size) if show_x_axis else 'blank',\n", " axis_ontop_x=True,\n", " panel_grid_minor='blank',\n", " panel_grid_major_y=element_line(size=line_size*6/5, color='#CBCBCB'),\n", " panel_grid_major_x='blank',\n", " panel_background='blank',\n", " strip_text=element_text(size=26, hjust=0),\n", " )\n", " if show_x_axis:\n", " result += coord_cartesian(ylim=[0, None]) + scale_y_continuous(expand=[.15, 0])\n", " return result" ] }, { "cell_type": "markdown", "id": "901f50cc", "metadata": {}, "source": [ "## Make a line chart" ] }, { "cell_type": "code", "execution_count": 5, "id": "e5451129", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:12.728548Z", "iopub.status.busy": "2024-04-26T11:41:12.728548Z", "iopub.status.idle": "2024-04-26T11:41:12.839621Z", "shell.execute_reply": "2024-04-26T11:41:12.839621Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "line_df = df[df.country == \"Malawi\"]\n", "\n", "ggplot(line_df, aes('year', 'lifeExp')) + \\\n", " geom_line(color='#1380A1', size=line_size, \\\n", " tooltips=layer_tooltips().format(\"@year\", \"d\")) + \\\n", " scale_x_continuous(format='d') + \\\n", " bbc_theme() + \\\n", " ggsize(600, 450) + \\\n", " labs(title=\"Living longer\", subtitle=\"Life expectancy in Malawi 1952-2007\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "53679cb7", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:12.842445Z", "iopub.status.busy": "2024-04-26T11:41:12.842445Z", "iopub.status.idle": "2024-04-26T11:41:12.855588Z", "shell.execute_reply": "2024-04-26T11:41:12.855588Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "line_df = df[df.country == \"China\"]\n", "\n", "ggplot(line_df, aes('year', 'lifeExp')) + \\\n", " geom_line(color='#1380A1', size=line_size, \\\n", " tooltips=layer_tooltips().format(\"@year\", \"d\")) + \\\n", " scale_x_continuous(format='d') + \\\n", " bbc_theme() + \\\n", " ggsize(600, 450) + \\\n", " labs(title=\"Living longer\", subtitle=\"Life expectancy in China 1952-2007\")" ] }, { "cell_type": "markdown", "id": "a6553da5", "metadata": {}, "source": [ "## Make a multiple line chart" ] }, { "cell_type": "code", "execution_count": 7, "id": "0578594a", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:12.855588Z", "iopub.status.busy": "2024-04-26T11:41:12.855588Z", "iopub.status.idle": "2024-04-26T11:41:12.871273Z", "shell.execute_reply": "2024-04-26T11:41:12.871273Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_df = df[df.country.isin([\"China\", \"United States\"])]\n", "\n", "multiple_line_plot = ggplot(multiple_line_df, aes('year', 'lifeExp', color='country')) + \\\n", " geom_line(size=line_size, tooltips=layer_tooltips().format(\"@year\", \"d\")) + \\\n", " scale_x_continuous(format='d') + \\\n", " scale_color_manual(values=['#FAAB18', '#1380A1']) + \\\n", " bbc_theme() + \\\n", " ggsize(600, 450) + \\\n", " labs(title=\"Living longer\", subtitle=\"Life expectancy in China and the US\")\n", "multiple_line_plot" ] }, { "cell_type": "markdown", "id": "a3fbd67f-339c-4abf-92de-9b6ebbdcfcff", "metadata": {}, "source": [ "## Add color scheme (flavor)" ] }, { "cell_type": "code", "execution_count": 8, "id": "a697b6ef-54ba-4908-9d79-b9e95499bb9b", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:12.872250Z", "iopub.status.busy": "2024-04-26T11:41:12.872250Z", "iopub.status.idle": "2024-04-26T11:41:12.888253Z", "shell.execute_reply": "2024-04-26T11:41:12.887284Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_plot + \\\n", " theme(plot_margin=[20, 30]) + flavor_high_contrast_dark() + \\\n", " ggsize(700, 500)" ] }, { "cell_type": "markdown", "id": "42dcb6ed", "metadata": {}, "source": [ "## Make a bar chart" ] }, { "cell_type": "code", "execution_count": 9, "id": "d6e8a5f8", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:12.888253Z", "iopub.status.busy": "2024-04-26T11:41:12.888253Z", "iopub.status.idle": "2024-04-26T11:41:12.904044Z", "shell.execute_reply": "2024-04-26T11:41:12.903259Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bar_df = df[(df.year == 2007)&(df.continent == \"Africa\")]\\\n", " .sort_values(by=['lifeExp'], ascending=False).head(5)\n", "\n", "bars_plot = ggplot(bar_df, aes(as_discrete('country', order=1), 'lifeExp')) + \\\n", " geom_bar(stat='identity', position='identity', fill='#1380A1') + \\\n", " bbc_theme() + \\\n", " ggsize(640, 480) + \\\n", " labs(title=\"Reunion is highest\", subtitle=\"Highest African life expectancy, 2007\")\n", "bars_plot" ] }, { "cell_type": "markdown", "id": "f8981a78", "metadata": {}, "source": [ "## Make a stacked bar chart" ] }, { "cell_type": "code", "execution_count": 10, "id": "5fdbdd8c", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:12.904044Z", "iopub.status.busy": "2024-04-26T11:41:12.904044Z", "iopub.status.idle": "2024-04-26T11:41:12.952690Z", "shell.execute_reply": "2024-04-26T11:41:12.951363Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stacked_df = df[df.year == 2007].assign(\n", " lifeExpGrouped=lambda df: pd.cut(\n", " df.lifeExp, bins=pd.IntervalIndex.from_tuples([(0, 50), (50, 65), (65, 80), (80, 90)])\n", " ).cat.rename_categories([\"Under 50\", \"50-65\", \"65-80\", \"80+\"])\n", ").rename(\n", " columns={'pop': 'continentPop'}\n", ").groupby(['continent', 'lifeExpGrouped'], observed=False).continentPop.sum().reset_index().query('continentPop > 0').assign(\n", " continentPopPercentage=lambda df: 100 * df.groupby('continent', group_keys=True).continentPop.apply(\n", " lambda x: x / float(x.sum())\n", " ).values\n", ")\n", "\n", "ggplot(stacked_df, aes('continent', 'continentPopPercentage', fill='lifeExpGrouped')) + \\\n", " geom_bar(stat='identity', size=0, tooltips=layer_tooltips().line('@continentPop')\\\n", " .format('@continentPop', ',d')) + \\\n", " scale_y_continuous(breaks=list(range(0, 101, 25)), format='{d}%') + \\\n", " scale_fill_viridis() + \\\n", " bbc_theme() + \\\n", " theme(legend_justification=[0, 1], legend_position=[-.02, 1.02], legend_direction='horizontal') + \\\n", " ggsize(640, 480) + \\\n", " labs(title=\"How life expectancy varies\", subtitle=\"% of population by life expectancy band, 2007\")" ] }, { "cell_type": "markdown", "id": "cc92d098", "metadata": {}, "source": [ "## Make a grouped bar chart" ] }, { "cell_type": "code", "execution_count": 11, "id": "670f7764", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:12.956139Z", "iopub.status.busy": "2024-04-26T11:41:12.956139Z", "iopub.status.idle": "2024-04-26T11:41:12.986317Z", "shell.execute_reply": "2024-04-26T11:41:12.985560Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grouped_bar_df = pd.melt(\n", " df[df.year.isin([1967, 2007])][['country', 'year', 'lifeExp']].pivot(\n", " index='country', columns='year', values='lifeExp'\n", " ).assign(\n", " gap=lambda df: df[2007] - df[1967]\n", " ).sort_values(\n", " by='gap', ascending=False\n", " ).head(5).reset_index(),\n", " id_vars=['country', 'gap'],\n", " value_vars=[1967, 2007],\n", " value_name='lifeExp'\n", ")\n", "\n", "ggplot(grouped_bar_df, aes(as_discrete('country', order=1), 'lifeExp', \\\n", " group='year', fill=as_discrete('year'))) + \\\n", " geom_bar(stat='identity', position='dodge') + \\\n", " bbc_theme() + \\\n", " scale_fill_manual(values=['#1380A1', '#FAAB18']) + \\\n", " ggsize(720, 480) + \\\n", " labs(title=\"We're living longer\", subtitle=\"Biggest life expectancy rise, 1967-2007\")" ] }, { "cell_type": "markdown", "id": "b8006abe", "metadata": {}, "source": [ "## Make a dumbbell chart" ] }, { "cell_type": "code", "execution_count": 12, "id": "0416ee54", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:12.988523Z", "iopub.status.busy": "2024-04-26T11:41:12.988523Z", "iopub.status.idle": "2024-04-26T11:41:13.020594Z", "shell.execute_reply": "2024-04-26T11:41:13.019345Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dumbbell_df = df[df.year.isin([1967, 2007])][['country', 'year', 'lifeExp']].pivot(\n", " index='country', columns='year', values='lifeExp'\n", ").assign(\n", " gap=lambda df: df[2007] - df[1967]\n", ").sort_values(\n", " by='gap'\n", ").tail(10).reset_index()\n", "dumbbell_df.columns = dumbbell_df.columns.map(str)\n", "\n", "ggplot(dumbbell_df) + \\\n", " geom_segment(aes(x='1967', xend='2007', y='country', yend='country'), \\\n", " color='#DDDDDD', size=3) + \\\n", " geom_point(aes(x='1967', y='country'), color='#FAAB18', size=5) + \\\n", " geom_point(aes(x='2007', y='country'), color='#1380A1', size=5) + \\\n", " bbc_theme(show_x_axis=False) + \\\n", " ggsize(640, 400) + \\\n", " labs(title=\"We're living longer\", subtitle=\"Biggest life expectancy rise, 1967-2007\")" ] }, { "cell_type": "markdown", "id": "e82e5c79", "metadata": {}, "source": [ "## Make a histogram" ] }, { "cell_type": "code", "execution_count": 13, "id": "a6aafe43", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.022044Z", "iopub.status.busy": "2024-04-26T11:41:13.022044Z", "iopub.status.idle": "2024-04-26T11:41:13.038571Z", "shell.execute_reply": "2024-04-26T11:41:13.038571Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hist_df = df[df.year == 2007]\n", "\n", "breaks = list(range(40, 91, 10))\n", "labels = ['{0} years'.format(y) if y == 90 else str(y) for y in breaks]\n", "\n", "ggplot(hist_df, aes('lifeExp')) + \\\n", " geom_histogram(binwidth=5, color='white', fill='#1380A1') + \\\n", " scale_x_continuous(limits=[35, 95], breaks=breaks, labels=labels) + \\\n", " bbc_theme() + \\\n", " ggsize(600, 450) + \\\n", " labs(title = \"How life expectancy varies\", subtitle=\"Distribution of life expectancy in 2007\")" ] }, { "cell_type": "markdown", "id": "803cf5de", "metadata": {}, "source": [ "## Make changes to the legend" ] }, { "cell_type": "markdown", "id": "d74c82fe", "metadata": {}, "source": [ "### Remove the legend" ] }, { "cell_type": "code", "execution_count": 14, "id": "a2130a1f", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.038571Z", "iopub.status.busy": "2024-04-26T11:41:13.038571Z", "iopub.status.idle": "2024-04-26T11:41:13.052714Z", "shell.execute_reply": "2024-04-26T11:41:13.052440Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_plot + scale_color_manual(values=['#FAAB18', '#1380A1'], guide='none')" ] }, { "cell_type": "code", "execution_count": 15, "id": "8b0cd450", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.056236Z", "iopub.status.busy": "2024-04-26T11:41:13.056236Z", "iopub.status.idle": "2024-04-26T11:41:13.068833Z", "shell.execute_reply": "2024-04-26T11:41:13.068158Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_plot + theme(legend_position='none')" ] }, { "cell_type": "markdown", "id": "2dc3d43f", "metadata": {}, "source": [ "### Change the position of the legend" ] }, { "cell_type": "code", "execution_count": 16, "id": "e28cac8f", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.072226Z", "iopub.status.busy": "2024-04-26T11:41:13.072226Z", "iopub.status.idle": "2024-04-26T11:41:13.083996Z", "shell.execute_reply": "2024-04-26T11:41:13.083996Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_plot + theme(legend_position='right')" ] }, { "cell_type": "markdown", "id": "7511eafa", "metadata": {}, "source": [ "## Make changes to the axes" ] }, { "cell_type": "markdown", "id": "44145a11", "metadata": {}, "source": [ "### Flip the coordinates of a plot" ] }, { "cell_type": "code", "execution_count": 17, "id": "0f57c8e3", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.088242Z", "iopub.status.busy": "2024-04-26T11:41:13.088068Z", "iopub.status.idle": "2024-04-26T11:41:13.095912Z", "shell.execute_reply": "2024-04-26T11:41:13.095912Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bars_plot = bars_plot + coord_flip()\n", "bars_plot" ] }, { "cell_type": "markdown", "id": "6d05b53d", "metadata": {}, "source": [ "### Change the plot limits" ] }, { "cell_type": "code", "execution_count": 18, "id": "fbca645d", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.098964Z", "iopub.status.busy": "2024-04-26T11:41:13.098964Z", "iopub.status.idle": "2024-04-26T11:41:13.108191Z", "shell.execute_reply": "2024-04-26T11:41:13.108136Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bars_plot + ylim(0, 500)" ] }, { "cell_type": "markdown", "id": "a01a020e", "metadata": {}, "source": [ "### Change the axis text manually" ] }, { "cell_type": "code", "execution_count": 19, "id": "f159c466", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.111973Z", "iopub.status.busy": "2024-04-26T11:41:13.111196Z", "iopub.status.idle": "2024-04-26T11:41:13.120177Z", "shell.execute_reply": "2024-04-26T11:41:13.119440Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "breaks = list(range(0, 81, 20))\n", "labels = ['{0} years'.format(y) if y == 80 else str(y) for y in breaks]\n", "\n", "bars_plot = bars_plot + \\\n", " scale_y_continuous(limits=[0, 85], breaks=breaks, labels=labels)\n", "bars_plot" ] }, { "cell_type": "markdown", "id": "c26e6158", "metadata": {}, "source": [ "### Add axis ticks" ] }, { "cell_type": "code", "execution_count": 20, "id": "d11d9a91", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.123751Z", "iopub.status.busy": "2024-04-26T11:41:13.122751Z", "iopub.status.idle": "2024-04-26T11:41:13.136473Z", "shell.execute_reply": "2024-04-26T11:41:13.135321Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_plot + \\\n", " theme(axis_ticks_x=element_line(color='#333333'), axis_ticks_length_x=10)" ] }, { "cell_type": "markdown", "id": "ae18241d", "metadata": {}, "source": [ "## Add annotations" ] }, { "cell_type": "markdown", "id": "bbf3f138", "metadata": {}, "source": [ "### Add an annotation" ] }, { "cell_type": "code", "execution_count": 21, "id": "ddbce135", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.138787Z", "iopub.status.busy": "2024-04-26T11:41:13.138787Z", "iopub.status.idle": "2024-04-26T11:41:13.152620Z", "shell.execute_reply": "2024-04-26T11:41:13.152043Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_plot + \\\n", " geom_text(x=1980, y=45, label=\"I'm an annotation!\", \\\n", " hjust=0, vjust=0.5, color='#555555', fill='white', \\\n", " family=\"Helvetica\", size=10)" ] }, { "cell_type": "code", "execution_count": 22, "id": "d633f5c5-88f8-4230-80ef-c48f680dc29c", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.154404Z", "iopub.status.busy": "2024-04-26T11:41:13.154404Z", "iopub.status.idle": "2024-04-26T11:41:13.170304Z", "shell.execute_reply": "2024-04-26T11:41:13.169263Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_plot = multiple_line_plot + \\\n", " geom_label(x=1980, y=45, label=\"I'm quite a long\\nannotation over\\nthree rows\", \\\n", " hjust=0, vjust=0.5, color='#555555', fill='white', \\\n", " family=\"Helvetica\", size=10, label_size=0) + \\\n", " theme(legend_position='none') + \\\n", " xlim(1950, 2011) + \\\n", " geom_label(x=2007, y=79, label=\"US\", \\\n", " hjust=0, vjust=0.5, color='#1380A1', fill='white', \\\n", " family=\"Helvetica\", size=10, label_size=0) + \\\n", " geom_label(x=2007, y=72, label=\"China\", \\\n", " hjust=0, vjust=0.5, color='#FAAB18', fill='white', \\\n", " family=\"Helvetica\", size=10, label_size=0)\n", "multiple_line_plot" ] }, { "cell_type": "markdown", "id": "5185b167", "metadata": {}, "source": [ "### Add labels based on your data" ] }, { "cell_type": "code", "execution_count": 23, "id": "9112f9f2", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.174034Z", "iopub.status.busy": "2024-04-26T11:41:13.173056Z", "iopub.status.idle": "2024-04-26T11:41:13.184352Z", "shell.execute_reply": "2024-04-26T11:41:13.184271Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bars_plot + \\\n", " geom_text(aes('country', 'lifeExp', label='lifeExp'), \\\n", " label_format='d', hjust=1, nudge_y=-1.5, color='white', \\\n", " family=\"Helvetica\", size=10)" ] }, { "cell_type": "markdown", "id": "b6456221", "metadata": {}, "source": [ "### Add left-aligned labels to bar charts" ] }, { "cell_type": "code", "execution_count": 24, "id": "6483a393", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.188082Z", "iopub.status.busy": "2024-04-26T11:41:13.188082Z", "iopub.status.idle": "2024-04-26T11:41:13.203888Z", "shell.execute_reply": "2024-04-26T11:41:13.203214Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bars_plot + \\\n", " geom_text(aes(x='country', label='lifeExp'), y=4, \\\n", " label_format='d', hjust=0, color='white', \\\n", " family=\"Helvetica\", size=10)" ] }, { "cell_type": "markdown", "id": "6f015e06", "metadata": {}, "source": [ "### Add a line" ] }, { "cell_type": "code", "execution_count": 25, "id": "a443476a", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.208181Z", "iopub.status.busy": "2024-04-26T11:41:13.208096Z", "iopub.status.idle": "2024-04-26T11:41:13.221661Z", "shell.execute_reply": "2024-04-26T11:41:13.220659Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_plot + \\\n", " geom_segment(x=1979, y=45, xend=1965, yend=43, color='#555555', size=line_size*3/4)" ] }, { "cell_type": "markdown", "id": "a715ab13-29fa-4470-8a85-3f1c33efe569", "metadata": {}, "source": [ "### Add a curved line" ] }, { "cell_type": "code", "execution_count": 26, "id": "81f5875c-18ec-4597-a7d9-fa055b6dd18c", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.224754Z", "iopub.status.busy": "2024-04-26T11:41:13.224754Z", "iopub.status.idle": "2024-04-26T11:41:13.236447Z", "shell.execute_reply": "2024-04-26T11:41:13.235640Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_plot + \\\n", " geom_curve(x=1979, y=45, xend=1965, yend=43, color='#555555', \\\n", " curvature = -0.2, size=line_size*3/4)" ] }, { "cell_type": "markdown", "id": "f1bf832e", "metadata": {}, "source": [ "### Add an arrow" ] }, { "cell_type": "code", "execution_count": 27, "id": "cddac1b3", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.240266Z", "iopub.status.busy": "2024-04-26T11:41:13.240266Z", "iopub.status.idle": "2024-04-26T11:41:13.261781Z", "shell.execute_reply": "2024-04-26T11:41:13.260762Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_plot + \\\n", " geom_curve(x=1979, y=45, xend=1965, yend=43, color='#555555', \\\n", " curvature = -0.2, size=line_size*3/4, arrow=arrow())" ] }, { "cell_type": "markdown", "id": "9aa9300b", "metadata": {}, "source": [ "### Add a line across the whole plot" ] }, { "cell_type": "code", "execution_count": 28, "id": "3826cc9e", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.265783Z", "iopub.status.busy": "2024-04-26T11:41:13.265783Z", "iopub.status.idle": "2024-04-26T11:41:13.285136Z", "shell.execute_reply": "2024-04-26T11:41:13.284128Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiple_line_plot + \\\n", " geom_hline(yintercept=10, size=line_size, color='red', linetype='dashed')" ] }, { "cell_type": "markdown", "id": "1da40158", "metadata": {}, "source": [ "## Work with small multiples" ] }, { "cell_type": "markdown", "id": "69ff2b6d-0505-4f3f-a44b-d5d360e14d9b", "metadata": {}, "source": [ "### Facets" ] }, { "cell_type": "code", "execution_count": 29, "id": "6a69b1a2", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.286658Z", "iopub.status.busy": "2024-04-26T11:41:13.286658Z", "iopub.status.idle": "2024-04-26T11:41:13.318239Z", "shell.execute_reply": "2024-04-26T11:41:13.318239Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "facet_df = df[df.continent != \"Americas\"].groupby(['continent', 'year']).pop.sum().to_frame().reset_index()\n", "\n", "ggplot() + \\\n", " geom_area(aes('year', 'pop', fill='continent'), data=facet_df, size=0) + \\\n", " scale_fill_manual(values=['#FAAB18', '#1380A1', '#990000', '#588300']) + \\\n", " facet_wrap('continent', ncol=5) + \\\n", " scale_y_continuous(breaks=[0, 2000000000, 4000000000], \\\n", " labels=['0', '2bn', '4bn'], \\\n", " limits=[0, 4000000010]) + \\\n", " bbc_theme() + \\\n", " theme(legend_position='none', axis_text_x=element_blank()) + \\\n", " ggsize(600, 420) + \\\n", " labs(title=\"Asia's rapid growth\", subtitle=\"Population growth by continent, 1952-2007\")" ] }, { "cell_type": "markdown", "id": "da71c8b3-4860-4534-84b3-b5470be79a45", "metadata": {}, "source": [ "### Free scales" ] }, { "cell_type": "code", "execution_count": 30, "id": "df2237f7-8c47-43e3-9303-dec702f1275a", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.318239Z", "iopub.status.busy": "2024-04-26T11:41:13.318239Z", "iopub.status.idle": "2024-04-26T11:41:13.349648Z", "shell.execute_reply": "2024-04-26T11:41:13.349648Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot() + \\\n", " geom_area(aes('year', 'pop', fill='continent'), data=facet_df, size=0) + \\\n", " scale_fill_manual(values=['#FAAB18', '#1380A1', '#990000', '#588300']) + \\\n", " facet_wrap('continent', scales='free') + \\\n", " bbc_theme() + \\\n", " theme(legend_position='none', axis_text_x=element_blank(), axis_text_y=element_blank()) + \\\n", " ggsize(600, 400) + \\\n", " labs(title=\"It's all relative\", subtitle=\"Relative population growth by continent, 1952-2007\")" ] }, { "cell_type": "markdown", "id": "7d7a2873-23c1-4bcb-bdb8-171c238c54f4", "metadata": {}, "source": [ "## Do something else entirely" ] }, { "cell_type": "markdown", "id": "e025c29f-8829-4b01-bcd0-b8f0269426ee", "metadata": {}, "source": [ "### Increase or decrease margins" ] }, { "cell_type": "code", "execution_count": 31, "id": "ff22c7ee-f30e-403c-999d-95bb3c12d118", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.351664Z", "iopub.status.busy": "2024-04-26T11:41:13.351664Z", "iopub.status.idle": "2024-04-26T11:41:13.365797Z", "shell.execute_reply": "2024-04-26T11:41:13.365797Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bars_plot + theme(plot_subtitle=element_text(margin=[0, 0, 75, 0]))" ] }, { "cell_type": "markdown", "id": "0af8eef2", "metadata": {}, "source": [ "### Reorder bars by size" ] }, { "cell_type": "code", "execution_count": 32, "id": "6e6a220c", "metadata": { "execution": { "iopub.execute_input": "2024-04-26T11:41:13.365797Z", "iopub.status.busy": "2024-04-26T11:41:13.365797Z", "iopub.status.idle": "2024-04-26T11:41:13.381555Z", "shell.execute_reply": "2024-04-26T11:41:13.381555Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(bar_df, aes(as_discrete('country', order_by='lifeExp', order=1), 'lifeExp')) + \\\n", " geom_bar(stat='identity', position='identity', fill='#1380A1') + \\\n", " bbc_theme() + \\\n", " ggsize(600, 450) + \\\n", " coord_flip() + \\\n", " labs(title=\"Reunion is highest\", subtitle=\"Highest African life expectancy, 2007\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }