{ "cells": [ { "cell_type": "markdown", "id": "d669132e-d986-4d19-8fc6-7c26b95054bc", "metadata": {}, "source": [ "# Missing Values: Line, Path, Area, and Ribbon\n", "\n", "Missing value (`NaN` or `None`) handling in `geom_line(), geom_path(), geom_area()`, and `geom_ribbon()`.\n", "\n", "Note: `geom_path()` handles missing values slightly differently than the others." ] }, { "cell_type": "code", "execution_count": 1, "id": "309d1f24-3ee2-42cc-901a-cfe5fbccdfa9", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from datetime import datetime\n", "from lets_plot import *" ] }, { "cell_type": "code", "execution_count": 2, "id": "b712edbc-eacb-4323-a439-667cb5d958dd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 3, "id": "35a0decd-c392-48b0-a075-abee8289845b", "metadata": {}, "outputs": [], "source": [ "economics_url = 'https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/economics.csv'\n", "economics = pd.read_csv(economics_url)\n", "economics['date'] = pd.to_datetime(economics['date'])\n", "start = datetime(2006, 1, 1)\n", "economics = economics.loc[economics['date'] >= start]" ] }, { "cell_type": "code", "execution_count": 4, "id": "13f427d4-0940-4c33-b39d-70ea141257da", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0datepcepoppsavertuempmedunemploy
4624632006-01-019059.8297647.04.28.67064
4634642006-02-019090.1297854.04.29.17184
4644652006-03-019122.1298060.04.28.77072
4654662006-04-019174.8298281.04.08.47120
4664672006-05-019215.1298496.03.88.56980
\n", "
" ], "text/plain": [ " Unnamed: 0 date pce pop psavert uempmed unemploy\n", "462 463 2006-01-01 9059.8 297647.0 4.2 8.6 7064\n", "463 464 2006-02-01 9090.1 297854.0 4.2 9.1 7184\n", "464 465 2006-03-01 9122.1 298060.0 4.2 8.7 7072\n", "465 466 2006-04-01 9174.8 298281.0 4.0 8.4 7120\n", "466 467 2006-05-01 9215.1 298496.0 3.8 8.5 6980" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "economics.head()" ] }, { "cell_type": "code", "execution_count": 5, "id": "40448cec-67a1-42a5-a864-611c2ff52046", "metadata": {}, "outputs": [], "source": [ "# Make some gaps in the data\n", "economics_with_gaps = economics.copy()\n", "economics_with_gaps.loc[\n", " (economics_with_gaps['date'] >= '2008-01-01') & \n", " (economics_with_gaps['date'] <= '2009-06-01'), \n", " 'date'\n", "] = None\n", "economics_with_gaps.loc[\n", " (economics_with_gaps['date'] >= '2012-01-01') & \n", " (economics_with_gaps['date'] <= '2012-04-01'), \n", " 'unemploy'\n", "] = np.nan" ] }, { "cell_type": "markdown", "id": "cd0122b1-b2e2-4a17-9fea-64cb04ed5b55", "metadata": {}, "source": [ "#### 1. Line" ] }, { "cell_type": "code", "execution_count": 6, "id": "fcbffaf8-9510-48d8-b6e7-5966e506d159", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "( \n", " ggplot(mapping=aes('date', 'unemploy')) + \n", " geom_line(data=economics, size=10, alpha=0.1, tooltips='none') + \n", " geom_line(data=economics_with_gaps, color='teal') + \n", " geom_label(label='Missing dates', x=datetime(2008, 8, 15), y=11000, nudge_x=-70, nudge_unit='px') +\n", " geom_label(label='Missing unemployment\\nfigures', x=datetime(2012, 2, 15), y=13000, nudge_x=80, nudge_y=40, nudge_unit='px') +\n", " theme_classic() + \n", " ggsize(800, 300)\n", ") \n" ] }, { "cell_type": "markdown", "id": "119e93f7-e2ed-4e25-8559-873be055158b", "metadata": {}, "source": [ "#### 2. Path" ] }, { "cell_type": "code", "execution_count": 8, "id": "1b7f0260-b949-4505-bb5e-707149ddeee6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "( \n", " ggplot(mapping=aes('date', 'unemploy')) + \n", " geom_line(data=economics, size=10, alpha=0.1, tooltips='none') + \n", " geom_path(data=economics_with_gaps, color='teal') + \n", " geom_label(label='Missing dates', x=datetime(2008, 8, 15), y=11000, nudge_x=-70, nudge_unit='px') +\n", " geom_label(label='Missing unemployment\\nfigures', x=datetime(2012, 2, 15), y=13000, nudge_x=80, nudge_y=40, nudge_unit='px') +\n", " theme_classic() + \n", " ggsize(800, 300)\n", ") \n" ] }, { "cell_type": "markdown", "id": "182477ef-fcc3-43db-b52c-ea9b6d60d6fa", "metadata": {}, "source": [ "#### 3. Area " ] }, { "cell_type": "code", "execution_count": 9, "id": "c61d4587-ab6d-4995-b118-8b78e59995a4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "( \n", " ggplot(mapping=aes('date', 'unemploy')) + \n", " geom_line(data=economics, size=10, alpha=0.1, tooltips='none') + \n", " geom_area(data=economics_with_gaps, color='teal', fill='teal', alpha=0.2) + \n", " geom_label(label='Missing dates', x=datetime(2008, 8, 15), y=12000, nudge_x=-70, nudge_unit='px') +\n", " geom_label(label='Missing unemployment\\nfigures', x=datetime(2012, 2, 15), y=13000, nudge_x=80, nudge_y=40, nudge_unit='px') +\n", " theme_classic() + \n", " coord_cartesian(ylim=[None, 20000]) +\n", " ggsize(800, 300)\n", ") \n" ] }, { "cell_type": "markdown", "id": "ebb199b7-a549-42af-be7e-5b1734746f12", "metadata": {}, "source": [ "#### 4. Ribbon" ] }, { "cell_type": "code", "execution_count": 10, "id": "137f291c-a077-4008-a617-c9acab6f98e7", "metadata": {}, "outputs": [], "source": [ "# Make some gaps in the data\n", "economics_with_gaps2 = economics.copy()\n", "economics_with_gaps2.loc[\n", " (economics_with_gaps2['date'] >= '2008-01-01') & \n", " (economics_with_gaps2['date'] <= '2009-06-01'), \n", " 'date'\n", "] = None\n", "economics_with_gaps2.loc[\n", " (economics_with_gaps2['date'] >= '2011-08-01') & \n", " (economics_with_gaps2['date'] <= '2011-11-01'), \n", " 'uempmed'\n", "] = np.nan\n", "economics_with_gaps2.loc[\n", " (economics_with_gaps2['date'] >= '2013-05-01') & \n", " (economics_with_gaps2['date'] <= '2013-08-01'), \n", " 'psavert'\n", "] = np.nan" ] }, { "cell_type": "code", "execution_count": 11, "id": "adf5b012-4d78-4119-8909-11e0ff46cc69", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "( \n", " ggplot(mapping=aes('date', ymin='psavert', ymax='uempmed')) + \n", " geom_line(aes(y='psavert'), data=economics, size=10, alpha=0.1, tooltips='none') + \n", " geom_line(aes(y='uempmed'), data=economics, size=10, alpha=0.1, tooltips='none') + \n", " geom_ribbon(data=economics_with_gaps2, color='teal', fill='teal', alpha=0.2) + \n", " geom_label(label='Missing dates', x=datetime(2008, 8, 15), y=13, nudge_x=-70, nudge_unit='px') +\n", " geom_label(label='Missing unemployment figures', x=datetime(2011, 8, 15), y=24, nudge_x=80, nudge_unit='px') +\n", " geom_label(label='Missing savings rate figures', x=datetime(2013, 5, 15), y=3.5, nudge_x=50, nudge_unit='px') +\n", " theme_classic() + \n", " ggsize(800, 300)\n", ") \n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.23" } }, "nbformat": 4, "nbformat_minor": 5 }