{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Time Series Visualizations\n", "\n", "This notebook demonstrates how to use Lets-Plot to investigate time series.\n", "\n", "The data is provided by [Kaggle](https://www.kaggle.com/sumanthvrao/daily-climate-time-series-data)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:12.579573Z", "iopub.status.busy": "2024-03-26T14:41:12.579494Z", "iopub.status.idle": "2024-03-26T14:41:12.902600Z", "shell.execute_reply": "2024-03-26T14:41:12.902131Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "from lets_plot import *\n", "from lets_plot.mapping import as_discrete" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:12.904155Z", "iopub.status.busy": "2024-03-26T14:41:12.904024Z", "iopub.status.idle": "2024-03-26T14:41:12.906430Z", "shell.execute_reply": "2024-03-26T14:41:12.906241Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "LetsPlot.setup_html()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Preparation" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:12.919658Z", "iopub.status.busy": "2024-03-26T14:41:12.919548Z", "iopub.status.idle": "2024-03-26T14:41:13.319405Z", "shell.execute_reply": "2024-03-26T14:41:13.319183Z" } }, "outputs": [], "source": [ "df = pd.read_csv(\"https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/delhi_climate.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:13.320633Z", "iopub.status.busy": "2024-03-26T14:41:13.320515Z", "iopub.status.idle": "2024-03-26T14:41:13.325017Z", "shell.execute_reply": "2024-03-26T14:41:13.324813Z" } }, "outputs": [], "source": [ "df = df.rename(columns={\"meantemp\": \"mean temperature\", \"wind_speed\": \"wind speed\"})\n", "df.date = pd.to_datetime(df.date)\n", "df[\"day\"] = df.date.dt.day\n", "df[\"month\"] = df.date.dt.month\n", "df[\"year\"] = df.date.dt.year\n", "df = df[df.year < 2017]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### General Information" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:13.326320Z", "iopub.status.busy": "2024-03-26T14:41:13.326154Z", "iopub.status.idle": "2024-03-26T14:41:13.372372Z", "shell.execute_reply": "2024-03-26T14:41:13.372135Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"date\", \"mean temperature\")) + \\\n", " geom_line(aes(group=\"year\", color=as_discrete(\"year\")), size=1) + \\\n", " scale_x_datetime(breaks=df[df.date.dt.day == 1].date, format=\"%b %Y\") + \\\n", " facet_grid(x=\"year\", scales='free') + \\\n", " ggtitle(\"Mean Temperature Along Period Under Review\") + \\\n", " ggsize(1000, 500) + \\\n", " theme(legend_position='bottom')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:13.373637Z", "iopub.status.busy": "2024-03-26T14:41:13.373529Z", "iopub.status.idle": "2024-03-26T14:41:13.439784Z", "shell.execute_reply": "2024-03-26T14:41:13.439560Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "p1 = ggplot() + \\\n", " geom_boxplot(aes(x=\"year\", y=\"mean temperature\", \\\n", " fill=as_discrete(\"year\")), \\\n", " data=df, size=2, alpha=.5) + \\\n", " scale_x_discrete(name=\"year\") + \\\n", " ggtitle(\"Mean Temperature Aggregated\") + \\\n", " theme(legend_position='bottom', panel_grid='blank')\n", "p2 = ggplot() + \\\n", " geom_boxplot(aes(x=\"month\", y=\"mean temperature\", \\\n", " fill=as_discrete(\"year\")), \\\n", " data=df, size=.75, alpha=.5) + \\\n", " scale_x_continuous(breaks=list(range(1, 13))) + \\\n", " facet_grid(x=\"year\") + \\\n", " ggtitle(\"Mean Temperature by Month\") + \\\n", " theme(legend_position='none', panel_grid='blank')\n", "\n", "w, h = 1000, 300\n", "bunch = GGBunch()\n", "bunch.add_plot(p1, 0, 0, w, h)\n", "bunch.add_plot(p2, 0, h, w, h)\n", "bunch.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Year-to-Year Temperature Comparison" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:13.441177Z", "iopub.status.busy": "2024-03-26T14:41:13.441061Z", "iopub.status.idle": "2024-03-26T14:41:13.457533Z", "shell.execute_reply": "2024-03-26T14:41:13.457245Z" }, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"day\", \"mean temperature\")) + \\\n", " geom_line(aes(group=\"year\", color=as_discrete(\"year\")), size=2, \\\n", " tooltips=layer_tooltips().title(\"@year\")\\\n", " .format(\"@{mean temperature}\", \".2f\")\\\n", " .line(\"@|@{mean temperature}\")\\\n", " .line(\"date|@month/@day/@year\")) + \\\n", " scale_x_continuous(breaks=list(range(1, 32))) + \\\n", " facet_grid(y=\"month\", scales='free') + \\\n", " ylab(\"month\") + \\\n", " ggtitle(\"Mean Temperature for Each Month\") + \\\n", " theme(legend_position='bottom')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Most Common Temperature Values" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:13.458786Z", "iopub.status.busy": "2024-03-26T14:41:13.458622Z", "iopub.status.idle": "2024-03-26T14:41:13.476371Z", "shell.execute_reply": "2024-03-26T14:41:13.476143Z" }, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(x=\"mean temperature\")) + \\\n", " geom_histogram(aes(group=\"year\", fill=as_discrete(\"year\")), \\\n", " color='black', bins=15, size=.5, alpha=.5, \\\n", " tooltips=layer_tooltips().line(\"count|@..count..\")\\\n", " .format(\"@{mean temperature}\", \".2f\")\\\n", " .line(\"@|@{mean temperature}\")\\\n", " .line(\"@|@month\")\\\n", " .line(\"@|@year\")) + \\\n", " facet_grid(x=\"month\", y=\"year\") + \\\n", " xlab(\"month\") + ylab(\"year\") + \\\n", " ggtitle(\"Most Common Temperature\") + \\\n", " ggsize(1000, 500) + \\\n", " theme_classic() + theme(legend_position='bottom')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:13.477539Z", "iopub.status.busy": "2024-03-26T14:41:13.477410Z", "iopub.status.idle": "2024-03-26T14:41:13.487119Z", "shell.execute_reply": "2024-03-26T14:41:13.486920Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "int_mean_temp_df = df[[\"mean temperature\", \"month\", \"year\"]].copy()\n", "int_mean_temp_df[\"mean temperature\"] = int_mean_temp_df[\"mean temperature\"].astype(int)\n", "\n", "ggplot(int_mean_temp_df, aes(\"month\", \"mean temperature\", fill=\"mean temperature\")) + \\\n", " geom_bin2d(stat='identity', size=.5, color='white', alpha=.2,\n", " tooltips=layer_tooltips().format(\"@{mean temperature}\", \".2f\")\\\n", " .line(\"@|@{mean temperature}\")\\\n", " .format(\"@month\", \"d\")\n", " .line(\"@|@month\")\\\n", " .title(\"@year\")) + \\\n", " scale_x_continuous(breaks=list(range(1, 13))) + \\\n", " scale_fill_gradient(low='#abd9e9', high='#d7191c') + \\\n", " facet_grid(x=\"year\") + \\\n", " coord_fixed(ratio=.5) + \\\n", " xlab(\"\") + \\\n", " ggtitle(\"Heatmap of Temperatures by Year\") + \\\n", " ggsize(1000, 500) + \\\n", " theme_classic() + theme(legend_position='bottom')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Observing Mean Temperature and Wind Speed Correlation" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:13.488379Z", "iopub.status.busy": "2024-03-26T14:41:13.488205Z", "iopub.status.idle": "2024-03-26T14:41:13.502686Z", "shell.execute_reply": "2024-03-26T14:41:13.502474Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"wind speed\", y=\"mean temperature\")) + \\\n", " geom_point(aes(color=\"mean temperature\", fill=\"mean temperature\"), \\\n", " shape=21, size=3, alpha=.2) + \\\n", " scale_color_gradient(low='#abd9e9', high='#d7191c') + \\\n", " scale_fill_gradient(low='#abd9e9', high='#d7191c') + \\\n", " facet_grid(x=\"year\") + \\\n", " ggtitle(\"Relation Between Mean Temperature and Wind Speed\") + \\\n", " ggsize(1000, 500) + \\\n", " theme_classic()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Observing Mean Temperature and Humidity Correlation" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:13.503935Z", "iopub.status.busy": "2024-03-26T14:41:13.503759Z", "iopub.status.idle": "2024-03-26T14:41:13.518321Z", "shell.execute_reply": "2024-03-26T14:41:13.518117Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot(df, aes(\"humidity\", \"mean temperature\")) + \\\n", " geom_point(aes(color=\"humidity\", fill=\"humidity\"), \\\n", " shape=21, size=3, alpha=.2) + \\\n", " scale_color_gradient(low='#fdae61', high='#2c7bb6') + \\\n", " scale_fill_gradient(low='#fdae61', high='#2c7bb6') + \\\n", " facet_grid(x=\"year\") + \\\n", " ggtitle(\"Relation Between Mean Temperature and Humidity\") + \\\n", " ggsize(1000, 500) + \\\n", " theme_classic()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### In Search of Correlation on Lag Scatter Plots" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:13.519621Z", "iopub.status.busy": "2024-03-26T14:41:13.519440Z", "iopub.status.idle": "2024-03-26T14:41:13.544105Z", "shell.execute_reply": "2024-03-26T14:41:13.543862Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_shifted_by_day = df[[\"mean temperature\", \"year\"]].copy()\n", "df_shifted_by_day[\"shifted mean temperature\"] = df[\"mean temperature\"].shift(-1)\n", "df_shifted_by_day = df_shifted_by_day.dropna()\n", "\n", "p1 = ggplot(df_shifted_by_day, aes(\"mean temperature\", \"shifted mean temperature\")) + \\\n", " geom_point(aes(color=\"mean temperature\", fill=\"mean temperature\"), \\\n", " shape=21, size=3, alpha=.2) + \\\n", " scale_color_gradient(low='#abd9e9', high='#d7191c') + \\\n", " scale_fill_gradient(low='#abd9e9', high='#d7191c') + \\\n", " facet_grid(x=\"year\") + \\\n", " coord_fixed(ratio=1) + \\\n", " ggtitle(\"One Day Lag Scatter Plot\") + \\\n", " theme_classic()\n", "\n", "df_shifted_by_month = df[[\"mean temperature\", \"year\"]].copy()\n", "df_shifted_by_month[\"shifted mean temperature\"] = df[\"mean temperature\"].shift(-30)\n", "df_shifted_by_month = df_shifted_by_month.dropna()\n", "\n", "p2 = ggplot(df_shifted_by_month, aes(\"mean temperature\", \"shifted mean temperature\")) + \\\n", " geom_point(aes(color=\"mean temperature\", fill=\"mean temperature\"), \\\n", " shape=21, size=3, alpha=.2) + \\\n", " scale_color_gradient(low='#abd9e9', high='#d7191c') + \\\n", " scale_fill_gradient(low='#abd9e9', high='#d7191c') + \\\n", " facet_grid(x=\"year\") + \\\n", " coord_fixed(ratio=1) + \\\n", " ggtitle(\"One Month Lag Scatter Plot\") + \\\n", " theme_classic()\n", "\n", "df_shifted_by_year = df[[\"mean temperature\", \"year\"]].copy()\n", "df_shifted_by_year[\"shifted mean temperature\"] = df[\"mean temperature\"].shift(-365)\n", "df_shifted_by_year = df_shifted_by_year.dropna()[:-1]\n", "\n", "p3 = ggplot(df_shifted_by_year, aes(\"mean temperature\", \"shifted mean temperature\")) + \\\n", " geom_point(aes(color=\"mean temperature\", fill=\"mean temperature\"), \\\n", " shape=21, size=3, alpha=.2) + \\\n", " scale_color_gradient(low='#abd9e9', high='#d7191c') + \\\n", " scale_fill_gradient(low='#abd9e9', high='#d7191c') + \\\n", " facet_grid(x=\"year\") + \\\n", " coord_fixed(ratio=1) + \\\n", " ggtitle(\"One Year Lag Scatter Plot\") + \\\n", " theme_classic()\n", "\n", "w, h = 1000, 300\n", "bunch = GGBunch()\n", "bunch.add_plot(p1, 0, 0, w, h)\n", "bunch.add_plot(p2, 0, h, w, h)\n", "bunch.add_plot(p3, 0, 2 * h, w, h)\n", "bunch.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Annual Path of Mean Temperature and Humidity" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:13.545401Z", "iopub.status.busy": "2024-03-26T14:41:13.545221Z", "iopub.status.idle": "2024-03-26T14:41:13.551192Z", "shell.execute_reply": "2024-03-26T14:41:13.551003Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_df = df.groupby(by=[\"year\", \"month\"]).mean(numeric_only=True)[[\"mean temperature\", \"humidity\"]].reset_index()\n", "\n", "ggplot(mean_df, aes(\"humidity\", \"mean temperature\")) + \\\n", " geom_path(color='#99d8c9', size=1) + \\\n", " geom_point(aes(fill=\"month\"), shape=21, size=3, color='#00441b',\n", " tooltips=layer_tooltips().title(\"@year\")\\\n", " .line(\"month|@month\")\\\n", " .format(\"@humidity\", \".2f\")\\\n", " .line(\"@|@humidity\")\\\n", " .format(\"@{mean temperature}\", \".2f\")\\\n", " .line(\"mean temperature|@{mean temperature}\")) + \\\n", " scale_fill_gradient(name=\"\", low='#e5f5f9', high='#2ca25f') + \\\n", " facet_grid(x=\"year\") + \\\n", " ylab(\"mean temperature\") + \\\n", " ggtitle(\"Annual Path of Mean Temperature and Humidity\") + \\\n", " ggsize(1000, 500) + \\\n", " theme_classic()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Autocorrelation Plots for Mean Temperature, Wind Speed and Humidity" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "execution": { "iopub.execute_input": "2024-03-26T14:41:13.552371Z", "iopub.status.busy": "2024-03-26T14:41:13.552243Z", "iopub.status.idle": "2024-03-26T14:41:13.787129Z", "shell.execute_reply": "2024-03-26T14:41:13.786810Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "acf_df = pd.DataFrame([\n", " (lag, df[\"mean temperature\"].autocorr(lag=lag), df[\"wind speed\"].autocorr(lag=lag), df.humidity.autocorr(lag=lag))\n", " for lag in range(365 * 3)\n", "], columns=[\"lag\", \"mean temperature acf\", \"wind speed acf\", \"humidity acf\"]).melt(\n", " id_vars=[\"lag\"],\n", " value_vars=[\"mean temperature acf\", \"wind speed acf\", \"humidity acf\"],\n", " var_name=\"acf_type\", value_name=\"acf_value\"\n", ")\n", "\n", "ggplot(acf_df, aes(\"lag\", \"acf_value\")) + \\\n", " geom_point(aes(color=\"acf_value\"), size=3) + \\\n", " scale_color_gradient(low='#fc8d59', high='#91cf60') + \\\n", " facet_grid(y=\"acf_type\") + \\\n", " ylab(\"ACF value\") + \\\n", " ggtitle(\"Autocorrelation Functions\") + \\\n", " ggsize(1000, 600) + \\\n", " theme(legend_position='none')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 4 }