{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Holt Winter's Method" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[](https://nbviewer.org/github/gautamnaik1994/SalesForecasting_ML_CaseStudy/blob/main/notebooks/modelling/01.HoltWinterMethod.ipynb?flush_cache=true)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "import plotly.io as pio\n", "pio.renderers.default = \"colab+notebook_connected+vscode\"\n", "import pandas as pd\n", "import numpy as np\n", "import duckdb as db\n", "import matplotlib.pyplot as plt\n", "import plotly.express as px\n", "import plotly.graph_objects as go\n", "import optuna\n", "import warnings\n", "\n", "warnings.filterwarnings('ignore')\n", "\n", "# exponential smoothing\n", "from statsmodels.tsa.holtwinters import ExponentialSmoothing\n", "from statsmodels.tsa.exponential_smoothing.ets import ETSModel\n", "from IPython.display import display, Markdown\n", "# mape\n", "from sklearn.metrics import mean_absolute_percentage_error\n", "optuna.logging.set_verbosity(optuna.logging.ERROR)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "df = pd.read_parquet(\"../../data/processed/train_enhanced.parquet\")\n", "train_agg = pd.read_parquet(\"../../data/processed/train_agg.parquet\")\n", "train_region_code_agg = pd.read_parquet(\"../../data/processed/train_region_code_agg.parquet\")\n", "holiday_df= pd.read_csv(\"../../data/processed/holidays.csv\")" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "def split_train_test(df, test_size=0.2):\n", " split_idx = int(len(df) * (1 - test_size))\n", " return df.iloc[:split_idx], df.iloc[split_idx:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Region 1" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Region_Code | \n", "Total_Sales | \n", "Avg_Sales | \n", "Total_Orders | \n", "Avg_Orders | \n", "Num_Stores | \n", "Holiday | \n", "Total_Discounts | \n", "
|---|---|---|---|---|---|---|---|---|
| Date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 2018-01-01 | \n", "R1 | \n", "5094374 | \n", "41084 | \n", "6509 | \n", "52 | \n", "124 | \n", "1 | \n", "124 | \n", "
| 2018-01-02 | \n", "R1 | \n", "7050675 | \n", "56860 | \n", "9738 | \n", "79 | \n", "124 | \n", "0 | \n", "124 | \n", "
| 2018-01-03 | \n", "R1 | \n", "6851526 | \n", "55254 | \n", "9473 | \n", "76 | \n", "124 | \n", "0 | \n", "124 | \n", "
| 2018-01-04 | \n", "R1 | \n", "7362648 | \n", "59376 | \n", "10132 | \n", "82 | \n", "124 | \n", "0 | \n", "124 | \n", "
| 2018-01-05 | \n", "R1 | \n", "8153604 | \n", "65755 | \n", "10883 | \n", "88 | \n", "124 | \n", "0 | \n", "124 | \n", "
| \n", " | Date | \n", "Total_Sales | \n", "Avg_Sales | \n", "Total_Orders | \n", "Avg_Orders | \n", "Holiday | \n", "Total_Discounts | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "2018-01-01 | \n", "15345484 | \n", "42042 | \n", "19666 | \n", "54 | \n", "1 | \n", "365 | \n", "
| 1 | \n", "2018-01-02 | \n", "19592415 | \n", "53678 | \n", "25326 | \n", "69 | \n", "0 | \n", "365 | \n", "
| 2 | \n", "2018-01-03 | \n", "18652527 | \n", "51103 | \n", "24047 | \n", "66 | \n", "0 | \n", "365 | \n", "
| 3 | \n", "2018-01-04 | \n", "19956267 | \n", "54675 | \n", "25584 | \n", "70 | \n", "0 | \n", "364 | \n", "
| 4 | \n", "2018-01-05 | \n", "22902651 | \n", "62747 | \n", "28436 | \n", "78 | \n", "0 | \n", "364 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 511 | \n", "2019-05-27 | \n", "17197023 | \n", "47115 | \n", "25447 | \n", "70 | \n", "0 | \n", "321 | \n", "
| 512 | \n", "2019-05-28 | \n", "18652065 | \n", "51102 | \n", "27184 | \n", "74 | \n", "0 | \n", "319 | \n", "
| 513 | \n", "2019-05-29 | \n", "16213497 | \n", "44421 | \n", "24047 | \n", "66 | \n", "0 | \n", "193 | \n", "
| 514 | \n", "2019-05-30 | \n", "16082139 | \n", "44061 | \n", "24318 | \n", "67 | \n", "0 | \n", "76 | \n", "
| 515 | \n", "2019-05-31 | \n", "15601825 | \n", "42745 | \n", "23602 | \n", "65 | \n", "1 | \n", "39 | \n", "
516 rows × 7 columns
\n", "