{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "(**Click the icon below to open this notebook in Colab**)\n", "\n", "[](https://colab.research.google.com/github/xiangshiyin/machine-learning-for-actuarial-science/blob/main/2025-spring/week07/notebook/demo.ipynb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# `pandas` series data type" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Date | \n", "Births | \n", "
|---|---|---|
| 0 | \n", "1959-01-01 | \n", "35 | \n", "
| 1 | \n", "1959-01-02 | \n", "32 | \n", "
| 2 | \n", "1959-01-03 | \n", "30 | \n", "
| \n", " | Births | \n", "
|---|---|
| Date | \n", "\n", " |
| 1959-01-01 | \n", "35 | \n", "
| 1959-01-02 | \n", "32 | \n", "
| 1959-01-03 | \n", "30 | \n", "
| Price | \n", "Close | \n", "High | \n", "Low | \n", "Open | \n", "Volume | \n", "
|---|---|---|---|---|---|
| Ticker | \n", "^GSPC | \n", "^GSPC | \n", "^GSPC | \n", "^GSPC | \n", "^GSPC | \n", "
| Date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " |
| 2020-01-02 | \n", "3257.850098 | \n", "3258.139893 | \n", "3235.530029 | \n", "3244.669922 | \n", "3459930000 | \n", "
| 2020-01-03 | \n", "3234.850098 | \n", "3246.149902 | \n", "3222.340088 | \n", "3226.360107 | \n", "3484700000 | \n", "
| 2020-01-06 | \n", "3246.280029 | \n", "3246.840088 | \n", "3214.639893 | \n", "3217.550049 | \n", "3702460000 | \n", "
| 2020-01-07 | \n", "3237.179932 | \n", "3244.909912 | \n", "3232.429932 | \n", "3241.860107 | \n", "3435910000 | \n", "
| 2020-01-08 | \n", "3253.050049 | \n", "3267.070068 | \n", "3236.669922 | \n", "3238.590088 | \n", "3726840000 | \n", "
| 2020-01-09 | \n", "3274.699951 | \n", "3275.580078 | \n", "3263.669922 | \n", "3266.030029 | \n", "3641230000 | \n", "
| 2020-01-10 | \n", "3265.350098 | \n", "3282.989990 | \n", "3260.860107 | \n", "3281.810059 | \n", "3214580000 | \n", "
| 2020-01-13 | \n", "3288.129883 | \n", "3288.129883 | \n", "3268.429932 | \n", "3271.129883 | \n", "3459390000 | \n", "
| 2020-01-14 | \n", "3283.149902 | \n", "3294.250000 | \n", "3277.189941 | \n", "3285.350098 | \n", "3687620000 | \n", "
| 2020-01-15 | \n", "3289.290039 | \n", "3298.659912 | \n", "3280.689941 | \n", "3282.270020 | \n", "3721490000 | \n", "
| Ticker | \n", "^GSPC | \n", "
|---|---|
| Date | \n", "\n", " |
| 2020-01-02 | \n", "3257.850098 | \n", "
| 2020-01-03 | \n", "3234.850098 | \n", "
| 2020-01-06 | \n", "3246.280029 | \n", "
| \n", " | GSPC | \n", "Year | \n", "DayOfYear | \n", "
|---|---|---|---|
| Date | \n", "\n", " | \n", " | \n", " |
| 2020-01-02 | \n", "3257.850098 | \n", "2020 | \n", "2 | \n", "
| 2020-01-03 | \n", "3234.850098 | \n", "2020 | \n", "3 | \n", "
| 2020-01-06 | \n", "3246.280029 | \n", "2020 | \n", "6 | \n", "
| Year | \n", "2020 | \n", "2021 | \n", "2022 | \n", "2023 | \n", "2024 | \n", "
|---|---|---|---|---|---|
| DayOfYear | \n", "\n", " | \n", " | \n", " | \n", " | \n", " |
| 2 | \n", "3257.850098 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4742.830078 | \n", "
| 3 | \n", "3234.850098 | \n", "NaN | \n", "4796.560059 | \n", "3824.139893 | \n", "4704.810059 | \n", "
| 4 | \n", "NaN | \n", "3700.649902 | \n", "4793.540039 | \n", "3852.969971 | \n", "4688.680176 | \n", "
| Ticker | \n", "GSPC | \n", "Year | \n", "Week | \n", "
|---|---|---|---|
| Date | \n", "\n", " | \n", " | \n", " |
| 2020-01-02 | \n", "3257.850098 | \n", "2020 | \n", "1 | \n", "
| 2020-01-03 | \n", "3234.850098 | \n", "2020 | \n", "1 | \n", "
| 2020-01-06 | \n", "3246.280029 | \n", "2020 | \n", "2 | \n", "
| \n", " | Year | \n", "Week | \n", "GSPC | \n", "
|---|---|---|---|
| 0 | \n", "2020 | \n", "1 | \n", "3246.350098 | \n", "
| 1 | \n", "2020 | \n", "2 | \n", "3255.312012 | \n", "
| 2 | \n", "2020 | \n", "3 | \n", "3301.400000 | \n", "
| Year | \n", "2020 | \n", "2021 | \n", "2022 | \n", "2023 | \n", "2024 | \n", "
|---|---|---|---|---|---|
| Week | \n", "\n", " | \n", " | \n", " | \n", " | \n", " |
| 1 | \n", "3246.350098 | \n", "3760.823975 | \n", "4732.751953 | \n", "3845.072510 | \n", "4948.100098 | \n", "
| 2 | \n", "3255.312012 | \n", "3794.886035 | \n", "4686.317969 | \n", "3952.642041 | \n", "4773.512109 | \n", "
| 3 | \n", "3301.400000 | \n", "3836.325012 | \n", "4497.634888 | \n", "3947.822571 | \n", "4781.484985 | \n", "
| Ticker | \n", "AAPL | \n", "
|---|---|
| Date | \n", "\n", " |
| 2020-01-02 | \n", "72.716080 | \n", "
| 2020-01-03 | \n", "72.009132 | \n", "
| 2020-01-06 | \n", "72.582916 | \n", "
| 2020-01-07 | \n", "72.241562 | \n", "
| 2020-01-08 | \n", "73.403656 | \n", "
| 2020-01-09 | \n", "74.962791 | \n", "
| 2020-01-10 | \n", "75.132256 | \n", "
| 2020-01-13 | \n", "76.737411 | \n", "
| 2020-01-14 | \n", "75.701210 | \n", "
| 2020-01-15 | \n", "75.376793 | \n", "
| Ticker | \n", "AAPL | \n", "Year | \n", "DayOfYear | \n", "
|---|---|---|---|
| Date | \n", "\n", " | \n", " | \n", " |
| 2020-01-02 | \n", "72.716080 | \n", "2020 | \n", "2 | \n", "
| 2020-01-03 | \n", "72.009132 | \n", "2020 | \n", "3 | \n", "
| 2020-01-06 | \n", "72.582916 | \n", "2020 | \n", "6 | \n", "
| Ticker | \n", "AAPL | \n", "Year | \n", "DayOfYear | \n", "
|---|---|---|---|
| Date | \n", "\n", " | \n", " | \n", " |
| 2020-01-02 | \n", "72.716080 | \n", "2020 | \n", "2 | \n", "
| 2020-01-03 | \n", "72.009132 | \n", "2020 | \n", "3 | \n", "
| 2020-01-06 | \n", "72.582916 | \n", "2020 | \n", "6 | \n", "
| Ticker | \n", "AAPL | \n", "MSFT | \n", "
|---|---|---|
| Date | \n", "\n", " | \n", " |
| 2019-01-02 | \n", "37.667175 | \n", "95.310539 | \n", "
| 2019-01-03 | \n", "33.915249 | \n", "91.804260 | \n", "
| 2019-01-04 | \n", "35.363075 | \n", "96.074005 | \n", "
| \n", " | ds | \n", "y | \n", "
|---|---|---|
| 0 | \n", "2019-01-02 | \n", "37.667175 | \n", "
| 1 | \n", "2019-01-03 | \n", "33.915249 | \n", "
| 2 | \n", "2019-01-04 | \n", "35.363075 | \n", "
| 3 | \n", "2019-01-07 | \n", "35.284363 | \n", "
| 4 | \n", "2019-01-08 | \n", "35.956993 | \n", "
| \n", " | ds | \n", "trend | \n", "yhat_lower | \n", "yhat_upper | \n", "trend_lower | \n", "trend_upper | \n", "additive_terms | \n", "additive_terms_lower | \n", "additive_terms_upper | \n", "weekly | \n", "weekly_lower | \n", "weekly_upper | \n", "multiplicative_terms | \n", "multiplicative_terms_lower | \n", "multiplicative_terms_upper | \n", "yhat | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "2019-01-02 | \n", "37.337318 | \n", "31.871734 | \n", "42.970259 | \n", "37.337318 | \n", "37.337318 | \n", "0.232282 | \n", "0.232282 | \n", "0.232282 | \n", "0.232282 | \n", "0.232282 | \n", "0.232282 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "37.569599 | \n", "
| 1 | \n", "2019-01-03 | \n", "37.415841 | \n", "31.566130 | \n", "43.473250 | \n", "37.415841 | \n", "37.415841 | \n", "0.010063 | \n", "0.010063 | \n", "0.010063 | \n", "0.010063 | \n", "0.010063 | \n", "0.010063 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "37.425904 | \n", "
| 2 | \n", "2019-01-04 | \n", "37.494365 | \n", "31.318919 | \n", "43.213433 | \n", "37.494365 | \n", "37.494365 | \n", "-0.072305 | \n", "-0.072305 | \n", "-0.072305 | \n", "-0.072305 | \n", "-0.072305 | \n", "-0.072305 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "37.422060 | \n", "
| 3 | \n", "2019-01-07 | \n", "37.729937 | \n", "31.603461 | \n", "43.231721 | \n", "37.729937 | \n", "37.729937 | \n", "-0.254685 | \n", "-0.254685 | \n", "-0.254685 | \n", "-0.254685 | \n", "-0.254685 | \n", "-0.254685 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "37.475251 | \n", "
| 4 | \n", "2019-01-08 | \n", "37.808461 | \n", "32.066786 | \n", "43.754179 | \n", "37.808461 | \n", "37.808461 | \n", "-0.098488 | \n", "-0.098488 | \n", "-0.098488 | \n", "-0.098488 | \n", "-0.098488 | \n", "-0.098488 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "37.709973 | \n", "
| \n", " | y | \n", "
|---|---|
| ds | \n", "\n", " |
| 2019-01-02 | \n", "37.667175 | \n", "
| 2019-01-03 | \n", "33.915249 | \n", "
| 2019-01-04 | \n", "35.363075 | \n", "
| \n", " | y | \n", "lag_6 | \n", "lag_7 | \n", "lag_8 | \n", "lag_9 | \n", "lag_10 | \n", "lag_11 | \n", "lag_12 | \n", "lag_13 | \n", "lag_14 | \n", "... | \n", "lag_16 | \n", "lag_17 | \n", "lag_18 | \n", "lag_19 | \n", "lag_20 | \n", "lag_21 | \n", "lag_22 | \n", "lag_23 | \n", "lag_24 | \n", "weekday | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ds | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 2019-02-06 | \n", "41.559830 | \n", "36.894379 | \n", "37.280788 | \n", "37.629005 | \n", "36.422104 | \n", "36.713108 | \n", "36.565220 | \n", "37.404808 | \n", "37.175827 | \n", "36.956387 | \n", "... | \n", "35.778103 | \n", "36.324303 | \n", "36.684467 | \n", "36.567604 | \n", "35.956993 | \n", "35.284363 | \n", "35.363075 | \n", "33.915249 | \n", "37.667175 | \n", "2 | \n", "
| 2019-02-07 | \n", "40.772720 | \n", "39.415550 | \n", "36.894379 | \n", "37.280788 | \n", "37.629005 | \n", "36.422104 | \n", "36.713108 | \n", "36.565220 | \n", "37.404808 | \n", "37.175827 | \n", "... | \n", "36.510357 | \n", "35.778103 | \n", "36.324303 | \n", "36.684467 | \n", "36.567604 | \n", "35.956993 | \n", "35.284363 | \n", "35.363075 | \n", "33.915249 | \n", "3 | \n", "
| 2019-02-08 | \n", "40.820629 | \n", "39.699375 | \n", "39.415550 | \n", "36.894379 | \n", "37.280788 | \n", "37.629005 | \n", "36.422104 | \n", "36.713108 | \n", "36.565220 | \n", "37.404808 | \n", "... | \n", "36.956387 | \n", "36.510357 | \n", "35.778103 | \n", "36.324303 | \n", "36.684467 | \n", "36.567604 | \n", "35.956993 | \n", "35.284363 | \n", "35.363075 | \n", "4 | \n", "
3 rows × 21 columns
\n", "| \n", " | y | \n", "lag_6 | \n", "lag_7 | \n", "lag_8 | \n", "lag_9 | \n", "lag_10 | \n", "lag_11 | \n", "lag_12 | \n", "lag_13 | \n", "lag_14 | \n", "lag_15 | \n", "lag_16 | \n", "lag_17 | \n", "lag_18 | \n", "lag_19 | \n", "lag_20 | \n", "lag_21 | \n", "lag_22 | \n", "lag_23 | \n", "lag_24 | \n", "weekday | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ds | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 2019-02-06 | \n", "41.559830 | \n", "36.894379 | \n", "37.280788 | \n", "37.629005 | \n", "36.422104 | \n", "36.713108 | \n", "36.565220 | \n", "37.404808 | \n", "37.175827 | \n", "36.956387 | \n", "36.510357 | \n", "35.778103 | \n", "36.324303 | \n", "36.684467 | \n", "36.567604 | \n", "35.956993 | \n", "35.284363 | \n", "35.363075 | \n", "33.915249 | \n", "37.667175 | \n", "2 | \n", "
| 2019-02-07 | \n", "40.772720 | \n", "39.415550 | \n", "36.894379 | \n", "37.280788 | \n", "37.629005 | \n", "36.422104 | \n", "36.713108 | \n", "36.565220 | \n", "37.404808 | \n", "37.175827 | \n", "36.956387 | \n", "36.510357 | \n", "35.778103 | \n", "36.324303 | \n", "36.684467 | \n", "36.567604 | \n", "35.956993 | \n", "35.284363 | \n", "35.363075 | \n", "33.915249 | \n", "3 | \n", "
| 2019-02-08 | \n", "40.820629 | \n", "39.699375 | \n", "39.415550 | \n", "36.894379 | \n", "37.280788 | \n", "37.629005 | \n", "36.422104 | \n", "36.713108 | \n", "36.565220 | \n", "37.404808 | \n", "37.175827 | \n", "36.956387 | \n", "36.510357 | \n", "35.778103 | \n", "36.324303 | \n", "36.684467 | \n", "36.567604 | \n", "35.956993 | \n", "35.284363 | \n", "35.363075 | \n", "4 | \n", "
| \n", " | lag_6 | \n", "lag_7 | \n", "lag_8 | \n", "lag_9 | \n", "lag_10 | \n", "lag_11 | \n", "lag_12 | \n", "lag_13 | \n", "lag_14 | \n", "lag_15 | \n", "lag_16 | \n", "lag_17 | \n", "lag_18 | \n", "lag_19 | \n", "lag_20 | \n", "lag_21 | \n", "lag_22 | \n", "lag_23 | \n", "lag_24 | \n", "weekday | \n", "rolling_mean | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ds | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 2019-02-06 | \n", "36.894379 | \n", "37.280788 | \n", "37.629005 | \n", "36.422104 | \n", "36.713108 | \n", "36.565220 | \n", "37.404808 | \n", "37.175827 | \n", "36.956387 | \n", "36.510357 | \n", "35.778103 | \n", "36.324303 | \n", "36.684467 | \n", "36.567604 | \n", "35.956993 | \n", "35.284363 | \n", "35.363075 | \n", "33.915249 | \n", "37.667175 | \n", "2 | \n", "36.478596 | \n", "
| 2019-02-07 | \n", "39.415550 | \n", "36.894379 | \n", "37.280788 | \n", "37.629005 | \n", "36.422104 | \n", "36.713108 | \n", "36.565220 | \n", "37.404808 | \n", "37.175827 | \n", "36.956387 | \n", "36.510357 | \n", "35.778103 | \n", "36.324303 | \n", "36.684467 | \n", "36.567604 | \n", "35.956993 | \n", "35.284363 | \n", "35.363075 | \n", "33.915249 | \n", "3 | \n", "36.570615 | \n", "
| 2019-02-08 | \n", "39.699375 | \n", "39.415550 | \n", "36.894379 | \n", "37.280788 | \n", "37.629005 | \n", "36.422104 | \n", "36.713108 | \n", "36.565220 | \n", "37.404808 | \n", "37.175827 | \n", "36.956387 | \n", "36.510357 | \n", "35.778103 | \n", "36.324303 | \n", "36.684467 | \n", "36.567604 | \n", "35.956993 | \n", "35.284363 | \n", "35.363075 | \n", "4 | \n", "36.875043 | \n", "
| \n", " | Feature | \n", "Importance | \n", "
|---|---|---|
| 0 | \n", "lag_6 | \n", "0.544092 | \n", "
| 1 | \n", "lag_7 | \n", "0.149403 | \n", "
| 2 | \n", "lag_8 | \n", "0.083345 | \n", "
| 3 | \n", "lag_9 | \n", "0.063612 | \n", "
| 20 | \n", "rolling_mean | \n", "0.060657 | \n", "
| 13 | \n", "lag_19 | \n", "0.030119 | \n", "
| 4 | \n", "lag_10 | \n", "0.029848 | \n", "
| 12 | \n", "lag_18 | \n", "0.005417 | \n", "
| 7 | \n", "lag_13 | \n", "0.004471 | \n", "
| 9 | \n", "lag_15 | \n", "0.004211 | \n", "
| 14 | \n", "lag_20 | \n", "0.004018 | \n", "
| 11 | \n", "lag_17 | \n", "0.003907 | \n", "
| 18 | \n", "lag_24 | \n", "0.003002 | \n", "
| 16 | \n", "lag_22 | \n", "0.002943 | \n", "
| 15 | \n", "lag_21 | \n", "0.002833 | \n", "
| 17 | \n", "lag_23 | \n", "0.002666 | \n", "
| 10 | \n", "lag_16 | \n", "0.001828 | \n", "
| 8 | \n", "lag_14 | \n", "0.001305 | \n", "
| 5 | \n", "lag_11 | \n", "0.001291 | \n", "
| 6 | \n", "lag_12 | \n", "0.000918 | \n", "
| 19 | \n", "weekday | \n", "0.000116 | \n", "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
" colsample_bylevel=None, colsample_bynode=None,\n",
" colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
" gamma=None, grow_policy=None, importance_type=None,\n",
" interaction_constraints=None, learning_rate=0.1, max_bin=None,\n",
" max_cat_threshold=None, max_cat_to_onehot=None,\n",
" max_delta_step=None, max_depth=None, max_leaves=None,\n",
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
" multi_strategy=None, n_estimator=20, n_estimators=None,\n",
" n_jobs=None, num_parallel_tree=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
" colsample_bylevel=None, colsample_bynode=None,\n",
" colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
" gamma=None, grow_policy=None, importance_type=None,\n",
" interaction_constraints=None, learning_rate=0.1, max_bin=None,\n",
" max_cat_threshold=None, max_cat_to_onehot=None,\n",
" max_delta_step=None, max_depth=None, max_leaves=None,\n",
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
" multi_strategy=None, n_estimator=20, n_estimators=None,\n",
" n_jobs=None, num_parallel_tree=None, ...)| \n", " | Feature | \n", "Importance | \n", "
|---|---|---|
| 0 | \n", "lag_6 | \n", "0.526295 | \n", "
| 2 | \n", "lag_8 | \n", "0.098961 | \n", "
| 14 | \n", "lag_20 | \n", "0.086200 | \n", "
| 3 | \n", "lag_9 | \n", "0.082883 | \n", "
| 9 | \n", "lag_15 | \n", "0.058548 | \n", "
| 1 | \n", "lag_7 | \n", "0.040848 | \n", "
| 13 | \n", "lag_19 | \n", "0.024806 | \n", "
| 20 | \n", "rolling_mean | \n", "0.021671 | \n", "
| 12 | \n", "lag_18 | \n", "0.014230 | \n", "
| 18 | \n", "lag_24 | \n", "0.009405 | \n", "
| 15 | \n", "lag_21 | \n", "0.009387 | \n", "
| 17 | \n", "lag_23 | \n", "0.008607 | \n", "
| 16 | \n", "lag_22 | \n", "0.005319 | \n", "
| 4 | \n", "lag_10 | \n", "0.002455 | \n", "
| 5 | \n", "lag_11 | \n", "0.002202 | \n", "
| 6 | \n", "lag_12 | \n", "0.002072 | \n", "
| 7 | \n", "lag_13 | \n", "0.002032 | \n", "
| 8 | \n", "lag_14 | \n", "0.001546 | \n", "
| 10 | \n", "lag_16 | \n", "0.001244 | \n", "
| 11 | \n", "lag_17 | \n", "0.001218 | \n", "
| 19 | \n", "weekday | \n", "0.000071 | \n", "