{ "cells": [ { "cell_type": "markdown", "id": "ee702cec", "metadata": {}, "source": [ "# Sequential Ensemble Model (Voting, Stacking generalization) of Precipitation Downscaling" ] }, { "cell_type": "markdown", "id": "9299c596", "metadata": {}, "source": [ "### This notebook investigates the model performace of using individual estimators and ensemble approach. Precipition data (processed) from DWD and climate dataset (processed and standardized) from ERA5 is used. \n", "\n", "### 1. Recursive feature selection is used tune the predictors to select the optimized variables. \n", "\n", "### 2. Different estimators are establised using sklearn and tensorflow for the densed models. (I used sklearn wrapper in keras.utils to make both models compatible) ---level 0\n", "\n", "### 3. Ensemble models (voting regressor and stacking regressor) with different final estimators are tested to evaluate its performace on precipitation data --level 1\n", "\n", "### 4. prediction and visualizaiton \n", "\n", "### 5. Model preformance evaluation\n", "\n", "### Note: This example adopts climate data (precipitaition), therefore the focus of the approach is tailored towards regression using advance models.\n", "\n", "#### However, it can be adopted for classification problems or even improve with complex networks like CNN\n", "\n", "### Next: Use future projections data to feed the model for future local predictions" ] }, { "cell_type": "code", "execution_count": 55, "id": "03714017", "metadata": {}, "outputs": [], "source": [ "#@dboateng (13.01.2022)\n", "\n", "#importing models\n", "import numpy as np\n", "import pandas as pd \n", "import matplotlib.pyplot as plt \n", "from sklearn.feature_selection import RFECV\n", "from sklearn.model_selection import TimeSeriesSplit\n", "\n", "from sklearn.linear_model import LassoCV, RidgeCV, BayesianRidge, ARDRegression, GammaRegressor, LassoLarsCV, PoissonRegressor\n", "from sklearn.ensemble import BaggingRegressor, VotingRegressor, GradientBoostingRegressor, StackingRegressor\n", "from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor, RandomForestRegressor, HistGradientBoostingRegressor\n", "from sklearn.svm import SVR\n", "from sklearn.neural_network import MLPRegressor, BernoulliRBM\n", "from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor \n", "\n", "from sklearn.metrics import mean_squared_error, accuracy_score, r2_score, explained_variance_score, max_error\n", "from sklearn.metrics import mean_absolute_error, mean_squared_log_error, mean_absolute_percentage_error\n", "\n", "import tensorflow as tf\n", "import tensorflow.keras as keras \n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Dense, Dropout\n", "from keras.metrics import RootMeanSquaredError\n", "\n", "from xgboost import XGBRegressor\n", "\n", "from sklearn import set_config" ] }, { "cell_type": "code", "execution_count": 20, "id": "4d4df0ca", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(744, 22) (744,)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
t2mmslv10u10z500z850tpq500q850t500...vo500vo850pv500pv850u500u850v500v850d2mPrecipitation
time
1958-01-01-0.599361-349.7826360.0574580.030403-341.345388-287.1864760.000917-0.0001090.000011-0.024671...0.000002-0.0000041.244580e-07-6.846847e-08-0.2189040.2982430.3199751.885623-0.73918082.3
1958-02-011.573796-493.1679690.6591331.702194-131.800592-299.6899100.0031270.0003990.0001091.271700...-0.000016-0.000014-2.016244e-07-5.969440e-087.09185711.9157900.032295-0.8347101.693005179.4
1958-03-01-4.403323-461.871850-0.570981-0.675173-1185.306200-586.329590-0.000708-0.000709-0.000149-3.920263...0.0000080.0000243.843178e-081.189981e-07-2.7671580.286086-1.069146-3.006988-3.90746727.6
1958-04-01-2.77158327.638861-0.502666-0.029506-529.931263-140.0201770.000243-0.000413-0.000057-3.140417...0.0000050.000009-7.566521e-094.726872e-08-0.809763-1.252618-1.183645-1.840023-1.81798162.5
1958-05-011.45293575.0496470.4930830.678390470.619645135.861848-0.0001610.0004970.0001421.807232...-0.000014-0.0000175.571874e-08-2.804966e-083.1677796.6122311.1087563.5154721.91789177.2
\n", "

5 rows × 23 columns

\n", "
" ], "text/plain": [ " t2m msl v10 u10 z500 z850 \\\n", "time \n", "1958-01-01 -0.599361 -349.782636 0.057458 0.030403 -341.345388 -287.186476 \n", "1958-02-01 1.573796 -493.167969 0.659133 1.702194 -131.800592 -299.689910 \n", "1958-03-01 -4.403323 -461.871850 -0.570981 -0.675173 -1185.306200 -586.329590 \n", "1958-04-01 -2.771583 27.638861 -0.502666 -0.029506 -529.931263 -140.020177 \n", "1958-05-01 1.452935 75.049647 0.493083 0.678390 470.619645 135.861848 \n", "\n", " tp q500 q850 t500 ... vo500 vo850 \\\n", "time ... \n", "1958-01-01 0.000917 -0.000109 0.000011 -0.024671 ... 0.000002 -0.000004 \n", "1958-02-01 0.003127 0.000399 0.000109 1.271700 ... -0.000016 -0.000014 \n", "1958-03-01 -0.000708 -0.000709 -0.000149 -3.920263 ... 0.000008 0.000024 \n", "1958-04-01 0.000243 -0.000413 -0.000057 -3.140417 ... 0.000005 0.000009 \n", "1958-05-01 -0.000161 0.000497 0.000142 1.807232 ... -0.000014 -0.000017 \n", "\n", " pv500 pv850 u500 u850 v500 \\\n", "time \n", "1958-01-01 1.244580e-07 -6.846847e-08 -0.218904 0.298243 0.319975 \n", "1958-02-01 -2.016244e-07 -5.969440e-08 7.091857 11.915790 0.032295 \n", "1958-03-01 3.843178e-08 1.189981e-07 -2.767158 0.286086 -1.069146 \n", "1958-04-01 -7.566521e-09 4.726872e-08 -0.809763 -1.252618 -1.183645 \n", "1958-05-01 5.571874e-08 -2.804966e-08 3.167779 6.612231 1.108756 \n", "\n", " v850 d2m Precipitation \n", "time \n", "1958-01-01 1.885623 -0.739180 82.3 \n", "1958-02-01 -0.834710 1.693005 179.4 \n", "1958-03-01 -3.006988 -3.907467 27.6 \n", "1958-04-01 -1.840023 -1.817981 62.5 \n", "1958-05-01 3.515472 1.917891 77.2 \n", "\n", "[5 rows x 23 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# reading data\n", "data= pd.read_csv(\"sample_data.csv\", index_col=[\"time\"], parse_dates=[0])\n", "y = data[\"Precipitation\"]\n", "X = data.drop([\"Precipitation\"], axis=1)\n", "print(X.shape, y.shape)\n", "data.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "87c31590", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 21, "id": "9616380e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(516,) (228,)\n" ] } ], "source": [ "# defining training and testing time \n", "train_period = pd.date_range(start=\"1958-01-01\", end=\"2000-12-31\", freq=\"MS\")\n", "test_period = pd.date_range(start=\"2001-01-01\", end=\"2019-12-31\", freq=\"MS\")\n", "full_time = pd.date_range(start=\"1958-01-01\", end=\"2019-12-31\", freq=\"MS\")\n", "print(train_period.shape, test_period.shape)" ] }, { "cell_type": "code", "execution_count": 22, "id": "7aac4a3b", "metadata": {}, "outputs": [], "source": [ "# droping nan\n", "X = X.loc[~np.isnan(y)]\n", "y = y.dropna()\n", "\n", "X_train, X_test = X.loc[train_period], X.loc[test_period]\n", "y_train, y_test = y.loc[train_period], y.loc[test_period]" ] }, { "cell_type": "code", "execution_count": 23, "id": "acb29616", "metadata": {}, "outputs": [], "source": [ "# applying feature selection \n", "estimator = ARDRegression()\n", "scoring = \"r2\"\n", "n_jobs = -1 \n", "min_features = 5\n", "cv = TimeSeriesSplit(n_splits=5)\n", "rfcv = RFECV(estimator=estimator, scoring=scoring, cv=cv, n_jobs=n_jobs, min_features_to_select=min_features)" ] }, { "cell_type": "code", "execution_count": 24, "id": "b922cba4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(516, 13)\n", "Optimal number of predictors: 13\n", "Selected predictors: Index(['t2m', 'v10', 'u10', 'tp', 'q500', 'q850', 't500', 't850', 'r', 'u500',\n", " 'u850', 'v500', 'd2m'],\n", " dtype='object')\n", "Feature selection score: 0.6790316432914921\n" ] } ], "source": [ "#transforming data after feature selection \n", "rfcv= rfcv.fit(X_train, y_train)\n", "X_train_new = rfcv.transform(X_train)\n", "X_test_new = rfcv.transform(X_test)\n", "print(X_train_new.shape)\n", "print(\"Optimal number of predictors:\", rfcv.n_features_)\n", "print(\"Selected predictors:\", X_train.columns[rfcv.support_])\n", "print(\"Feature selection score:\", rfcv.cv_results_[\"mean_test_score\"].mean())" ] }, { "cell_type": "code", "execution_count": 58, "id": "ec08d173", "metadata": {}, "outputs": [], "source": [ "import warnings \n", "warnings.filterwarnings(\"ignore\")\n", "# defining models in level 1 BayesianRidge, ARDRegression, GammaRegressor, MultiTaskElasticNetCV, QuantileRegressor\n", "#lasso\n", "lassoCV = LassoCV(cv=cv, selection=\"random\")\n", "lassoLarsCV = LassoLarsCV(cv=cv)\n", "bayesianRidge = BayesianRidge(n_iter=1000) \n", "poissonRegressor = PoissonRegressor()\n", "ARD = ARDRegression(n_iter=1000)\n", "gamma = GammaRegressor() #gamma regressor do not accept standardized values (negative anomalies)\n", "MLP = MLPRegressor(random_state=42, max_iter=1000, early_stopping=True)\n", "bernoulli = BernoulliRBM()\n", "randomforest = RandomForestRegressor(n_estimators=100, random_state=42)\n", "decisiontree = DecisionTreeRegressor(random_state=42)\n", "extratree = ExtraTreeRegressor(random_state=42)\n", "#svr = SVR(gamma=\"scale\", kernel=\"linear\")\n", "\n", "#defining keras model from tensroflow !!!! This must be improved with detailed design\n", "def build_nn(plot_network=False):\n", " model=Sequential()\n", " model.add(Dense(512, activation=\"relu\", input_dim=13))\n", " model.add(Dense(256, activation=\"relu\"))\n", " model.add(Dropout(0.2))\n", " model.add(Dense(64, activation=\"relu\"))\n", " model.add(Dense(1))\n", " \n", " from ann_visualizer.visualize import ann_viz\n", " if plot_network ==True:\n", " ann_viz(model, title=\"Dense Model\")\n", " \n", " model.compile(optimizer=\"adam\", loss=\"mean_squared_error\", metrics=[\"RootMeanSquaredError\"])\n", " return model \n", "\n", "#++++trick for keras and sklearn compatibility +++(might be drepcated---use scikeras model...in future)\n", "keras_reg = tf.keras.wrappers.scikit_learn.KerasRegressor(build_nn, epochs=1000, verbose=False)\n", "keras_reg._estimator_type = \"regressor\"\n", "\n" ] }, { "cell_type": "code", "execution_count": 43, "id": "c4830131", "metadata": {}, "outputs": [], "source": [ "#plot network\n", "#build_nn(plot_network=True)\n", "#isinstance(gamma, GammaRegressor)" ] }, { "cell_type": "code", "execution_count": 59, "id": "329fe4ea", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LassoCV 31.1341673611636\n", "R² score: 0.39\n", "ARDRegression 22.456580847098238\n", "R² score: 0.68\n", "GammaRegressor 31.722368930455\n", "R² score: 0.37\n", "MLPRegressor 35.57190810127707\n", "R² score: 0.21\n", "RandomForestRegressor 23.79390147281958\n", "R² score: 0.64\n", "LassoLarsCV 22.547571826766596\n", "R² score: 0.68\n", "BayesianRidge 30.999929781461233\n", "R² score: 0.40\n", "PoissonRegressor 30.241438827301796\n", "R² score: 0.43\n", "DecisionTreeRegressor 33.76214921029533\n", "R² score: 0.28\n", "ExtraTreeRegressor 32.74383094435165\n", "R² score: 0.33\n" ] } ], "source": [ "# checking the performance of the individual models\n", "estimators = [(\"lassoCV\", lassoCV), (\"ARD\", ARD), (\"Gamma\", gamma), (\"MLP\", MLP), \n", " (\"RandomForest\", randomforest), \n", " (\"Dense\", keras_reg), (\"Lars\",lassoLarsCV)]\n", "\n", "\n", "for reg in (lassoCV, ARD, gamma, MLP, randomforest, lassoLarsCV, bayesianRidge, poissonRegressor, \n", " decisiontree, extratree):\n", " reg.fit(X_train_new, y_train)\n", " yhat = reg.predict(X_test_new)\n", " print(reg.__class__.__name__, mean_squared_error(y_test, yhat, squared=False))\n", " print(\"R² score: {:.2f}\".format(r2_score(y_test, yhat)))" ] }, { "cell_type": "code", "execution_count": 30, "id": "299d87d9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ExtraTree 25.097043545741442\n", "R² score: 0.60\n", "GradientBoosting 22.793677285840143\n", "R² score: 0.67\n", "HistGradientBoosting 24.67749187348157\n", "R² score: 0.62\n", "XGBoosting 25.87956878150102\n", "R² score: 0.58\n", "AdaBoost 23.789250532827243\n", "R² score: 0.64\n", "RandomForest 24.216042350714105\n", "R² score: 0.63\n" ] } ], "source": [ "#combinging level 1 output in level 2 GradientBoostingRegressor, GradientBoostingRegressor, StackingRegressor\n", "# AdaBoostRegressor, ExtraTreesRegressor, RandomForestRegressor, VotingRegressor, XGBoost\n", "\n", "#set display of models \n", "set_config(display=\"diagram\")\n", "\n", "# ensemble types (based on boasting ) voting or stacking generalization\n", "# different types of final estimator \n", "# tree-based \n", "extra_tree = ExtraTreesRegressor(random_state=42, criterion=\"squared_error\")\n", "#gradient boosting \n", "gradient_boost = GradientBoostingRegressor(n_estimators=25, subsample=0.5, min_samples_leaf=25, random_state=42)\n", "#HistGradientBoosting\n", "histgradient_boost = HistGradientBoostingRegressor(random_state=42)\n", "#XGBoosting\n", "xgboost_reg = XGBRegressor(random_state=42)\n", "#adaboost\n", "adaboost_reg = AdaBoostRegressor(random_state=42, n_estimators=100)\n", "\n", "#voting regressor\n", "#voting_reg = VotingRegressor(estimators=estimators) # default estimator: RidgeCV\n", "\n", "#Random Forest\n", "randomforest_reg = RandomForestRegressor(random_state=42)\n", "\n", "for estimator,name in zip([extra_tree, gradient_boost, histgradient_boost, xgboost_reg, adaboost_reg, \n", " randomforest_reg], [\"ExtraTree\", \"GradientBoosting\", \"HistGradientBoosting\", \"XGBoosting\",\n", " \"AdaBoost\", \"RandomForest\"]):\n", " stacked_regressor = StackingRegressor(estimators=estimators, final_estimator=estimator)\n", " stacked_regressor.fit(X_train_new, y_train)\n", " stacked_regressor\n", " yhat = stacked_regressor.predict(X_test_new)\n", " print(name, mean_squared_error(y_test, yhat, squared=False))\n", " print(\"R² score: {:.2f}\".format(r2_score(y_test, yhat)))\n", " " ] }, { "cell_type": "code", "execution_count": 32, "id": "441a898d", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
StackingRegressor(estimators=[('lassoCV',\n",
       "                               LassoCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None),\n",
       "                                       selection='random')),\n",
       "                              ('ARD', ARDRegression()),\n",
       "                              ('Gamma', GammaRegressor()),\n",
       "                              ('MLP',\n",
       "                               MLPRegressor(max_iter=1000, random_state=42)),\n",
       "                              ('RandomForest',\n",
       "                               RandomForestRegressor(random_state=42)),\n",
       "                              ('Dense',\n",
       "                               <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000026486D4B2B0>),\n",
       "                              ('Lars',\n",
       "                               LassoLarsCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)))],\n",
       "                  final_estimator=RandomForestRegressor(random_state=42))
Please rerun this cell to show the HTML repr or trust the notebook.
" ], "text/plain": [ "StackingRegressor(estimators=[('lassoCV',\n", " LassoCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None),\n", " selection='random')),\n", " ('ARD', ARDRegression()),\n", " ('Gamma', GammaRegressor()),\n", " ('MLP',\n", " MLPRegressor(max_iter=1000, random_state=42)),\n", " ('RandomForest',\n", " RandomForestRegressor(random_state=42)),\n", " ('Dense',\n", " ),\n", " ('Lars',\n", " LassoLarsCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)))],\n", " final_estimator=RandomForestRegressor(random_state=42))" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stacked_regressor" ] }, { "cell_type": "code", "execution_count": 33, "id": "862c77ba", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#prediciton and visualizaiton\n", "plt.style.use(\"ggplot\")\n", "plt.rcParams.update({\"font.size\":22, \"font.weight\":\"bold\"})\n", "from scipy import stats\n", "from sklearn.linear_model import LinearRegression\n", "\n", "#model = LinearRegression(fit_intercept=False)\n", "#model.fit(yhat, y_test)\n", "#reg_line = model.predict(yhat)\n", "#score = model.score()\n", "reg_stat = stats.linregress(yhat, y_test)\n", "reg_line = reg_stat.slope*yhat\n", "\n", "fig,ax = plt.subplots(figsize=(12,8))\n", "ax.scatter(yhat, y_test, edgecolor='k', facecolor='grey', alpha=0.7,)\n", "#ax.plot(yhat, reg_line, color=\"red\", label=\"R²= {:.2f}\".format(r2_score(y_test, yhat)))\n", "ax.plot(yhat, reg_line, color=\"red\", label=\"R²= {:.2f}\".format(reg_stat.rvalue))\n", "ax.set_ylabel(\"predicted values\")\n", "ax.set_xlabel(\"observed values\")\n", "ax.legend(frameon=True)\n" ] }, { "cell_type": "code", "execution_count": 34, "id": "7655edb8", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#residuals plot (To check for overfitting)--including linear models deviates the residuals form normal distribution (which justifies that\n", "# the relation between the large scale and local precipitation is non-linear). Therefore, models like tree-based algorithms, bayesian regression\n", "# and deep learning models are feasible for modeling the transfer function\n", "plt.style.use(\"ggplot\")\n", "plt.rcParams.update({\"font.size\":22, \"font.weight\":\"bold\"})\n", "\n", "from yellowbrick.regressor import ResidualsPlot\n", "model = stacked_regressor\n", "visualizer=ResidualsPlot(model, hist=True, qqplot=False)\n", "visualizer.fit(X_train_new, y_train)\n", "visualizer.score(X_test_new, y_test)\n", "visualizer.show()" ] }, { "cell_type": "code", "execution_count": 35, "id": "f3db5ca5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Explained Variance: 0.64\n", "R² (Coefficient of determinaiton): 0.63\n", "MSE): 24.22\n", "MAE): 17.66\n", "RMSE: 24.216042\n", "NSE: 0.63\n" ] } ], "source": [ "# model evaluaiton (regression meterics) mean_squared_error, r2_score, explained_variance_score, \n", "# max_error, mean_absolute_error, mean_squared_log_error, mean_absolute_percentage_error\n", "\n", "#explained_variance \n", "score = explained_variance_score(y_test, yhat)\n", "print(\"Explained Variance: {:.2f}\".format(score))\n", "\n", "#r2_score \n", "score = r2_score(y_test, yhat)\n", "print(\"R² (Coefficient of determinaiton): {:.2f}\".format(score))\n", "\n", "#mean_squared_error \n", "score = mean_squared_error(y_test, yhat, squared=False)\n", "print(\"MSE): {:.2f}\".format(score))\n", "\n", "#mean_absolute_error \n", "score = mean_absolute_error(y_test, yhat)\n", "print(\"MAE): {:.2f}\".format(score))\n", "\n", "#RMSE\n", "error = y_test -yhat\n", "score = np.sqrt(np.mean(error **2))\n", "print(\"RMSE: {:2f}\".format(score))\n", "\n", "# Nash-Sutcliffe Efficiency (NSE)\n", "score = (1-(np.sum((yhat-y_test)**2))/np.sum((y_test - np.mean(y_test))**2))\n", "print(\"NSE: {:.2f}\".format(score))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }