{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Search for Optimal Parameters of Regression Models\n", "\n", "Hyperparameter Tuning of *4.2 Decision Tree and 4.3 Random Forest* in [Main Notebook](https://github.com/czarinagluna/regression-analysis-for-estimating-prices/blob/main/main.ipynb)\n", "\n", "*** \n", "**By [Czarina Luna](https://www.linkedin.com/in/czarinagluna/)**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%store -r X_train" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%store -r y_train" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import GridSearchCV\n", "\n", "def grid_search(regressor, params):\n", " '''Performs grid search on given regression model, returns best cross validation scores and parameters'''\n", " gridsearch = GridSearchCV(estimator=regressor, param_grid=params, cv=5)\n", " gridsearch.fit(X_train, y_train)\n", " \n", " print(f'Best cv score: {gridsearch.best_score_ :.2%}')\n", " print(f'Best parameters: {gridsearch.best_params_}')\n", " \n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Decision Tree**" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "%store -r dt_pipe" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best cv score: 73.24%\n", "Best parameters: {'decision_tree__criterion': 'friedman_mse', 'decision_tree__max_depth': None, 'decision_tree__min_samples_split': 5}\n" ] } ], "source": [ "grid = {'decision_tree__criterion': ['squared_error', 'friedman_mse', 'absolute_error'], \n", " 'decision_tree__max_depth': [None, 1, 2], \n", " 'decision_tree__min_samples_split': [2, 3, 5]}\n", "\n", "grid_search(dt_pipe, grid)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best cv score: 76.86%\n", "Best parameters: {'decision_tree__criterion': 'friedman_mse', 'decision_tree__max_depth': None, 'decision_tree__min_samples_leaf': 5, 'decision_tree__min_samples_split': 5}\n" ] } ], "source": [ "grid = {'decision_tree__criterion': ['friedman_mse'], \n", " 'decision_tree__max_depth': [None], \n", " 'decision_tree__min_samples_split': [5, 10], \n", " 'decision_tree__min_samples_leaf': [1, 2, 5]}\n", "\n", "grid_search(dt_pipe, grid)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best cv score: 78.44%\n", "Best parameters: {'decision_tree__criterion': 'friedman_mse', 'decision_tree__max_depth': None, 'decision_tree__min_samples_leaf': 5, 'decision_tree__min_samples_split': 20}\n" ] } ], "source": [ "grid = {'decision_tree__criterion': ['friedman_mse'], \n", " 'decision_tree__max_depth': [None], \n", " 'decision_tree__min_samples_split': [10, 15, 20], \n", " 'decision_tree__min_samples_leaf': [5, 10]}\n", "\n", "grid_search(dt_pipe, grid)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best cv score: 78.95%\n", "Best parameters: {'decision_tree__criterion': 'friedman_mse', 'decision_tree__max_depth': None, 'decision_tree__min_samples_leaf': 5, 'decision_tree__min_samples_split': 30}\n" ] } ], "source": [ "grid = {'decision_tree__criterion': ['friedman_mse'], \n", " 'decision_tree__max_depth': [None], \n", " 'decision_tree__min_samples_split': [20, 30, 50], \n", " 'decision_tree__min_samples_leaf': [5]}\n", "\n", "grid_search(dt_pipe, grid)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best cv score: 79.00%\n", "Best parameters: {'decision_tree__criterion': 'friedman_mse', 'decision_tree__max_depth': None, 'decision_tree__min_samples_leaf': 5, 'decision_tree__min_samples_split': 45}\n" ] } ], "source": [ "grid = {'decision_tree__criterion': ['friedman_mse'], \n", " 'decision_tree__max_depth': [None], \n", " 'decision_tree__min_samples_split': [25, 30, 45], \n", " 'decision_tree__min_samples_leaf': [5]}\n", "\n", "grid_search(dt_pipe, grid)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Random Forest**" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "%store -r rf_pipe" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best cv score: 86.22%\n", "Best parameters: {'random_forest__criterion': 'friedman_mse', 'random_forest__max_depth': None, 'random_forest__min_samples_split': 2}\n" ] } ], "source": [ "grid = {'random_forest__criterion': ['squared_error', 'friedman_mse', 'absolute_error'], \n", " 'random_forest__max_depth': [None, 1, 2], \n", " 'random_forest__min_samples_split': [2, 3, 5]}\n", "\n", "grid_search(rf_pipe, grid)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best cv score: 86.14%\n", "Best parameters: {'random_forest__criterion': 'friedman_mse', 'random_forest__max_depth': None, 'random_forest__min_samples_leaf': 1, 'random_forest__min_samples_split': 3}\n" ] } ], "source": [ "grid = {'random_forest__criterion': ['friedman_mse'], \n", " 'random_forest__max_depth': [None], \n", " 'random_forest__min_samples_split': [2, 3, 10], \n", " 'random_forest__min_samples_leaf': [1, 2, 5]}\n", "\n", "grid_search(rf_pipe, grid)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best cv score: 86.25%\n", "Best parameters: {'random_forest__criterion': 'friedman_mse', 'random_forest__max_depth': None, 'random_forest__min_samples_leaf': 1, 'random_forest__min_samples_split': 2, 'random_forest__n_estimators': 150}\n" ] } ], "source": [ "grid = {'random_forest__criterion': ['friedman_mse'], \n", " 'random_forest__max_depth': [None], \n", " 'random_forest__min_samples_split': [1,2,3], \n", " 'random_forest__min_samples_leaf': [1], \n", " 'random_forest__n_estimators': [100, 150, 200]}\n", "\n", "grid_search(rf_pipe, grid)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best cv score: 86.29%\n", "Best parameters: {'random_forest__criterion': 'friedman_mse', 'random_forest__max_depth': None, 'random_forest__max_features': 'sqrt', 'random_forest__min_samples_leaf': 1, 'random_forest__min_samples_split': 2, 'random_forest__n_estimators': 150, 'random_forest__n_jobs': 2}\n" ] } ], "source": [ "grid = {'random_forest__criterion': ['friedman_mse'], \n", " 'random_forest__max_depth': [None], \n", " 'random_forest__min_samples_split': [2], \n", " 'random_forest__min_samples_leaf': [1], \n", " 'random_forest__n_estimators': [150], \n", " 'random_forest__max_features': ['auto', 'sqrt', 'log2'], \n", " 'random_forest__n_jobs': [1, 2, 3]}\n", "\n", "grid_search(rf_pipe, grid)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Contact\n", "\n", "Feel free to contact me for any questions and connect with me on [Linkedin](https://www.linkedin.com/in/czarinagluna/)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }