{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Päivitetty 2022-10-14 15:36:59.178318\n"
     ]
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "print(f'Päivitetty {datetime.now()}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h1>Esimerkki sopivien hyperparametrien etsimisestä</h1>\n",
    "\n",
    "Mallille annettavia parametreja on tapana kutsua hyperparametreiksi.\n",
    "\n",
    "Testidatan käyttö sopivien hyperparametrien valitsemissa ei ole suotavaa, koska tällöin testidata olisi osittain osallisena mallin opettamiseen. Testidatanhan on tarkoitus olla data, jota malli ei ole \"nähnyt\" opetuksen yhteydessä.\n",
    "\n",
    "Hyperparametrien valitsemisessa voidaan käyttää **GridSearchCV**-toimintoa, joka jakaa opetusdatan opetusdataan ja validointidataan sekä kokeilee vaihtoehtoisia hyperparametrien arvoja. Lue lisää:\n",
    "\n",
    "https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "\n",
    "from sklearn.model_selection import train_test_split, GridSearchCV\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = sns.load_dataset('iris')\n",
    "\n",
    "X = df.drop('species', axis=1)\n",
    "y = df['species']\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ccp_alpha': 0.0,\n",
       " 'class_weight': None,\n",
       " 'criterion': 'gini',\n",
       " 'max_depth': None,\n",
       " 'max_features': None,\n",
       " 'max_leaf_nodes': None,\n",
       " 'min_impurity_decrease': 0.0,\n",
       " 'min_samples_leaf': 1,\n",
       " 'min_samples_split': 2,\n",
       " 'min_weight_fraction_leaf': 0.0,\n",
       " 'random_state': 2,\n",
       " 'splitter': 'best'}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Määritän käytettävän mallin\n",
    "dtc = DecisionTreeClassifier(random_state=2)\n",
    "\n",
    "# Katson mallin oletushyperparametrit\n",
    "dtc.get_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 3 candidates, totalling 15 fits\n",
      "[CV 1/5] END .......................max_depth=1;, score=0.652 total time=   0.0s\n",
      "[CV 2/5] END .......................max_depth=1;, score=0.652 total time=   0.0s\n",
      "[CV 3/5] END .......................max_depth=1;, score=0.636 total time=   0.0s\n",
      "[CV 4/5] END .......................max_depth=1;, score=0.636 total time=   0.0s\n",
      "[CV 5/5] END .......................max_depth=1;, score=0.636 total time=   0.0s\n",
      "[CV 1/5] END .......................max_depth=2;, score=0.957 total time=   0.0s\n",
      "[CV 2/5] END .......................max_depth=2;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END .......................max_depth=2;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END .......................max_depth=2;, score=0.909 total time=   0.0s\n",
      "[CV 5/5] END .......................max_depth=2;, score=1.000 total time=   0.0s\n",
      "[CV 1/5] END .......................max_depth=3;, score=0.957 total time=   0.0s\n",
      "[CV 2/5] END .......................max_depth=3;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END .......................max_depth=3;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END .......................max_depth=3;, score=0.909 total time=   0.0s\n",
      "[CV 5/5] END .......................max_depth=3;, score=1.000 total time=   0.0s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(estimator=DecisionTreeClassifier(random_state=2),\n",
       "             param_grid={'max_depth': [1, 2, 3]}, verbose=3)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Kokeilen erilaisia päätöspuun haarautumisten lukumääriä\n",
    "parameters = {'max_depth':[1, 2, 3]}\n",
    "\n",
    "# GridSearchCV kokeilee kaikki vaihtoehdot ja valitsee parhaan\n",
    "dtc_grid = GridSearchCV(dtc, parameters, verbose=3)\n",
    "dtc_grid.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best score:  0.9644268774703558\n",
      "best parameters:  {'max_depth': 2}\n"
     ]
    }
   ],
   "source": [
    "# Katsotaan paras tulos\n",
    "print('best score: ', dtc_grid.best_score_)\n",
    "print('best parameters: ', dtc_grid.best_params_)\n",
    "\n",
    "# Käytetään parhaiksi todettuja hyperparametrien arvoja\n",
    "dtc = dtc_grid.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Opetusdata 0.964\n",
      "Testidata 0.947\n"
     ]
    }
   ],
   "source": [
    "print(f'Opetusdata {dtc.score(X_train, y_train):.3f}')\n",
    "print(f'Testidata {dtc.score(X_test, y_test):.3f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Satunnaismetsä"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'bootstrap': True,\n",
       " 'ccp_alpha': 0.0,\n",
       " 'class_weight': None,\n",
       " 'criterion': 'gini',\n",
       " 'max_depth': None,\n",
       " 'max_features': 'auto',\n",
       " 'max_leaf_nodes': None,\n",
       " 'max_samples': None,\n",
       " 'min_impurity_decrease': 0.0,\n",
       " 'min_samples_leaf': 1,\n",
       " 'min_samples_split': 2,\n",
       " 'min_weight_fraction_leaf': 0.0,\n",
       " 'n_estimators': 100,\n",
       " 'n_jobs': None,\n",
       " 'oob_score': False,\n",
       " 'random_state': 2,\n",
       " 'verbose': 0,\n",
       " 'warm_start': False}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Määritän käytettävän mallin\n",
    "rfc = RandomForestClassifier(random_state=2)\n",
    "\n",
    "# Katson mallin oletushyperparametrit\n",
    "rfc.get_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 9 candidates, totalling 45 fits\n",
      "[CV 1/5] END ..max_features=2, n_estimators=100;, score=0.913 total time=   0.0s\n",
      "[CV 2/5] END ..max_features=2, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END ..max_features=2, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END ..max_features=2, n_estimators=100;, score=0.909 total time=   0.0s\n",
      "[CV 5/5] END ..max_features=2, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 1/5] END ..max_features=2, n_estimators=200;, score=0.913 total time=   0.1s\n",
      "[CV 2/5] END ..max_features=2, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 3/5] END ..max_features=2, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END ..max_features=2, n_estimators=200;, score=0.909 total time=   0.1s\n",
      "[CV 5/5] END ..max_features=2, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 1/5] END ..max_features=2, n_estimators=300;, score=0.913 total time=   0.2s\n",
      "[CV 2/5] END ..max_features=2, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 3/5] END ..max_features=2, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 4/5] END ..max_features=2, n_estimators=300;, score=0.909 total time=   0.2s\n",
      "[CV 5/5] END ..max_features=2, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 1/5] END ..max_features=3, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 2/5] END ..max_features=3, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END ..max_features=3, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END ..max_features=3, n_estimators=100;, score=0.909 total time=   0.0s\n",
      "[CV 5/5] END ..max_features=3, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 1/5] END ..max_features=3, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 2/5] END ..max_features=3, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 3/5] END ..max_features=3, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END ..max_features=3, n_estimators=200;, score=0.909 total time=   0.1s\n",
      "[CV 5/5] END ..max_features=3, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 1/5] END ..max_features=3, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 2/5] END ..max_features=3, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 3/5] END ..max_features=3, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 4/5] END ..max_features=3, n_estimators=300;, score=0.909 total time=   0.2s\n",
      "[CV 5/5] END ..max_features=3, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 1/5] END ..max_features=4, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 2/5] END ..max_features=4, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END ..max_features=4, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END ..max_features=4, n_estimators=100;, score=0.909 total time=   0.0s\n",
      "[CV 5/5] END ..max_features=4, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 1/5] END ..max_features=4, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 2/5] END ..max_features=4, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 3/5] END ..max_features=4, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END ..max_features=4, n_estimators=200;, score=0.909 total time=   0.1s\n",
      "[CV 5/5] END ..max_features=4, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 1/5] END ..max_features=4, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 2/5] END ..max_features=4, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 3/5] END ..max_features=4, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 4/5] END ..max_features=4, n_estimators=300;, score=0.909 total time=   0.2s\n",
      "[CV 5/5] END ..max_features=4, n_estimators=300;, score=1.000 total time=   0.2s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(estimator=RandomForestClassifier(random_state=2),\n",
       "             param_grid={'max_features': [2, 3, 4],\n",
       "                         'n_estimators': [100, 200, 300]},\n",
       "             verbose=3)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Keskeisimmät parametrit ovat laskettavien päätöspuiden lukumäärä\n",
    "# ja yhteen päätöspuuhun mukaan otettavien selittävien muuttujien lukumäärä\n",
    "parameters = {'n_estimators':[100, 200, 300],\n",
    "             'max_features':[2, 3, 4]}\n",
    "\n",
    "# Kokeilen kaikki vaihtoehdot\n",
    "rfc_grid = GridSearchCV(rfc, parameters, verbose=3)\n",
    "rfc_grid.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best score:  0.9644268774703558\n",
      "best parameters:  {'max_features': 3, 'n_estimators': 100}\n"
     ]
    }
   ],
   "source": [
    "# Katsotaan paras tulos\n",
    "print('best score: ', rfc_grid.best_score_)\n",
    "print('best parameters: ', rfc_grid.best_params_)\n",
    "\n",
    "# Käytetään parhaiksi todettuja hyperparametrien arvoja\n",
    "rfc = rfc_grid.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Opetusdata 1.000\n",
      "Testidata 0.974\n"
     ]
    }
   ],
   "source": [
    "print(f'Opetusdata {rfc.score(X_train, y_train):.3f}')\n",
    "print(f'Testidata {rfc.score(X_test, y_test):.3f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Gradienttitehostaminen"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ccp_alpha': 0.0,\n",
       " 'criterion': 'friedman_mse',\n",
       " 'init': None,\n",
       " 'learning_rate': 0.1,\n",
       " 'loss': 'deviance',\n",
       " 'max_depth': 3,\n",
       " 'max_features': None,\n",
       " 'max_leaf_nodes': None,\n",
       " 'min_impurity_decrease': 0.0,\n",
       " 'min_samples_leaf': 1,\n",
       " 'min_samples_split': 2,\n",
       " 'min_weight_fraction_leaf': 0.0,\n",
       " 'n_estimators': 100,\n",
       " 'n_iter_no_change': None,\n",
       " 'random_state': 2,\n",
       " 'subsample': 1.0,\n",
       " 'tol': 0.0001,\n",
       " 'validation_fraction': 0.1,\n",
       " 'verbose': 0,\n",
       " 'warm_start': False}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gbc = GradientBoostingClassifier(random_state=2)\n",
    "gbc.get_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 36 candidates, totalling 180 fits\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=1, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=1, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=1, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=1, n_estimators=100;, score=0.818 total time=   0.0s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=1, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=1, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=1, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=1, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=1, n_estimators=200;, score=0.864 total time=   0.1s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=1, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=1, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=1, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=1, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=1, n_estimators=300;, score=0.909 total time=   0.2s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=1, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=1, n_estimators=400;, score=0.957 total time=   0.3s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=1, n_estimators=400;, score=0.957 total time=   0.3s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=1, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=1, n_estimators=400;, score=0.909 total time=   0.3s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=1, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=2, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=2, n_estimators=100;, score=0.826 total time=   0.0s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=2, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=2, n_estimators=100;, score=0.864 total time=   0.0s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=2, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=2, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=2, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=2, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=2, n_estimators=200;, score=0.909 total time=   0.1s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=2, n_estimators=200;, score=0.955 total time=   0.1s\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=2, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=2, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=2, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=2, n_estimators=300;, score=0.909 total time=   0.2s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=2, n_estimators=300;, score=0.955 total time=   0.2s\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=2, n_estimators=400;, score=0.957 total time=   0.3s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=2, n_estimators=400;, score=0.957 total time=   0.3s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=2, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=2, n_estimators=400;, score=0.909 total time=   0.3s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=2, n_estimators=400;, score=0.955 total time=   0.3s\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=3, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=3, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=3, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=3, n_estimators=100;, score=0.864 total time=   0.0s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=3, n_estimators=100;, score=1.000 total time=   0.1s\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=3, n_estimators=200;, score=0.957 total time=   0.2s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=3, n_estimators=200;, score=0.957 total time=   0.2s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=3, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=3, n_estimators=200;, score=0.864 total time=   0.1s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=3, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=3, n_estimators=300;, score=0.957 total time=   0.3s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=3, n_estimators=300;, score=0.957 total time=   0.3s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=3, n_estimators=300;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=3, n_estimators=300;, score=0.864 total time=   0.3s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=3, n_estimators=300;, score=1.000 total time=   0.3s\n",
      "[CV 1/5] END learning_rate=0.001, max_depth=3, n_estimators=400;, score=0.957 total time=   0.4s\n",
      "[CV 2/5] END learning_rate=0.001, max_depth=3, n_estimators=400;, score=0.957 total time=   0.4s\n",
      "[CV 3/5] END learning_rate=0.001, max_depth=3, n_estimators=400;, score=1.000 total time=   0.4s\n",
      "[CV 4/5] END learning_rate=0.001, max_depth=3, n_estimators=400;, score=0.864 total time=   0.4s\n",
      "[CV 5/5] END learning_rate=0.001, max_depth=3, n_estimators=400;, score=1.000 total time=   0.4s\n",
      "[CV 1/5] END learning_rate=0.01, max_depth=1, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=1, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=1, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=1, n_estimators=100;, score=0.909 total time=   0.0s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=1, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 1/5] END learning_rate=0.01, max_depth=1, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=1, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=1, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=1, n_estimators=200;, score=0.909 total time=   0.1s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=1, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 1/5] END learning_rate=0.01, max_depth=1, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=1, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=1, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=1, n_estimators=300;, score=0.909 total time=   0.2s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=1, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 1/5] END learning_rate=0.01, max_depth=1, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=1, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=1, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=1, n_estimators=400;, score=0.909 total time=   0.3s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=1, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 1/5] END learning_rate=0.01, max_depth=2, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=2, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=2, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=2, n_estimators=100;, score=0.909 total time=   0.0s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=2, n_estimators=100;, score=1.000 total time=   0.0s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CV 1/5] END learning_rate=0.01, max_depth=2, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=2, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=2, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=2, n_estimators=200;, score=0.909 total time=   0.1s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=2, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 1/5] END learning_rate=0.01, max_depth=2, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=2, n_estimators=300;, score=0.957 total time=   0.3s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=2, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=2, n_estimators=300;, score=0.909 total time=   0.2s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=2, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 1/5] END learning_rate=0.01, max_depth=2, n_estimators=400;, score=0.957 total time=   0.3s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=2, n_estimators=400;, score=0.957 total time=   0.3s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=2, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=2, n_estimators=400;, score=0.909 total time=   0.3s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=2, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.864 total time=   0.0s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.957 total time=   0.2s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.957 total time=   0.2s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.864 total time=   0.1s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=300;, score=0.957 total time=   0.3s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=300;, score=0.957 total time=   0.3s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=300;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=300;, score=0.864 total time=   0.2s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=300;, score=1.000 total time=   0.3s\n",
      "[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=400;, score=1.000 total time=   0.4s\n",
      "[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=400;, score=0.957 total time=   0.4s\n",
      "[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=400;, score=1.000 total time=   0.4s\n",
      "[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=400;, score=0.864 total time=   0.3s\n",
      "[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=400;, score=1.000 total time=   0.4s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=1, n_estimators=100;, score=0.913 total time=   0.0s\n",
      "[CV 2/5] END learning_rate=0.1, max_depth=1, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=1, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=1, n_estimators=100;, score=0.909 total time=   0.0s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=1, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=1, n_estimators=200;, score=0.913 total time=   0.1s\n",
      "[CV 2/5] END learning_rate=0.1, max_depth=1, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=1, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=1, n_estimators=200;, score=0.909 total time=   0.1s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=1, n_estimators=200;, score=0.955 total time=   0.1s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=1, n_estimators=300;, score=0.913 total time=   0.2s\n",
      "[CV 2/5] END learning_rate=0.1, max_depth=1, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=1, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=1, n_estimators=300;, score=0.909 total time=   0.2s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=1, n_estimators=300;, score=0.955 total time=   0.2s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=1, n_estimators=400;, score=0.913 total time=   0.3s\n",
      "[CV 2/5] END learning_rate=0.1, max_depth=1, n_estimators=400;, score=0.957 total time=   0.3s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=1, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=1, n_estimators=400;, score=0.909 total time=   0.3s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=1, n_estimators=400;, score=0.955 total time=   0.3s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=2, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 2/5] END learning_rate=0.1, max_depth=2, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=2, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=2, n_estimators=100;, score=0.909 total time=   0.0s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=2, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=2, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 2/5] END learning_rate=0.1, max_depth=2, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=2, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=2, n_estimators=200;, score=0.909 total time=   0.1s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=2, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=2, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 2/5] END learning_rate=0.1, max_depth=2, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=2, n_estimators=300;, score=1.000 total time=   0.2s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=2, n_estimators=300;, score=0.909 total time=   0.2s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=2, n_estimators=300;, score=1.000 total time=   0.3s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=2, n_estimators=400;, score=0.957 total time=   0.3s\n",
      "[CV 2/5] END learning_rate=0.1, max_depth=2, n_estimators=400;, score=0.957 total time=   0.3s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=2, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=2, n_estimators=400;, score=0.909 total time=   0.3s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=2, n_estimators=400;, score=1.000 total time=   0.4s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 2/5] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=0.957 total time=   0.0s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=0.864 total time=   0.0s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=3, n_estimators=100;, score=1.000 total time=   0.0s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=3, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 2/5] END learning_rate=0.1, max_depth=3, n_estimators=200;, score=0.957 total time=   0.1s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=3, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=3, n_estimators=200;, score=0.909 total time=   0.1s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=3, n_estimators=200;, score=1.000 total time=   0.1s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=3, n_estimators=300;, score=1.000 total time=   0.2s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CV 2/5] END learning_rate=0.1, max_depth=3, n_estimators=300;, score=0.957 total time=   0.2s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=3, n_estimators=300;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=3, n_estimators=300;, score=0.909 total time=   0.2s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=3, n_estimators=300;, score=1.000 total time=   0.3s\n",
      "[CV 1/5] END learning_rate=0.1, max_depth=3, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 2/5] END learning_rate=0.1, max_depth=3, n_estimators=400;, score=0.957 total time=   0.3s\n",
      "[CV 3/5] END learning_rate=0.1, max_depth=3, n_estimators=400;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END learning_rate=0.1, max_depth=3, n_estimators=400;, score=0.909 total time=   0.2s\n",
      "[CV 5/5] END learning_rate=0.1, max_depth=3, n_estimators=400;, score=1.000 total time=   0.3s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(estimator=GradientBoostingClassifier(random_state=2),\n",
       "             param_grid={'learning_rate': [0.001, 0.01, 0.1],\n",
       "                         'max_depth': [1, 2, 3],\n",
       "                         'n_estimators': [100, 200, 300, 400]},\n",
       "             verbose=3)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Gradienttitehostuksella myös learning_rate on keskeinen parametri\n",
    "# Liian pieni learning_rate johtaa hitaaseen oppimiseen\n",
    "# liian suuri learning_rate voi estää optimiratkaisun löytymisen\n",
    "parameters = {'max_depth':[1, 2, 3],\n",
    "             'n_estimators':[100, 200, 300, 400],\n",
    "             'learning_rate':[0.001, 0.01, 0.1]}\n",
    "\n",
    "gbc_grid = GridSearchCV(gbc, parameters, verbose=3)\n",
    "gbc_grid.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best score:  0.9818181818181818\n",
      "best parameters:  {'learning_rate': 0.01, 'max_depth': 1, 'n_estimators': 300}\n"
     ]
    }
   ],
   "source": [
    "print('best score: ', gbc_grid.best_score_)\n",
    "print('best parameters: ', gbc_grid.best_params_)\n",
    "\n",
    "# Käytetään parhaiksi todettuja hyperparametrien arvoja\n",
    "gbc = gbc_grid.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Opetusdata 0.982\n",
      "Testidata 0.974\n"
     ]
    }
   ],
   "source": [
    "print(f'Opetusdata {gbc.score(X_train, y_train):.3f}')\n",
    "print(f'Testidata {gbc.score(X_test, y_test):.3f}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}