{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## BayesSearchCV\n",
    "- skopt\n",
    "- pip3 install scikit-optimize"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "BayesSearchCV implements a \"fit\" and a \"score\" method. It also implements \"predict\", \"predict_proba\", \"decision_function\", \"transform\" and \"inverse_transform\" if they are implemented in the estimator used.\n",
    "\n",
    "The parameters of the estimator used to apply these methods are optimized by cross-validated search over parameter settings.\n",
    "\n",
    "In contrast to GridSearchCV, not all parameter values are tried out, but rather a fixed number of parameter settings is sampled from the specified distributions. The number of parameter settings that are tried is given by n_iter.(n_iter!)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import xgboost as xgb\n",
    "import lightgbm as lgb\n",
    "from skopt import BayesSearchCV\n",
    "from sklearn.model_selection import StratifiedKFold, KFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%config InlineBackend.figure_format = 'retina'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "ITERATIONS = 10 # 1000\n",
    "TRAINING_SIZE = 100000 # 20000000\n",
    "TEST_SIZE = 25000\n",
    "\n",
    "# Load data\n",
    "X = pd.read_csv(\n",
    "    './data/train_sample.csv', \n",
    "    nrows=TRAINING_SIZE,\n",
    "    parse_dates=['click_time']\n",
    ")\n",
    "\n",
    "# Split into X and y\n",
    "y = X['is_attributed']\n",
    "X = X.drop(['click_time','is_attributed', 'attributed_time'], axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## XGBoost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Classifier\n",
    "bayes_cv_tuner = BayesSearchCV(\n",
    "    estimator = xgb.XGBClassifier(\n",
    "        n_jobs = 1,\n",
    "        objective = 'binary:logistic',\n",
    "        eval_metric = 'auc',\n",
    "        silent=1,\n",
    "        tree_method='approx'\n",
    "    ),\n",
    "    search_spaces = {\n",
    "        'learning_rate': (0.01, 1.0, 'log-uniform'),\n",
    "        'min_child_weight': (0, 10),\n",
    "        'max_depth': (0, 50),\n",
    "        'max_delta_step': (0, 20),\n",
    "        'subsample': (0.01, 1.0, 'uniform'),\n",
    "        'colsample_bytree': (0.01, 1.0, 'uniform'),\n",
    "        'colsample_bylevel': (0.01, 1.0, 'uniform'),\n",
    "        'reg_lambda': (1e-9, 1000, 'log-uniform'),\n",
    "        'reg_alpha': (1e-9, 1.0, 'log-uniform'),\n",
    "        'gamma': (1e-9, 0.5, 'log-uniform'),\n",
    "        'min_child_weight': (0, 5),\n",
    "        'n_estimators': (50, 100),\n",
    "        'scale_pos_weight': (1e-6, 500, 'log-uniform')\n",
    "    },    \n",
    "    scoring = 'roc_auc',\n",
    "    cv = StratifiedKFold(\n",
    "        n_splits=3,\n",
    "        shuffle=True,\n",
    "        random_state=42\n",
    "    ),\n",
    "    n_jobs = 3,\n",
    "    n_iter = ITERATIONS,   \n",
    "    verbose = 0,\n",
    "    refit = True,\n",
    "    random_state = 42\n",
    ")\n",
    "\n",
    "def status_print(optim_result):\n",
    "    \"\"\"Status callback durring bayesian hyperparameter search\"\"\"\n",
    "    \n",
    "    # Get all the models tested so far in DataFrame format\n",
    "    all_models = pd.DataFrame(bayes_cv_tuner.cv_results_)    \n",
    "    \n",
    "    # Get current parameters and the best parameters    \n",
    "    best_params = pd.Series(bayes_cv_tuner.best_params_)\n",
    "    print('Model #{}\\nBest ROC-AUC: {}\\nBest params: {}\\n'.format(\n",
    "        len(all_models),\n",
    "        np.round(bayes_cv_tuner.best_score_, 4),\n",
    "        bayes_cv_tuner.best_params_\n",
    "    ))\n",
    "    \n",
    "    # Save all model results\n",
    "    clf_name = bayes_cv_tuner.estimator.__class__.__name__\n",
    "    all_models.to_csv(clf_name+\"_cv_results.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model #1\n",
      "Best ROC-AUC: 0.5\n",
      "Best params: {'colsample_bylevel': 0.4160029192647807, 'colsample_bytree': 0.7304484857455519, 'gamma': 0.13031389926541354, 'learning_rate': 0.042815319280763466, 'max_delta_step': 13, 'max_depth': 21, 'min_child_weight': 2, 'n_estimators': 87, 'reg_alpha': 5.497557739289786e-07, 'reg_lambda': 0.05936070635912049, 'scale_pos_weight': 0.060830282487222144, 'subsample': 0.13556548021189216}\n",
      "\n",
      "Model #2\n",
      "Best ROC-AUC: 0.9279\n",
      "Best params: {'colsample_bylevel': 0.8390144719977516, 'colsample_bytree': 0.8844821246070537, 'gamma': 4.358684608480795e-07, 'learning_rate': 0.7988179462781242, 'max_delta_step': 17, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 68, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'scale_pos_weight': 0.3016410771843142, 'subsample': 0.9923710598637134}\n",
      "\n",
      "Model #3\n",
      "Best ROC-AUC: 0.9279\n",
      "Best params: {'colsample_bylevel': 0.8390144719977516, 'colsample_bytree': 0.8844821246070537, 'gamma': 4.358684608480795e-07, 'learning_rate': 0.7988179462781242, 'max_delta_step': 17, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 68, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'scale_pos_weight': 0.3016410771843142, 'subsample': 0.9923710598637134}\n",
      "\n",
      "Model #4\n",
      "Best ROC-AUC: 0.9279\n",
      "Best params: {'colsample_bylevel': 0.8390144719977516, 'colsample_bytree': 0.8844821246070537, 'gamma': 4.358684608480795e-07, 'learning_rate': 0.7988179462781242, 'max_delta_step': 17, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 68, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'scale_pos_weight': 0.3016410771843142, 'subsample': 0.9923710598637134}\n",
      "\n",
      "Model #5\n",
      "Best ROC-AUC: 0.9279\n",
      "Best params: {'colsample_bylevel': 0.8390144719977516, 'colsample_bytree': 0.8844821246070537, 'gamma': 4.358684608480795e-07, 'learning_rate': 0.7988179462781242, 'max_delta_step': 17, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 68, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'scale_pos_weight': 0.3016410771843142, 'subsample': 0.9923710598637134}\n",
      "\n",
      "Model #6\n",
      "Best ROC-AUC: 0.9438\n",
      "Best params: {'colsample_bylevel': 0.7366877378057127, 'colsample_bytree': 0.9399760402267441, 'gamma': 2.6498051478267012e-08, 'learning_rate': 0.0238149998729586, 'max_delta_step': 16, 'max_depth': 19, 'min_child_weight': 2, 'n_estimators': 77, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536}\n",
      "\n",
      "Model #7\n",
      "Best ROC-AUC: 0.9438\n",
      "Best params: {'colsample_bylevel': 0.7366877378057127, 'colsample_bytree': 0.9399760402267441, 'gamma': 2.6498051478267012e-08, 'learning_rate': 0.0238149998729586, 'max_delta_step': 16, 'max_depth': 19, 'min_child_weight': 2, 'n_estimators': 77, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536}\n",
      "\n",
      "Model #8\n",
      "Best ROC-AUC: 0.9438\n",
      "Best params: {'colsample_bylevel': 0.7366877378057127, 'colsample_bytree': 0.9399760402267441, 'gamma': 2.6498051478267012e-08, 'learning_rate': 0.0238149998729586, 'max_delta_step': 16, 'max_depth': 19, 'min_child_weight': 2, 'n_estimators': 77, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536}\n",
      "\n",
      "Model #9\n",
      "Best ROC-AUC: 0.9438\n",
      "Best params: {'colsample_bylevel': 0.7366877378057127, 'colsample_bytree': 0.9399760402267441, 'gamma': 2.6498051478267012e-08, 'learning_rate': 0.0238149998729586, 'max_delta_step': 16, 'max_depth': 19, 'min_child_weight': 2, 'n_estimators': 77, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536}\n",
      "\n",
      "Model #10\n",
      "Best ROC-AUC: 0.9438\n",
      "Best params: {'colsample_bylevel': 0.7366877378057127, 'colsample_bytree': 0.9399760402267441, 'gamma': 2.6498051478267012e-08, 'learning_rate': 0.0238149998729586, 'max_delta_step': 16, 'max_depth': 19, 'min_child_weight': 2, 'n_estimators': 77, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "xgb_result = bayes_cv_tuner.fit(X.values, y.values, callback=status_print)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9437820084781707"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_result.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'colsample_bylevel': 0.7366877378057127,\n",
       " 'colsample_bytree': 0.9399760402267441,\n",
       " 'gamma': 2.6498051478267012e-08,\n",
       " 'learning_rate': 0.0238149998729586,\n",
       " 'max_delta_step': 16,\n",
       " 'max_depth': 19,\n",
       " 'min_child_weight': 2,\n",
       " 'n_estimators': 77,\n",
       " 'reg_alpha': 0.011683028450342707,\n",
       " 'reg_lambda': 0.0048879464985534336,\n",
       " 'scale_pos_weight': 0.13267482411031659,\n",
       " 'subsample': 0.5689543694097536}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_result.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, booster='gbtree',\n",
       "       colsample_bylevel=0.7366877378057127,\n",
       "       colsample_bytree=0.9399760402267441, eval_metric='auc',\n",
       "       gamma=2.6498051478267012e-08, learning_rate=0.0238149998729586,\n",
       "       max_delta_step=16, max_depth=19, min_child_weight=2, missing=None,\n",
       "       n_estimators=77, n_jobs=1, nthread=None,\n",
       "       objective='binary:logistic', random_state=0,\n",
       "       reg_alpha=0.011683028450342707, reg_lambda=0.0048879464985534336,\n",
       "       scale_pos_weight=0.13267482411031659, seed=None, silent=1,\n",
       "       subsample=0.5689543694097536, tree_method='approx')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_result.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_model = xgb_result.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "xgb.plot_importance(new_model);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defaultdict(list,\n",
       "            {'split0_test_score': [0.5,\n",
       "              0.942736769309165,\n",
       "              0.9036215267557312,\n",
       "              0.7664768429281755,\n",
       "              0.9149290554548015,\n",
       "              0.9508199056182765,\n",
       "              0.970266156777475,\n",
       "              0.5,\n",
       "              0.9619782814423754,\n",
       "              0.8688657418397158],\n",
       "             'split1_test_score': [0.5,\n",
       "              0.9204035989757906,\n",
       "              0.9202405990169361,\n",
       "              0.8314109624593686,\n",
       "              0.8862962532164799,\n",
       "              0.9419943677975383,\n",
       "              0.9252757943478577,\n",
       "              0.5,\n",
       "              0.9345857822890258,\n",
       "              0.9155486135508354],\n",
       "             'split2_test_score': [0.5,\n",
       "              0.9205941606278379,\n",
       "              0.8919333273195618,\n",
       "              0.7698788225035331,\n",
       "              0.8939457357348328,\n",
       "              0.9385314369907087,\n",
       "              0.933384249932345,\n",
       "              0.5,\n",
       "              0.9272590231630433,\n",
       "              0.8850760240951778],\n",
       "             'mean_test_score': [0.5,\n",
       "              0.9279116559845781,\n",
       "              0.9052654176672172,\n",
       "              0.7892559301647617,\n",
       "              0.8983904370276193,\n",
       "              0.9437820084781707,\n",
       "              0.9429755921755676,\n",
       "              0.5,\n",
       "              0.9412746426049309,\n",
       "              0.889830221577291],\n",
       "             'std_test_score': [0.0,\n",
       "              0.0104833840703361,\n",
       "              0.011614653047009727,\n",
       "              0.02984089344163704,\n",
       "              0.012104519222291919,\n",
       "              0.005173525816300345,\n",
       "              0.019579488305032023,\n",
       "              0.0,\n",
       "              0.014942341537875773,\n",
       "              0.01935259332214665],\n",
       "             'rank_test_score': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
       "             'mean_fit_time': [0.34290297826131183,\n",
       "              0.754174550374349,\n",
       "              0.6671676635742188,\n",
       "              0.40001877148946124,\n",
       "              0.37077172597249347,\n",
       "              1.405093749364217,\n",
       "              1.6076428890228271,\n",
       "              0.26987171173095703,\n",
       "              1.0809760093688965,\n",
       "              0.30988844235738117],\n",
       "             'std_fit_time': [0.0034053772387067456,\n",
       "              0.00894768554778718,\n",
       "              0.010761718229036845,\n",
       "              0.003188315515353434,\n",
       "              0.003918948230922813,\n",
       "              0.01365679822911968,\n",
       "              0.03164965717137351,\n",
       "              0.004066227056245113,\n",
       "              0.05171909487097653,\n",
       "              0.0179053241838573],\n",
       "             'mean_score_time': [0.013190587361653646,\n",
       "              0.03159968058268229,\n",
       "              0.020743608474731445,\n",
       "              0.015934228897094727,\n",
       "              0.014749368031819662,\n",
       "              0.0346372922261556,\n",
       "              0.13779433568318686,\n",
       "              0.009719451268513998,\n",
       "              0.03555425008138021,\n",
       "              0.01338354746500651],\n",
       "             'std_score_time': [0.0013884540400461664,\n",
       "              0.0005411997531292687,\n",
       "              0.0013822142703546138,\n",
       "              0.0005896041167790662,\n",
       "              0.0003052665613508982,\n",
       "              0.001916110975315102,\n",
       "              0.003969088228005105,\n",
       "              0.00019924950654634242,\n",
       "              0.003008662813512281,\n",
       "              0.0006047749486508725],\n",
       "             'param_colsample_bylevel': [0.4160029192647807,\n",
       "              0.8390144719977516,\n",
       "              0.4503841871781403,\n",
       "              0.8142720284737898,\n",
       "              0.8015579071911014,\n",
       "              0.7366877378057127,\n",
       "              0.6209085649172932,\n",
       "              0.5479690370134094,\n",
       "              0.955923206446829,\n",
       "              0.013594004182195795],\n",
       "             'param_colsample_bytree': [0.7304484857455519,\n",
       "              0.8844821246070537,\n",
       "              0.9195352964526833,\n",
       "              0.1801528457825951,\n",
       "              0.44364889457651413,\n",
       "              0.9399760402267441,\n",
       "              0.7776107350396038,\n",
       "              0.9208091341729433,\n",
       "              0.7036152301751524,\n",
       "              0.819651719467114],\n",
       "             'param_gamma': [0.13031389926541354,\n",
       "              4.358684608480795e-07,\n",
       "              8.168958221061441e-09,\n",
       "              0.00015936523535755285,\n",
       "              3.811128976537413e-05,\n",
       "              2.6498051478267012e-08,\n",
       "              1.3277909848852635e-06,\n",
       "              2.083286323303108e-05,\n",
       "              0.03823613443879595,\n",
       "              0.002807995180059625],\n",
       "             'param_learning_rate': [0.042815319280763466,\n",
       "              0.7988179462781242,\n",
       "              0.07356404539935663,\n",
       "              0.4032083917998946,\n",
       "              0.2700390206185342,\n",
       "              0.0238149998729586,\n",
       "              0.5605967693796124,\n",
       "              0.4734922490673386,\n",
       "              0.06786442521779147,\n",
       "              0.03229300915669146],\n",
       "             'param_max_delta_step': [13, 17, 4, 10, 18, 16, 12, 6, 8, 12],\n",
       "             'param_max_depth': [21, 3, 23, 5, 36, 19, 30, 3, 11, 14],\n",
       "             'param_min_child_weight': [2, 1, 1, 4, 2, 2, 3, 3, 0, 2],\n",
       "             'param_n_estimators': [87, 68, 88, 94, 83, 77, 71, 51, 69, 58],\n",
       "             'param_reg_alpha': [5.497557739289786e-07,\n",
       "              0.0005266983003701547,\n",
       "              0.00010376808625045426,\n",
       "              0.1611980387486336,\n",
       "              1.5057560255472018e-06,\n",
       "              0.011683028450342707,\n",
       "              0.004026635957416632,\n",
       "              2.9618722230360503e-06,\n",
       "              0.00022356829889037284,\n",
       "              0.11080071157037095],\n",
       "             'param_reg_lambda': [0.05936070635912049,\n",
       "              276.5424475574225,\n",
       "              476.96194787286544,\n",
       "              4.3806965488564525e-05,\n",
       "              0.08186810622382998,\n",
       "              0.0048879464985534336,\n",
       "              0.040887904512512056,\n",
       "              8.153638964242,\n",
       "              1.2908532337409298e-07,\n",
       "              5.745523087821567],\n",
       "             'param_scale_pos_weight': [0.060830282487222144,\n",
       "              0.3016410771843142,\n",
       "              1.3165669602830552,\n",
       "              0.0009365503147654213,\n",
       "              0.029004593634154585,\n",
       "              0.13267482411031659,\n",
       "              109.72255122430063,\n",
       "              0.0015718563651880596,\n",
       "              4.73588486119117,\n",
       "              3.573713830065675],\n",
       "             'param_subsample': [0.13556548021189216,\n",
       "              0.9923710598637134,\n",
       "              0.387658500562527,\n",
       "              0.8391548832503206,\n",
       "              0.8835665823899177,\n",
       "              0.5689543694097536,\n",
       "              0.6612742297240571,\n",
       "              0.577028860872224,\n",
       "              0.4499578015509351,\n",
       "              0.029649078936835577],\n",
       "             'params': [{'colsample_bylevel': 0.4160029192647807,\n",
       "               'colsample_bytree': 0.7304484857455519,\n",
       "               'gamma': 0.13031389926541354,\n",
       "               'learning_rate': 0.042815319280763466,\n",
       "               'max_delta_step': 13,\n",
       "               'max_depth': 21,\n",
       "               'min_child_weight': 2,\n",
       "               'n_estimators': 87,\n",
       "               'reg_alpha': 5.497557739289786e-07,\n",
       "               'reg_lambda': 0.05936070635912049,\n",
       "               'scale_pos_weight': 0.060830282487222144,\n",
       "               'subsample': 0.13556548021189216},\n",
       "              {'colsample_bylevel': 0.8390144719977516,\n",
       "               'colsample_bytree': 0.8844821246070537,\n",
       "               'gamma': 4.358684608480795e-07,\n",
       "               'learning_rate': 0.7988179462781242,\n",
       "               'max_delta_step': 17,\n",
       "               'max_depth': 3,\n",
       "               'min_child_weight': 1,\n",
       "               'n_estimators': 68,\n",
       "               'reg_alpha': 0.0005266983003701547,\n",
       "               'reg_lambda': 276.5424475574225,\n",
       "               'scale_pos_weight': 0.3016410771843142,\n",
       "               'subsample': 0.9923710598637134},\n",
       "              {'colsample_bylevel': 0.4503841871781403,\n",
       "               'colsample_bytree': 0.9195352964526833,\n",
       "               'gamma': 8.168958221061441e-09,\n",
       "               'learning_rate': 0.07356404539935663,\n",
       "               'max_delta_step': 4,\n",
       "               'max_depth': 23,\n",
       "               'min_child_weight': 1,\n",
       "               'n_estimators': 88,\n",
       "               'reg_alpha': 0.00010376808625045426,\n",
       "               'reg_lambda': 476.96194787286544,\n",
       "               'scale_pos_weight': 1.3165669602830552,\n",
       "               'subsample': 0.387658500562527},\n",
       "              {'colsample_bylevel': 0.8142720284737898,\n",
       "               'colsample_bytree': 0.1801528457825951,\n",
       "               'gamma': 0.00015936523535755285,\n",
       "               'learning_rate': 0.4032083917998946,\n",
       "               'max_delta_step': 10,\n",
       "               'max_depth': 5,\n",
       "               'min_child_weight': 4,\n",
       "               'n_estimators': 94,\n",
       "               'reg_alpha': 0.1611980387486336,\n",
       "               'reg_lambda': 4.3806965488564525e-05,\n",
       "               'scale_pos_weight': 0.0009365503147654213,\n",
       "               'subsample': 0.8391548832503206},\n",
       "              {'colsample_bylevel': 0.8015579071911014,\n",
       "               'colsample_bytree': 0.44364889457651413,\n",
       "               'gamma': 3.811128976537413e-05,\n",
       "               'learning_rate': 0.2700390206185342,\n",
       "               'max_delta_step': 18,\n",
       "               'max_depth': 36,\n",
       "               'min_child_weight': 2,\n",
       "               'n_estimators': 83,\n",
       "               'reg_alpha': 1.5057560255472018e-06,\n",
       "               'reg_lambda': 0.08186810622382998,\n",
       "               'scale_pos_weight': 0.029004593634154585,\n",
       "               'subsample': 0.8835665823899177},\n",
       "              {'colsample_bylevel': 0.7366877378057127,\n",
       "               'colsample_bytree': 0.9399760402267441,\n",
       "               'gamma': 2.6498051478267012e-08,\n",
       "               'learning_rate': 0.0238149998729586,\n",
       "               'max_delta_step': 16,\n",
       "               'max_depth': 19,\n",
       "               'min_child_weight': 2,\n",
       "               'n_estimators': 77,\n",
       "               'reg_alpha': 0.011683028450342707,\n",
       "               'reg_lambda': 0.0048879464985534336,\n",
       "               'scale_pos_weight': 0.13267482411031659,\n",
       "               'subsample': 0.5689543694097536},\n",
       "              {'colsample_bylevel': 0.6209085649172932,\n",
       "               'colsample_bytree': 0.7776107350396038,\n",
       "               'gamma': 1.3277909848852635e-06,\n",
       "               'learning_rate': 0.5605967693796124,\n",
       "               'max_delta_step': 12,\n",
       "               'max_depth': 30,\n",
       "               'min_child_weight': 3,\n",
       "               'n_estimators': 71,\n",
       "               'reg_alpha': 0.004026635957416632,\n",
       "               'reg_lambda': 0.040887904512512056,\n",
       "               'scale_pos_weight': 109.72255122430063,\n",
       "               'subsample': 0.6612742297240571},\n",
       "              {'colsample_bylevel': 0.5479690370134094,\n",
       "               'colsample_bytree': 0.9208091341729433,\n",
       "               'gamma': 2.083286323303108e-05,\n",
       "               'learning_rate': 0.4734922490673386,\n",
       "               'max_delta_step': 6,\n",
       "               'max_depth': 3,\n",
       "               'min_child_weight': 3,\n",
       "               'n_estimators': 51,\n",
       "               'reg_alpha': 2.9618722230360503e-06,\n",
       "               'reg_lambda': 8.153638964242,\n",
       "               'scale_pos_weight': 0.0015718563651880596,\n",
       "               'subsample': 0.577028860872224},\n",
       "              {'colsample_bylevel': 0.955923206446829,\n",
       "               'colsample_bytree': 0.7036152301751524,\n",
       "               'gamma': 0.03823613443879595,\n",
       "               'learning_rate': 0.06786442521779147,\n",
       "               'max_delta_step': 8,\n",
       "               'max_depth': 11,\n",
       "               'min_child_weight': 0,\n",
       "               'n_estimators': 69,\n",
       "               'reg_alpha': 0.00022356829889037284,\n",
       "               'reg_lambda': 1.2908532337409298e-07,\n",
       "               'scale_pos_weight': 4.73588486119117,\n",
       "               'subsample': 0.4499578015509351},\n",
       "              {'colsample_bylevel': 0.013594004182195795,\n",
       "               'colsample_bytree': 0.819651719467114,\n",
       "               'gamma': 0.002807995180059625,\n",
       "               'learning_rate': 0.03229300915669146,\n",
       "               'max_delta_step': 12,\n",
       "               'max_depth': 14,\n",
       "               'min_child_weight': 2,\n",
       "               'n_estimators': 58,\n",
       "               'reg_alpha': 0.11080071157037095,\n",
       "               'reg_lambda': 5.745523087821567,\n",
       "               'scale_pos_weight': 3.573713830065675,\n",
       "               'subsample': 0.029649078936835577}]})"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_result.cv_results_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## LightGBM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model #1\n",
      "Best ROC-AUC: 0.5\n",
      "Best params: {'colsample_bytree': 0.4160029192647807, 'learning_rate': 0.28539836866041823, 'max_bin': 940, 'max_depth': 16, 'min_child_samples': 34, 'min_child_weight': 4, 'n_estimators': 68, 'num_leaves': 74, 'reg_alpha': 5.497557739289786e-07, 'reg_lambda': 0.05936070635912049, 'scale_pos_weight': 0.060830282487222144, 'subsample': 0.13556548021189216, 'subsample_for_bin': 171234, 'subsample_freq': 6}\n",
      "\n",
      "Model #2\n",
      "Best ROC-AUC: 0.5\n",
      "Best params: {'colsample_bytree': 0.4160029192647807, 'learning_rate': 0.28539836866041823, 'max_bin': 940, 'max_depth': 16, 'min_child_samples': 34, 'min_child_weight': 4, 'n_estimators': 68, 'num_leaves': 74, 'reg_alpha': 5.497557739289786e-07, 'reg_lambda': 0.05936070635912049, 'scale_pos_weight': 0.060830282487222144, 'subsample': 0.13556548021189216, 'subsample_for_bin': 171234, 'subsample_freq': 6}\n",
      "\n",
      "Model #3\n",
      "Best ROC-AUC: 0.5108\n",
      "Best params: {'colsample_bytree': 0.4503841871781403, 'learning_rate': 0.6877728743793542, 'max_bin': 194, 'max_depth': 22, 'min_child_samples': 9, 'min_child_weight': 5, 'n_estimators': 58, 'num_leaves': 75, 'reg_alpha': 0.00010376808625045426, 'reg_lambda': 476.96194787286544, 'scale_pos_weight': 1.3165669602830552, 'subsample': 0.387658500562527, 'subsample_for_bin': 179142, 'subsample_freq': 5}\n",
      "\n",
      "Model #4\n",
      "Best ROC-AUC: 0.5108\n",
      "Best params: {'colsample_bytree': 0.4503841871781403, 'learning_rate': 0.6877728743793542, 'max_bin': 194, 'max_depth': 22, 'min_child_samples': 9, 'min_child_weight': 5, 'n_estimators': 58, 'num_leaves': 75, 'reg_alpha': 0.00010376808625045426, 'reg_lambda': 476.96194787286544, 'scale_pos_weight': 1.3165669602830552, 'subsample': 0.387658500562527, 'subsample_for_bin': 179142, 'subsample_freq': 5}\n",
      "\n",
      "Model #5\n",
      "Best ROC-AUC: 0.5108\n",
      "Best params: {'colsample_bytree': 0.4503841871781403, 'learning_rate': 0.6877728743793542, 'max_bin': 194, 'max_depth': 22, 'min_child_samples': 9, 'min_child_weight': 5, 'n_estimators': 58, 'num_leaves': 75, 'reg_alpha': 0.00010376808625045426, 'reg_lambda': 476.96194787286544, 'scale_pos_weight': 1.3165669602830552, 'subsample': 0.387658500562527, 'subsample_for_bin': 179142, 'subsample_freq': 5}\n",
      "\n",
      "Model #6\n",
      "Best ROC-AUC: 0.9245\n",
      "Best params: {'colsample_bytree': 0.7366877378057127, 'learning_rate': 0.7563790218678241, 'max_bin': 247, 'max_depth': 9, 'min_child_samples': 40, 'min_child_weight': 4, 'n_estimators': 73, 'num_leaves': 54, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536, 'subsample_for_bin': 108942, 'subsample_freq': 5}\n",
      "\n",
      "Model #7\n",
      "Best ROC-AUC: 0.9455\n",
      "Best params: {'colsample_bytree': 0.6209085649172932, 'learning_rate': 0.35540927532494104, 'max_bin': 423, 'max_depth': 44, 'min_child_samples': 30, 'min_child_weight': 6, 'n_estimators': 82, 'num_leaves': 43, 'reg_alpha': 0.004026635957416632, 'reg_lambda': 0.040887904512512056, 'scale_pos_weight': 109.72255122430063, 'subsample': 0.6612742297240571, 'subsample_for_bin': 344698, 'subsample_freq': 3}\n",
      "\n",
      "Model #8\n",
      "Best ROC-AUC: 0.9455\n",
      "Best params: {'colsample_bytree': 0.6209085649172932, 'learning_rate': 0.35540927532494104, 'max_bin': 423, 'max_depth': 44, 'min_child_samples': 30, 'min_child_weight': 6, 'n_estimators': 82, 'num_leaves': 43, 'reg_alpha': 0.004026635957416632, 'reg_lambda': 0.040887904512512056, 'scale_pos_weight': 109.72255122430063, 'subsample': 0.6612742297240571, 'subsample_for_bin': 344698, 'subsample_freq': 3}\n",
      "\n",
      "Model #9\n",
      "Best ROC-AUC: 0.9455\n",
      "Best params: {'colsample_bytree': 0.6209085649172932, 'learning_rate': 0.35540927532494104, 'max_bin': 423, 'max_depth': 44, 'min_child_samples': 30, 'min_child_weight': 6, 'n_estimators': 82, 'num_leaves': 43, 'reg_alpha': 0.004026635957416632, 'reg_lambda': 0.040887904512512056, 'scale_pos_weight': 109.72255122430063, 'subsample': 0.6612742297240571, 'subsample_for_bin': 344698, 'subsample_freq': 3}\n",
      "\n",
      "Model #10\n",
      "Best ROC-AUC: 0.9455\n",
      "Best params: {'colsample_bytree': 0.6209085649172932, 'learning_rate': 0.35540927532494104, 'max_bin': 423, 'max_depth': 44, 'min_child_samples': 30, 'min_child_weight': 6, 'n_estimators': 82, 'num_leaves': 43, 'reg_alpha': 0.004026635957416632, 'reg_lambda': 0.040887904512512056, 'scale_pos_weight': 109.72255122430063, 'subsample': 0.6612742297240571, 'subsample_for_bin': 344698, 'subsample_freq': 3}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "bayes_cv_tuner = BayesSearchCV(\n",
    "    estimator = lgb.LGBMRegressor(\n",
    "        objective='binary',\n",
    "        metric='auc',\n",
    "        n_jobs=1,\n",
    "        verbose=0\n",
    "    ),\n",
    "    search_spaces = {\n",
    "        'learning_rate': (0.01, 1.0, 'log-uniform'),\n",
    "        'num_leaves': (1, 100),      \n",
    "        'max_depth': (0, 50),\n",
    "        'min_child_samples': (0, 50),\n",
    "        'max_bin': (100, 1000),\n",
    "        'subsample': (0.01, 1.0, 'uniform'),\n",
    "        'subsample_freq': (0, 10),\n",
    "        'colsample_bytree': (0.01, 1.0, 'uniform'),\n",
    "        'min_child_weight': (0, 10),\n",
    "        'subsample_for_bin': (100000, 500000),\n",
    "        'reg_lambda': (1e-9, 1000, 'log-uniform'),\n",
    "        'reg_alpha': (1e-9, 1.0, 'log-uniform'),\n",
    "        'scale_pos_weight': (1e-6, 500, 'log-uniform'),\n",
    "        'n_estimators': (50, 100),\n",
    "    },    \n",
    "    scoring = 'roc_auc',\n",
    "    cv = StratifiedKFold(\n",
    "        n_splits=3,\n",
    "        shuffle=True,\n",
    "        random_state=42\n",
    "    ),\n",
    "    n_jobs = 3,\n",
    "    n_iter = ITERATIONS,   \n",
    "    verbose = 0,\n",
    "    refit = True,\n",
    "    random_state = 42\n",
    ")\n",
    "\n",
    "# Fit the model\n",
    "lgbm_result = bayes_cv_tuner.fit(X.values, y.values, callback=status_print)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'colsample_bytree': 0.6209085649172932,\n",
       " 'learning_rate': 0.35540927532494104,\n",
       " 'max_bin': 423,\n",
       " 'max_depth': 44,\n",
       " 'min_child_samples': 30,\n",
       " 'min_child_weight': 6,\n",
       " 'n_estimators': 82,\n",
       " 'num_leaves': 43,\n",
       " 'reg_alpha': 0.004026635957416632,\n",
       " 'reg_lambda': 0.040887904512512056,\n",
       " 'scale_pos_weight': 109.72255122430063,\n",
       " 'subsample': 0.6612742297240571,\n",
       " 'subsample_for_bin': 344698,\n",
       " 'subsample_freq': 3}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lgbm_result.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,\n",
       "       learning_rate=0.1, max_depth=-1, metric='auc', min_child_samples=20,\n",
       "       min_child_weight=0.001, min_split_gain=0.0, n_estimators=100,\n",
       "       n_jobs=1, num_leaves=31, objective='binary', random_state=None,\n",
       "       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=1.0,\n",
       "       subsample_for_bin=200000, subsample_freq=1, verbose=0)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lgbm_result.estimator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "'neg1_mean_squared_error' is not a valid scoring value. Use sorted(sklearn.metrics.SCORERS.keys()) to get valid options.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m/usr/local/lib/python3.6/site-packages/sklearn/metrics/scorer.py\u001b[0m in \u001b[0;36mget_scorer\u001b[0;34m(scoring)\u001b[0m\n\u001b[1;32m    228\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 229\u001b[0;31m             \u001b[0mscorer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSCORERS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    230\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: 'neg1_mean_squared_error'",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-21-cb4460c089fd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     48\u001b[0m \u001b[0;31m# Fit the model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbayes_cv_tuner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstatus_print\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/usr/local/lib/python3.6/site-packages/skopt/searchcv.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, callback)\u001b[0m\n\u001b[1;32m    652\u001b[0m                 optim_result = self._step(\n\u001b[1;32m    653\u001b[0m                     \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msearch_space\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 654\u001b[0;31m                     \u001b[0mgroups\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgroups\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_points\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mn_points_adjusted\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    655\u001b[0m                 )\n\u001b[1;32m    656\u001b[0m                 \u001b[0mn_iter\u001b[0m \u001b[0;34m-=\u001b[0m \u001b[0mn_points\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.6/site-packages/skopt/searchcv.py\u001b[0m in \u001b[0;36m_step\u001b[0;34m(self, X, y, search_space, optimizer, groups, n_points)\u001b[0m\n\u001b[1;32m    548\u001b[0m         \u001b[0mrefit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrefit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    549\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrefit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 550\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroups\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams_dict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    551\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrefit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrefit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    552\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.6/site-packages/skopt/searchcv.py\u001b[0m in \u001b[0;36m_fit\u001b[0;34m(self, X, y, groups, parameter_iterable)\u001b[0m\n\u001b[1;32m    374\u001b[0m             self.cv, y, classifier=is_classifier(estimator))\n\u001b[1;32m    375\u001b[0m         self.scorer_ = check_scoring(\n\u001b[0;32m--> 376\u001b[0;31m             self.estimator, scoring=self.scoring)\n\u001b[0m\u001b[1;32m    377\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    378\u001b[0m         \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroups\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroups\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.6/site-packages/sklearn/metrics/scorer.py\u001b[0m in \u001b[0;36mcheck_scoring\u001b[0;34m(estimator, scoring, allow_none)\u001b[0m\n\u001b[1;32m    271\u001b[0m                         \"'fit' method, %r was passed\" % estimator)\n\u001b[1;32m    272\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstring_types\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 273\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mget_scorer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    274\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    275\u001b[0m         \u001b[0;31m# Heuristic to ensure user has not passed a metric\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.6/site-packages/sklearn/metrics/scorer.py\u001b[0m in \u001b[0;36mget_scorer\u001b[0;34m(scoring)\u001b[0m\n\u001b[1;32m    231\u001b[0m             raise ValueError('%r is not a valid scoring value. '\n\u001b[1;32m    232\u001b[0m                              \u001b[0;34m'Use sorted(sklearn.metrics.SCORERS.keys()) '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 233\u001b[0;31m                              'to get valid options.' % (scoring))\n\u001b[0m\u001b[1;32m    234\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    235\u001b[0m         \u001b[0mscorer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscoring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: 'neg1_mean_squared_error' is not a valid scoring value. Use sorted(sklearn.metrics.SCORERS.keys()) to get valid options."
     ]
    }
   ],
   "source": [
    "bayes_cv_tuner = BayesSearchCV(\n",
    "    estimator = lgb.LGBMRegressor(objective='regression', boosting_type='gbdt', subsample=0.6143), #colsample_bytree=0.6453, subsample=0.6143\n",
    "    search_spaces = {\n",
    "        'learning_rate': (0.01, 1.0, 'log-uniform'),\n",
    "        'num_leaves': (10, 100),      \n",
    "        'max_depth': (0, 50),\n",
    "        'min_child_samples': (0, 50),\n",
    "        'max_bin': (100, 1000),\n",
    "        'subsample_freq': (0, 10),\n",
    "        'min_child_weight': (0, 10),\n",
    "        'reg_lambda': (1e-9, 1000, 'log-uniform'),\n",
    "        'reg_alpha': (1e-9, 1.0, 'log-uniform'),\n",
    "        'scale_pos_weight': (1e-6, 500, 'log-uniform'),\n",
    "        'n_estimators': (50, 150),\n",
    "    },    \n",
    "    scoring = 'neg_mean_squared_error', #neg_mean_squared_log_error\n",
    "    cv = KFold(\n",
    "        n_splits=5,\n",
    "        shuffle=True,\n",
    "        random_state=42\n",
    "    ),\n",
    "    n_jobs = 1,\n",
    "    n_iter = 100,   \n",
    "    verbose = 0,\n",
    "    refit = True,\n",
    "    random_state = 42\n",
    ")\n",
    "\n",
    "\n",
    "def status_print(optim_result):\n",
    "    \"\"\"Status callback durring bayesian hyperparameter search\"\"\"\n",
    "    \n",
    "    # Get all the models tested so far in DataFrame format\n",
    "    all_models = pd.DataFrame(bayes_cv_tuner.cv_results_)    \n",
    "    \n",
    "    # Get current parameters and the best parameters    \n",
    "    best_params = pd.Series(bayes_cv_tuner.best_params_)\n",
    "    print('Model #{}\\nBest MSE: {}\\nBest params: {}\\n'.format(\n",
    "        len(all_models),\n",
    "        np.round(bayes_cv_tuner.best_score_, 4),\n",
    "        bayes_cv_tuner.best_params_\n",
    "    ))\n",
    "    \n",
    "    # Save all model results\n",
    "    clf_name = bayes_cv_tuner.estimator.__class__.__name__\n",
    "    all_models.to_csv(clf_name+\"_cv_results.csv\")\n",
    "\n",
    "# Fit the model\n",
    "result = bayes_cv_tuner.fit(X.values, y.values, callback=status_print)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- Scoring에서 쓸 수 있는 값은 아래와 같음"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sklearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "keys = sklearn.metrics.SCORERS.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "explained_variance\n",
      "r2\n",
      "neg_median_absolute_error\n",
      "neg_mean_absolute_error\n",
      "neg_mean_squared_error\n",
      "neg_mean_squared_log_error\n",
      "accuracy\n",
      "roc_auc\n",
      "balanced_accuracy\n",
      "average_precision\n",
      "neg_log_loss\n",
      "brier_score_loss\n",
      "adjusted_rand_score\n",
      "homogeneity_score\n",
      "completeness_score\n",
      "v_measure_score\n",
      "mutual_info_score\n",
      "adjusted_mutual_info_score\n",
      "normalized_mutual_info_score\n",
      "fowlkes_mallows_score\n",
      "precision\n",
      "precision_macro\n",
      "precision_micro\n",
      "precision_samples\n",
      "precision_weighted\n",
      "recall\n",
      "recall_macro\n",
      "recall_micro\n",
      "recall_samples\n",
      "recall_weighted\n",
      "f1\n",
      "f1_macro\n",
      "f1_micro\n",
      "f1_samples\n",
      "f1_weighted\n"
     ]
    }
   ],
   "source": [
    "for key in keys:\n",
    "    print(key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}