{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from functools import reduce\n",
    "\n",
    "def _get_model_name(model):\n",
    "    \"\"\"\n",
    "            Returns a string with the name of a sklearn model\n",
    "                model: Sklearn stimator class\n",
    "    \"\"\"\n",
    "    if isinstance(model, Pipeline):\n",
    "        estimator = model.steps[-1][1]\n",
    "        name = \"Pipeline_\" + str(estimator)[:str(estimator).find(\"(\")]\n",
    "    else: \n",
    "        name = str(model)[:str(model).find(\"(\")]\n",
    "    return name\n",
    "    \n",
    "    \n",
    "def plot_cv_score(X, y, models_list, cv = 5, scoring_list = None, refit = True, return_scores = True):\n",
    "    \"\"\" \n",
    "            X: numpy_array/pandas dataframe n_rows, m_features\n",
    "            y: numpy_array/pandas dataframe n_rows\n",
    "            Plots min, max and avg kfold crosval_score for a list of models\n",
    "        \n",
    "    \"\"\"\n",
    "    \n",
    "        \n",
    "        \n",
    "    names, mean_score = list(), list()\n",
    "    ldf = list()\n",
    "    mnames = list()\n",
    "    \n",
    "    for i, model in enumerate(models_list):\n",
    "        name = _get_model_name(model)\n",
    "    \n",
    "        if refit:\n",
    "            model.fit(X, y)\n",
    "                \n",
    "        for metric in score_list:\n",
    "            \n",
    "            score = cross_val_score(model, X, y, cv = cv, scoring = metric, n_jobs= -1)\n",
    "            mean_score.append(np.mean(score))\n",
    "    \n",
    "    \n",
    "        tmp = pd.DataFrame({name: mean_score}, index = score_list)\n",
    "        \n",
    "            \n",
    "            \n",
    "        ldf.append(tmp)\n",
    "        \n",
    "        \n",
    "        mean_score = list()\n",
    "        \n",
    "    frame_scores = reduce(lambda x,y: pd.merge(x,y, left_index = True, right_index = True), ldf).T\n",
    "    print(frame_scores)\n",
    "    \n",
    "    \n",
    "    fig, ax  = plt.subplots(1,1, figsize = (10,5))\n",
    "\n",
    "    frame_scores.plot.bar(ax = ax, cmap = 'RdYlBu', edgecolor = \"black\")\n",
    "    ax.legend(loc = 'best')\n",
    "    ax.set_ylabel(\"Score\")\n",
    "    ax.set_title(\"Cross validation model benchmark\")\n",
    "\n",
    "    if return_scores:    \n",
    "        return frame_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import accuracy_score, f1_score, roc_auc_score\n",
    "from sklearn.datasets import load_breast_cancer\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "from sklearn.pipeline import Pipeline\n",
    "\n",
    "X, y = load_breast_cancer(return_X_y= True)\n",
    "\n",
    "models_list =[LogisticRegression(random_state= 42),\n",
    "            #   SVC(probability= True),\n",
    "              RandomForestClassifier(random_state = 42),\n",
    "              GaussianNB()]\n",
    "\n",
    "score_list = [\"roc_auc\", \"accuracy\", \"f1\", \"precision\", \"recall\"]\n",
    "\n",
    "t = plot_cv_score(X = X, y = y, models_list = models_list, cv = 5, scoring_list = score_list, refit = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# TEST\n",
    "import pandas as pd  \n",
    "  \n",
    "# assign data of lists.  \n",
    "data = {'Accuracy': [0.9764, 0.9818, 0.9991, 0.9985, 0.9884], \n",
    "        'Precision':[0.9761, 0.9816, 0.9991, 0.9985, 0.9882],\n",
    "        'Recall':[0.9767, 0.9819, 0.9992, 0.9985, 0.9885],\n",
    "        'F1-Score':[0.9763, 0.9817, 0.9991, 0.9985, 0.9883],}  \n",
    "  \n",
    "# Creates pandas DataFrame.  \n",
    "df = pd.DataFrame(data, index =['GradientBoostingClassifier', 'GradientBoostingRegressor', 'HistGradientBoostingClassifier', 'HistGradientBoostingRegressor', 'AdaBoostClassifier'])  \n",
    "  \n",
    "# print the data  \n",
    "print(df)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax  = plt.subplots(1,1, figsize = (12,5))\n",
    "\n",
    "plt.rcParams[\"font.family\"] = \"Times New Roman\"\n",
    "df.plot.bar(ax = ax, cmap = 'GnBu', edgecolor = \"black\")\n",
    "plt.ylim(0.95,1)\n",
    "ax.legend(loc = 'best')\n",
    "ax.set_ylabel(\"Score\", fontproperties = \"Times New Roman\", fontweight = 'bold', fontsize = 14)\n",
    "ax.set_xlabel(\"Model\", fontproperties = \"Times New Roman\", fontweight = 'bold', fontsize = 14)\n",
    "plt.yticks(fontproperties = \"Times New Roman\", size = 12, weight = \"bold\")\n",
    "plt.xticks(fontproperties = \"Times New Roman\", size = 12, weight = \"bold\", rotation = 355)\n",
    "ax.set_title(\"Evaluation metrics among different models\", fontproperties = \"Times New Roman\", fontweight = 'bold', fontsize = 15)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.7.6 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.7.6"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "3499bcc59dd07de3752bcaf4b431b7cc0c8d7df018f3c7c8f72730d6f0400322"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}