{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": 3
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python_defaultSpec_1597439255963",
   "display_name": "Python 3.8.4 64-bit"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is a simple notebook demo to illustrate typically how OptimalFlow's autoCV modules work with classification problem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install external packages in binder environment.\n",
    "!pip install xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "Now in Progress - Model Selection w/ Cross-validation: Estimate about 0.0 minutes left  [####################] 100.0%\n\n    *optimalflow* autoCV Module ===> rgcv_CrossValidation with 5 folds:\nBest Parameters: {'fit_intercept': 'False'}\n\nBest CV Score: 0.803456180567801\n\nlgr -- Accuracy: 0.775 / Precision: 0.712 / Recall: 0.646 / Latency: 1.0s\nsvm -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 4.5s\nmlp -- Accuracy: 0.787 / Precision: 0.745 / Recall: 0.631 / Latency: 1.0s\nrf -- Accuracy: 0.809 / Precision: 0.83 / Recall: 0.6 / Latency: 45.6s\nada -- Accuracy: 0.792 / Precision: 0.759 / Recall: 0.631 / Latency: 20.6s\ngb -- Accuracy: 0.815 / Precision: 0.796 / Recall: 0.662 / Latency: 3.2s\nxgb -- Accuracy: 0.815 / Precision: 0.786 / Recall: 0.677 / Latency: 2.0s\nlsvc -- Accuracy: 0.753 / Precision: 0.667 / Recall: 0.646 / Latency: 4.1s\nsgd -- Accuracy: 0.775 / Precision: 0.658 / Recall: 0.8 / Latency: 0.0s\nhgboost -- Accuracy: 0.815 / Precision: 0.82 / Recall: 0.631 / Latency: 9.1s\nrgcv -- Accuracy: 0.753 / Precision: 0.678 / Recall: 0.615 / Latency: 1.0s\n"
    }
   ],
   "source": [
    "# Classification Demo \n",
    "import pandas as pd\n",
    "from optimalflow.autoCV import dynaClassifier,evaluate_model\n",
    "import joblib\n",
    "\n",
    "tr_features = pd.read_csv('./data/classification/train_features.csv')\n",
    "tr_labels = pd.read_csv('./data/classification/train_labels.csv')\n",
    "val_features = pd.read_csv('./data/classification/val_features.csv')\n",
    "val_labels = pd.read_csv('./data/classification/val_labels.csv')\n",
    "te_features = pd.read_csv('./data/classification/test_features.csv')\n",
    "te_labels = pd.read_csv('./data/classification/test_labels.csv')\n",
    "\n",
    "custom_ml = ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']\n",
    "\n",
    "clf_cv_demo = dynaClassifier(custom_estimators = custom_ml,random_state = 13,cv_num = 5)\n",
    "\n",
    "clf_cv_demo.fit(tr_features,tr_labels)\n",
    "\n",
    "models = {}\n",
    "\n",
    "for mdl in ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']:\n",
    "    models[mdl] = joblib.load('./pkl/{}_clf_model.pkl'.format(mdl))\n",
    "\n",
    "for name, mdl in models.items():\n",
    "    try:\n",
    "        ml_evl = evaluate_model(model_type = \"cls\")\n",
    "        ml_evl.fit(name, mdl, val_features, val_labels)\n",
    "    except:\n",
    "        print(f\"Failed to load the {mdl}.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "Now in Progress - Model Selection w/ Cross-validation: Estimate about 0.0 minutes left  [####################] 100.0%\n\n    *optimalflow* autoCV Module ===> rgcv_CrossValidation with 5 folds:\nBest Parameters: {'fit_intercept': 'False'}\n\nBest CV Score: 0.803456180567801\n\nlgr -- Accuracy: 0.775 / Precision: 0.712 / Recall: 0.646 / Latency: 3.0s\nsvm -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 4.4s\nmlp -- Accuracy: 0.77 / Precision: 0.7 / Recall: 0.646 / Latency: 5.6s\nrf -- Accuracy: 0.809 / Precision: 0.83 / Recall: 0.6 / Latency: 38.2s\nada -- Accuracy: 0.792 / Precision: 0.759 / Recall: 0.631 / Latency: 21.1s\ngb -- Accuracy: 0.815 / Precision: 0.796 / Recall: 0.662 / Latency: 4.0s\nxgb -- Accuracy: 0.82 / Precision: 0.811 / Recall: 0.662 / Latency: 2.0s\nlsvc -- Accuracy: 0.747 / Precision: 0.661 / Recall: 0.631 / Latency: 5.2s\nsgd -- Accuracy: 0.64 / Precision: 0.6 / Recall: 0.046 / Latency: 0.0s\nhgboost -- Accuracy: 0.815 / Precision: 0.82 / Recall: 0.631 / Latency: 5.3s\nrgcv -- Accuracy: 0.753 / Precision: 0.678 / Recall: 0.615 / Latency: 6.3s\n"
    }
   ],
   "source": [
    "# fast Classification Demo \n",
    "import pandas as pd\n",
    "from optimalflow.autoCV import fastClassifier,evaluate_model\n",
    "import joblib\n",
    "\n",
    "tr_features = pd.read_csv('./data/classification/train_features.csv')\n",
    "tr_labels = pd.read_csv('./data/classification/train_labels.csv')\n",
    "val_features = pd.read_csv('./data/classification/val_features.csv')\n",
    "val_labels = pd.read_csv('./data/classification/val_labels.csv')\n",
    "te_features = pd.read_csv('./data/classification/test_features.csv')\n",
    "te_labels = pd.read_csv('./data/classification/test_labels.csv')\n",
    "\n",
    "custom_ml = ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']\n",
    "\n",
    "clf_cv_demo = fastClassifier(custom_estimators = custom_ml,random_state = 13,cv_num = 5,n_comb = 12)\n",
    "\n",
    "clf_cv_demo.fit(tr_features,tr_labels)\n",
    "\n",
    "models = {}\n",
    "\n",
    "for mdl in ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']:\n",
    "    models[mdl] = joblib.load('./pkl/{}_clf_model.pkl'.format(mdl))\n",
    "\n",
    "for name, mdl in models.items():\n",
    "    try:\n",
    "        ml_evl = evaluate_model(model_type = \"cls\")\n",
    "        ml_evl.fit(name, mdl, val_features, val_labels)\n",
    "    except:\n",
    "        print(f\"Failed to load the {mdl}.\")"
   ]
  }
 ]
}