{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from itertools import product\n",
    "from sklearn.base import clone\n",
    "from sklearn.datasets import load_boston, load_iris\n",
    "from sklearn.svm import SVC, SVR\n",
    "from sklearn.model_selection import KFold, StratifiedKFold\n",
    "from sklearn.model_selection import GridSearchCV as skGridSearchCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class GridSearchCV():\n",
    "    def __init__(self, estimator, param_grid):\n",
    "        self.estimator = estimator\n",
    "        self.param_grid = param_grid\n",
    "\n",
    "    def generate_grid(self):\n",
    "        items = sorted(self.param_grid.items())\n",
    "        keys, values = zip(*items)\n",
    "        for v in product(*values):\n",
    "            params = dict(zip(keys, v))\n",
    "            yield params\n",
    "\n",
    "    def fit(self, X, y):\n",
    "        if self.estimator._estimator_type == \"regressor\":\n",
    "            cv = KFold()\n",
    "        else:  # estimator._estimator_type == \"classifier\"\n",
    "            cv = StratifiedKFold()\n",
    "        train_scores, test_scores  = [], []\n",
    "        params = []\n",
    "        for i, cur_param in enumerate(self.generate_grid()):\n",
    "            cur_train_score, cur_test_score = [], []\n",
    "            for j, (train, test) in enumerate(cv.split(X, y)):\n",
    "                est = clone(self.estimator)\n",
    "                est.set_params(**cur_param)\n",
    "                est.fit(X[train], y[train])\n",
    "                cur_train_score.append(est.score(X[train], y[train]))\n",
    "                cur_test_score.append(est.score(X[test], y[test]))\n",
    "            params.append(cur_param)\n",
    "            train_scores.append(cur_train_score)\n",
    "            test_scores.append(cur_test_score)\n",
    "        train_scores = np.array(train_scores)\n",
    "        test_scores = np.array(test_scores)\n",
    "        cv_results = {}\n",
    "        for i in range(cv.n_splits):\n",
    "            cv_results[\"split\" + str(i) + \"_train_score\"] = train_scores[:, i]\n",
    "            cv_results[\"split\" + str(i) + \"_test_score\"] = test_scores[:, i]\n",
    "        cv_results[\"mean_train_score\"] = np.mean(train_scores, axis=1)\n",
    "        cv_results[\"std_train_score\"] = np.std(train_scores, axis=1)\n",
    "        cv_results[\"mean_test_score\"] = np.mean(test_scores, axis=1)\n",
    "        cv_results[\"std_test_score\"] = np.std(test_scores, axis=1)\n",
    "        cv_results['params'] = params\n",
    "        self.cv_results_ = cv_results\n",
    "        self.best_params_ = cv_results['params'][np.argmax(cv_results['mean_test_score'])]\n",
    "        self.best_estimator_ = clone(self.estimator)\n",
    "        self.best_estimator_.set_params(**self.best_params_)\n",
    "        self.best_estimator_.fit(X, y)\n",
    "        return self\n",
    "\n",
    "    def decision_function(self, X):\n",
    "        return self.best_estimator_.decision_function(X)\n",
    "\n",
    "    def predict(self, X):\n",
    "        return self.best_estimator_.predict(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# regressor\n",
    "X, y = load_boston(return_X_y=True)\n",
    "param_grid = {\"C\":[0.1, 1, 10], \"gamma\":[0.1, 1, 10]}\n",
    "clf1 = GridSearchCV(SVR(), param_grid).fit(X, y)\n",
    "clf2 = skGridSearchCV(SVR(), param_grid, return_train_score=True).fit(X, y)\n",
    "for i in range(5):\n",
    "    assert np.allclose(clf1.cv_results_[\"split\" + str(i) + \"_train_score\"],\n",
    "                       clf2.cv_results_[\"split\" + str(i) + \"_train_score\"])\n",
    "    assert np.allclose(clf1.cv_results_[\"split\" + str(i) + \"_test_score\"],\n",
    "                       clf2.cv_results_[\"split\" + str(i) + \"_test_score\"])\n",
    "assert np.allclose(clf1.cv_results_[\"mean_train_score\"], clf2.cv_results_[\"mean_train_score\"])\n",
    "assert np.allclose(clf1.cv_results_[\"std_train_score\"], clf2.cv_results_[\"std_train_score\"])\n",
    "assert np.allclose(clf1.cv_results_[\"mean_test_score\"], clf2.cv_results_[\"mean_test_score\"])\n",
    "assert np.allclose(clf1.cv_results_[\"std_test_score\"], clf2.cv_results_[\"std_test_score\"])\n",
    "assert np.allclose(clf1.best_params_[\"C\"], clf2.best_params_[\"C\"])\n",
    "assert np.allclose(clf1.best_params_[\"gamma\"], clf2.best_params_[\"gamma\"])\n",
    "pred1 = clf1.predict(X)\n",
    "pred2 = clf2.predict(X)\n",
    "assert np.allclose(pred1, pred2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# classification\n",
    "X, y = load_iris(return_X_y=True)\n",
    "param_grid = {\"C\":[0.1, 1, 10], \"gamma\":[0.1, 1, 10]}\n",
    "clf1 = GridSearchCV(SVC(random_state=0), param_grid).fit(X, y)\n",
    "clf2 = skGridSearchCV(SVC(random_state=0), param_grid, return_train_score=True).fit(X, y)\n",
    "for i in range(5):\n",
    "    assert np.allclose(clf1.cv_results_[\"split\" + str(i) + \"_train_score\"],\n",
    "                       clf2.cv_results_[\"split\" + str(i) + \"_train_score\"])\n",
    "    assert np.allclose(clf1.cv_results_[\"split\" + str(i) + \"_test_score\"],\n",
    "                       clf2.cv_results_[\"split\" + str(i) + \"_test_score\"])\n",
    "assert np.allclose(clf1.cv_results_[\"mean_train_score\"], clf2.cv_results_[\"mean_train_score\"])\n",
    "assert np.allclose(clf1.cv_results_[\"std_train_score\"], clf2.cv_results_[\"std_train_score\"])\n",
    "assert np.allclose(clf1.cv_results_[\"mean_test_score\"], clf2.cv_results_[\"mean_test_score\"])\n",
    "assert np.allclose(clf1.cv_results_[\"std_test_score\"], clf2.cv_results_[\"std_test_score\"])\n",
    "assert np.allclose(clf1.best_params_[\"C\"], clf2.best_params_[\"C\"])\n",
    "assert np.allclose(clf1.best_params_[\"gamma\"], clf2.best_params_[\"gamma\"])\n",
    "prob1 = clf1.decision_function(X)\n",
    "prob2 = clf2.decision_function(X)\n",
    "assert np.allclose(prob1, prob2)\n",
    "pred1 = clf1.predict(X)\n",
    "pred2 = clf2.predict(X)\n",
    "assert np.array_equal(pred1, pred2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dev",
   "language": "python",
   "name": "dev"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}