{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from itertools import product\n", "from sklearn.base import clone\n", "from sklearn.datasets import load_boston, load_iris\n", "from sklearn.svm import SVC, SVR\n", "from sklearn.model_selection import KFold, StratifiedKFold\n", "from sklearn.model_selection import GridSearchCV as skGridSearchCV" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "class GridSearchCV():\n", " def __init__(self, estimator, param_grid):\n", " self.estimator = estimator\n", " self.param_grid = param_grid\n", "\n", " def generate_grid(self):\n", " items = sorted(self.param_grid.items())\n", " keys, values = zip(*items)\n", " for v in product(*values):\n", " params = dict(zip(keys, v))\n", " yield params\n", "\n", " def fit(self, X, y):\n", " if self.estimator._estimator_type == \"regressor\":\n", " cv = KFold()\n", " else: # estimator._estimator_type == \"classifier\"\n", " cv = StratifiedKFold()\n", " train_scores, test_scores = [], []\n", " params = []\n", " for i, cur_param in enumerate(self.generate_grid()):\n", " cur_train_score, cur_test_score = [], []\n", " for j, (train, test) in enumerate(cv.split(X, y)):\n", " est = clone(self.estimator)\n", " est.set_params(**cur_param)\n", " est.fit(X[train], y[train])\n", " cur_train_score.append(est.score(X[train], y[train]))\n", " cur_test_score.append(est.score(X[test], y[test]))\n", " params.append(cur_param)\n", " train_scores.append(cur_train_score)\n", " test_scores.append(cur_test_score)\n", " train_scores = np.array(train_scores)\n", " test_scores = np.array(test_scores)\n", " cv_results = {}\n", " for i in range(cv.n_splits):\n", " cv_results[\"split\" + str(i) + \"_train_score\"] = train_scores[:, i]\n", " cv_results[\"split\" + str(i) + \"_test_score\"] = test_scores[:, i]\n", " cv_results[\"mean_train_score\"] = np.mean(train_scores, axis=1)\n", " cv_results[\"std_train_score\"] = np.std(train_scores, axis=1)\n", " cv_results[\"mean_test_score\"] = np.mean(test_scores, axis=1)\n", " cv_results[\"std_test_score\"] = np.std(test_scores, axis=1)\n", " cv_results['params'] = params\n", " self.cv_results_ = cv_results\n", " self.best_params_ = cv_results['params'][np.argmax(cv_results['mean_test_score'])]\n", " self.best_estimator_ = clone(self.estimator)\n", " self.best_estimator_.set_params(**self.best_params_)\n", " self.best_estimator_.fit(X, y)\n", " return self\n", "\n", " def decision_function(self, X):\n", " return self.best_estimator_.decision_function(X)\n", "\n", " def predict(self, X):\n", " return self.best_estimator_.predict(X)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# regressor\n", "X, y = load_boston(return_X_y=True)\n", "param_grid = {\"C\":[0.1, 1, 10], \"gamma\":[0.1, 1, 10]}\n", "clf1 = GridSearchCV(SVR(), param_grid).fit(X, y)\n", "clf2 = skGridSearchCV(SVR(), param_grid, return_train_score=True).fit(X, y)\n", "for i in range(5):\n", " assert np.allclose(clf1.cv_results_[\"split\" + str(i) + \"_train_score\"],\n", " clf2.cv_results_[\"split\" + str(i) + \"_train_score\"])\n", " assert np.allclose(clf1.cv_results_[\"split\" + str(i) + \"_test_score\"],\n", " clf2.cv_results_[\"split\" + str(i) + \"_test_score\"])\n", "assert np.allclose(clf1.cv_results_[\"mean_train_score\"], clf2.cv_results_[\"mean_train_score\"])\n", "assert np.allclose(clf1.cv_results_[\"std_train_score\"], clf2.cv_results_[\"std_train_score\"])\n", "assert np.allclose(clf1.cv_results_[\"mean_test_score\"], clf2.cv_results_[\"mean_test_score\"])\n", "assert np.allclose(clf1.cv_results_[\"std_test_score\"], clf2.cv_results_[\"std_test_score\"])\n", "assert np.allclose(clf1.best_params_[\"C\"], clf2.best_params_[\"C\"])\n", "assert np.allclose(clf1.best_params_[\"gamma\"], clf2.best_params_[\"gamma\"])\n", "pred1 = clf1.predict(X)\n", "pred2 = clf2.predict(X)\n", "assert np.allclose(pred1, pred2)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# classification\n", "X, y = load_iris(return_X_y=True)\n", "param_grid = {\"C\":[0.1, 1, 10], \"gamma\":[0.1, 1, 10]}\n", "clf1 = GridSearchCV(SVC(random_state=0), param_grid).fit(X, y)\n", "clf2 = skGridSearchCV(SVC(random_state=0), param_grid, return_train_score=True).fit(X, y)\n", "for i in range(5):\n", " assert np.allclose(clf1.cv_results_[\"split\" + str(i) + \"_train_score\"],\n", " clf2.cv_results_[\"split\" + str(i) + \"_train_score\"])\n", " assert np.allclose(clf1.cv_results_[\"split\" + str(i) + \"_test_score\"],\n", " clf2.cv_results_[\"split\" + str(i) + \"_test_score\"])\n", "assert np.allclose(clf1.cv_results_[\"mean_train_score\"], clf2.cv_results_[\"mean_train_score\"])\n", "assert np.allclose(clf1.cv_results_[\"std_train_score\"], clf2.cv_results_[\"std_train_score\"])\n", "assert np.allclose(clf1.cv_results_[\"mean_test_score\"], clf2.cv_results_[\"mean_test_score\"])\n", "assert np.allclose(clf1.cv_results_[\"std_test_score\"], clf2.cv_results_[\"std_test_score\"])\n", "assert np.allclose(clf1.best_params_[\"C\"], clf2.best_params_[\"C\"])\n", "assert np.allclose(clf1.best_params_[\"gamma\"], clf2.best_params_[\"gamma\"])\n", "prob1 = clf1.decision_function(X)\n", "prob2 = clf2.decision_function(X)\n", "assert np.allclose(prob1, prob2)\n", "pred1 = clf1.predict(X)\n", "pred2 = clf2.predict(X)\n", "assert np.array_equal(pred1, pred2)" ] } ], "metadata": { "kernelspec": { "display_name": "dev", "language": "python", "name": "dev" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }