{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from scipy.linalg import inv, svd\n", "from scipy.optimize import minimize\n", "from scipy.sparse.linalg import lsqr\n", "from sklearn.datasets import load_boston\n", "from sklearn.linear_model import Ridge as skRidge" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Implementation 1\n", "- based on scipy.sparse.linalg.lsqr\n", "- center the dataset and calculate the intercept manually\n", "- similar to scikit-learn solver='lsqr'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "class Ridge:\n", " def __init__(self, alpha=1.0):\n", " self.alpha = alpha\n", "\n", " def fit(self, X, y):\n", " X_mean = np.mean(X, axis=0)\n", " y_mean = np.mean(y)\n", " X_train = X - X_mean\n", " y_train = y - y_mean\n", " info = lsqr(X_train, y_train, np.sqrt(self.alpha))\n", " self.coef_ = info[0]\n", " self.intercept_ = y_mean - np.dot(X_mean, self.coef_)\n", " return self\n", "\n", " def predict(self, X):\n", " y_pred = np.dot(X, self.coef_) + self.intercept_\n", " return y_pred" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "X, y = load_boston(return_X_y=True)\n", "for alpha in [0.1, 1, 10]:\n", " reg1 = Ridge(alpha=alpha).fit(X, y)\n", " reg2 = skRidge(alpha=alpha).fit(X, y)\n", " assert np.allclose(reg1.coef_, reg2.coef_)\n", " assert np.isclose(reg1.intercept_, reg2.intercept_)\n", " pred1 = reg1.predict(X)\n", " pred2 = reg2.predict(X)\n", " assert np.allclose(pred1, pred2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Implementation 2\n", "- based on formula\n", "- center the dataset and calculate the intercept manually\n", "- similar to scikit-learn solver='cholesky' when n_features <= n_samples" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "class Ridge:\n", " def __init__(self, alpha=1.0):\n", " self.alpha = alpha\n", "\n", " def fit(self, X, y):\n", " X_mean = np.mean(X, axis=0)\n", " y_mean = np.mean(y)\n", " X_train = X - X_mean\n", " y_train = y - y_mean\n", " A = np.dot(X_train.T, X_train) + np.diag(np.full(X_train.shape[1], alpha))\n", " Xy = np.dot(X_train.T, y_train)\n", " self.coef_ = np.dot(inv(A), Xy).ravel()\n", " self.intercept_ = y_mean - np.dot(X_mean, self.coef_)\n", " return self\n", "\n", " def predict(self, X):\n", " y_pred = np.dot(X, self.coef_) + self.intercept_\n", " return y_pred" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "X, y = load_boston(return_X_y=True)\n", "for alpha in [0.1, 1, 10]:\n", " reg1 = Ridge(alpha=alpha).fit(X, y)\n", " reg2 = skRidge(alpha=alpha).fit(X, y)\n", " assert np.allclose(reg1.coef_, reg2.coef_)\n", " assert np.isclose(reg1.intercept_, reg2.intercept_)\n", " pred1 = reg1.predict(X)\n", " pred2 = reg2.predict(X)\n", " assert np.allclose(pred1, pred2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Implementation 3\n", "- based on formula\n", "- center the dataset and calculate the intercept manually\n", "- similar to scikit-learn solver='cholesky' when n_features > n_samples\n", "- ref: https://www.ics.uci.edu/~welling/classnotes/papers_class/Kernel-Ridge.pdf\n", "- ref: https://stat.fandom.com/wiki/Kernel_Ridge_Regression" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "class Ridge:\n", " def __init__(self, alpha=1.0):\n", " self.alpha = alpha\n", "\n", " def fit(self, X, y):\n", " X_mean = np.mean(X, axis=0)\n", " y_mean = np.mean(y)\n", " X_train = X - X_mean\n", " y_train = y - y_mean\n", " A = np.dot(X_train, X_train.T) + np.diag(np.full(X_train.shape[0], alpha))\n", " self.coef_ = np.dot(np.dot(X_train.T, inv(A)), y_train).ravel()\n", " self.intercept_ = y_mean - np.dot(X_mean, self.coef_)\n", " return self\n", "\n", " def predict(self, X):\n", " y_pred = np.dot(X, self.coef_) + self.intercept_\n", " return y_pred" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "X, y = load_boston(return_X_y=True)\n", "for alpha in [0.1, 1, 10]:\n", " reg1 = Ridge(alpha=alpha).fit(X, y)\n", " reg2 = skRidge(alpha=alpha).fit(X, y)\n", " assert np.allclose(reg1.coef_, reg2.coef_)\n", " assert np.isclose(reg1.intercept_, reg2.intercept_)\n", " pred1 = reg1.predict(X)\n", " pred2 = reg2.predict(X)\n", " assert np.allclose(pred1, pred2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Implementation 4\n", "- based on svd\n", "- center the dataset and calculate the intercept manually\n", "- similar to scikit-learn solver='svd'" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "class Ridge:\n", " def __init__(self, alpha=1.0):\n", " self.alpha = alpha\n", "\n", " def fit(self, X, y):\n", " X_mean = np.mean(X, axis=0)\n", " y_mean = np.mean(y)\n", " X_train = X - X_mean\n", " y_train = y - y_mean\n", " U, S, VT = svd(X_train, full_matrices=False)\n", " self.coef_ = np.dot(np.dot(np.dot(VT.T, np.diag(S / (np.square(S) + self.alpha))), U.T), y).ravel()\n", " self.intercept_ = y_mean - np.dot(X_mean, self.coef_)\n", " return self\n", "\n", " def predict(self, X):\n", " y_pred = np.dot(X, self.coef_) + self.intercept_\n", " return y_pred" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "X, y = load_boston(return_X_y=True)\n", "for alpha in [0.1, 1, 10]:\n", " reg1 = Ridge(alpha=alpha).fit(X, y)\n", " reg2 = skRidge(alpha=alpha).fit(X, y)\n", " assert np.allclose(reg1.coef_, reg2.coef_)\n", " assert np.isclose(reg1.intercept_, reg2.intercept_)\n", " pred1 = reg1.predict(X)\n", " pred2 = reg2.predict(X)\n", " assert np.allclose(pred1, pred2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Implementation 5\n", "- based on gradient decent\n", "- center the dataset and calculate the intercept manually" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def cost_gradient(theta, X, y, alpha):\n", " h = np.dot(X, theta) - y\n", " J = np.dot(h, h) / (2 * X.shape[0]) + alpha * np.dot(theta, theta) / (2 * X.shape[0])\n", " grad = np.dot(X.T, h) / X.shape[0] + alpha * theta / X.shape[0]\n", " return J, grad\n", "\n", "\n", "class Ridge:\n", " def __init__(self, alpha=1.0):\n", " self.alpha = alpha\n", "\n", " def fit(self, X, y):\n", " X_mean = np.mean(X, axis=0)\n", " y_mean = np.mean(y)\n", " X_train = X - X_mean\n", " y_train = y - y_mean\n", " theta = np.zeros(X_train.shape[1])\n", " res = minimize(fun=cost_gradient, jac=True, x0=theta,\n", " args=(X_train, y_train, alpha), method='L-BFGS-B')\n", " self.coef_ = res.x\n", " self.intercept_ = y_mean - np.dot(X_mean, self.coef_)\n", " return self\n", "\n", " def predict(self, X):\n", " y_pred = np.dot(X, self.coef_) + self.intercept_\n", " return y_pred" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "X, y = load_boston(return_X_y=True)\n", "# center and scale the dataset to improve performance\n", "X = (X - X.mean(axis=0)) / X.std(axis=0)\n", "for alpha in [0.1, 1, 10]:\n", " reg1 = Ridge(alpha=alpha).fit(X, y)\n", " reg2 = skRidge(alpha=alpha).fit(X, y)\n", " assert np.allclose(reg1.coef_, reg2.coef_, atol=1e-3)\n", " assert np.isclose(reg1.intercept_, reg2.intercept_)\n", " pred1 = reg1.predict(X)\n", " pred2 = reg2.predict(X)\n", " assert np.allclose(pred1, pred2, atol=1e-3)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }