{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy.linalg import inv, svd\n",
    "from scipy.optimize import minimize\n",
    "from scipy.sparse.linalg import lsqr\n",
    "from sklearn.datasets import load_boston\n",
    "from sklearn.linear_model import Ridge as skRidge"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Implementation 1\n",
    "- based on scipy.sparse.linalg.lsqr\n",
    "- center the dataset and calculate the intercept manually\n",
    "- similar to scikit-learn solver='lsqr'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Ridge:\n",
    "    def __init__(self, alpha=1.0):\n",
    "        self.alpha = alpha\n",
    "\n",
    "    def fit(self, X, y):\n",
    "        X_mean = np.mean(X, axis=0)\n",
    "        y_mean = np.mean(y)\n",
    "        X_train = X - X_mean\n",
    "        y_train = y - y_mean\n",
    "        info = lsqr(X_train, y_train, np.sqrt(self.alpha))\n",
    "        self.coef_ = info[0]\n",
    "        self.intercept_ = y_mean - np.dot(X_mean, self.coef_)\n",
    "        return self\n",
    "\n",
    "    def predict(self, X):\n",
    "        y_pred = np.dot(X, self.coef_) + self.intercept_\n",
    "        return y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "X, y = load_boston(return_X_y=True)\n",
    "for alpha in [0.1, 1, 10]:\n",
    "    reg1 = Ridge(alpha=alpha).fit(X, y)\n",
    "    reg2 = skRidge(alpha=alpha).fit(X, y)\n",
    "    assert np.allclose(reg1.coef_, reg2.coef_)\n",
    "    assert np.isclose(reg1.intercept_, reg2.intercept_)\n",
    "    pred1 = reg1.predict(X)\n",
    "    pred2 = reg2.predict(X)\n",
    "    assert np.allclose(pred1, pred2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Implementation 2\n",
    "- based on formula\n",
    "- center the dataset and calculate the intercept manually\n",
    "- similar to scikit-learn solver='cholesky' when n_features <= n_samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Ridge:\n",
    "    def __init__(self, alpha=1.0):\n",
    "        self.alpha = alpha\n",
    "\n",
    "    def fit(self, X, y):\n",
    "        X_mean = np.mean(X, axis=0)\n",
    "        y_mean = np.mean(y)\n",
    "        X_train = X - X_mean\n",
    "        y_train = y - y_mean\n",
    "        A = np.dot(X_train.T, X_train) + np.diag(np.full(X_train.shape[1], alpha))\n",
    "        Xy = np.dot(X_train.T, y_train)\n",
    "        self.coef_ = np.dot(inv(A), Xy).ravel()\n",
    "        self.intercept_ = y_mean - np.dot(X_mean, self.coef_)\n",
    "        return self\n",
    "\n",
    "    def predict(self, X):\n",
    "        y_pred = np.dot(X, self.coef_) + self.intercept_\n",
    "        return y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "X, y = load_boston(return_X_y=True)\n",
    "for alpha in [0.1, 1, 10]:\n",
    "    reg1 = Ridge(alpha=alpha).fit(X, y)\n",
    "    reg2 = skRidge(alpha=alpha).fit(X, y)\n",
    "    assert np.allclose(reg1.coef_, reg2.coef_)\n",
    "    assert np.isclose(reg1.intercept_, reg2.intercept_)\n",
    "    pred1 = reg1.predict(X)\n",
    "    pred2 = reg2.predict(X)\n",
    "    assert np.allclose(pred1, pred2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Implementation 3\n",
    "- based on formula\n",
    "- center the dataset and calculate the intercept manually\n",
    "- similar to scikit-learn solver='cholesky' when n_features > n_samples\n",
    "- ref: https://www.ics.uci.edu/~welling/classnotes/papers_class/Kernel-Ridge.pdf\n",
    "- ref: https://stat.fandom.com/wiki/Kernel_Ridge_Regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Ridge:\n",
    "    def __init__(self, alpha=1.0):\n",
    "        self.alpha = alpha\n",
    "\n",
    "    def fit(self, X, y):\n",
    "        X_mean = np.mean(X, axis=0)\n",
    "        y_mean = np.mean(y)\n",
    "        X_train = X - X_mean\n",
    "        y_train = y - y_mean\n",
    "        A = np.dot(X_train, X_train.T) + np.diag(np.full(X_train.shape[0], alpha))\n",
    "        self.coef_ = np.dot(np.dot(X_train.T, inv(A)), y_train).ravel()\n",
    "        self.intercept_ = y_mean - np.dot(X_mean, self.coef_)\n",
    "        return self\n",
    "\n",
    "    def predict(self, X):\n",
    "        y_pred = np.dot(X, self.coef_) + self.intercept_\n",
    "        return y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "X, y = load_boston(return_X_y=True)\n",
    "for alpha in [0.1, 1, 10]:\n",
    "    reg1 = Ridge(alpha=alpha).fit(X, y)\n",
    "    reg2 = skRidge(alpha=alpha).fit(X, y)\n",
    "    assert np.allclose(reg1.coef_, reg2.coef_)\n",
    "    assert np.isclose(reg1.intercept_, reg2.intercept_)\n",
    "    pred1 = reg1.predict(X)\n",
    "    pred2 = reg2.predict(X)\n",
    "    assert np.allclose(pred1, pred2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Implementation 4\n",
    "- based on svd\n",
    "- center the dataset and calculate the intercept manually\n",
    "- similar to scikit-learn solver='svd'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Ridge:\n",
    "    def __init__(self, alpha=1.0):\n",
    "        self.alpha = alpha\n",
    "\n",
    "    def fit(self, X, y):\n",
    "        X_mean = np.mean(X, axis=0)\n",
    "        y_mean = np.mean(y)\n",
    "        X_train = X - X_mean\n",
    "        y_train = y - y_mean\n",
    "        U, S, VT = svd(X_train, full_matrices=False)\n",
    "        self.coef_ = np.dot(np.dot(np.dot(VT.T, np.diag(S / (np.square(S) + self.alpha))), U.T), y).ravel()\n",
    "        self.intercept_ = y_mean - np.dot(X_mean, self.coef_)\n",
    "        return self\n",
    "\n",
    "    def predict(self, X):\n",
    "        y_pred = np.dot(X, self.coef_) + self.intercept_\n",
    "        return y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "X, y = load_boston(return_X_y=True)\n",
    "for alpha in [0.1, 1, 10]:\n",
    "    reg1 = Ridge(alpha=alpha).fit(X, y)\n",
    "    reg2 = skRidge(alpha=alpha).fit(X, y)\n",
    "    assert np.allclose(reg1.coef_, reg2.coef_)\n",
    "    assert np.isclose(reg1.intercept_, reg2.intercept_)\n",
    "    pred1 = reg1.predict(X)\n",
    "    pred2 = reg2.predict(X)\n",
    "    assert np.allclose(pred1, pred2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Implementation 5\n",
    "- based on gradient decent\n",
    "- center the dataset and calculate the intercept manually"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cost_gradient(theta, X, y, alpha):\n",
    "    h = np.dot(X, theta) - y\n",
    "    J = np.dot(h, h) / (2 * X.shape[0]) + alpha * np.dot(theta, theta) / (2 * X.shape[0])\n",
    "    grad = np.dot(X.T, h) / X.shape[0] + alpha * theta / X.shape[0]\n",
    "    return J, grad\n",
    "\n",
    "\n",
    "class Ridge:\n",
    "    def __init__(self, alpha=1.0):\n",
    "        self.alpha = alpha\n",
    "\n",
    "    def fit(self, X, y):\n",
    "        X_mean = np.mean(X, axis=0)\n",
    "        y_mean = np.mean(y)\n",
    "        X_train = X - X_mean\n",
    "        y_train = y - y_mean\n",
    "        theta = np.zeros(X_train.shape[1])\n",
    "        res = minimize(fun=cost_gradient, jac=True, x0=theta,\n",
    "                       args=(X_train, y_train, alpha), method='L-BFGS-B')\n",
    "        self.coef_ = res.x\n",
    "        self.intercept_ = y_mean - np.dot(X_mean, self.coef_)\n",
    "        return self\n",
    "\n",
    "    def predict(self, X):\n",
    "        y_pred = np.dot(X, self.coef_) + self.intercept_\n",
    "        return y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "X, y = load_boston(return_X_y=True)\n",
    "# center and scale the dataset to improve performance\n",
    "X = (X - X.mean(axis=0)) / X.std(axis=0)\n",
    "for alpha in [0.1, 1, 10]:\n",
    "    reg1 = Ridge(alpha=alpha).fit(X, y)\n",
    "    reg2 = skRidge(alpha=alpha).fit(X, y)\n",
    "    assert np.allclose(reg1.coef_, reg2.coef_, atol=1e-3)\n",
    "    assert np.isclose(reg1.intercept_, reg2.intercept_)\n",
    "    pred1 = reg1.predict(X)\n",
    "    pred2 = reg2.predict(X)\n",
    "    assert np.allclose(pred1, pred2, atol=1e-3)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}