{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn.datasets import load_boston\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.linear_model import SGDRegressor as skSGDRegressor"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Implementation 1\n",
    "- scikit-learn loss = \"squared_loss\", penalty=\"l2\"/\"none\"\n",
    "- similar to sklearn.linear_model.LinearRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def _loss(x, y, coef, intercept):\n",
    "    p = np.dot(x, coef) + intercept \n",
    "    return 0.5 * (p - y) * (p - y)\n",
    "\n",
    "def _grad(x, y, coef, intercept):\n",
    "    p = np.dot(x, coef) + intercept\n",
    "    # clip gradient (consistent with scikit-learn)\n",
    "    dloss = np.clip(p - y, -1e12, 1e12)\n",
    "    coef_grad = dloss * x\n",
    "    intercept_grad = dloss\n",
    "    return coef_grad, intercept_grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SGDRegressor():\n",
    "    def __init__(self, penalty=\"l2\", alpha=0.0001, max_iter=1000, tol=1e-3,\n",
    "                 shuffle=True, random_state=0,\n",
    "                 eta0=0.01, power_t=0.25, n_iter_no_change=5):\n",
    "        self.penalty = penalty\n",
    "        self.alpha = alpha\n",
    "        self.max_iter = max_iter\n",
    "        self.tol = tol\n",
    "        self.shuffle = shuffle\n",
    "        self.random_state = random_state\n",
    "        self.eta0 = eta0\n",
    "        self.power_t = power_t\n",
    "        self.n_iter_no_change = n_iter_no_change\n",
    "\n",
    "    def fit(self, X, y):\n",
    "        coef = np.zeros(X.shape[1])\n",
    "        intercept = 0\n",
    "        best_loss = np.inf\n",
    "        no_improvement_count = 0\n",
    "        t = 1\n",
    "        rng = np.random.RandomState(self.random_state)\n",
    "        for epoch in range(self.max_iter):\n",
    "            # different from how data is shuffled in scikit-learn\n",
    "            if self.shuffle:\n",
    "                ind = rng.permutation(X.shape[0])\n",
    "                X, y = X[ind], y[ind]\n",
    "            sumloss = 0\n",
    "            for i in range(X.shape[0]):\n",
    "                sumloss += _loss(X[i], y[i], coef, intercept)\n",
    "                eta = self.eta0 / np.power(t, self.power_t)\n",
    "                coef_grad, intercept_grad = _grad(X[i], y[i], coef, intercept)\n",
    "                if self.penalty == \"l2\":\n",
    "                    coef *= 1 - eta * self.alpha\n",
    "                coef -= eta * coef_grad\n",
    "                intercept -= eta * intercept_grad\n",
    "                t += 1\n",
    "            if sumloss > best_loss - self.tol * X.shape[0]:\n",
    "                no_improvement_count += 1\n",
    "            else:\n",
    "                no_improvement_count = 0\n",
    "            if no_improvement_count == self.n_iter_no_change:\n",
    "                break\n",
    "            if sumloss < best_loss:\n",
    "                best_loss = sumloss\n",
    "        self.coef_ = coef\n",
    "        self.intercept_ = np.array([intercept])\n",
    "        self.n_iter_ = epoch + 1\n",
    "        return self\n",
    "\n",
    "    def predict(self, X):\n",
    "        y_pred = np.dot(X, self.coef_) + self.intercept_\n",
    "        return y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# shuffle=False penalty=\"none\"\n",
    "X, y = load_boston(return_X_y=True)\n",
    "X = StandardScaler().fit_transform(X)\n",
    "clf1 = SGDRegressor(shuffle=False, penalty=\"none\").fit(X, y)\n",
    "clf2 = skSGDRegressor(shuffle=False, penalty=\"none\").fit(X, y)\n",
    "assert np.allclose(clf1.coef_, clf2.coef_)\n",
    "assert np.allclose(clf1.intercept_, clf2.intercept_)\n",
    "pred1 = clf1.predict(X)\n",
    "pred2 = clf2.predict(X)\n",
    "assert np.allclose(pred1, pred2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# shuffle=False penalty=\"l2\"\n",
    "for alpha in [0.1, 1, 10]:\n",
    "    X, y = load_boston(return_X_y=True)\n",
    "    X = StandardScaler().fit_transform(X)\n",
    "    clf1 = SGDRegressor(shuffle=False, alpha=alpha).fit(X, y)\n",
    "    clf2 = skSGDRegressor(shuffle=False, alpha=alpha).fit(X, y)\n",
    "    assert np.allclose(clf1.coef_, clf2.coef_)\n",
    "    assert np.allclose(clf1.intercept_, clf2.intercept_)\n",
    "    pred1 = clf1.predict(X)\n",
    "    pred2 = clf2.predict(X)\n",
    "    assert np.allclose(pred1, pred2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Implementation 2\n",
    "- scikit-learn loss = \"huber\", penalty=\"l2\"/\"none\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def _loss(x, y, coef, intercept, epsilon):\n",
    "    p = np.dot(x, coef) + intercept\n",
    "    r = p - y\n",
    "    if np.abs(r) <= epsilon:\n",
    "        return 0.5 * r * r\n",
    "    else:\n",
    "        return epsilon * (np.abs(r) - 0.5 * epsilon)\n",
    "\n",
    "def _grad(x, y, coef, intercept, epsilon):\n",
    "    p = np.dot(x, coef) + intercept\n",
    "    r = p - y\n",
    "    if np.abs(r) <= epsilon:\n",
    "        dloss = r\n",
    "    elif r > epsilon:\n",
    "        dloss = epsilon\n",
    "    else:\n",
    "        dloss = -epsilon\n",
    "    dloss = np.clip(dloss, -1e12, 1e12)\n",
    "    coef_grad = dloss * x\n",
    "    intercept_grad = dloss\n",
    "    return coef_grad, intercept_grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SGDRegressor():\n",
    "    def __init__(self, penalty=\"l2\", alpha=0.0001, max_iter=1000, tol=1e-3,\n",
    "                 shuffle=True, epsilon=0.1, random_state=0,\n",
    "                 eta0=0.01, power_t=0.25, n_iter_no_change=5):\n",
    "        self.penalty = penalty\n",
    "        self.alpha = alpha\n",
    "        self.max_iter = max_iter\n",
    "        self.tol = tol\n",
    "        self.shuffle = shuffle\n",
    "        self.epsilon = epsilon\n",
    "        self.random_state = random_state\n",
    "        self.eta0 = eta0\n",
    "        self.power_t = power_t\n",
    "        self.n_iter_no_change = n_iter_no_change\n",
    "\n",
    "    def fit(self, X, y):\n",
    "        coef = np.zeros(X.shape[1])\n",
    "        intercept = 0\n",
    "        best_loss = np.inf\n",
    "        no_improvement_count = 0\n",
    "        t = 1\n",
    "        rng = np.random.RandomState(self.random_state)\n",
    "        for epoch in range(self.max_iter):\n",
    "            # different from how data is shuffled in scikit-learn\n",
    "            if self.shuffle:\n",
    "                ind = rng.permutation(X.shape[0])\n",
    "                X, y = X[ind], y[ind]\n",
    "            sumloss = 0\n",
    "            for i in range(X.shape[0]):\n",
    "                sumloss += _loss(X[i], y[i], coef, intercept, self.epsilon)\n",
    "                eta = self.eta0 / np.power(t, self.power_t)\n",
    "                coef_grad, intercept_grad = _grad(X[i], y[i], coef, intercept, self.epsilon)\n",
    "                if self.penalty == \"l2\":\n",
    "                    coef *= 1 - eta * self.alpha\n",
    "                coef -= eta * coef_grad\n",
    "                intercept -= eta * intercept_grad\n",
    "                t += 1\n",
    "            if sumloss > best_loss - self.tol * X.shape[0]:\n",
    "                no_improvement_count += 1\n",
    "            else:\n",
    "                no_improvement_count = 0\n",
    "            if no_improvement_count == self.n_iter_no_change:\n",
    "                break\n",
    "            if sumloss < best_loss:\n",
    "                best_loss = sumloss\n",
    "        self.coef_ = coef\n",
    "        self.intercept_ = np.array([intercept])\n",
    "        self.n_iter_ = epoch + 1\n",
    "        return self\n",
    "\n",
    "    def predict(self, X):\n",
    "        y_pred = np.dot(X, self.coef_) + self.intercept_\n",
    "        return y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# shuffle=False penalty=\"none\"\n",
    "X, y = load_boston(return_X_y=True)\n",
    "X = StandardScaler().fit_transform(X)\n",
    "clf1 = SGDRegressor(shuffle=False, penalty=\"none\").fit(X, y)\n",
    "clf2 = skSGDRegressor(loss=\"huber\", shuffle=False, penalty=\"none\").fit(X, y)\n",
    "assert np.allclose(clf1.coef_, clf2.coef_)\n",
    "assert np.allclose(clf1.intercept_, clf2.intercept_)\n",
    "pred1 = clf1.predict(X)\n",
    "pred2 = clf2.predict(X)\n",
    "assert np.allclose(pred1, pred2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# shuffle=False penalty=\"l2\"\n",
    "for alpha in [0.1, 1, 10]:\n",
    "    X, y = load_boston(return_X_y=True)\n",
    "    X = StandardScaler().fit_transform(X)\n",
    "    clf1 = SGDRegressor(shuffle=False, alpha=alpha).fit(X, y)\n",
    "    clf2 = skSGDRegressor(loss=\"huber\", shuffle=False, alpha=alpha).fit(X, y)\n",
    "    assert np.allclose(clf1.coef_, clf2.coef_)\n",
    "    assert np.allclose(clf1.intercept_, clf2.intercept_)\n",
    "    pred1 = clf1.predict(X)\n",
    "    pred2 = clf2.predict(X)\n",
    "    assert np.allclose(pred1, pred2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dev",
   "language": "python",
   "name": "dev"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}