{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Linear regression model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Generating a training set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(71)\n",
    "X = 2 * np.random.rand(100, 1)\n",
    "y = 4 + 3 * X + np.random.randn(100, 1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Exploring the training set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(6,4))\n",
    "plt.plot(X, y, 'b.')\n",
    "plt.xlabel('$x_1$')\n",
    "plt.ylabel('$y$', rotation = 0, fontsize=14)\n",
    "plt.axis([0, 2, 0, 15])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Computing model's parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_b = np.c_[np.ones((100, 1)), X]\n",
    "theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "theta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_new = np.array([[0], [2]])\n",
    "# add x0 = 1 to each instance\n",
    "X_new_b = np.c_[np.ones((2, 1)), X_new]\n",
    "y_predict = X_new_b @ theta\n",
    "y_predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(X_new, y_predict, 'r-', label = \"Predictions\")\n",
    "plt.plot(X, y, 'b.')\n",
    "plt.axis([0, 2, 0, 15])\n",
    "plt.xlabel(\"$x_1$\")\n",
    "plt.ylabel(\"$y$\", rotation = 0)\n",
    "plt.legend(loc=\"upper left\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sklearn\n",
    "assert sklearn.__version__ >= '0.21.3'\n",
    "\n",
    "from sklearn.linear_model import LinearRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lm = LinearRegression()\n",
    "lm.fit(X, y)\n",
    "lm.intercept_, lm.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lm.predict(X_new)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Linear regression using batch gradient descent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "alpha = 0.1\n",
    "\n",
    "# limit the number of iterations to avoid to take long time to search for the optimal value, i.e,, only good enough\n",
    "n_iterations = 1000\n",
    "\n",
    "m, n = X.shape\n",
    "# randommly initiliazes theta\n",
    "theta = np.random.randn(2, 1)\n",
    "\n",
    "for iteration in range(n_iterations):\n",
    "    gradients = 2 / m * X_b.T.dot(X_b.dot(theta) - y)\n",
    "    theta = theta - alpha * gradients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "theta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_new_b @ theta"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.1. Visualizing the gradient descent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "theta_path_bgd = []\n",
    "\n",
    "def plot_gradient_descent(theta, alpha, theta_path=None):\n",
    "    m = len(X_b)\n",
    "    plt.plot(X, y, \"b.\")\n",
    "    n_iterations = 1000\n",
    "    for iteration in range(n_iterations):\n",
    "        if iteration < 10:\n",
    "            y_predict = X_new_b.dot(theta)\n",
    "            style = \"b-\" if iteration > 0 else \"r--\"\n",
    "            plt.plot(X_new, y_predict, style)\n",
    "        gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)\n",
    "        theta = theta - alpha * gradients\n",
    "        if theta_path is not None:\n",
    "            theta_path.append(theta)\n",
    "    plt.xlabel(\"$x_1$\", fontsize=18)\n",
    "    plt.axis([0, 2, 0, 15])\n",
    "    plt.title(r\"$\\eta = {}$\".format(alpha), fontsize=16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(42)\n",
    "theta = np.random.randn(2,1)\n",
    "\n",
    "plt.figure(figsize=(10,4))\n",
    "plt.subplot(131); plot_gradient_descent(theta, alpha=0.02)\n",
    "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n",
    "plt.subplot(132); plot_gradient_descent(theta, alpha=0.1, theta_path=theta_path_bgd)\n",
    "plt.subplot(133); plot_gradient_descent(theta, alpha=0.5)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Stochastic Gradient Descent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "theta_path_sgd = []\n",
    "m = len(X_b)\n",
    "np.random.seed(42)\n",
    "\n",
    "n_epochs = 50\n",
    "\n",
    "# learning schedule hyperparameters\n",
    "t0, t1 = 5, 50  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def learning_schedule(t):\n",
    "    return t0 / (t + t1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "theta = np.random.randn(2,1)  # random initialization\n",
    "\n",
    "for epoch in range(n_epochs):\n",
    "    for i in range(m):\n",
    "        if epoch == 0 and i < 20:                    \n",
    "            y_predict = X_new_b.dot(theta)           \n",
    "            style = \"b-\" if i > 0 else \"r--\"         \n",
    "            plt.plot(X_new, y_predict, style)        \n",
    "        random_index = np.random.randint(m)\n",
    "        xi = X_b[random_index:random_index+1]\n",
    "        yi = y[random_index:random_index+1]\n",
    "        gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n",
    "        eta = learning_schedule(epoch * m + i)\n",
    "        theta = theta - eta * gradients\n",
    "        theta_path_sgd.append(theta)  \n",
    "\n",
    "plt.plot(X, y, \"b.\")                                 \n",
    "plt.xlabel(\"$x_1$\", fontsize=18)                     \n",
    "plt.ylabel(\"$y$\", rotation=0, fontsize=18)           \n",
    "plt.axis([0, 2, 0, 15])                              "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "theta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import SGDRegressor\n",
    "\n",
    "sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3, penalty=None, eta0=0.1, random_state=42)\n",
    "sgd_reg.fit(X, y.ravel())\n",
    "\n",
    "sgd_reg.intercept_, sgd_reg.coef_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Mini-batch gradient descent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "theta_path_mgd = []\n",
    "\n",
    "n_iterations = 50\n",
    "minibatch_size = 20\n",
    "\n",
    "np.random.seed(42)\n",
    "theta = np.random.randn(2,1)  # random initialization\n",
    "\n",
    "t0, t1 = 200, 1000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def learning_schedule(t):\n",
    "    return t0 / (t + t1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "t = 0\n",
    "for epoch in range(n_iterations):\n",
    "    shuffled_indices = np.random.permutation(m)\n",
    "    X_b_shuffled = X_b[shuffled_indices]\n",
    "    y_shuffled = y[shuffled_indices]\n",
    "    for i in range(0, m, minibatch_size):\n",
    "        t += 1\n",
    "        xi = X_b_shuffled[i:i+minibatch_size]\n",
    "        yi = y_shuffled[i:i+minibatch_size]\n",
    "        gradients = 2/minibatch_size * xi.T.dot(xi.dot(theta) - yi)\n",
    "        eta = learning_schedule(t)\n",
    "        theta = theta - eta * gradients\n",
    "        theta_path_mgd.append(theta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "theta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "theta_path_bgd = np.array(theta_path_bgd)\n",
    "theta_path_sgd = np.array(theta_path_sgd)\n",
    "theta_path_mgd = np.array(theta_path_mgd)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,5))\n",
    "plt.plot(theta_path_sgd[:, 0], theta_path_sgd[:, 1], \"r-s\", linewidth=1, label=\"Stochastic\")\n",
    "plt.plot(theta_path_mgd[:, 0], theta_path_mgd[:, 1], \"g-+\", linewidth=2, label=\"Mini-batch\")\n",
    "plt.plot(theta_path_bgd[:, 0], theta_path_bgd[:, 1], \"b-o\", linewidth=3, label=\"Batch\")\n",
    "plt.legend(loc=\"upper right\", fontsize=16)\n",
    "plt.xlabel(r\"$\\theta_0$\", fontsize=20)\n",
    "plt.ylabel(r\"$\\theta_1$   \", fontsize=20, rotation=0)\n",
    "plt.axis([2.5, 4.5, 2.3, 3.9])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}