{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Linear regression model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Generating a training set" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "np.random.seed(71)\n", "X = 2 * np.random.rand(100, 1)\n", "y = 4 + 3 * X + np.random.randn(100, 1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Exploring the training set" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(6,4))\n", "plt.plot(X, y, 'b.')\n", "plt.xlabel('$x_1$')\n", "plt.ylabel('$y$', rotation = 0, fontsize=14)\n", "plt.axis([0, 2, 0, 15])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Computing model's parameters" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X_b = np.c_[np.ones((100, 1)), X]\n", "theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "theta" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X_new = np.array([[0], [2]])\n", "# add x0 = 1 to each instance\n", "X_new_b = np.c_[np.ones((2, 1)), X_new]\n", "y_predict = X_new_b @ theta\n", "y_predict" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.plot(X_new, y_predict, 'r-', label = \"Predictions\")\n", "plt.plot(X, y, 'b.')\n", "plt.axis([0, 2, 0, 15])\n", "plt.xlabel(\"$x_1$\")\n", "plt.ylabel(\"$y$\", rotation = 0)\n", "plt.legend(loc=\"upper left\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sklearn\n", "assert sklearn.__version__ >= '0.21.3'\n", "\n", "from sklearn.linear_model import LinearRegression" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lm = LinearRegression()\n", "lm.fit(X, y)\n", "lm.intercept_, lm.coef_" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lm.predict(X_new)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Linear regression using batch gradient descent" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "alpha = 0.1\n", "\n", "# limit the number of iterations to avoid to take long time to search for the optimal value, i.e,, only good enough\n", "n_iterations = 1000\n", "\n", "m, n = X.shape\n", "# randommly initiliazes theta\n", "theta = np.random.randn(2, 1)\n", "\n", "for iteration in range(n_iterations):\n", " gradients = 2 / m * X_b.T.dot(X_b.dot(theta) - y)\n", " theta = theta - alpha * gradients" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "theta" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X_new_b @ theta" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4.1. Visualizing the gradient descent" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "theta_path_bgd = []\n", "\n", "def plot_gradient_descent(theta, alpha, theta_path=None):\n", " m = len(X_b)\n", " plt.plot(X, y, \"b.\")\n", " n_iterations = 1000\n", " for iteration in range(n_iterations):\n", " if iteration < 10:\n", " y_predict = X_new_b.dot(theta)\n", " style = \"b-\" if iteration > 0 else \"r--\"\n", " plt.plot(X_new, y_predict, style)\n", " gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)\n", " theta = theta - alpha * gradients\n", " if theta_path is not None:\n", " theta_path.append(theta)\n", " plt.xlabel(\"$x_1$\", fontsize=18)\n", " plt.axis([0, 2, 0, 15])\n", " plt.title(r\"$\\eta = {}$\".format(alpha), fontsize=16)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "np.random.seed(42)\n", "theta = np.random.randn(2,1)\n", "\n", "plt.figure(figsize=(10,4))\n", "plt.subplot(131); plot_gradient_descent(theta, alpha=0.02)\n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18)\n", "plt.subplot(132); plot_gradient_descent(theta, alpha=0.1, theta_path=theta_path_bgd)\n", "plt.subplot(133); plot_gradient_descent(theta, alpha=0.5)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Stochastic Gradient Descent" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "theta_path_sgd = []\n", "m = len(X_b)\n", "np.random.seed(42)\n", "\n", "n_epochs = 50\n", "\n", "# learning schedule hyperparameters\n", "t0, t1 = 5, 50 " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def learning_schedule(t):\n", " return t0 / (t + t1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "theta = np.random.randn(2,1) # random initialization\n", "\n", "for epoch in range(n_epochs):\n", " for i in range(m):\n", " if epoch == 0 and i < 20: \n", " y_predict = X_new_b.dot(theta) \n", " style = \"b-\" if i > 0 else \"r--\" \n", " plt.plot(X_new, y_predict, style) \n", " random_index = np.random.randint(m)\n", " xi = X_b[random_index:random_index+1]\n", " yi = y[random_index:random_index+1]\n", " gradients = 2 * xi.T.dot(xi.dot(theta) - yi)\n", " eta = learning_schedule(epoch * m + i)\n", " theta = theta - eta * gradients\n", " theta_path_sgd.append(theta) \n", "\n", "plt.plot(X, y, \"b.\") \n", "plt.xlabel(\"$x_1$\", fontsize=18) \n", "plt.ylabel(\"$y$\", rotation=0, fontsize=18) \n", "plt.axis([0, 2, 0, 15]) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "theta" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import SGDRegressor\n", "\n", "sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3, penalty=None, eta0=0.1, random_state=42)\n", "sgd_reg.fit(X, y.ravel())\n", "\n", "sgd_reg.intercept_, sgd_reg.coef_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Mini-batch gradient descent" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "theta_path_mgd = []\n", "\n", "n_iterations = 50\n", "minibatch_size = 20\n", "\n", "np.random.seed(42)\n", "theta = np.random.randn(2,1) # random initialization\n", "\n", "t0, t1 = 200, 1000" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def learning_schedule(t):\n", " return t0 / (t + t1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t = 0\n", "for epoch in range(n_iterations):\n", " shuffled_indices = np.random.permutation(m)\n", " X_b_shuffled = X_b[shuffled_indices]\n", " y_shuffled = y[shuffled_indices]\n", " for i in range(0, m, minibatch_size):\n", " t += 1\n", " xi = X_b_shuffled[i:i+minibatch_size]\n", " yi = y_shuffled[i:i+minibatch_size]\n", " gradients = 2/minibatch_size * xi.T.dot(xi.dot(theta) - yi)\n", " eta = learning_schedule(t)\n", " theta = theta - eta * gradients\n", " theta_path_mgd.append(theta)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "theta" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "theta_path_bgd = np.array(theta_path_bgd)\n", "theta_path_sgd = np.array(theta_path_sgd)\n", "theta_path_mgd = np.array(theta_path_mgd)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.figure(figsize=(12,5))\n", "plt.plot(theta_path_sgd[:, 0], theta_path_sgd[:, 1], \"r-s\", linewidth=1, label=\"Stochastic\")\n", "plt.plot(theta_path_mgd[:, 0], theta_path_mgd[:, 1], \"g-+\", linewidth=2, label=\"Mini-batch\")\n", "plt.plot(theta_path_bgd[:, 0], theta_path_bgd[:, 1], \"b-o\", linewidth=3, label=\"Batch\")\n", "plt.legend(loc=\"upper right\", fontsize=16)\n", "plt.xlabel(r\"$\\theta_0$\", fontsize=20)\n", "plt.ylabel(r\"$\\theta_1$ \", fontsize=20, rotation=0)\n", "plt.axis([2.5, 4.5, 2.3, 3.9])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }