{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Class demo 1: data generation, linear regression and gradient descent" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAO0UlEQVR4nO3df4hlZ33H8fdn3QQ6aI24U7FJZscWf7Vg2jjW0NZ2VahJhAbBQuOQ0KAMQSsW+kdKl5o/ZKEiLSKiyzRdFmGIf9SgsfgDodVtibGdSNxsslS2kV2XBHZiipbMH2WTb/+4M2ayzuy9d+fMvXOfeb9guHOe8+w534cZPvPsc849N1WFJGny7Rt3AZKkbhjoktQIA12SGmGgS1IjDHRJasT+cZ34wIEDNTs7O67TS9JEeuSRR56pqunN9o0t0GdnZ1leXh7X6SVpIiU5u9U+l1wkqREGuiQ1wkCXpEb0DfQk1yf51ySnkzye5GOb9EmSzyQ5k+Rkkht3plxJ0lYGuSh6EfjLqvp+klcAjyT5VlU9saHPLcDr177eDnx+7VWSNCJ9Z+hV9XRVfX/t+/8FTgPXXtLtNuAL1fMwcE2S13ZerSRNsqUlmJ2Ffft6r0tLnR5+qNsWk8wCvw1875Jd1wI/3rB9fq3t6e0UJ0nNWFqChQVYXe1tnz3b2waYn+/kFANfFE3ycuBLwF9U1c8u3b3JP/mF5/ImWUiynGR5ZWVluEolaZIdPvximK9bXe21d2SgQE9yFb0wX6qqBzbpch64fsP2dcBTl3aqqsWqmququenpTd/oJEltOnduuPYrMMhdLgH+EThdVX+/RbcHgTvX7na5CfhpVbncIknrZmaGa78Cg8zQfw+4A3hXkkfXvm5NcneSu9f6fA14EjgD/APw4c4qlKQWHDkCU1MvbZua6rV3pO9F0ar6dzZfI9/Yp4CPdFWUJDVn/cLn4cO9ZZaZmV6Yd3RBFMb4cC5J2nPm5zsN8Ev51n9JaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEsajx1+lOxe5BuLJI3eCB4luxc5Q5c0eiN4lOxeZKBLGr0RPEp2LzLQJY3eCB4luxcZ6JJGbwSPkt2LDHRJozc/D4uLcPAgJL3XxUUviG6Td7lIGo8dfpTsXuQMXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmN6BvoSY4luZDk1Bb7X5nkq0l+kOTxJHd1X6YkqZ9BZujHgZsvs/8jwBNVdQNwCPi7JFdvvzRJ0jD6BnpVnQCevVwX4BVJArx8re/FbsqTJA2qizX0zwJvBp4CHgM+VlUvbNYxyUKS5STLKysrHZxakrSui0B/D/Ao8KvAbwGfTfLLm3WsqsWqmququenp6Q5OLUla10Wg3wU8UD1ngB8Bb+rguJKkIXQR6OeAdwMkeQ3wRuDJDo4rSRpC388UTXI/vbtXDiQ5D9wLXAVQVUeBTwDHkzwGBLinqp7ZsYolSZvqG+hVdXuf/U8Bf9RZRZKkK+I7RSWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktSIvoGe5FiSC0lOXabPoSSPJnk8yXe6LVGSNIhBZujHgZu32pnkGuBzwB9X1W8Cf9JJZZKkofQN9Ko6ATx7mS4fAB6oqnNr/S90VJskaQhdrKG/AXhVkm8neSTJnVt1TLKQZDnJ8srKSgenliSt6yLQ9wNvBd4LvAf4myRv2KxjVS1W1VxVzU1PT3dwaknSuv0dHOM88ExVPQc8l+QEcAPwww6OLUkaUBcz9K8A70iyP8kU8HbgdAfHlSQNoe8MPcn9wCHgQJLzwL3AVQBVdbSqTif5BnASeAG4r6q2vMVRkrQz+gZ6Vd0+QJ9PAZ/qpCJJ0hXxnaKS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjo0m6wtASzs7BvX+91aWncFWkCdfFwLknbsbQECwuwutrbPnu2tw0wPz++ujRxnKFL43b48Ithvm51tdcuDcFAl8bt3Lnh2qUtGOjSuM3MDNcubcFAl8btyBGYmnpp29RUr10agoEujdv8PCwuwsGDkPReFxe9IKqheZeLtBvMzxvg2jZn6JLUCANdkhphoEtSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqRN9AT3IsyYUkp/r0e1uS55O8v7vyJEmDGmSGfhy4+XIdkrwM+CTwzQ5qkiRdgb6BXlUngGf7dPso8CXgQhdFSZKGt+019CTXAu8Djg7QdyHJcpLllZWV7Z5akrRBFxdFPw3cU1XP9+tYVYtVNVdVc9PT0x2cWpK0rotPLJoDvpgE4ABwa5KLVfXlDo4tSRrQtgO9ql63/n2S48A/G+aSNHqD3LZ4P/Bd4I1Jzif5YJK7k9y98+VpT1pagtlZ2Lev97q0NO6KpInQd4ZeVbcPerCq+rNtVSMtLcHCAqyu9rbPnu1tgx+iLPXhO0W1uxw+/GKYr1td7bVLuiwDfbfba8sP584N1y7p5wz03Wx9+eHsWah6cfmh5VCfmRmuXdLPGei72V5cfjhyBKamXto2NdVrl3RZBvputheXH+bnYXERDh6EpPe6uOgFUWkAXbyxSDtlZqa3zLJZe8vm5w1w6Qo4Q9/NXH6QNAQDfTdz+UHSEFxy2e1cfpA0IGfoktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIa0TfQkxxLciHJqS32zyc5ufb1UJIbui9TktTPIDP048DNl9n/I+APq+otwCeAxQ7qkiQNqe+HRFfViSSzl9n/0IbNh4HrOqhLkjSkrtfQPwh8faudSRaSLCdZXllZ6fjUkrS3dRboSd5JL9Dv2apPVS1W1VxVzU1PT3d1akkSAyy5DCLJW4D7gFuq6iddHFOSNJxtz9CTzAAPAHdU1Q+3X5Ik6Ur0naEnuR84BBxIch64F7gKoKqOAh8HXg18LgnAxaqa26mCJUmbG+Qul9v77P8Q8KHOKpIkXRHfKSpJjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXCQJekRhjoktQIA12SGmGgS1IjDHRJaoSBLkmNMNAlqREGuiQ1wkCXpEYY6JLUCANdkhphoEtSIwx0SWqEgS5JjTDQJakRBrokNcJAl6RG9A30JMeSXEhyaov9SfKZJGeSnExyY/dlSpL6GWSGfhy4+TL7bwFev/a1AHx++2VJkobVN9Cr6gTw7GW63AZ8oXoeBq5J8tquCpQkDaaLNfRrgR9v2D6/1vYLkiwkWU6yvLKy0sGpJUnrugj0bNJWm3WsqsWqmququenp6Q5OLUla10Wgnweu37B9HfBUB8eVJA2hi0B/ELhz7W6Xm4CfVtXTHRxXkjSE/f06JLkfOAQcSHIeuBe4CqCqjgJfA24FzgCrwF07VawkaWt9A72qbu+zv4CPdFaRJOmK+E5RSWqEgS5JjTDQJakRBrokNcJAl6RGGOiS1AgDXZIaMVmBvrQEs7Owb1/vdWlp3BVJ0q7R941Fu8bSEiwswOpqb/vs2d42wPz8+OqSpF1icmbohw+/GObrVld77ZKkCQr0c+eGa5ekPWZyAn1mZrh2SdpjJifQjxyBqamXtk1N9dolSRMU6PPzsLgIBw9C0ntdXPSCqCStmZy7XKAX3ga4JG1qcmbokqTLMtAlqREGuiQ1wkCXpEYY6JLUiPQ+43kMJ05WgLNX+M8PAM90WM4kcMx7g2PeG7Yz5oNVNb3ZjrEF+nYkWa6quXHXMUqOeW9wzHvDTo3ZJRdJaoSBLkmNmNRAXxx3AWPgmPcGx7w37MiYJ3INXZL0iyZ1hi5JuoSBLkmN2NWBnuTmJP+V5EySv9pkf5J8Zm3/ySQ3jqPOLg0w5vm1sZ5M8lCSG8ZRZ5f6jXlDv7cleT7J+0dZ304YZMxJDiV5NMnjSb4z6hq7NsDv9iuTfDXJD9bGfNc46uxKkmNJLiQ5tcX+7vOrqnblF/Ay4L+BXwOuBn4A/MYlfW4Fvg4EuAn43rjrHsGYfxd41dr3t+yFMW/o9y/A14D3j7vuEfycrwGeAGbWtn9l3HWPYMx/DXxy7ftp4Fng6nHXvo0x/wFwI3Bqi/2d59dunqH/DnCmqp6sqv8Dvgjcdkmf24AvVM/DwDVJXjvqQjvUd8xV9VBV/c/a5sPAdSOusWuD/JwBPgp8CbgwyuJ2yCBj/gDwQFWdA6iqSR/3IGMu4BVJArycXqBfHG2Z3amqE/TGsJXO82s3B/q1wI83bJ9faxu2zyQZdjwfpPcXfpL1HXOSa4H3AUdHWNdOGuTn/AbgVUm+neSRJHeOrLqdMciYPwu8GXgKeAz4WFW9MJryxqLz/NrNn1iUTdouvcdykD6TZODxJHknvUD//R2taOcNMuZPA/dU1fO9ydvEG2TM+4G3Au8Gfgn4bpKHq+qHO13cDhlkzO8BHgXeBfw68K0k/1ZVP9vh2sal8/zazYF+Hrh+w/Z19P5yD9tnkgw0niRvAe4Dbqmqn4yotp0yyJjngC+uhfkB4NYkF6vqyyOpsHuD/m4/U1XPAc8lOQHcAExqoA8y5ruAv63eAvOZJD8C3gT8x2hKHLnO82s3L7n8J/D6JK9LcjXwp8CDl/R5ELhz7WrxTcBPq+rpURfaob5jTjIDPADcMcGztY36jrmqXldVs1U1C/wT8OEJDnMY7Hf7K8A7kuxPMgW8HTg94jq7NMiYz9H7HwlJXgO8EXhypFWOVuf5tWtn6FV1McmfA9+kd4X8WFU9nuTutf1H6d3xcCtwBlil9xd+Yg045o8DrwY+tzZjvVgT/KS6AcfclEHGXFWnk3wDOAm8ANxXVZve/jYJBvw5fwI4nuQxessR91TVxD5WN8n9wCHgQJLzwL3AVbBz+eVb/yWpEbt5yUWSNAQDXZIaYaBLUiMMdElqhIEuSY0w0CWpEQa6JDXi/wEhL89cv1y9sgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt \n", "\n", "\n", "# generating equispaced points \n", "x = np.linspace(0,1, 6)\n", "\n", "betaTrue = np.asarray([1,1])\n", "\n", "Xtilde = (np.vstack((np.ones((1, len(x))), x.reshape(-1,1).T))).T\n", "\n", "t = np.matmul(Xtilde,betaTrue.reshape(-1,1))\n", "\n", "# adding gaussian noise \n", "tnoisy = t + np.random.normal(0,0.15,len(t)).reshape(-1,1)\n", "plt.scatter(Xtilde[:,1], tnoisy, c='r')\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "maxIter = 10\n", "eta = 0.1\n", "\n", "k = 0\n", "\n", "beta = np.random.normal(1,.1, (2,1))\n", "\n", "# gradient descent iterations\n", "for k in np.arange(maxIter):\n", " \n", " grad = -np.matmul(Xtilde.T,tnoisy) + np.matmul(np.matmul(Xtilde.T, Xtilde), beta) \n", " \n", " beta = beta - eta*grad\n", " \n", "\n", " \n", "xtest = np.linspace(0,1,10)\n", "Xtilde_test = (np.vstack((np.ones((1, len(xtest))), xtest.reshape(-1,1).T))).T\n", "t_test = np.matmul(Xtilde_test,betaTrue.reshape(-1,1))\n", "\n", "plt.plot(xtest, t_test) \n", "plt.scatter(Xtilde[:,1], tnoisy, c='r')\n", "\n", "predictions = np.matmul(Xtilde_test,beta.reshape(-1,1))\n", "\n", "plt.plot(xtest, predictions)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# same with points distributed along a parabola\n", "\n", "x = np.linspace(-2,1,20)\n", "\n", "t = x**2 + x + 1\n", "\n", "plt.plot(x, t)\n", "\n", "\n", "xi = np.linspace(-2,1,10)\n", "\n", "epsilon = np.random.normal(0,.1,len(xi))\n", "\n", "ti = xi**2 + xi + 1 + epsilon\n", "\n", "plt.scatter(xi, ti, c= 'r')\n", "\n", "plt.show()\n", "\n" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1. , -2. , 4. ],\n", " [ 1. , -1.66666667, 2.77777778],\n", " [ 1. , -1.33333333, 1.77777778],\n", " [ 1. , -1. , 1. ],\n", " [ 1. , -0.66666667, 0.44444444],\n", " [ 1. , -0.33333333, 0.11111111],\n", " [ 1. , 0. , 0. ],\n", " [ 1. , 0.33333333, 0.11111111],\n", " [ 1. , 0.66666667, 0.44444444],\n", " [ 1. , 1. , 1. ]])" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "xi1 = xi\n", "xi2 = xi**2\n", "\n", "Xtilde = np.vstack((np.ones((1, len(xi))), xi1.reshape(-1,1).T))\n", "\n", "Xtilde = np.vstack((Xtilde, xi2.reshape(-1,1).T)).T\n", "\n", "Xtilde\n" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.94017014],\n", " [1.04532901],\n", " [1.02221728]])" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "maxIter = 10\n", "eta = 0.01\n", "\n", "k = 0\n", "\n", "beta = np.random.normal(1,.1, (3,1))\n", "\n", "\n", "# solving the linear regression problem through gradient descent on \n", "# the features x_1 = xi, x_2 = xi^2\n", "\n", "for k in np.arange(maxIter):\n", " \n", " grad = -np.matmul(Xtilde.T,ti.reshape(-1,1)).reshape(-1,1) + np.matmul(np.matmul(Xtilde.T, Xtilde), beta).reshape(-1,1) \n", " \n", " beta = beta - eta*grad\n", " \n", " " ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# displaying the solution by first generating the features xi and xi^2 for eq\n", "\n", "xtest = np.linspace(-2,1,20)\n", "\n", "xtest1 = xtest\n", "xtest2 = xtest**2\n", "\n", "Xtilde = np.vstack((np.ones((1, len(xtest2))), xtest1.reshape(-1,1).T))\n", "\n", "Xtilde = np.vstack((Xtilde, xtest2.reshape(-1,1).T)).T\n", "\n", "prediction = np.matmul(Xtilde,beta.reshape(-1,1))\n", "\n", "\n", "plt.plot(xtest1.reshape(-1,1), prediction)\n", "plt.scatter(xi, ti, c= 'r')\n", "plt.show()\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }