{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import statsmodels.api as sm\n", "import statsmodels.formula.api as smf\n", "\n", "np.random.seed(123)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def make_data():\n", " N = 20 # sample size\n", " X = np.random.random(N) * 10\n", " β = 3\n", " ϵ = np.random.normal(loc=0, scale=1, size=N)\n", " y = β * X + ϵ\n", " return pd.DataFrame([X, y], index=['x', 'y']).T" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data = make_data()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAR1ElEQVR4nO3df2zc9X3H8dfrYuMEjFqTeFGasKUalIqiYJDXdc1UtaWtaMdCu0xVmaBoq5b+URid0BLWf9r90Eaz/lj/mDqlkJZplA5hqkQIdUXAhKpWCIcFF5pudB0Up4EYN4y4S4yde++PO2u2seOz7773+Z4/z4cUfPe9u+/3zUl55ePPr68jQgCAfFRSFwAAaC+CHwAyQ/ADQGYIfgDIDMEPAJnpSl1AIzZs2BBbt25NXQYAdJRDhw69HBH98493RPBv3bpVw8PDqcsAgI5i+/mFjtPVAwCZIfgBIDMEPwBkhuAHgMwQ/ACQGYIfAEpqfGJST73wisYnJlt63o6YzgkAuTlw+Kj2DI2ou1LRVLWqvTu3acfA5pacmxY/AJTM+MSk9gyN6PRUVScnp3V6qqrdQyMta/kT/ABQMqMnTqm7MjeeuysVjZ441ZLzE/wAUDJb+tZpqlqdc2yqWtWWvnUtOT/BDwAls763R3t3btPa7orO7+nS2u6K9u7cpvW9PS05P4O7AFBCOwY2a/tFGzR64pS29K1rWehLBD8AlNb63p6WBv4MunoAIDMEPwBkhuAHgMwQ/ACQGYIfADJD8ANAZgh+AMgMwQ8AmSH4ASAzBD8AZIbgB4DMEPwAkBmCHwAyQ/ADQGYKC37bF9p+1PaPbD9j+5b68c/ZPmr7cP3Ph4qqAQDwekXuxz8t6daIeNL2+ZIO2X6o/tqXI+ILBV4bALCIwoI/Io5JOlZ/fNL2EUmbi7oeAKAxbenjt71V0hWSHq8fusn2iO39tvsW+cwu28O2h8fGxtpRJgBkofDgt90raUjSpyPiVUlflfTrkgZU+43giwt9LiL2RcRgRAz29/cXXSYAZKPQ4LfdrVro3x0R90tSRLwUEWcioirpa5LeXmQNAIC5ipzVY0l3SjoSEV+adXzTrLd9RNLTRdUAAHi9Imf1bJd0g6Qf2j5cP/YZSdfZHpAUkp6T9MkCawAAzFPkrJ7vSfICLz1Y1DUBAEtj5S4AZIbgB4DMEPwAkBmCHwAyQ/ADQGYIfgDIDMEPAJkh+AEgMwQ/AGSG4AeAzBD8AJAZgh8AMkPwA1jS+MSknnrhFY1PTKYuBS1Q5LbMAFaBA4ePas/QiLorFU1Vq9q7c5t2DHD77E5Gix/AosYnJrVnaESnp6o6OTmt01NV7R4aoeXf4Qh+AIsaPXFK3ZW5MdFdqWj0xKklP0v3UHnR1QNgUVv61mmqWp1zbKpa1Za+dWf9HN1D5UaLH8Ci1vf2aO/ObVrbXdH5PV1a213R3p3btL63Z9HP0D1UfrT4AZzVjoHN2n7RBo2eOKUtfeuWDP1Hf3xcXZW5d12d6R4622fRPgQ/gCWt7+1ZMrRnunfW2Prla2fmvNZI9xDah+AH0JTxiUk98/NXtfu+EU1Ozx0POK9njc5UY8nuIbQXwQ9gxWZa+RX59aF/zhr9xe++Te95668Q+iVD8ANYkdmDuAs5E0HolxTBD2BFZub4n9bc4D/3nDWqBt07ZVZY8Nu+UNI/SdooKSTti4iv2L5A0r9I2irpOUkfjYgTRdUBoBgLzfHv6bL+8for9bY3vYHQL7Ei5/FPS7o1Ii6V9A5Jn7J9qaTbJD0cERdLerj+HECJNLLqdqE5/n/3+5frXW+he6fsCmvxR8QxScfqj0/aPiJps6RrJb27/ra7JP2bpD1F1QFgeZaz6nY5c/xRHm3p47e9VdIVkh6XtLH+j4IkvahaVxCAEpg9YDvTd797aETbL9qwaKg3Mscf5VL4lg22eyUNSfp0RLw6+7WICNX6/xf63C7bw7aHx8bGii4TgJrblA2do9Dgt92tWujfHRH31w+/ZHtT/fVNko4v9NmI2BcRgxEx2N/fX2SZAOpWuikbOkthwW/bku6UdCQivjTrpYOSbqw/vlHSgaJqALA8K9mUDZ2nyD7+7ZJukPRD24frxz4j6XZJ99r+hKTnJX20wBoALBMDtqtfkbN6vifJi7x8VVHXBdA8BmxXN/bjB4DMEPwAkBmCHwAyQ/ADQGYIfgDIDMEPtEEjm54B7cJ+/EDBlrPpGdAOtPiBAs3e9Ozk5LROT1W1e2iElj+SIviBArHpGcqI4AcKxKZnKCOCHygQm56hjBjcBQrGpmcoG4IfaAM2PUOZ0NUDAJkh+IEWYZEWOgVdPUALsEgLnYQWP9AkFmmh0xD8QJNYpIVOQ/ADTWKRFjoNwQ80aWaRVk9XReees0Y9XSzSQrkR/EALxMx/4/+fAWVF8ANNmhncnZwO/e/UGU1OB4O7KDWCH2gSg7voNAQ/oOYWXzG4i07DAi5kr9nFVzODu7vnnYPBXZRVYcFve7+kayQdj4jL6sc+J+mPJY3V3/aZiHiwqBqApcxefHVatVb77qERbb9ow7KCmx040UmK7Or5hqSrFzj+5YgYqP8h9JFUK/vn1/f26PIL30joo/QKC/6IeEzSL4o6P9AK9M8jRykGd2+yPWJ7v+2+xd5ke5ftYdvDY2Nji70NaAp3yEKOHFHcYhPbWyU9MKuPf6Okl1Vb4fJXkjZFxB8tdZ7BwcEYHh4urE5gfGKS/nmsOrYPRcTg/ONtndUTES/NKuhrkh5o5/WBxXCHLOSkrV09tjfNevoRSU+38/oAgGKnc94j6d2SNtgelfRZSe+2PaBaV89zkj5Z1PUBAAsrLPgj4roFDt9Z1PWQH/rlgZVh5S46Erc6BFaOvXrQcbjVIdAcgh8dh90wgeYsGfy2bz7bQiug3VhtCzSnkRb/RklP2L7X9tW2XXRRwNmw2hZoTkMrd+th/wFJfyhpUNK9ku6MiP8qtrwaVu5iIczqAc6uqZW7ERG2X5T0oqRpSX2S7rP9UETsbm2pQGMWWm3LPwbA0pYMftu3SPq4anvs3CHpzyJiynZF0rOSCH6UAlM8gcY00uK/QNLvRcTzsw9GRNX2NcWUBSxPq26oAuRgycHdiPjs/NCf9dqR1pcELB9TPIHGMY8fqwJTPIHGEfxYFZjiCTSOvXqwanDDc6AxBD9WFW6oAiyNrh4AyAzBDwCZIfgBIDMEPwBkhuAHgMwQ/ACQGYIfADJD8ANAZgh+AMgMwY/SGZ+Y1FMvvKLxicnUpQCrEls2oOWauQvW7JupvHbmjG56z8X6g9/8VbZhAFqosBa/7f22j9t+etaxC2w/ZPvZ+s++oq6PNA4cPqrtn39E19/xuLZ//hEdPHy04c/OvpnKyclpTU6HvvjQf+qdty/vPADOrsiunm9IunresdskPRwRF0t6uP4cq8T84D49VdXuoZGGu2wWupmKJE1OL+88AM6usOCPiMck/WLe4Wsl3VV/fJekDxd1fbRfs3fBWuhmKis5D4Cza/fg7saIOFZ//KKkjYu90fYu28O2h8fGxtpTHZrS7F2wZm6m0tPl173G3bSA1kk2qyciQlKc5fV9ETEYEYP9/f1trAwr1Yq7YO0Y2Kzv33aVbn3/W9TTxd20gCK0e1bPS7Y3RcQx25skHW/z9VGwVtwFa31vj26+qjabh7tpAa3X7uA/KOlGSbfXfx5o8/XRBq26CxZ30wKKUeR0znsk/UDSJbZHbX9CtcB/v+1nJb2v/hwlwKIpIB+Ftfgj4rpFXrqqqGtiZWYvmpqqVrV35zbtGNicuiwABWHLhsw1O/ceQOch+DPX7Nx7AJ2H4M9cs3PvAXQegj9zrZh7D6CzsDsnWjL3HkDnIPghiTnzQE7o6gGAzBD8AJAZgr9DsdIWwErRx9+BWGkLoBm0+DsMK20BNIvg7zCstAXQLIK/w7DSFkCzCP4Ow0pbAM1icLcDsdIWQDMI/g7FSlsAK0VXT2LMxwfQbrT4E2I+PoAUaPEnwnx8AKkQ/IkwHx9AKgR/IszHB5AKwZ8I8/EBpMLgbkLMxweQAsGfGPPxAbQbXT0AkJkkLX7bz0k6KemMpOmIGExRRyrjE5N07wBIJmVXz3si4uWE10+CRVsAUqOrp41YtAWgDFIFf0j6ru1Dtnct9Abbu2wP2x4eGxtrc3nFYNEWgDJIFfy/HRFXSvqgpE/Zftf8N0TEvogYjIjB/v7+9ldYABZtASiDJMEfEUfrP49L+rakt6eoo91YtAWgDNo+uGv7PEmViDhZf/wBSX/Z7jpSYdEWgNRSzOrZKOnbtmeu/82I+E6COpJh0RaAlNoe/BHxU0mXt/u6AIAapnMCQGYIfgDIDMEPAJkh+AEgMwQ/AGSG4AeAzBD8AJAZgh8AMkPwA0BmCH4AyAzBDwCZIfgBIDMEPwBkhuAHgMwQ/ACQGYIfADJD8ANAZgh+AMgMwQ8AmSH4ASAzBD8AZIbgB4DMEPwAkBmCHwAyQ/ADQGaSBL/tq23/h+2f2L4tRQ0AkKu2B7/tNZL+QdIHJV0q6Trbl7a7DgDIVYoW/9sl/SQifhoRr0n6lqRrE9QBAFlKEfybJb0w6/lo/dgctnfZHrY9PDY21rbiAGC1K+3gbkTsi4jBiBjs7+9PXQ4ArBopgv+opAtnPd9SPwYAaIMUwf+EpIttv9n2OZI+JulggjoAIEtd7b5gREzbvknSv0paI2l/RDxTxLXGJyY1euKUtvSt0/reniIuAQAdp+3BL0kR8aCkB4u8xoHDR7VnaETdlYqmqlXt3blNOwZeN4YMANkp7eBuM8YnJrVnaESnp6o6OTmt01NV7R4a0fjEZOrSACC5VRn8oydOqbsy93+tu1LR6IlTiSoCgPJYlcG/pW+dpqrVOcemqlVt6VuXqCIAKI9VGfzre3u0d+c2re2u6PyeLq3trmjvzm0M8AKAEg3utsOOgc3aftEGZvUAwDyrNvilWsufwAeAuVZlVw8AYHEEPwBkhuAHgMwQ/ACQGYIfADLjiEhdw5Jsj0l6PnUdCWyQ9HLqIkqA76GG76GG76Gmke/h1yLidTc06Yjgz5Xt4YgYTF1HanwPNXwPNXwPNc18D3T1AEBmCH4AyAzBX277UhdQEnwPNXwPNXwPNSv+HujjB4DM0OIHgMwQ/ACQGYK/ZGxfaPtR2z+y/YztW1LXlJLtNbb/3fYDqWtJxfYbbd9n+8e2j9j+rdQ1pWD7T+t/J562fY/ttalrahfb+20ft/30rGMX2H7I9rP1n32Nno/gL59pSbdGxKWS3iHpU7YvTVxTSrdIOpK6iMS+Iuk7EfFWSZcrw+/D9mZJfyJpMCIuk7RG0sfSVtVW35B09bxjt0l6OCIulvRw/XlDCP6SiYhjEfFk/fFJ1f6Sb05bVRq2t0j6HUl3pK4lFdtvkPQuSXdKUkS8FhGvJC0qnS5J62x3STpX0s8T19M2EfGYpF/MO3ytpLvqj++S9OFGz0fwl5jtrZKukPR44lJS+XtJuyVVl3jfavZmSWOSvl7v8rrD9nmpi2q3iDgq6QuSfibpmKT/iYjvpq0quY0Rcaz++EVJGxv9IMFfUrZ7JQ1J+nREvJq6nnazfY2k4xFxKHUtiXVJulLSVyPiCkm/1DJ+pV8t6v3X16r2D+GbJJ1n+/q0VZVH1OblNzw3n+AvIdvdqoX+3RFxf+p6EtkuaYft5yR9S9J7bf9z2pKSGJU0GhEzv/Xdp9o/BLl5n6T/joixiJiSdL+kdyauKbWXbG+SpPrP441+kOAvGdtWrT/3SER8KXU9qUTEn0fElojYqtog3iMRkV0LLyJelPSC7Uvqh66S9KOEJaXyM0nvsH1u/e/IVcpwkHueg5JurD++UdKBRj9I8JfPdkk3qNbCPVz/86HURSGpmyXdbXtE0oCkv0lbTvvVf+O5T9KTkn6oWnZls3WD7Xsk/UDSJbZHbX9C0u2S3m/7WdV+I7q94fOxZQMA5IUWPwBkhuAHgMwQ/ACQGYIfADJD8ANAZgh+AMgMwQ8AmSH4gRWw/Ru2R2yvtX1efZ/4y1LXBTSCBVzACtn+a0lrJa1TbT+dv01cEtAQgh9YIdvnSHpC0mlJ74yIM4lLAhpCVw+wcusl9Uo6X7WWP9ARaPEDK2T7oGpbRr9Z0qaIuClxSUBDulIXAHQi2x+XNBUR37S9RtL3bb83Ih5JXRuwFFr8AJAZ+vgBIDMEPwBkhuAHgMwQ/ACQGYIfADJD8ANAZgh+AMjM/wGGu7mDBdhBtQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "data.plot.scatter('x', 'y')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Coefficient of determination (aka. $R^2$)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "results = smf.ols('y ~ x', data=data).fit()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "y_hat = results.predict(data.x)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "var_y = np.var(data.y)\n", "var_y_hat = np.var(y_hat)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "r_squared = var_y_hat / var_y\n", "ρ_squared = np.corrcoef(data.y, y_hat)[0][1] ** 2" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "var_y=47.42, var_y_hat=46.22, r_squared=0.9747, ρ_squared=0.9747\n" ] } ], "source": [ "print(f'{var_y=:.2f}, {var_y_hat=:.2f}, {r_squared=:.4f}, {ρ_squared=:.4f}')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# make sure the calculation of `r_squared` is reasonable.\n", "# note in real implementation, degree of freedoms may matter when sample size is small\n", "assert np.isclose(r_squared, results.rsquared)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Show $\\rho^2 = R^2$" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "assert np.isclose(r_squared, ρ_squared)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# More stats of the fit" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: y R-squared: 0.975\n", "Model: OLS Adj. R-squared: 0.973\n", "Method: Least Squares F-statistic: 693.1\n", "Date: Sat, 02 Jan 2021 Prob (F-statistic): 8.00e-16\n", "Time: 20:57:08 Log-Likelihood: -30.204\n", "No. Observations: 20 AIC: 64.41\n", "Df Residuals: 18 BIC: 66.40\n", "Df Model: 1 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept -0.1367 0.602 -0.227 0.823 -1.401 1.128\n", "x 2.9906 0.114 26.327 0.000 2.752 3.229\n", "==============================================================================\n", "Omnibus: 1.344 Durbin-Watson: 1.699\n", "Prob(Omnibus): 0.511 Jarque-Bera (JB): 0.972\n", "Skew: -0.519 Prob(JB): 0.615\n", "Kurtosis: 2.706 Cond. No. 12.7\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "print(results.summary())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" } }, "nbformat": 4, "nbformat_minor": 2 }