{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "sklearn_regression_diabetetes.ipynb", "provenance": [], "collapsed_sections": [], "authorship_tag": "ABX9TyP+fDr6HFsgo2ApOuOYo6B1", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "source": [ "# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html" ], "metadata": { "id": "2t4gcEqWUowK" } }, { "cell_type": "markdown", "source": [ "### https://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html#sphx-glr-auto-examples-linear-model-plot-ols-py" ], "metadata": { "id": "VAYkP7QhU73k" } }, { "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "from pylab import mpl, plt\n", "import numpy as np\n", "from sklearn import datasets\n", "from sklearn.metrics import mean_squared_error, r2_score\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LinearRegression\n", "import datetime as dt\n", "\n", "import statsmodels.formula.api as smf\n", "import statsmodels.api as sm\n", "from scipy import stats\n", "\n", "plt.style.use('seaborn')\n", "mpl.rcParams['font.family'] = 'DejaVu Sans'\n", "mpl.rcParams[\"savefig.dpi\"] = 500\n", "np.set_printoptions(precision=5, suppress=True, formatter={\"float\": lambda x: f\"{x:6.3f}\"})\n", "\n", "%matplotlib inline" ], "metadata": { "id": "UIgP7erHK9Jf" }, "execution_count": 24, "outputs": [] }, { "cell_type": "code", "source": [ "" ], "metadata": { "id": "X8PWUgCKVLE_" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Load the diabetes dataset\n", "diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)\n", "diabetes_X" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "sJ4cSg8aT6zN", "outputId": "0c68d128-a464-47f2-c661-6a1c82279fb2" }, "execution_count": 6, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([[ 0.03807591, 0.05068012, 0.06169621, ..., -0.00259226,\n", " 0.01990842, -0.01764613],\n", " [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n", " -0.06832974, -0.09220405],\n", " [ 0.08529891, 0.05068012, 0.04445121, ..., -0.00259226,\n", " 0.00286377, -0.02593034],\n", " ...,\n", " [ 0.04170844, 0.05068012, -0.01590626, ..., -0.01107952,\n", " -0.04687948, 0.01549073],\n", " [-0.04547248, -0.04464164, 0.03906215, ..., 0.02655962,\n", " 0.04452837, -0.02593034],\n", " [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n", " -0.00421986, 0.00306441]])" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "source": [ "# Use only one feature\n", "diabetes_X = diabetes_X[:, np.newaxis, 2]\n", "\n", "# Split the data into training/testing sets\n", "diabetes_X_train = diabetes_X[:-20]\n", "diabetes_X_test = diabetes_X[-20:]\n", "\n", "# Split the targets into training/testing sets\n", "diabetes_y_train = diabetes_y[:-20]\n", "diabetes_y_test = diabetes_y[-20:]\n" ], "metadata": { "id": "n8kFIZ0FULGo" }, "execution_count": 7, "outputs": [] }, { "cell_type": "code", "source": [ "# Create linear regression object\n", "regr = LinearRegression()\n", "\n", "# Train the model using the training sets\n", "regr.fit(diabetes_X_train, diabetes_y_train)\n", "\n", "# Make predictions using the testing set\n", "diabetes_y_pred = regr.predict(diabetes_X_test)" ], "metadata": { "id": "8CHNy0REVqML" }, "execution_count": 25, "outputs": [] }, { "cell_type": "code", "source": [ "type(regr.coef_)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RedHiJErWMZU", "outputId": "813e8aa6-de2c-4afd-8f74-5a2b517e8b99" }, "execution_count": 14, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "numpy.ndarray" ] }, "metadata": {}, "execution_count": 14 } ] }, { "cell_type": "code", "source": [ "np.ndarray.round(regr.coef_,4)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "tc2oHOojV-cd", "outputId": "3a46867b-b590-4396-9f5a-6624ac8ab452" }, "execution_count": 19, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([938.2379])" ] }, "metadata": {}, "execution_count": 19 } ] }, { "cell_type": "code", "source": [ "# The coefficients\n", "print(\"Coefficients: \\n\", np.ndarray.round(regr.coef_,4))\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\" % mean_squared_error(diabetes_y_test, diabetes_y_pred))\n", "# The coefficient of determination: 1 is perfect prediction\n", "print(\"Coefficient of determination: %.2f\" % r2_score(diabetes_y_test, diabetes_y_pred))\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AEkVUO9IT--c", "outputId": "4c93470d-1f98-418b-fbb4-05bcc80f1b4c" }, "execution_count": 21, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Coefficients: \n", " [938.2379]\n", "Mean squared error: 2548.07\n", "Coefficient of determination: 0.47\n" ] } ] }, { "cell_type": "code", "source": [ "# Plot outputs\n", "plt.scatter(diabetes_X_test, diabetes_y_test, color=\"black\")\n", "plt.plot(diabetes_X_test, diabetes_y_pred, color=\"blue\", linewidth=3)\n", "\n", "plt.xticks(())\n", "plt.yticks(())\n", "\n", "plt.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 252 }, "id": "nRRBgiI7UATN", "outputId": "32877e33-da17-4ca6-f912-70a6aa5851e8" }, "execution_count": 4, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAADrCAYAAABXYUzjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAQfElEQVR4nO3dbagcZ93H8d9sE2L2pmlMk1hEdkZj09aHIuTUgIjV6G31za1Rmhu7KiTUbREqlFpfuIJCuwqiRRSi3ahUOPNCG4IPL7Slqe2LQO94UqhaKyaNOxuktDX0Cfc0Tzv3i+meycOe3Zk9O3vNXPP9QF5kuM45V9LTX/7nf838xwnDUACA2auY3gAAlBUBDACGEMAAYAgBDACGEMAAYAgBDACGrEqzeOPGjaHneRltBQDsdOTIkX+HYbjp4uupAtjzPC0sLExvVwBQAo7jBMOu04IAAEMIYAAwhAAGAEMIYAAwhAAGAEMIYABYhu/78jxPlUpFnufJ9/2pfv5Ut6EBQFn4vq9Go6FerydJCoJAjUZDklSv16fyNaiAAWCIZrO5FL4DvV5PzWZzal+DAAaAIbrdbqrrkyCAAWCIWq2W6vokCGAAGKLVaqlarV5wrVqtqtVqTe1rEMAAMES9Xle73ZbrunIcR67rqt1uT+0ATpKcNC/lnJubCxnGAwDpOI5zJAzDuYuvUwEDgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAMcfSodM01kuNInic98sj0vwYBDMAavu/L8zxVKhV5niff91N9/KlT0h13RKG7dav0j39E14NAarenv99V0/+UADB7vu+r0Wio1+tJkoIgUKPRkCTV6/WRH/vrX0s7d47+/Hv2TGWbF6ACBmCFZrO5FL4DvV5PzWZz6PoTJ6Qbboiq3VHhe/310vHj0ic+Mc3dRghgAFbodrtjr589KzWbUejWatLCwvKf75e/lMJQeuop6e1vn/ZuIwQwACvUarVlrz/6aBS6q1dL3/728p/jS1+Ser0oeHftymij5yGAAVih1WqpWq2ed2WTKpU/Kgg6+uhHl/84z5OefjoK3XZbWrs2653GOIQDYIV6va5+X7rtts1aXPxvSVK/v/z6n/40OlhznBltcAgCGEDhHTggffazkjT6boddu6R9+6R162ayrbEIYACF9Pzz0lVXjV+3YYP00EPS3Fz2e0qLHjCAwghD6dZbo7bBuPC9+Wbp3Dnp5Ml8hq9EAAOFstInvYrq4MEodCsV6Wc/G722242C+le/itbnGS0IoCBW8qRXEb3ySnSHwssvj1/7i19IX/xi5luaupz/+wBgIO2TXkX19a9H1e769aPD98Mfls6ciardIoavRAUMFEaSJ72K6k9/kt7//mRrn3lGuvbabPczK1TAQEGMetKriBYX43GP48L3vvuiSjcM7QlfiQAGCuPSJ72karWqVqtlaEeTue++KHSr1Xjc4zDXXRc/FnznnbPb3yzRggAKYnDQ1mw21e12VavV1Gq1CnEA98wz0rvelWztwoK0bVu2+8kLJwzDxIvn5ubChVHjgwDgDWfOSB/5iHTo0Pi1zaZ0773Z78kUx3GOhGF4yd3IVMAApuqBB6Tdu8evu/JK6dlnpSuuyHxLuUUAA1ixbldy3WRrDx6UduzIdj9FwSEcgIn0+9JnPhMdqI0L39tui9aHIeF7PipgAKn85jfSpz+dbO3zz0ubN2e7nyKjAgYw1rPPRpWu44wP3wMH4nt2Cd/RqIABDBWG0qpVo4eaD+zcKe3fn//hN3nDXxeAC+zdG08eGxe+nU4U1AcOEL6ToAIGkHi4uRSNg9yzJ9v9lAUBDJTY1q3S0aPJ1i4uSm96U7b7KRt+aABKZv/++EBtXPg+/HB8oEb4Th8VMFACr72W/EWUH/qQ9Pjj2e4HESpgwGI33RRVuknC96WXokqX8J0dAhiwzGOPxS2Ghx8evXZ+Pm4xrF8/k+3hPLQgAAucPi2tWZNs7dveJp04ke1+kAwVMFBgjUZU6SYJ33/9K6p0Cd/8IICBgnnqqbjFsG/f6LXf/37cYnjrW2ezPyRHCwIogHPnoseCk+r3o4BGvlEBAzn2qU9FQZokfP/+97jaJXyLgQAGcuYvf4lbDL/97ei1X/1qHLrXXDOb/WF6aEEAORCG6YbZnD4trV6d3X4wG9ZXwL7vy/M8VSoVeZ4n3/dNbwlYcued8eSxcX73u7jaJXztYHUF7Pu+Go2Ger2eJCkIAjUaDUkqxKu8YacTJ6RaLdnaTZukF17Idj8wx+rX0nuepyAILrnuuq46nc7sN4RSS3Mw9uqr0uWXZ7cXzNZyr6W3ugXR7XZTXQfON4321Q9+EB+ojXP//XGLgfAtB6tbELVabWgFXEv68x9KayXtq5dflt785uRfK8UPobCM1RVwq9VStVq94Fq1WlWr1TK0IxRFs9lcCt+BXq+nZrO57MesWRNVuknC97nn4moX5WV1ANfrdbXbbbmuK8dx5Lqu2u02B3AYK2n76sEH4xbD6dOjP+c3vhGHbtLX/8BuVgewFIVwp9NRv99Xp9MhfJHIcm2qWq2mU6fi0N21a/znGoTuPfdMeZMzxO2c2bA+gIFJDGtfOc5TCoJOolfznP9YcNEN+uFBECgMw6V+OCG8cgQwMMSgfbV58y2SQkmhwvD6MR9j52PBk/TDkYzVd0EAk+j3pcsuk6T6G7/Gr7d5+A23c2aHChh4w9VXR0Eahe9ohw6VZ/LYqH44VoYARqkdPhwfqB07Nnrt9u1x6H7gA7PZXx5wO2d2aEGglNJUrYuLSnTwZqvBnUPNZlPdble1Wk2tVos7iqaAChilsXNn8seCf/zjuNotc/gOcDtnNqiAYbXjx6UtW5Kvt+G2MRQHAQwrpWkxnDwpbdiQ3V6A5dCCgDU+97nkLYavfS1uMRC+MIUKGIX2wgvSW96SfD0tBuQJFTAKaVDpJgnfv/3NnseCYRcCGIVxzz3JWwzXXhuH7nXXZb83YBK0IJBri4vSRc8AjESViyKhAkYuDSrdJOF78CAtBhQTAZwR5qemt3dv8haDFIfujh3Z7gvICi2IDKzkfWJlE08eS+bs2XTrgTyjAs4A81PHG1S6ScL05z+Pq13CFzahAs4A81OH+8MfpE9+Mvl6erqwHQGcgVqtpiAIhl4vozSPBb/6qnT55dntBcgTWhAZYH6qtG5d8gO1PXviFgPhizKhAs5AWeenPv209J73JF9PiwFl54Qp/i+Ym5sLFxYWMtwOiihNiyEIpJJ2YlBijuMcCcNw7uLrtCAwkY99LHmL4X3vi1sMhC8QowWBxJg8BkwXFTDGSjN57PBhHgsGkiKAMdRdd032WPANN2S7L8AmtCCw5NSpdC+g7PfTHcABuBAVMJYq3STh++CDcbVL+AIrQwVcUvv3SzffnHw9PV1g+qiAS2RQtTpO0vBdLdf1ND/PKE0gCwRwCaxdG4VuJcF/7d27/0/V6n9JciSdXRqlyTxjYPoIYEs98URc7b7++vj1g77uo4/+L6M0gRmhB2yZlU4eY5QmMDtUwBb44AeT37N7112jJ48tNzKzrKM0gSwRwAX1z3/GoXvo0Pj1g9D93vdGr2OUJjA7BHDBDEL3He8Yv/bEifSPBdfrdbXbbbmuK8dx5Lqu2u229aM0ARMYR1kAt98u3X9/srU33RS9+gdAfiw3jpJDuJx66SVpw4bk63lQAigeWhA54fu+PM9bajEkCd8nn2TyGFBkVMA58IUvLGh+vi5pfJ9106ZoLi+A4iOADTl9WlqzZvC7S1pDl2DyGGAfWhAzNmgxxOE7yv8weQywGAE8A7//fbrh5tEcBkeu++cMdwXANFoQGQnDZMNvBtauvUKLi68u/Z6HHwD7UQFP2e23J5889sMfxncx7Nu3l4cfgJIhgKfg/MeCkzwwMQjdO+6Ir9XrdXU6HfX7fXU6HcJ3Cga39lUqFXmex0hN5A4tiBVY6eQxZMf3fTUajaXRmoO5xpL4xw25QQWc0ne+k/xA7YEHRk8eQ3aazSZzjZF7VMAJnDwpbdyYfD1PppnHXGMUARXwCINKN0n4vvgijwXnCXONUQQE8EXm55O3GO69Nw7dNBVy3tlweMVcYxQBLQhJi4vSRf+vjmRzlWvL4dVgr81mU91uV7VaTa1Wq1B/Btiv1POAr75aOnYs2dpjx6QtW7LdTx54nqcgCC657rquOp3O7DcEWGC5ecCla0E88kjcYhgXvrfeGrcYyhC+EodXwCyVogVx7py0KsWftMyTx2q12tAKmMMrYPqsroC/+c0oSJOE7+HDYvKYOLwCZsm6Cvj48eTtghtvlB57LNPtFA6HV8DsWHEIF4bSl78s/eQnydafOZOuJQEAK2HlIdzjj8eTx8aF7/kthryHrw334QIYL+dRdKnXXpPe+c5k70W75RapaNlly324AMYrTAX8rW9F1e66dePDt9eLKt08hG/aapYhMkB55LoCfvJJadu2ZGv/+lfp3e/Odj9pTVLNch8uUB65q4Bff11673ujandc+H73u3FfN2/hK01WzTJEBiiP3ATwj34Uhe7atVE1u5wtW6T//CcK3bvvnt3+JjFJNct9uEB5GA3go0fjx4K/8pXRa594IgrdY8fSDc4xaZJqtl6vq91u8344oARmHsBnz0o7dkShu3Xr6LV33x23GLZvn83+pmnSapb3wwHlMLNDON+XPv/58evWrZOCQFq/Pvs9ZY2nygCMkvmTcK+8kixMH3pI+vjHU31qACgEY0/CjXpN++7d0eSxMCR8AZRP5i2I7dul1auj+QsDzz0nXXVV1l8ZAPIt8wC+8cYocM+cIXQB4HwzOYS78spZfBUAKJbcPIgBAGVDAAOAIdYEMDN0ARRNrqehJcUMXQBFZEUFzAxdAEVkRQAzQxdAEVkRwMzQLRf6/bCFFQFs+wxdAic26PcHQaAwDJf6/WX+O0GBhWGY+Ne2bdvCvJqfnw9d1w0dxwld1w3n5+dNb2kq5ufnw2q1Gkpa+lWtVhP9+Wz8O3Fd94K/i8Ev13VNbw1YlqSFcEimZj4NDSvjeZ6CILjkuuu66nQ6y37cxXeGSNFPBUUf7l6pVDTse9ZxHPX7fQM7AsYzNg0NKzPpAaOtd4bQ74dNCOCcmzRwbL0zxPZ+P8qFAM65SQPH1kqRd+bBJgRwzk0aODZXirwzD7aYWQBzK9XkJgkcKkUg/2ZyF4StJ/IAkITRuyBsPZEHgJWYSQDbeiIPACsxkwC29UQeAFZiJgFs84k8AEwq8wD2fX+pB3zZZZdJEifyGIk7ZlAWmb4R4+K7H86dO7dU+RK+GIa3m6BMMr0NbdJBMigvvmdgIyO3oXH3A9LiewZlkmkAc/cD0uJ7BmWSaQBz9wPS4nsGZZJpADOPAGnxPYMy4Y0YAJAx3ogBADlDAAOAIQQwABhCAAOAIQQwABiS6i4Ix3FelHTpc6IAgFHcMAw3XXwxVQADAKaHFgQAGEIAA4AhBDAAGEIAA4AhBDAAGEIAA4AhBDAAGEIAA4AhBDAAGPL/Fn14gssvCKAAAAAASUVORK5CYII=\n" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "import statsmodels.api as sm\n", "\n", "X_ols = sm.add_constant(diabetes_X_train)\n", "model = sm.OLS(diabetes_y_train, X_ols).fit()\n", "print(model.summary())" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ba00f911-6165-4e2a-c4c1-a2cac8d044e4", "id": "oRIaznjCiM-a" }, "execution_count": 36, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: y R-squared: 0.335\n", "Model: OLS Adj. R-squared: 0.334\n", "Method: Least Squares F-statistic: 211.7\n", "Date: Sun, 29 May 2022 Prob (F-statistic): 3.98e-39\n", "Time: 01:49:43 Log-Likelihood: -2346.5\n", "No. Observations: 422 AIC: 4697.\n", "Df Residuals: 420 BIC: 4705.\n", "Df Model: 1 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const 152.9189 3.069 49.830 0.000 146.887 158.951\n", "x1 938.2379 64.484 14.550 0.000 811.487 1064.989\n", "==============================================================================\n", "Omnibus: 12.587 Durbin-Watson: 1.839\n", "Prob(Omnibus): 0.002 Jarque-Bera (JB): 7.334\n", "Skew: 0.142 Prob(JB): 0.0255\n", "Kurtosis: 2.420 Cond. No. 21.0\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ] }, { "cell_type": "code", "source": [ "X_ols" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "eZ3PXJwugtVm", "outputId": "15a9c6d8-65de-46c0-bb33-8c8317514a65" }, "execution_count": 38, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([[ 1.000, 0.062],\n", " [ 1.000, -0.051],\n", " [ 1.000, 0.044],\n", " [ 1.000, -0.012],\n", " [ 1.000, -0.036],\n", " [ 1.000, -0.041],\n", " [ 1.000, -0.047],\n", " [ 1.000, -0.002],\n", " [ 1.000, 0.062],\n", " [ 1.000, 0.039],\n", " [ 1.000, -0.084],\n", " [ 1.000, 0.018],\n", " [ 1.000, -0.029],\n", " [ 1.000, -0.002],\n", " [ 1.000, -0.026],\n", " [ 1.000, -0.018],\n", " [ 1.000, 0.042],\n", " [ 1.000, 0.012],\n", " [ 1.000, -0.011],\n", " [ 1.000, -0.018],\n", " [ 1.000, -0.057],\n", " [ 1.000, -0.022],\n", " [ 1.000, -0.004],\n", " [ 1.000, 0.061],\n", " [ 1.000, 0.036],\n", " [ 1.000, -0.013],\n", " [ 1.000, -0.077],\n", " [ 1.000, 0.060],\n", " [ 1.000, -0.021],\n", " [ 1.000, -0.006],\n", " [ 1.000, 0.044],\n", " [ 1.000, -0.065],\n", " [ 1.000, 0.125],\n", " [ 1.000, -0.050],\n", " [ 1.000, -0.063],\n", " [ 1.000, -0.031],\n", " [ 1.000, 0.023],\n", " [ 1.000, 0.011],\n", " [ 1.000, 0.071],\n", " [ 1.000, 0.014],\n", " [ 1.000, -0.008],\n", " [ 1.000, -0.068],\n", " [ 1.000, -0.011],\n", " [ 1.000, -0.023],\n", " [ 1.000, 0.068],\n", " [ 1.000, -0.035],\n", " [ 1.000, -0.012],\n", " [ 1.000, -0.073],\n", " [ 1.000, -0.042],\n", " [ 1.000, 0.014],\n", " [ 1.000, -0.007],\n", " [ 1.000, 0.016],\n", " [ 1.000, -0.009],\n", " [ 1.000, -0.016],\n", " [ 1.000, 0.025],\n", " [ 1.000, -0.049],\n", " [ 1.000, 0.041],\n", " [ 1.000, -0.063],\n", " [ 1.000, -0.064],\n", " [ 1.000, -0.026],\n", " [ 1.000, -0.004],\n", " [ 1.000, 0.005],\n", " [ 1.000, -0.007],\n", " [ 1.000, -0.037],\n", " [ 1.000, -0.026],\n", " [ 1.000, -0.025],\n", " [ 1.000, -0.018],\n", " [ 1.000, -0.015],\n", " [ 1.000, -0.030],\n", " [ 1.000, -0.046],\n", " [ 1.000, -0.070],\n", " [ 1.000, 0.034],\n", " [ 1.000, -0.004],\n", " [ 1.000, -0.020],\n", " [ 1.000, 0.002],\n", " [ 1.000, -0.031],\n", " [ 1.000, 0.028],\n", " [ 1.000, -0.036],\n", " [ 1.000, -0.058],\n", " [ 1.000, -0.037],\n", " [ 1.000, 0.012],\n", " [ 1.000, -0.022],\n", " [ 1.000, -0.035],\n", " [ 1.000, 0.010],\n", " [ 1.000, -0.040],\n", " [ 1.000, 0.071],\n", " [ 1.000, -0.075],\n", " [ 1.000, -0.006],\n", " [ 1.000, -0.041],\n", " [ 1.000, -0.048],\n", " [ 1.000, -0.026],\n", " [ 1.000, 0.052],\n", " [ 1.000, 0.005],\n", " [ 1.000, -0.064],\n", " [ 1.000, -0.017],\n", " [ 1.000, -0.058],\n", " [ 1.000, 0.010],\n", " [ 1.000, 0.089],\n", " [ 1.000, -0.005],\n", " [ 1.000, -0.064],\n", " [ 1.000, 0.018],\n", " [ 1.000, -0.045],\n", " [ 1.000, 0.028],\n", " [ 1.000, 0.041],\n", " [ 1.000, 0.065],\n", " [ 1.000, -0.032],\n", " [ 1.000, -0.076],\n", " [ 1.000, 0.050],\n", " [ 1.000, 0.046],\n", " [ 1.000, -0.009],\n", " [ 1.000, -0.032],\n", " [ 1.000, 0.005],\n", " [ 1.000, 0.021],\n", " [ 1.000, 0.014],\n", " [ 1.000, 0.110],\n", " [ 1.000, 0.001],\n", " [ 1.000, 0.058],\n", " [ 1.000, -0.021],\n", " [ 1.000, -0.011],\n", " [ 1.000, -0.047],\n", " [ 1.000, 0.005],\n", " [ 1.000, 0.018],\n", " [ 1.000, 0.081],\n", " [ 1.000, 0.035],\n", " [ 1.000, 0.024],\n", " [ 1.000, -0.008],\n", " [ 1.000, -0.061],\n", " [ 1.000, -0.002],\n", " [ 1.000, -0.062],\n", " [ 1.000, 0.016],\n", " [ 1.000, 0.096],\n", " [ 1.000, -0.070],\n", " [ 1.000, -0.021],\n", " [ 1.000, -0.054],\n", " [ 1.000, 0.043],\n", " [ 1.000, 0.056],\n", " [ 1.000, -0.082],\n", " [ 1.000, 0.050],\n", " [ 1.000, 0.111],\n", " [ 1.000, 0.062],\n", " [ 1.000, 0.014],\n", " [ 1.000, 0.048],\n", " [ 1.000, 0.012],\n", " [ 1.000, 0.006],\n", " [ 1.000, 0.047],\n", " [ 1.000, 0.129],\n", " [ 1.000, 0.060],\n", " [ 1.000, 0.093],\n", " [ 1.000, 0.015],\n", " [ 1.000, -0.005],\n", " [ 1.000, 0.070],\n", " [ 1.000, -0.004],\n", " [ 1.000, -0.001],\n", " [ 1.000, -0.044],\n", " [ 1.000, 0.021],\n", " [ 1.000, 0.061],\n", " [ 1.000, -0.011],\n", " [ 1.000, -0.033],\n", " [ 1.000, -0.065],\n", " [ 1.000, 0.043],\n", " [ 1.000, -0.062],\n", " [ 1.000, 0.064],\n", " [ 1.000, 0.030],\n", " [ 1.000, 0.072],\n", " [ 1.000, -0.019],\n", " [ 1.000, -0.067],\n", " [ 1.000, -0.060],\n", " [ 1.000, 0.069],\n", " [ 1.000, 0.060],\n", " [ 1.000, -0.027],\n", " [ 1.000, -0.020],\n", " [ 1.000, -0.046],\n", " [ 1.000, 0.071],\n", " [ 1.000, -0.079],\n", " [ 1.000, 0.010],\n", " [ 1.000, -0.039],\n", " [ 1.000, 0.020],\n", " [ 1.000, 0.027],\n", " [ 1.000, -0.008],\n", " [ 1.000, -0.016],\n", " [ 1.000, 0.005],\n", " [ 1.000, -0.043],\n", " [ 1.000, 0.006],\n", " [ 1.000, -0.035],\n", " [ 1.000, 0.024],\n", " [ 1.000, -0.018],\n", " [ 1.000, 0.042],\n", " [ 1.000, -0.055],\n", " [ 1.000, -0.003],\n", " [ 1.000, -0.067],\n", " [ 1.000, -0.013],\n", " [ 1.000, -0.042],\n", " [ 1.000, -0.031],\n", " [ 1.000, -0.005],\n", " [ 1.000, -0.059],\n", " [ 1.000, 0.025],\n", " [ 1.000, -0.046],\n", " [ 1.000, 0.003],\n", " [ 1.000, 0.054],\n", " [ 1.000, -0.045],\n", " [ 1.000, -0.058],\n", " [ 1.000, -0.056],\n", " [ 1.000, 0.001],\n", " [ 1.000, 0.030],\n", " [ 1.000, 0.007],\n", " [ 1.000, 0.047],\n", " [ 1.000, 0.026],\n", " [ 1.000, 0.046],\n", " [ 1.000, 0.040],\n", " [ 1.000, -0.018],\n", " [ 1.000, 0.014],\n", " [ 1.000, 0.037],\n", " [ 1.000, 0.003],\n", " [ 1.000, -0.071],\n", " [ 1.000, -0.033],\n", " [ 1.000, 0.094],\n", " [ 1.000, 0.036],\n", " [ 1.000, 0.032],\n", " [ 1.000, -0.065],\n", " [ 1.000, -0.042],\n", " [ 1.000, -0.040],\n", " [ 1.000, -0.039],\n", " [ 1.000, -0.026],\n", " [ 1.000, -0.023],\n", " [ 1.000, -0.067],\n", " [ 1.000, 0.033],\n", " [ 1.000, -0.046],\n", " [ 1.000, -0.030],\n", " [ 1.000, -0.013],\n", " [ 1.000, -0.016],\n", " [ 1.000, 0.071],\n", " [ 1.000, -0.031],\n", " [ 1.000, 0.000],\n", " [ 1.000, 0.037],\n", " [ 1.000, 0.039],\n", " [ 1.000, -0.015],\n", " [ 1.000, 0.007],\n", " [ 1.000, -0.069],\n", " [ 1.000, -0.009],\n", " [ 1.000, 0.020],\n", " [ 1.000, 0.075],\n", " [ 1.000, -0.008],\n", " [ 1.000, -0.023],\n", " [ 1.000, -0.046],\n", " [ 1.000, 0.054],\n", " [ 1.000, -0.035],\n", " [ 1.000, -0.032],\n", " [ 1.000, -0.082],\n", " [ 1.000, 0.048],\n", " [ 1.000, 0.061],\n", " [ 1.000, 0.056],\n", " [ 1.000, 0.098],\n", " [ 1.000, 0.060],\n", " [ 1.000, 0.034],\n", " [ 1.000, 0.056],\n", " [ 1.000, -0.065],\n", " [ 1.000, 0.161],\n", " [ 1.000, -0.056],\n", " [ 1.000, -0.025],\n", " [ 1.000, -0.036],\n", " [ 1.000, -0.008],\n", " [ 1.000, -0.042],\n", " [ 1.000, 0.127],\n", " [ 1.000, -0.077],\n", " [ 1.000, 0.028],\n", " [ 1.000, -0.026],\n", " [ 1.000, -0.062],\n", " [ 1.000, -0.001],\n", " [ 1.000, 0.089],\n", " [ 1.000, -0.032],\n", " [ 1.000, 0.030],\n", " [ 1.000, 0.009],\n", " [ 1.000, 0.007],\n", " [ 1.000, -0.020],\n", " [ 1.000, -0.025],\n", " [ 1.000, -0.012],\n", " [ 1.000, 0.026],\n", " [ 1.000, -0.059],\n", " [ 1.000, -0.036],\n", " [ 1.000, -0.025],\n", " [ 1.000, 0.019],\n", " [ 1.000, -0.090],\n", " [ 1.000, -0.005],\n", " [ 1.000, -0.053],\n", " [ 1.000, -0.022],\n", " [ 1.000, -0.020],\n", " [ 1.000, -0.055],\n", " [ 1.000, -0.006],\n", " [ 1.000, -0.017],\n", " [ 1.000, 0.055],\n", " [ 1.000, 0.077],\n", " [ 1.000, 0.019],\n", " [ 1.000, -0.022],\n", " [ 1.000, 0.093],\n", " [ 1.000, -0.031],\n", " [ 1.000, 0.039],\n", " [ 1.000, -0.061],\n", " [ 1.000, -0.008],\n", " [ 1.000, -0.037],\n", " [ 1.000, -0.014],\n", " [ 1.000, 0.074],\n", " [ 1.000, -0.025],\n", " [ 1.000, 0.034],\n", " [ 1.000, 0.035],\n", " [ 1.000, -0.039],\n", " [ 1.000, -0.040],\n", " [ 1.000, -0.002],\n", " [ 1.000, -0.031],\n", " [ 1.000, -0.046],\n", " [ 1.000, 0.001],\n", " [ 1.000, 0.065],\n", " [ 1.000, 0.040],\n", " [ 1.000, -0.023],\n", " [ 1.000, 0.053],\n", " [ 1.000, 0.040],\n", " [ 1.000, -0.020],\n", " [ 1.000, 0.014],\n", " [ 1.000, -0.034],\n", " [ 1.000, 0.007],\n", " [ 1.000, 0.005],\n", " [ 1.000, 0.030],\n", " [ 1.000, 0.052],\n", " [ 1.000, 0.062],\n", " [ 1.000, -0.007],\n", " [ 1.000, 0.006],\n", " [ 1.000, 0.054],\n", " [ 1.000, -0.008],\n", " [ 1.000, 0.115],\n", " [ 1.000, 0.067],\n", " [ 1.000, -0.056],\n", " [ 1.000, 0.030],\n", " [ 1.000, -0.026],\n", " [ 1.000, 0.105],\n", " [ 1.000, -0.006],\n", " [ 1.000, -0.047],\n", " [ 1.000, -0.048],\n", " [ 1.000, 0.085],\n", " [ 1.000, -0.013],\n", " [ 1.000, -0.033],\n", " [ 1.000, -0.007],\n", " [ 1.000, -0.014],\n", " [ 1.000, 0.060],\n", " [ 1.000, 0.022],\n", " [ 1.000, 0.019],\n", " [ 1.000, -0.012],\n", " [ 1.000, -0.003],\n", " [ 1.000, 0.018],\n", " [ 1.000, -0.030],\n", " [ 1.000, -0.020],\n", " [ 1.000, -0.058],\n", " [ 1.000, 0.061],\n", " [ 1.000, -0.041],\n", " [ 1.000, -0.072],\n", " [ 1.000, -0.056],\n", " [ 1.000, 0.046],\n", " [ 1.000, -0.009],\n", " [ 1.000, -0.033],\n", " [ 1.000, 0.050],\n", " [ 1.000, -0.085],\n", " [ 1.000, 0.006],\n", " [ 1.000, 0.021],\n", " [ 1.000, -0.007],\n", " [ 1.000, 0.105],\n", " [ 1.000, -0.025],\n", " [ 1.000, -0.006],\n", " [ 1.000, -0.039],\n", " [ 1.000, 0.137],\n", " [ 1.000, 0.171],\n", " [ 1.000, 0.002],\n", " [ 1.000, 0.038],\n", " [ 1.000, -0.058],\n", " [ 1.000, -0.009],\n", " [ 1.000, -0.023],\n", " [ 1.000, -0.011],\n", " [ 1.000, -0.034],\n", " [ 1.000, -0.003],\n", " [ 1.000, 0.068],\n", " [ 1.000, 0.010],\n", " [ 1.000, 0.002],\n", " [ 1.000, -0.039],\n", " [ 1.000, 0.026],\n", " [ 1.000, -0.089],\n", " [ 1.000, 0.061],\n", " [ 1.000, -0.029],\n", " [ 1.000, -0.030],\n", " [ 1.000, -0.019],\n", " [ 1.000, -0.041],\n", " [ 1.000, 0.015],\n", " [ 1.000, -0.025],\n", " [ 1.000, 0.001],\n", " [ 1.000, 0.069],\n", " [ 1.000, -0.070],\n", " [ 1.000, -0.030],\n", " [ 1.000, -0.046],\n", " [ 1.000, 0.019],\n", " [ 1.000, 0.001],\n", " [ 1.000, -0.031],\n", " [ 1.000, -0.004],\n", " [ 1.000, 0.015],\n", " [ 1.000, 0.023],\n", " [ 1.000, 0.046],\n", " [ 1.000, -0.045],\n", " [ 1.000, -0.033],\n", " [ 1.000, 0.097],\n", " [ 1.000, 0.054],\n", " [ 1.000, 0.123],\n", " [ 1.000, -0.081],\n", " [ 1.000, 0.093],\n", " [ 1.000, -0.050],\n", " [ 1.000, -0.012],\n", " [ 1.000, -0.028],\n", " [ 1.000, 0.058],\n", " [ 1.000, 0.085],\n", " [ 1.000, -0.001],\n", " [ 1.000, 0.007],\n", " [ 1.000, 0.009],\n", " [ 1.000, 0.080],\n", " [ 1.000, 0.071],\n", " [ 1.000, -0.025],\n", " [ 1.000, -0.055],\n", " [ 1.000, -0.036],\n", " [ 1.000, 0.016]])" ] }, "metadata": {}, "execution_count": 38 } ] }, { "cell_type": "code", "source": [ "mod = sm.OLS(diabetes_y_train, sm.add_constant(diabetes_X_train))\n", "res = mod.fit()\n", "print(res.summary())" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Bci5XJD0gS5R", "outputId": "777cb9ea-bf19-4c38-d904-de4166d05864" }, "execution_count": 35, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: y R-squared: 0.335\n", "Model: OLS Adj. R-squared: 0.334\n", "Method: Least Squares F-statistic: 211.7\n", "Date: Sun, 29 May 2022 Prob (F-statistic): 3.98e-39\n", "Time: 01:49:37 Log-Likelihood: -2346.5\n", "No. Observations: 422 AIC: 4697.\n", "Df Residuals: 420 BIC: 4705.\n", "Df Model: 1 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const 152.9189 3.069 49.830 0.000 146.887 158.951\n", "x1 938.2379 64.484 14.550 0.000 811.487 1064.989\n", "==============================================================================\n", "Omnibus: 12.587 Durbin-Watson: 1.839\n", "Prob(Omnibus): 0.002 Jarque-Bera (JB): 7.334\n", "Skew: 0.142 Prob(JB): 0.0255\n", "Kurtosis: 2.420 Cond. No. 21.0\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "beta=np.linalg.inv(X_ols.T.dot(X_ols)).dot(X_ols.T.dot(diabetes_y_train))\n", "pd.Series(beta)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "u8Z0y-EzUG0i", "outputId": "76535b9d-39c3-47a9-c35b-bc4e31be507c" }, "execution_count": 39, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 152.918862\n", "1 938.237861\n", "dtype: float64" ] }, "metadata": {}, "execution_count": 39 } ] } ] }