{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "sklearn_regression_diabetetes.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyP+fDr6HFsgo2ApOuOYo6B1",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"
"
]
},
{
"cell_type": "markdown",
"source": [
"# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html"
],
"metadata": {
"id": "2t4gcEqWUowK"
}
},
{
"cell_type": "markdown",
"source": [
"### https://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html#sphx-glr-auto-examples-linear-model-plot-ols-py"
],
"metadata": {
"id": "VAYkP7QhU73k"
}
},
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"from pylab import mpl, plt\n",
"import numpy as np\n",
"from sklearn import datasets\n",
"from sklearn.metrics import mean_squared_error, r2_score\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LinearRegression\n",
"import datetime as dt\n",
"\n",
"import statsmodels.formula.api as smf\n",
"import statsmodels.api as sm\n",
"from scipy import stats\n",
"\n",
"plt.style.use('seaborn')\n",
"mpl.rcParams['font.family'] = 'DejaVu Sans'\n",
"mpl.rcParams[\"savefig.dpi\"] = 500\n",
"np.set_printoptions(precision=5, suppress=True, formatter={\"float\": lambda x: f\"{x:6.3f}\"})\n",
"\n",
"%matplotlib inline"
],
"metadata": {
"id": "UIgP7erHK9Jf"
},
"execution_count": 24,
"outputs": []
},
{
"cell_type": "code",
"source": [
""
],
"metadata": {
"id": "X8PWUgCKVLE_"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Load the diabetes dataset\n",
"diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)\n",
"diabetes_X"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "sJ4cSg8aT6zN",
"outputId": "0c68d128-a464-47f2-c661-6a1c82279fb2"
},
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[ 0.03807591, 0.05068012, 0.06169621, ..., -0.00259226,\n",
" 0.01990842, -0.01764613],\n",
" [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n",
" -0.06832974, -0.09220405],\n",
" [ 0.08529891, 0.05068012, 0.04445121, ..., -0.00259226,\n",
" 0.00286377, -0.02593034],\n",
" ...,\n",
" [ 0.04170844, 0.05068012, -0.01590626, ..., -0.01107952,\n",
" -0.04687948, 0.01549073],\n",
" [-0.04547248, -0.04464164, 0.03906215, ..., 0.02655962,\n",
" 0.04452837, -0.02593034],\n",
" [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n",
" -0.00421986, 0.00306441]])"
]
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"source": [
"# Use only one feature\n",
"diabetes_X = diabetes_X[:, np.newaxis, 2]\n",
"\n",
"# Split the data into training/testing sets\n",
"diabetes_X_train = diabetes_X[:-20]\n",
"diabetes_X_test = diabetes_X[-20:]\n",
"\n",
"# Split the targets into training/testing sets\n",
"diabetes_y_train = diabetes_y[:-20]\n",
"diabetes_y_test = diabetes_y[-20:]\n"
],
"metadata": {
"id": "n8kFIZ0FULGo"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Create linear regression object\n",
"regr = LinearRegression()\n",
"\n",
"# Train the model using the training sets\n",
"regr.fit(diabetes_X_train, diabetes_y_train)\n",
"\n",
"# Make predictions using the testing set\n",
"diabetes_y_pred = regr.predict(diabetes_X_test)"
],
"metadata": {
"id": "8CHNy0REVqML"
},
"execution_count": 25,
"outputs": []
},
{
"cell_type": "code",
"source": [
"type(regr.coef_)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "RedHiJErWMZU",
"outputId": "813e8aa6-de2c-4afd-8f74-5a2b517e8b99"
},
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"numpy.ndarray"
]
},
"metadata": {},
"execution_count": 14
}
]
},
{
"cell_type": "code",
"source": [
"np.ndarray.round(regr.coef_,4)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tc2oHOojV-cd",
"outputId": "3a46867b-b590-4396-9f5a-6624ac8ab452"
},
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([938.2379])"
]
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"source": [
"# The coefficients\n",
"print(\"Coefficients: \\n\", np.ndarray.round(regr.coef_,4))\n",
"# The mean squared error\n",
"print(\"Mean squared error: %.2f\" % mean_squared_error(diabetes_y_test, diabetes_y_pred))\n",
"# The coefficient of determination: 1 is perfect prediction\n",
"print(\"Coefficient of determination: %.2f\" % r2_score(diabetes_y_test, diabetes_y_pred))\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "AEkVUO9IT--c",
"outputId": "4c93470d-1f98-418b-fbb4-05bcc80f1b4c"
},
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Coefficients: \n",
" [938.2379]\n",
"Mean squared error: 2548.07\n",
"Coefficient of determination: 0.47\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Plot outputs\n",
"plt.scatter(diabetes_X_test, diabetes_y_test, color=\"black\")\n",
"plt.plot(diabetes_X_test, diabetes_y_pred, color=\"blue\", linewidth=3)\n",
"\n",
"plt.xticks(())\n",
"plt.yticks(())\n",
"\n",
"plt.show()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 252
},
"id": "nRRBgiI7UATN",
"outputId": "32877e33-da17-4ca6-f912-70a6aa5851e8"
},
"execution_count": 4,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAADrCAYAAABXYUzjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAQfElEQVR4nO3dbagcZ93H8d9sE2L2pmlMk1hEdkZj09aHIuTUgIjV6G31za1Rmhu7KiTUbREqlFpfuIJCuwqiRRSi3ahUOPNCG4IPL7Slqe2LQO94UqhaKyaNOxuktDX0Cfc0Tzv3i+meycOe3Zk9O3vNXPP9QF5kuM45V9LTX/7nf838xwnDUACA2auY3gAAlBUBDACGEMAAYAgBDACGEMAAYAgBDACGrEqzeOPGjaHneRltBQDsdOTIkX+HYbjp4uupAtjzPC0sLExvVwBQAo7jBMOu04IAAEMIYAAwhAAGAEMIYAAwhAAGAEMIYABYhu/78jxPlUpFnufJ9/2pfv5Ut6EBQFn4vq9Go6FerydJCoJAjUZDklSv16fyNaiAAWCIZrO5FL4DvV5PzWZzal+DAAaAIbrdbqrrkyCAAWCIWq2W6vokCGAAGKLVaqlarV5wrVqtqtVqTe1rEMAAMES9Xle73ZbrunIcR67rqt1uT+0ATpKcNC/lnJubCxnGAwDpOI5zJAzDuYuvUwEDgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAYQgADgCEEMAAMcfSodM01kuNInic98sj0vwYBDMAavu/L8zxVKhV5niff91N9/KlT0h13RKG7dav0j39E14NAarenv99V0/+UADB7vu+r0Wio1+tJkoIgUKPRkCTV6/WRH/vrX0s7d47+/Hv2TGWbF6ACBmCFZrO5FL4DvV5PzWZz6PoTJ6Qbboiq3VHhe/310vHj0ic+Mc3dRghgAFbodrtjr589KzWbUejWatLCwvKf75e/lMJQeuop6e1vn/ZuIwQwACvUarVlrz/6aBS6q1dL3/728p/jS1+Ser0oeHftymij5yGAAVih1WqpWq2ed2WTKpU/Kgg6+uhHl/84z5OefjoK3XZbWrs2653GOIQDYIV6va5+X7rtts1aXPxvSVK/v/z6n/40OlhznBltcAgCGEDhHTggffazkjT6boddu6R9+6R162ayrbEIYACF9Pzz0lVXjV+3YYP00EPS3Fz2e0qLHjCAwghD6dZbo7bBuPC9+Wbp3Dnp5Ml8hq9EAAOFstInvYrq4MEodCsV6Wc/G722242C+le/itbnGS0IoCBW8qRXEb3ySnSHwssvj1/7i19IX/xi5luaupz/+wBgIO2TXkX19a9H1e769aPD98Mfls6ciardIoavRAUMFEaSJ72K6k9/kt7//mRrn3lGuvbabPczK1TAQEGMetKriBYX43GP48L3vvuiSjcM7QlfiQAGCuPSJ72karWqVqtlaEeTue++KHSr1Xjc4zDXXRc/FnznnbPb3yzRggAKYnDQ1mw21e12VavV1Gq1CnEA98wz0rvelWztwoK0bVu2+8kLJwzDxIvn5ubChVHjgwDgDWfOSB/5iHTo0Pi1zaZ0773Z78kUx3GOhGF4yd3IVMAApuqBB6Tdu8evu/JK6dlnpSuuyHxLuUUAA1ixbldy3WRrDx6UduzIdj9FwSEcgIn0+9JnPhMdqI0L39tui9aHIeF7PipgAKn85jfSpz+dbO3zz0ubN2e7nyKjAgYw1rPPRpWu44wP3wMH4nt2Cd/RqIABDBWG0qpVo4eaD+zcKe3fn//hN3nDXxeAC+zdG08eGxe+nU4U1AcOEL6ToAIGkHi4uRSNg9yzJ9v9lAUBDJTY1q3S0aPJ1i4uSm96U7b7KRt+aABKZv/++EBtXPg+/HB8oEb4Th8VMFACr72W/EWUH/qQ9Pjj2e4HESpgwGI33RRVuknC96WXokqX8J0dAhiwzGOPxS2Ghx8evXZ+Pm4xrF8/k+3hPLQgAAucPi2tWZNs7dveJp04ke1+kAwVMFBgjUZU6SYJ33/9K6p0Cd/8IICBgnnqqbjFsG/f6LXf/37cYnjrW2ezPyRHCwIogHPnoseCk+r3o4BGvlEBAzn2qU9FQZokfP/+97jaJXyLgQAGcuYvf4lbDL/97ei1X/1qHLrXXDOb/WF6aEEAORCG6YbZnD4trV6d3X4wG9ZXwL7vy/M8VSoVeZ4n3/dNbwlYcued8eSxcX73u7jaJXztYHUF7Pu+Go2Ger2eJCkIAjUaDUkqxKu8YacTJ6RaLdnaTZukF17Idj8wx+rX0nuepyAILrnuuq46nc7sN4RSS3Mw9uqr0uWXZ7cXzNZyr6W3ugXR7XZTXQfON4321Q9+EB+ojXP//XGLgfAtB6tbELVabWgFXEv68x9KayXtq5dflt785uRfK8UPobCM1RVwq9VStVq94Fq1WlWr1TK0IxRFs9lcCt+BXq+nZrO57MesWRNVuknC97nn4moX5WV1ANfrdbXbbbmuK8dx5Lqu2u02B3AYK2n76sEH4xbD6dOjP+c3vhGHbtLX/8BuVgewFIVwp9NRv99Xp9MhfJHIcm2qWq2mU6fi0N21a/znGoTuPfdMeZMzxO2c2bA+gIFJDGtfOc5TCoJOolfznP9YcNEN+uFBECgMw6V+OCG8cgQwMMSgfbV58y2SQkmhwvD6MR9j52PBk/TDkYzVd0EAk+j3pcsuk6T6G7/Gr7d5+A23c2aHChh4w9VXR0Eahe9ohw6VZ/LYqH44VoYARqkdPhwfqB07Nnrt9u1x6H7gA7PZXx5wO2d2aEGglNJUrYuLSnTwZqvBnUPNZlPdble1Wk2tVos7iqaAChilsXNn8seCf/zjuNotc/gOcDtnNqiAYbXjx6UtW5Kvt+G2MRQHAQwrpWkxnDwpbdiQ3V6A5dCCgDU+97nkLYavfS1uMRC+MIUKGIX2wgvSW96SfD0tBuQJFTAKaVDpJgnfv/3NnseCYRcCGIVxzz3JWwzXXhuH7nXXZb83YBK0IJBri4vSRc8AjESViyKhAkYuDSrdJOF78CAtBhQTAZwR5qemt3dv8haDFIfujh3Z7gvICi2IDKzkfWJlE08eS+bs2XTrgTyjAs4A81PHG1S6ScL05z+Pq13CFzahAs4A81OH+8MfpE9+Mvl6erqwHQGcgVqtpiAIhl4vozSPBb/6qnT55dntBcgTWhAZYH6qtG5d8gO1PXviFgPhizKhAs5AWeenPv209J73JF9PiwFl54Qp/i+Ym5sLFxYWMtwOiihNiyEIpJJ2YlBijuMcCcNw7uLrtCAwkY99LHmL4X3vi1sMhC8QowWBxJg8BkwXFTDGSjN57PBhHgsGkiKAMdRdd032WPANN2S7L8AmtCCw5NSpdC+g7PfTHcABuBAVMJYq3STh++CDcbVL+AIrQwVcUvv3SzffnHw9PV1g+qiAS2RQtTpO0vBdLdf1ND/PKE0gCwRwCaxdG4VuJcF/7d27/0/V6n9JciSdXRqlyTxjYPoIYEs98URc7b7++vj1g77uo4/+L6M0gRmhB2yZlU4eY5QmMDtUwBb44AeT37N7112jJ48tNzKzrKM0gSwRwAX1z3/GoXvo0Pj1g9D93vdGr2OUJjA7BHDBDEL3He8Yv/bEifSPBdfrdbXbbbmuK8dx5Lqu2u229aM0ARMYR1kAt98u3X9/srU33RS9+gdAfiw3jpJDuJx66SVpw4bk63lQAigeWhA54fu+PM9bajEkCd8nn2TyGFBkVMA58IUvLGh+vi5pfJ9106ZoLi+A4iOADTl9WlqzZvC7S1pDl2DyGGAfWhAzNmgxxOE7yv8weQywGAE8A7//fbrh5tEcBkeu++cMdwXANFoQGQnDZMNvBtauvUKLi68u/Z6HHwD7UQFP2e23J5889sMfxncx7Nu3l4cfgJIhgKfg/MeCkzwwMQjdO+6Ir9XrdXU6HfX7fXU6HcJ3Cga39lUqFXmex0hN5A4tiBVY6eQxZMf3fTUajaXRmoO5xpL4xw25QQWc0ne+k/xA7YEHRk8eQ3aazSZzjZF7VMAJnDwpbdyYfD1PppnHXGMUARXwCINKN0n4vvgijwXnCXONUQQE8EXm55O3GO69Nw7dNBVy3tlweMVcYxQBLQhJi4vSRf+vjmRzlWvL4dVgr81mU91uV7VaTa1Wq1B/Btiv1POAr75aOnYs2dpjx6QtW7LdTx54nqcgCC657rquOp3O7DcEWGC5ecCla0E88kjcYhgXvrfeGrcYyhC+EodXwCyVogVx7py0KsWftMyTx2q12tAKmMMrYPqsroC/+c0oSJOE7+HDYvKYOLwCZsm6Cvj48eTtghtvlB57LNPtFA6HV8DsWHEIF4bSl78s/eQnydafOZOuJQEAK2HlIdzjj8eTx8aF7/kthryHrw334QIYL+dRdKnXXpPe+c5k70W75RapaNlly324AMYrTAX8rW9F1e66dePDt9eLKt08hG/aapYhMkB55LoCfvJJadu2ZGv/+lfp3e/Odj9pTVLNch8uUB65q4Bff11673ujandc+H73u3FfN2/hK01WzTJEBiiP3ATwj34Uhe7atVE1u5wtW6T//CcK3bvvnt3+JjFJNct9uEB5GA3go0fjx4K/8pXRa594IgrdY8fSDc4xaZJqtl6vq91u8344oARmHsBnz0o7dkShu3Xr6LV33x23GLZvn83+pmnSapb3wwHlMLNDON+XPv/58evWrZOCQFq/Pvs9ZY2nygCMkvmTcK+8kixMH3pI+vjHU31qACgEY0/CjXpN++7d0eSxMCR8AZRP5i2I7dul1auj+QsDzz0nXXVV1l8ZAPIt8wC+8cYocM+cIXQB4HwzOYS78spZfBUAKJbcPIgBAGVDAAOAIdYEMDN0ARRNrqehJcUMXQBFZEUFzAxdAEVkRQAzQxdAEVkRwMzQLRf6/bCFFQFs+wxdAic26PcHQaAwDJf6/WX+O0GBhWGY+Ne2bdvCvJqfnw9d1w0dxwld1w3n5+dNb2kq5ufnw2q1Gkpa+lWtVhP9+Wz8O3Fd94K/i8Ev13VNbw1YlqSFcEimZj4NDSvjeZ6CILjkuuu66nQ6y37cxXeGSNFPBUUf7l6pVDTse9ZxHPX7fQM7AsYzNg0NKzPpAaOtd4bQ74dNCOCcmzRwbL0zxPZ+P8qFAM65SQPH1kqRd+bBJgRwzk0aODZXirwzD7aYWQBzK9XkJgkcKkUg/2ZyF4StJ/IAkITRuyBsPZEHgJWYSQDbeiIPACsxkwC29UQeAFZiJgFs84k8AEwq8wD2fX+pB3zZZZdJEifyGIk7ZlAWmb4R4+K7H86dO7dU+RK+GIa3m6BMMr0NbdJBMigvvmdgIyO3oXH3A9LiewZlkmkAc/cD0uJ7BmWSaQBz9wPS4nsGZZJpADOPAGnxPYMy4Y0YAJAx3ogBADlDAAOAIQQwABhCAAOAIQQwABiS6i4Ix3FelHTpc6IAgFHcMAw3XXwxVQADAKaHFgQAGEIAA4AhBDAAGEIAA4AhBDAAGEIAA4AhBDAAGEIAA4AhBDAAGPL/Fn14gssvCKAAAAAASUVORK5CYII=\n"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"import statsmodels.api as sm\n",
"\n",
"X_ols = sm.add_constant(diabetes_X_train)\n",
"model = sm.OLS(diabetes_y_train, X_ols).fit()\n",
"print(model.summary())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "ba00f911-6165-4e2a-c4c1-a2cac8d044e4",
"id": "oRIaznjCiM-a"
},
"execution_count": 36,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: y R-squared: 0.335\n",
"Model: OLS Adj. R-squared: 0.334\n",
"Method: Least Squares F-statistic: 211.7\n",
"Date: Sun, 29 May 2022 Prob (F-statistic): 3.98e-39\n",
"Time: 01:49:43 Log-Likelihood: -2346.5\n",
"No. Observations: 422 AIC: 4697.\n",
"Df Residuals: 420 BIC: 4705.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 152.9189 3.069 49.830 0.000 146.887 158.951\n",
"x1 938.2379 64.484 14.550 0.000 811.487 1064.989\n",
"==============================================================================\n",
"Omnibus: 12.587 Durbin-Watson: 1.839\n",
"Prob(Omnibus): 0.002 Jarque-Bera (JB): 7.334\n",
"Skew: 0.142 Prob(JB): 0.0255\n",
"Kurtosis: 2.420 Cond. No. 21.0\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"X_ols"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "eZ3PXJwugtVm",
"outputId": "15a9c6d8-65de-46c0-bb33-8c8317514a65"
},
"execution_count": 38,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[ 1.000, 0.062],\n",
" [ 1.000, -0.051],\n",
" [ 1.000, 0.044],\n",
" [ 1.000, -0.012],\n",
" [ 1.000, -0.036],\n",
" [ 1.000, -0.041],\n",
" [ 1.000, -0.047],\n",
" [ 1.000, -0.002],\n",
" [ 1.000, 0.062],\n",
" [ 1.000, 0.039],\n",
" [ 1.000, -0.084],\n",
" [ 1.000, 0.018],\n",
" [ 1.000, -0.029],\n",
" [ 1.000, -0.002],\n",
" [ 1.000, -0.026],\n",
" [ 1.000, -0.018],\n",
" [ 1.000, 0.042],\n",
" [ 1.000, 0.012],\n",
" [ 1.000, -0.011],\n",
" [ 1.000, -0.018],\n",
" [ 1.000, -0.057],\n",
" [ 1.000, -0.022],\n",
" [ 1.000, -0.004],\n",
" [ 1.000, 0.061],\n",
" [ 1.000, 0.036],\n",
" [ 1.000, -0.013],\n",
" [ 1.000, -0.077],\n",
" [ 1.000, 0.060],\n",
" [ 1.000, -0.021],\n",
" [ 1.000, -0.006],\n",
" [ 1.000, 0.044],\n",
" [ 1.000, -0.065],\n",
" [ 1.000, 0.125],\n",
" [ 1.000, -0.050],\n",
" [ 1.000, -0.063],\n",
" [ 1.000, -0.031],\n",
" [ 1.000, 0.023],\n",
" [ 1.000, 0.011],\n",
" [ 1.000, 0.071],\n",
" [ 1.000, 0.014],\n",
" [ 1.000, -0.008],\n",
" [ 1.000, -0.068],\n",
" [ 1.000, -0.011],\n",
" [ 1.000, -0.023],\n",
" [ 1.000, 0.068],\n",
" [ 1.000, -0.035],\n",
" [ 1.000, -0.012],\n",
" [ 1.000, -0.073],\n",
" [ 1.000, -0.042],\n",
" [ 1.000, 0.014],\n",
" [ 1.000, -0.007],\n",
" [ 1.000, 0.016],\n",
" [ 1.000, -0.009],\n",
" [ 1.000, -0.016],\n",
" [ 1.000, 0.025],\n",
" [ 1.000, -0.049],\n",
" [ 1.000, 0.041],\n",
" [ 1.000, -0.063],\n",
" [ 1.000, -0.064],\n",
" [ 1.000, -0.026],\n",
" [ 1.000, -0.004],\n",
" [ 1.000, 0.005],\n",
" [ 1.000, -0.007],\n",
" [ 1.000, -0.037],\n",
" [ 1.000, -0.026],\n",
" [ 1.000, -0.025],\n",
" [ 1.000, -0.018],\n",
" [ 1.000, -0.015],\n",
" [ 1.000, -0.030],\n",
" [ 1.000, -0.046],\n",
" [ 1.000, -0.070],\n",
" [ 1.000, 0.034],\n",
" [ 1.000, -0.004],\n",
" [ 1.000, -0.020],\n",
" [ 1.000, 0.002],\n",
" [ 1.000, -0.031],\n",
" [ 1.000, 0.028],\n",
" [ 1.000, -0.036],\n",
" [ 1.000, -0.058],\n",
" [ 1.000, -0.037],\n",
" [ 1.000, 0.012],\n",
" [ 1.000, -0.022],\n",
" [ 1.000, -0.035],\n",
" [ 1.000, 0.010],\n",
" [ 1.000, -0.040],\n",
" [ 1.000, 0.071],\n",
" [ 1.000, -0.075],\n",
" [ 1.000, -0.006],\n",
" [ 1.000, -0.041],\n",
" [ 1.000, -0.048],\n",
" [ 1.000, -0.026],\n",
" [ 1.000, 0.052],\n",
" [ 1.000, 0.005],\n",
" [ 1.000, -0.064],\n",
" [ 1.000, -0.017],\n",
" [ 1.000, -0.058],\n",
" [ 1.000, 0.010],\n",
" [ 1.000, 0.089],\n",
" [ 1.000, -0.005],\n",
" [ 1.000, -0.064],\n",
" [ 1.000, 0.018],\n",
" [ 1.000, -0.045],\n",
" [ 1.000, 0.028],\n",
" [ 1.000, 0.041],\n",
" [ 1.000, 0.065],\n",
" [ 1.000, -0.032],\n",
" [ 1.000, -0.076],\n",
" [ 1.000, 0.050],\n",
" [ 1.000, 0.046],\n",
" [ 1.000, -0.009],\n",
" [ 1.000, -0.032],\n",
" [ 1.000, 0.005],\n",
" [ 1.000, 0.021],\n",
" [ 1.000, 0.014],\n",
" [ 1.000, 0.110],\n",
" [ 1.000, 0.001],\n",
" [ 1.000, 0.058],\n",
" [ 1.000, -0.021],\n",
" [ 1.000, -0.011],\n",
" [ 1.000, -0.047],\n",
" [ 1.000, 0.005],\n",
" [ 1.000, 0.018],\n",
" [ 1.000, 0.081],\n",
" [ 1.000, 0.035],\n",
" [ 1.000, 0.024],\n",
" [ 1.000, -0.008],\n",
" [ 1.000, -0.061],\n",
" [ 1.000, -0.002],\n",
" [ 1.000, -0.062],\n",
" [ 1.000, 0.016],\n",
" [ 1.000, 0.096],\n",
" [ 1.000, -0.070],\n",
" [ 1.000, -0.021],\n",
" [ 1.000, -0.054],\n",
" [ 1.000, 0.043],\n",
" [ 1.000, 0.056],\n",
" [ 1.000, -0.082],\n",
" [ 1.000, 0.050],\n",
" [ 1.000, 0.111],\n",
" [ 1.000, 0.062],\n",
" [ 1.000, 0.014],\n",
" [ 1.000, 0.048],\n",
" [ 1.000, 0.012],\n",
" [ 1.000, 0.006],\n",
" [ 1.000, 0.047],\n",
" [ 1.000, 0.129],\n",
" [ 1.000, 0.060],\n",
" [ 1.000, 0.093],\n",
" [ 1.000, 0.015],\n",
" [ 1.000, -0.005],\n",
" [ 1.000, 0.070],\n",
" [ 1.000, -0.004],\n",
" [ 1.000, -0.001],\n",
" [ 1.000, -0.044],\n",
" [ 1.000, 0.021],\n",
" [ 1.000, 0.061],\n",
" [ 1.000, -0.011],\n",
" [ 1.000, -0.033],\n",
" [ 1.000, -0.065],\n",
" [ 1.000, 0.043],\n",
" [ 1.000, -0.062],\n",
" [ 1.000, 0.064],\n",
" [ 1.000, 0.030],\n",
" [ 1.000, 0.072],\n",
" [ 1.000, -0.019],\n",
" [ 1.000, -0.067],\n",
" [ 1.000, -0.060],\n",
" [ 1.000, 0.069],\n",
" [ 1.000, 0.060],\n",
" [ 1.000, -0.027],\n",
" [ 1.000, -0.020],\n",
" [ 1.000, -0.046],\n",
" [ 1.000, 0.071],\n",
" [ 1.000, -0.079],\n",
" [ 1.000, 0.010],\n",
" [ 1.000, -0.039],\n",
" [ 1.000, 0.020],\n",
" [ 1.000, 0.027],\n",
" [ 1.000, -0.008],\n",
" [ 1.000, -0.016],\n",
" [ 1.000, 0.005],\n",
" [ 1.000, -0.043],\n",
" [ 1.000, 0.006],\n",
" [ 1.000, -0.035],\n",
" [ 1.000, 0.024],\n",
" [ 1.000, -0.018],\n",
" [ 1.000, 0.042],\n",
" [ 1.000, -0.055],\n",
" [ 1.000, -0.003],\n",
" [ 1.000, -0.067],\n",
" [ 1.000, -0.013],\n",
" [ 1.000, -0.042],\n",
" [ 1.000, -0.031],\n",
" [ 1.000, -0.005],\n",
" [ 1.000, -0.059],\n",
" [ 1.000, 0.025],\n",
" [ 1.000, -0.046],\n",
" [ 1.000, 0.003],\n",
" [ 1.000, 0.054],\n",
" [ 1.000, -0.045],\n",
" [ 1.000, -0.058],\n",
" [ 1.000, -0.056],\n",
" [ 1.000, 0.001],\n",
" [ 1.000, 0.030],\n",
" [ 1.000, 0.007],\n",
" [ 1.000, 0.047],\n",
" [ 1.000, 0.026],\n",
" [ 1.000, 0.046],\n",
" [ 1.000, 0.040],\n",
" [ 1.000, -0.018],\n",
" [ 1.000, 0.014],\n",
" [ 1.000, 0.037],\n",
" [ 1.000, 0.003],\n",
" [ 1.000, -0.071],\n",
" [ 1.000, -0.033],\n",
" [ 1.000, 0.094],\n",
" [ 1.000, 0.036],\n",
" [ 1.000, 0.032],\n",
" [ 1.000, -0.065],\n",
" [ 1.000, -0.042],\n",
" [ 1.000, -0.040],\n",
" [ 1.000, -0.039],\n",
" [ 1.000, -0.026],\n",
" [ 1.000, -0.023],\n",
" [ 1.000, -0.067],\n",
" [ 1.000, 0.033],\n",
" [ 1.000, -0.046],\n",
" [ 1.000, -0.030],\n",
" [ 1.000, -0.013],\n",
" [ 1.000, -0.016],\n",
" [ 1.000, 0.071],\n",
" [ 1.000, -0.031],\n",
" [ 1.000, 0.000],\n",
" [ 1.000, 0.037],\n",
" [ 1.000, 0.039],\n",
" [ 1.000, -0.015],\n",
" [ 1.000, 0.007],\n",
" [ 1.000, -0.069],\n",
" [ 1.000, -0.009],\n",
" [ 1.000, 0.020],\n",
" [ 1.000, 0.075],\n",
" [ 1.000, -0.008],\n",
" [ 1.000, -0.023],\n",
" [ 1.000, -0.046],\n",
" [ 1.000, 0.054],\n",
" [ 1.000, -0.035],\n",
" [ 1.000, -0.032],\n",
" [ 1.000, -0.082],\n",
" [ 1.000, 0.048],\n",
" [ 1.000, 0.061],\n",
" [ 1.000, 0.056],\n",
" [ 1.000, 0.098],\n",
" [ 1.000, 0.060],\n",
" [ 1.000, 0.034],\n",
" [ 1.000, 0.056],\n",
" [ 1.000, -0.065],\n",
" [ 1.000, 0.161],\n",
" [ 1.000, -0.056],\n",
" [ 1.000, -0.025],\n",
" [ 1.000, -0.036],\n",
" [ 1.000, -0.008],\n",
" [ 1.000, -0.042],\n",
" [ 1.000, 0.127],\n",
" [ 1.000, -0.077],\n",
" [ 1.000, 0.028],\n",
" [ 1.000, -0.026],\n",
" [ 1.000, -0.062],\n",
" [ 1.000, -0.001],\n",
" [ 1.000, 0.089],\n",
" [ 1.000, -0.032],\n",
" [ 1.000, 0.030],\n",
" [ 1.000, 0.009],\n",
" [ 1.000, 0.007],\n",
" [ 1.000, -0.020],\n",
" [ 1.000, -0.025],\n",
" [ 1.000, -0.012],\n",
" [ 1.000, 0.026],\n",
" [ 1.000, -0.059],\n",
" [ 1.000, -0.036],\n",
" [ 1.000, -0.025],\n",
" [ 1.000, 0.019],\n",
" [ 1.000, -0.090],\n",
" [ 1.000, -0.005],\n",
" [ 1.000, -0.053],\n",
" [ 1.000, -0.022],\n",
" [ 1.000, -0.020],\n",
" [ 1.000, -0.055],\n",
" [ 1.000, -0.006],\n",
" [ 1.000, -0.017],\n",
" [ 1.000, 0.055],\n",
" [ 1.000, 0.077],\n",
" [ 1.000, 0.019],\n",
" [ 1.000, -0.022],\n",
" [ 1.000, 0.093],\n",
" [ 1.000, -0.031],\n",
" [ 1.000, 0.039],\n",
" [ 1.000, -0.061],\n",
" [ 1.000, -0.008],\n",
" [ 1.000, -0.037],\n",
" [ 1.000, -0.014],\n",
" [ 1.000, 0.074],\n",
" [ 1.000, -0.025],\n",
" [ 1.000, 0.034],\n",
" [ 1.000, 0.035],\n",
" [ 1.000, -0.039],\n",
" [ 1.000, -0.040],\n",
" [ 1.000, -0.002],\n",
" [ 1.000, -0.031],\n",
" [ 1.000, -0.046],\n",
" [ 1.000, 0.001],\n",
" [ 1.000, 0.065],\n",
" [ 1.000, 0.040],\n",
" [ 1.000, -0.023],\n",
" [ 1.000, 0.053],\n",
" [ 1.000, 0.040],\n",
" [ 1.000, -0.020],\n",
" [ 1.000, 0.014],\n",
" [ 1.000, -0.034],\n",
" [ 1.000, 0.007],\n",
" [ 1.000, 0.005],\n",
" [ 1.000, 0.030],\n",
" [ 1.000, 0.052],\n",
" [ 1.000, 0.062],\n",
" [ 1.000, -0.007],\n",
" [ 1.000, 0.006],\n",
" [ 1.000, 0.054],\n",
" [ 1.000, -0.008],\n",
" [ 1.000, 0.115],\n",
" [ 1.000, 0.067],\n",
" [ 1.000, -0.056],\n",
" [ 1.000, 0.030],\n",
" [ 1.000, -0.026],\n",
" [ 1.000, 0.105],\n",
" [ 1.000, -0.006],\n",
" [ 1.000, -0.047],\n",
" [ 1.000, -0.048],\n",
" [ 1.000, 0.085],\n",
" [ 1.000, -0.013],\n",
" [ 1.000, -0.033],\n",
" [ 1.000, -0.007],\n",
" [ 1.000, -0.014],\n",
" [ 1.000, 0.060],\n",
" [ 1.000, 0.022],\n",
" [ 1.000, 0.019],\n",
" [ 1.000, -0.012],\n",
" [ 1.000, -0.003],\n",
" [ 1.000, 0.018],\n",
" [ 1.000, -0.030],\n",
" [ 1.000, -0.020],\n",
" [ 1.000, -0.058],\n",
" [ 1.000, 0.061],\n",
" [ 1.000, -0.041],\n",
" [ 1.000, -0.072],\n",
" [ 1.000, -0.056],\n",
" [ 1.000, 0.046],\n",
" [ 1.000, -0.009],\n",
" [ 1.000, -0.033],\n",
" [ 1.000, 0.050],\n",
" [ 1.000, -0.085],\n",
" [ 1.000, 0.006],\n",
" [ 1.000, 0.021],\n",
" [ 1.000, -0.007],\n",
" [ 1.000, 0.105],\n",
" [ 1.000, -0.025],\n",
" [ 1.000, -0.006],\n",
" [ 1.000, -0.039],\n",
" [ 1.000, 0.137],\n",
" [ 1.000, 0.171],\n",
" [ 1.000, 0.002],\n",
" [ 1.000, 0.038],\n",
" [ 1.000, -0.058],\n",
" [ 1.000, -0.009],\n",
" [ 1.000, -0.023],\n",
" [ 1.000, -0.011],\n",
" [ 1.000, -0.034],\n",
" [ 1.000, -0.003],\n",
" [ 1.000, 0.068],\n",
" [ 1.000, 0.010],\n",
" [ 1.000, 0.002],\n",
" [ 1.000, -0.039],\n",
" [ 1.000, 0.026],\n",
" [ 1.000, -0.089],\n",
" [ 1.000, 0.061],\n",
" [ 1.000, -0.029],\n",
" [ 1.000, -0.030],\n",
" [ 1.000, -0.019],\n",
" [ 1.000, -0.041],\n",
" [ 1.000, 0.015],\n",
" [ 1.000, -0.025],\n",
" [ 1.000, 0.001],\n",
" [ 1.000, 0.069],\n",
" [ 1.000, -0.070],\n",
" [ 1.000, -0.030],\n",
" [ 1.000, -0.046],\n",
" [ 1.000, 0.019],\n",
" [ 1.000, 0.001],\n",
" [ 1.000, -0.031],\n",
" [ 1.000, -0.004],\n",
" [ 1.000, 0.015],\n",
" [ 1.000, 0.023],\n",
" [ 1.000, 0.046],\n",
" [ 1.000, -0.045],\n",
" [ 1.000, -0.033],\n",
" [ 1.000, 0.097],\n",
" [ 1.000, 0.054],\n",
" [ 1.000, 0.123],\n",
" [ 1.000, -0.081],\n",
" [ 1.000, 0.093],\n",
" [ 1.000, -0.050],\n",
" [ 1.000, -0.012],\n",
" [ 1.000, -0.028],\n",
" [ 1.000, 0.058],\n",
" [ 1.000, 0.085],\n",
" [ 1.000, -0.001],\n",
" [ 1.000, 0.007],\n",
" [ 1.000, 0.009],\n",
" [ 1.000, 0.080],\n",
" [ 1.000, 0.071],\n",
" [ 1.000, -0.025],\n",
" [ 1.000, -0.055],\n",
" [ 1.000, -0.036],\n",
" [ 1.000, 0.016]])"
]
},
"metadata": {},
"execution_count": 38
}
]
},
{
"cell_type": "code",
"source": [
"mod = sm.OLS(diabetes_y_train, sm.add_constant(diabetes_X_train))\n",
"res = mod.fit()\n",
"print(res.summary())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Bci5XJD0gS5R",
"outputId": "777cb9ea-bf19-4c38-d904-de4166d05864"
},
"execution_count": 35,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: y R-squared: 0.335\n",
"Model: OLS Adj. R-squared: 0.334\n",
"Method: Least Squares F-statistic: 211.7\n",
"Date: Sun, 29 May 2022 Prob (F-statistic): 3.98e-39\n",
"Time: 01:49:37 Log-Likelihood: -2346.5\n",
"No. Observations: 422 AIC: 4697.\n",
"Df Residuals: 420 BIC: 4705.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 152.9189 3.069 49.830 0.000 146.887 158.951\n",
"x1 938.2379 64.484 14.550 0.000 811.487 1064.989\n",
"==============================================================================\n",
"Omnibus: 12.587 Durbin-Watson: 1.839\n",
"Prob(Omnibus): 0.002 Jarque-Bera (JB): 7.334\n",
"Skew: 0.142 Prob(JB): 0.0255\n",
"Kurtosis: 2.420 Cond. No. 21.0\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"beta=np.linalg.inv(X_ols.T.dot(X_ols)).dot(X_ols.T.dot(diabetes_y_train))\n",
"pd.Series(beta)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u8Z0y-EzUG0i",
"outputId": "76535b9d-39c3-47a9-c35b-bc4e31be507c"
},
"execution_count": 39,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 152.918862\n",
"1 938.237861\n",
"dtype: float64"
]
},
"metadata": {},
"execution_count": 39
}
]
}
]
}