{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "sklearn_regression_diabetetes.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyP+fDr6HFsgo2ApOuOYo6B1",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "markdown",
"source": [
"# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html"
],
"metadata": {
"id": "2t4gcEqWUowK"
}
},
{
"cell_type": "markdown",
"source": [
"### https://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html#sphx-glr-auto-examples-linear-model-plot-ols-py"
],
"metadata": {
"id": "VAYkP7QhU73k"
}
},
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"from pylab import mpl, plt\n",
"import numpy as np\n",
"from sklearn import datasets\n",
"from sklearn.metrics import mean_squared_error, r2_score\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LinearRegression\n",
"import datetime as dt\n",
"\n",
"import statsmodels.formula.api as smf\n",
"import statsmodels.api as sm\n",
"from scipy import stats\n",
"\n",
"plt.style.use('seaborn')\n",
"mpl.rcParams['font.family'] = 'DejaVu Sans'\n",
"mpl.rcParams[\"savefig.dpi\"] = 500\n",
"np.set_printoptions(precision=5, suppress=True, formatter={\"float\": lambda x: f\"{x:6.3f}\"})\n",
"\n",
"%matplotlib inline"
],
"metadata": {
"id": "UIgP7erHK9Jf"
},
"execution_count": 24,
"outputs": []
},
{
"cell_type": "code",
"source": [
""
],
"metadata": {
"id": "X8PWUgCKVLE_"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Load the diabetes dataset\n",
"diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)\n",
"diabetes_X"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "sJ4cSg8aT6zN",
"outputId": "0c68d128-a464-47f2-c661-6a1c82279fb2"
},
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[ 0.03807591, 0.05068012, 0.06169621, ..., -0.00259226,\n",
" 0.01990842, -0.01764613],\n",
" [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n",
" -0.06832974, -0.09220405],\n",
" [ 0.08529891, 0.05068012, 0.04445121, ..., -0.00259226,\n",
" 0.00286377, -0.02593034],\n",
" ...,\n",
" [ 0.04170844, 0.05068012, -0.01590626, ..., -0.01107952,\n",
" -0.04687948, 0.01549073],\n",
" [-0.04547248, -0.04464164, 0.03906215, ..., 0.02655962,\n",
" 0.04452837, -0.02593034],\n",
" [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n",
" -0.00421986, 0.00306441]])"
]
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"source": [
"# Use only one feature\n",
"diabetes_X = diabetes_X[:, np.newaxis, 2]\n",
"\n",
"# Split the data into training/testing sets\n",
"diabetes_X_train = diabetes_X[:-20]\n",
"diabetes_X_test = diabetes_X[-20:]\n",
"\n",
"# Split the targets into training/testing sets\n",
"diabetes_y_train = diabetes_y[:-20]\n",
"diabetes_y_test = diabetes_y[-20:]\n"
],
"metadata": {
"id": "n8kFIZ0FULGo"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Create linear regression object\n",
"regr = LinearRegression()\n",
"\n",
"# Train the model using the training sets\n",
"regr.fit(diabetes_X_train, diabetes_y_train)\n",
"\n",
"# Make predictions using the testing set\n",
"diabetes_y_pred = regr.predict(diabetes_X_test)"
],
"metadata": {
"id": "8CHNy0REVqML"
},
"execution_count": 25,
"outputs": []
},
{
"cell_type": "code",
"source": [
"type(regr.coef_)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "RedHiJErWMZU",
"outputId": "813e8aa6-de2c-4afd-8f74-5a2b517e8b99"
},
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"numpy.ndarray"
]
},
"metadata": {},
"execution_count": 14
}
]
},
{
"cell_type": "code",
"source": [
"np.ndarray.round(regr.coef_,4)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tc2oHOojV-cd",
"outputId": "3a46867b-b590-4396-9f5a-6624ac8ab452"
},
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([938.2379])"
]
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"source": [
"# The coefficients\n",
"print(\"Coefficients: \\n\", np.ndarray.round(regr.coef_,4))\n",
"# The mean squared error\n",
"print(\"Mean squared error: %.2f\" % mean_squared_error(diabetes_y_test, diabetes_y_pred))\n",
"# The coefficient of determination: 1 is perfect prediction\n",
"print(\"Coefficient of determination: %.2f\" % r2_score(diabetes_y_test, diabetes_y_pred))\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "AEkVUO9IT--c",
"outputId": "4c93470d-1f98-418b-fbb4-05bcc80f1b4c"
},
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Coefficients: \n",
" [938.2379]\n",
"Mean squared error: 2548.07\n",
"Coefficient of determination: 0.47\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Plot outputs\n",
"plt.scatter(diabetes_X_test, diabetes_y_test, color=\"black\")\n",
"plt.plot(diabetes_X_test, diabetes_y_pred, color=\"blue\", linewidth=3)\n",
"\n",
"plt.xticks(())\n",
"plt.yticks(())\n",
"\n",
"plt.show()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 252
},
"id": "nRRBgiI7UATN",
"outputId": "32877e33-da17-4ca6-f912-70a6aa5851e8"
},
"execution_count": 4,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"