{ "cells": [ { "cell_type": "markdown", "id": "86266ad7-ffe1-4601-a749-2907c4c9c05c", "metadata": {}, "source": [ "## Heart Disease Health Indicators Machine Learning Project\n", "\n", "#### Author: Michael Breen-McKay\n", "#### Date: 03/15/2023" ] }, { "cell_type": "code", "execution_count": 25, "id": "e6ea2147-fca5-45c7-9c2c-8d79ad54bbf8", "metadata": {}, "outputs": [], "source": [ "#import packages\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV\n", "from sklearn.metrics import mean_squared_error, f1_score, recall_score" ] }, { "cell_type": "markdown", "id": "23cee9f6-9b74-418c-8ae5-ccae93dd325a", "metadata": {}, "source": [ "### Data Review" ] }, { "cell_type": "code", "execution_count": 2, "id": "c0368d2b-580b-4455-a3f4-908e6f162837", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | HeartDiseaseorAttack | \n", "HighBP | \n", "HighChol | \n", "CholCheck | \n", "BMI | \n", "Smoker | \n", "Stroke | \n", "Diabetes | \n", "PhysActivity | \n", "Fruits | \n", "... | \n", "AnyHealthcare | \n", "NoDocbcCost | \n", "GenHlth | \n", "MentHlth | \n", "PhysHlth | \n", "DiffWalk | \n", "Sex | \n", "Age | \n", "Education | \n", "Income | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "40.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "1.0 | \n", "0.0 | \n", "5.0 | \n", "18.0 | \n", "15.0 | \n", "1.0 | \n", "0.0 | \n", "9.0 | \n", "4.0 | \n", "3.0 | \n", "
| 1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "25.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "1.0 | \n", "3.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "7.0 | \n", "6.0 | \n", "1.0 | \n", "
| 2 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "28.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "... | \n", "1.0 | \n", "1.0 | \n", "5.0 | \n", "30.0 | \n", "30.0 | \n", "1.0 | \n", "0.0 | \n", "9.0 | \n", "4.0 | \n", "8.0 | \n", "
| 3 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "27.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "1.0 | \n", "0.0 | \n", "2.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "11.0 | \n", "3.0 | \n", "6.0 | \n", "
| 4 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "24.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "1.0 | \n", "0.0 | \n", "2.0 | \n", "3.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "11.0 | \n", "5.0 | \n", "4.0 | \n", "
5 rows × 22 columns
\n", "| \n", " | HeartDiseaseorAttack | \n", "HighBP | \n", "HighChol | \n", "CholCheck | \n", "BMI | \n", "Smoker | \n", "Stroke | \n", "Diabetes | \n", "PhysActivity | \n", "Fruits | \n", "... | \n", "AnyHealthcare | \n", "NoDocbcCost | \n", "GenHlth | \n", "MentHlth | \n", "PhysHlth | \n", "DiffWalk | \n", "Sex | \n", "Age | \n", "Education | \n", "Income | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "... | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "253680.000000 | \n", "
| mean | \n", "0.094186 | \n", "0.429001 | \n", "0.424121 | \n", "0.962670 | \n", "28.382364 | \n", "0.443169 | \n", "0.040571 | \n", "0.296921 | \n", "0.756544 | \n", "0.634256 | \n", "... | \n", "0.951053 | \n", "0.084177 | \n", "2.511392 | \n", "3.184772 | \n", "4.242081 | \n", "0.168224 | \n", "0.440342 | \n", "8.032119 | \n", "5.050434 | \n", "6.053875 | \n", "
| std | \n", "0.292087 | \n", "0.494934 | \n", "0.494210 | \n", "0.189571 | \n", "6.608694 | \n", "0.496761 | \n", "0.197294 | \n", "0.698160 | \n", "0.429169 | \n", "0.481639 | \n", "... | \n", "0.215759 | \n", "0.277654 | \n", "1.068477 | \n", "7.412847 | \n", "8.717951 | \n", "0.374066 | \n", "0.496429 | \n", "3.054220 | \n", "0.985774 | \n", "2.071148 | \n", "
| min | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "12.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "
| 25% | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "24.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "0.000000 | \n", "... | \n", "1.000000 | \n", "0.000000 | \n", "2.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "6.000000 | \n", "4.000000 | \n", "5.000000 | \n", "
| 50% | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "27.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "... | \n", "1.000000 | \n", "0.000000 | \n", "2.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "8.000000 | \n", "5.000000 | \n", "7.000000 | \n", "
| 75% | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "31.000000 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "... | \n", "1.000000 | \n", "0.000000 | \n", "3.000000 | \n", "2.000000 | \n", "3.000000 | \n", "0.000000 | \n", "1.000000 | \n", "10.000000 | \n", "6.000000 | \n", "8.000000 | \n", "
| max | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "98.000000 | \n", "1.000000 | \n", "1.000000 | \n", "2.000000 | \n", "1.000000 | \n", "1.000000 | \n", "... | \n", "1.000000 | \n", "1.000000 | \n", "5.000000 | \n", "30.000000 | \n", "30.000000 | \n", "1.000000 | \n", "1.000000 | \n", "13.000000 | \n", "6.000000 | \n", "8.000000 | \n", "
8 rows × 22 columns
\n", "| \n", " | HeartDiseaseorAttack | \n", "HighBP | \n", "HighChol | \n", "CholCheck | \n", "BMI | \n", "Smoker | \n", "Stroke | \n", "Diabetes | \n", "PhysActivity | \n", "Fruits | \n", "... | \n", "AnyHealthcare | \n", "NoDocbcCost | \n", "GenHlth | \n", "MentHlth | \n", "PhysHlth | \n", "DiffWalk | \n", "Sex | \n", "Age | \n", "Education | \n", "Income | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| HeartDiseaseorAttack | \n", "1.000000 | \n", "0.209361 | \n", "0.180765 | \n", "0.044206 | \n", "0.052904 | \n", "0.114441 | \n", "0.203002 | \n", "0.180272 | \n", "-0.087299 | \n", "-0.019790 | \n", "... | \n", "0.018734 | \n", "0.031000 | \n", "0.258383 | \n", "0.064621 | \n", "0.181698 | \n", "0.212709 | \n", "0.086096 | \n", "0.221618 | \n", "-0.099600 | \n", "-0.141011 | \n", "
| HighBP | \n", "0.209361 | \n", "1.000000 | \n", "0.298199 | \n", "0.098508 | \n", "0.213748 | \n", "0.096991 | \n", "0.129575 | \n", "0.271596 | \n", "-0.125267 | \n", "-0.040555 | \n", "... | \n", "0.038425 | \n", "0.017358 | \n", "0.300530 | \n", "0.056456 | \n", "0.161212 | \n", "0.223618 | \n", "0.052207 | \n", "0.344452 | \n", "-0.141358 | \n", "-0.171235 | \n", "
| HighChol | \n", "0.180765 | \n", "0.298199 | \n", "1.000000 | \n", "0.085642 | \n", "0.106722 | \n", "0.091299 | \n", "0.092620 | \n", "0.209085 | \n", "-0.078046 | \n", "-0.040859 | \n", "... | \n", "0.042230 | \n", "0.013310 | \n", "0.208426 | \n", "0.062069 | \n", "0.121751 | \n", "0.144672 | \n", "0.031205 | \n", "0.272318 | \n", "-0.070802 | \n", "-0.085459 | \n", "
| CholCheck | \n", "0.044206 | \n", "0.098508 | \n", "0.085642 | \n", "1.000000 | \n", "0.034495 | \n", "-0.009929 | \n", "0.024158 | \n", "0.067546 | \n", "0.004190 | \n", "0.023849 | \n", "... | \n", "0.117626 | \n", "-0.058255 | \n", "0.046589 | \n", "-0.008366 | \n", "0.031775 | \n", "0.040585 | \n", "-0.022115 | \n", "0.090321 | \n", "0.001510 | \n", "0.014259 | \n", "
| BMI | \n", "0.052904 | \n", "0.213748 | \n", "0.106722 | \n", "0.034495 | \n", "1.000000 | \n", "0.013804 | \n", "0.020153 | \n", "0.224379 | \n", "-0.147294 | \n", "-0.087518 | \n", "... | \n", "-0.018471 | \n", "0.058206 | \n", "0.239185 | \n", "0.085310 | \n", "0.121141 | \n", "0.197078 | \n", "0.042950 | \n", "-0.036618 | \n", "-0.103932 | \n", "-0.100069 | \n", "
| Smoker | \n", "0.114441 | \n", "0.096991 | \n", "0.091299 | \n", "-0.009929 | \n", "0.013804 | \n", "1.000000 | \n", "0.061173 | \n", "0.062914 | \n", "-0.087401 | \n", "-0.077666 | \n", "... | \n", "-0.023251 | \n", "0.048946 | \n", "0.163143 | \n", "0.092196 | \n", "0.116460 | \n", "0.122463 | \n", "0.093662 | \n", "0.120641 | \n", "-0.161955 | \n", "-0.123937 | \n", "
| Stroke | \n", "0.203002 | \n", "0.129575 | \n", "0.092620 | \n", "0.024158 | \n", "0.020153 | \n", "0.061173 | \n", "1.000000 | \n", "0.107179 | \n", "-0.069151 | \n", "-0.013389 | \n", "... | \n", "0.008776 | \n", "0.034804 | \n", "0.177942 | \n", "0.070172 | \n", "0.148944 | \n", "0.176567 | \n", "0.002978 | \n", "0.126974 | \n", "-0.076009 | \n", "-0.128599 | \n", "
| Diabetes | \n", "0.180272 | \n", "0.271596 | \n", "0.209085 | \n", "0.067546 | \n", "0.224379 | \n", "0.062914 | \n", "0.107179 | \n", "1.000000 | \n", "-0.121947 | \n", "-0.042192 | \n", "... | \n", "0.015410 | \n", "0.035436 | \n", "0.302587 | \n", "0.073507 | \n", "0.176287 | \n", "0.224239 | \n", "0.031040 | \n", "0.185026 | \n", "-0.130517 | \n", "-0.171483 | \n", "
| PhysActivity | \n", "-0.087299 | \n", "-0.125267 | \n", "-0.078046 | \n", "0.004190 | \n", "-0.147294 | \n", "-0.087401 | \n", "-0.069151 | \n", "-0.121947 | \n", "1.000000 | \n", "0.142756 | \n", "... | \n", "0.035505 | \n", "-0.061638 | \n", "-0.266186 | \n", "-0.125587 | \n", "-0.219230 | \n", "-0.253174 | \n", "0.032482 | \n", "-0.092511 | \n", "0.199658 | \n", "0.198539 | \n", "
| Fruits | \n", "-0.019790 | \n", "-0.040555 | \n", "-0.040859 | \n", "0.023849 | \n", "-0.087518 | \n", "-0.077666 | \n", "-0.013389 | \n", "-0.042192 | \n", "0.142756 | \n", "1.000000 | \n", "... | \n", "0.031544 | \n", "-0.044243 | \n", "-0.103854 | \n", "-0.068217 | \n", "-0.044633 | \n", "-0.048352 | \n", "-0.091175 | \n", "0.064547 | \n", "0.110187 | \n", "0.079929 | \n", "
| Veggies | \n", "-0.039167 | \n", "-0.061266 | \n", "-0.039874 | \n", "0.006121 | \n", "-0.062275 | \n", "-0.030678 | \n", "-0.041124 | \n", "-0.058972 | \n", "0.153150 | \n", "0.254342 | \n", "... | \n", "0.029584 | \n", "-0.032232 | \n", "-0.123066 | \n", "-0.058884 | \n", "-0.064290 | \n", "-0.080506 | \n", "-0.064765 | \n", "-0.009771 | \n", "0.154329 | \n", "0.151087 | \n", "
| HvyAlcoholConsump | \n", "-0.028991 | \n", "-0.003972 | \n", "-0.011543 | \n", "-0.023730 | \n", "-0.048736 | \n", "0.101619 | \n", "-0.016950 | \n", "-0.057882 | \n", "0.012392 | \n", "-0.035288 | \n", "... | \n", "-0.010488 | \n", "0.004684 | \n", "-0.036724 | \n", "0.024716 | \n", "-0.026415 | \n", "-0.037668 | \n", "0.005740 | \n", "-0.034578 | \n", "0.023997 | \n", "0.053619 | \n", "
| AnyHealthcare | \n", "0.018734 | \n", "0.038425 | \n", "0.042230 | \n", "0.117626 | \n", "-0.018471 | \n", "-0.023251 | \n", "0.008776 | \n", "0.015410 | \n", "0.035505 | \n", "0.031544 | \n", "... | \n", "1.000000 | \n", "-0.232532 | \n", "-0.040817 | \n", "-0.052707 | \n", "-0.008276 | \n", "0.007074 | \n", "-0.019405 | \n", "0.138046 | \n", "0.122514 | \n", "0.157999 | \n", "
| NoDocbcCost | \n", "0.031000 | \n", "0.017358 | \n", "0.013310 | \n", "-0.058255 | \n", "0.058206 | \n", "0.048946 | \n", "0.034804 | \n", "0.035436 | \n", "-0.061638 | \n", "-0.044243 | \n", "... | \n", "-0.232532 | \n", "1.000000 | \n", "0.166397 | \n", "0.192107 | \n", "0.148998 | \n", "0.118447 | \n", "-0.044931 | \n", "-0.119777 | \n", "-0.100701 | \n", "-0.203182 | \n", "
| GenHlth | \n", "0.258383 | \n", "0.300530 | \n", "0.208426 | \n", "0.046589 | \n", "0.239185 | \n", "0.163143 | \n", "0.177942 | \n", "0.302587 | \n", "-0.266186 | \n", "-0.103854 | \n", "... | \n", "-0.040817 | \n", "0.166397 | \n", "1.000000 | \n", "0.301674 | \n", "0.524364 | \n", "0.456920 | \n", "-0.006091 | \n", "0.152450 | \n", "-0.284912 | \n", "-0.370014 | \n", "
| MentHlth | \n", "0.064621 | \n", "0.056456 | \n", "0.062069 | \n", "-0.008366 | \n", "0.085310 | \n", "0.092196 | \n", "0.070172 | \n", "0.073507 | \n", "-0.125587 | \n", "-0.068217 | \n", "... | \n", "-0.052707 | \n", "0.192107 | \n", "0.301674 | \n", "1.000000 | \n", "0.353619 | \n", "0.233688 | \n", "-0.080705 | \n", "-0.092068 | \n", "-0.101830 | \n", "-0.209806 | \n", "
| PhysHlth | \n", "0.181698 | \n", "0.161212 | \n", "0.121751 | \n", "0.031775 | \n", "0.121141 | \n", "0.116460 | \n", "0.148944 | \n", "0.176287 | \n", "-0.219230 | \n", "-0.044633 | \n", "... | \n", "-0.008276 | \n", "0.148998 | \n", "0.524364 | \n", "0.353619 | \n", "1.000000 | \n", "0.478417 | \n", "-0.043137 | \n", "0.099130 | \n", "-0.155093 | \n", "-0.266799 | \n", "
| DiffWalk | \n", "0.212709 | \n", "0.223618 | \n", "0.144672 | \n", "0.040585 | \n", "0.197078 | \n", "0.122463 | \n", "0.176567 | \n", "0.224239 | \n", "-0.253174 | \n", "-0.048352 | \n", "... | \n", "0.007074 | \n", "0.118447 | \n", "0.456920 | \n", "0.233688 | \n", "0.478417 | \n", "1.000000 | \n", "-0.070299 | \n", "0.204450 | \n", "-0.192642 | \n", "-0.320124 | \n", "
| Sex | \n", "0.086096 | \n", "0.052207 | \n", "0.031205 | \n", "-0.022115 | \n", "0.042950 | \n", "0.093662 | \n", "0.002978 | \n", "0.031040 | \n", "0.032482 | \n", "-0.091175 | \n", "... | \n", "-0.019405 | \n", "-0.044931 | \n", "-0.006091 | \n", "-0.080705 | \n", "-0.043137 | \n", "-0.070299 | \n", "1.000000 | \n", "-0.027340 | \n", "0.019480 | \n", "0.127141 | \n", "
| Age | \n", "0.221618 | \n", "0.344452 | \n", "0.272318 | \n", "0.090321 | \n", "-0.036618 | \n", "0.120641 | \n", "0.126974 | \n", "0.185026 | \n", "-0.092511 | \n", "0.064547 | \n", "... | \n", "0.138046 | \n", "-0.119777 | \n", "0.152450 | \n", "-0.092068 | \n", "0.099130 | \n", "0.204450 | \n", "-0.027340 | \n", "1.000000 | \n", "-0.101901 | \n", "-0.127775 | \n", "
| Education | \n", "-0.099600 | \n", "-0.141358 | \n", "-0.070802 | \n", "0.001510 | \n", "-0.103932 | \n", "-0.161955 | \n", "-0.076009 | \n", "-0.130517 | \n", "0.199658 | \n", "0.110187 | \n", "... | \n", "0.122514 | \n", "-0.100701 | \n", "-0.284912 | \n", "-0.101830 | \n", "-0.155093 | \n", "-0.192642 | \n", "0.019480 | \n", "-0.101901 | \n", "1.000000 | \n", "0.449106 | \n", "
| Income | \n", "-0.141011 | \n", "-0.171235 | \n", "-0.085459 | \n", "0.014259 | \n", "-0.100069 | \n", "-0.123937 | \n", "-0.128599 | \n", "-0.171483 | \n", "0.198539 | \n", "0.079929 | \n", "... | \n", "0.157999 | \n", "-0.203182 | \n", "-0.370014 | \n", "-0.209806 | \n", "-0.266799 | \n", "-0.320124 | \n", "0.127141 | \n", "-0.127775 | \n", "0.449106 | \n", "1.000000 | \n", "
22 rows × 22 columns
\n", "| \n", " | Model_Name | \n", "Accuracy | \n", "Recall | \n", "F1_Score | \n", "
|---|---|---|---|---|
| 0 | \n", "GNB | \n", "0.834109 | \n", "0.48499 | \n", "0.35917 | \n", "
| 1 | \n", "BNB | \n", "0.878548 | \n", "0.293763 | \n", "0.316801 | \n", "
| 2 | \n", "SVC | \n", "0.905761 | \n", "0.038382 | \n", "0.072426 | \n", "
| 3 | \n", "LR | \n", "0.905892 | \n", "0.107608 | \n", "0.179798 | \n", "
| 4 | \n", "KNN | \n", "0.903027 | \n", "0.050446 | \n", "0.090685 | \n", "
| 5 | \n", "DTC | \n", "0.870454 | \n", "0.217958 | \n", "0.243884 | \n", "
| 6 | \n", "RFC | \n", "0.894815 | \n", "0.142152 | \n", "0.205774 | \n", "