{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Diabetes Onset Detection\n",
"> In this project, it will show the Deep Neural Network with GridSearch for detecting Diabetes onset. GridSearch is used for hyperparameter tuning. The original data(PIMA indian diabetes dataset) is from UCI Open Repository.\n",
"\n",
"- toc: true \n",
"- badges: true\n",
"- comments: true\n",
"- author: Chanseok Kang\n",
"- categories: [Python, Machine_Learning]\n",
"- image: images/pima.jpg"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Required Packages"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import datetime\n",
"import numpy as np\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import pandas as pd\n",
"import sklearn\n",
"import tensorflow as tf\n",
"\n",
"plt.rcParams['figure.figsize'] = (8, 8)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Version check"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Python: 3.7.6 (default, Jan 8 2020, 20:23:39) [MSC v.1916 64 bit (AMD64)]\n",
"Numpy: 1.18.1\n",
"Matplotlib: 3.1.3\n",
"Seaborn: 0.10.0\n",
"Pandas: 1.0.1\n",
"Scikit-learn: 0.22.1\n",
"Tensorflow: 2.1.0\n"
]
}
],
"source": [
"print('Python: {}'.format(sys.version))\n",
"print('Numpy: {}'.format(np.__version__))\n",
"print('Matplotlib: {}'.format(mpl.__version__))\n",
"print('Seaborn: {}'.format(sns.__version__))\n",
"print('Pandas: {}'.format(pd.__version__))\n",
"print('Scikit-learn: {}'.format(sklearn.__version__))\n",
"print('Tensorflow: {}'.format(tf.__version__))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prepare Dataset\n",
"Currently, PIMA indian diabetes dataset is offered from [kaggle](https://www.kaggle.com/uciml/pima-indians-diabetes-database/)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 6 | \n",
" 148 | \n",
" 72 | \n",
" 35 | \n",
" 0 | \n",
" 33.6 | \n",
" 0.627 | \n",
" 50 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 85 | \n",
" 66 | \n",
" 29 | \n",
" 0 | \n",
" 26.6 | \n",
" 0.351 | \n",
" 31 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 8 | \n",
" 183 | \n",
" 64 | \n",
" 0 | \n",
" 0 | \n",
" 23.3 | \n",
" 0.672 | \n",
" 32 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 89 | \n",
" 66 | \n",
" 23 | \n",
" 94 | \n",
" 28.1 | \n",
" 0.167 | \n",
" 21 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0 | \n",
" 137 | \n",
" 40 | \n",
" 35 | \n",
" 168 | \n",
" 43.1 | \n",
" 2.288 | \n",
" 33 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"0 6 148 72 35 0 33.6 \n",
"1 1 85 66 29 0 26.6 \n",
"2 8 183 64 0 0 23.3 \n",
"3 1 89 66 23 94 28.1 \n",
"4 0 137 40 35 168 43.1 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"0 0.627 50 1 \n",
"1 0.351 31 0 \n",
"2 0.672 32 1 \n",
"3 0.167 21 0 \n",
"4 2.288 33 1 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Import the dataset\n",
"df = pd.read_csv('./dataset/datasets_228_482_diabetes.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 768.000000 | \n",
" 768.000000 | \n",
" 768.000000 | \n",
" 768.000000 | \n",
" 768.000000 | \n",
" 768.000000 | \n",
" 768.000000 | \n",
" 768.000000 | \n",
" 768.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 3.845052 | \n",
" 120.894531 | \n",
" 69.105469 | \n",
" 20.536458 | \n",
" 79.799479 | \n",
" 31.992578 | \n",
" 0.471876 | \n",
" 33.240885 | \n",
" 0.348958 | \n",
"
\n",
" \n",
" std | \n",
" 3.369578 | \n",
" 31.972618 | \n",
" 19.355807 | \n",
" 15.952218 | \n",
" 115.244002 | \n",
" 7.884160 | \n",
" 0.331329 | \n",
" 11.760232 | \n",
" 0.476951 | \n",
"
\n",
" \n",
" min | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.078000 | \n",
" 21.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 1.000000 | \n",
" 99.000000 | \n",
" 62.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 27.300000 | \n",
" 0.243750 | \n",
" 24.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 3.000000 | \n",
" 117.000000 | \n",
" 72.000000 | \n",
" 23.000000 | \n",
" 30.500000 | \n",
" 32.000000 | \n",
" 0.372500 | \n",
" 29.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 6.000000 | \n",
" 140.250000 | \n",
" 80.000000 | \n",
" 32.000000 | \n",
" 127.250000 | \n",
" 36.600000 | \n",
" 0.626250 | \n",
" 41.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" max | \n",
" 17.000000 | \n",
" 199.000000 | \n",
" 122.000000 | \n",
" 99.000000 | \n",
" 846.000000 | \n",
" 67.100000 | \n",
" 2.420000 | \n",
" 81.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n",
"count 768.000000 768.000000 768.000000 768.000000 768.000000 \n",
"mean 3.845052 120.894531 69.105469 20.536458 79.799479 \n",
"std 3.369578 31.972618 19.355807 15.952218 115.244002 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 1.000000 99.000000 62.000000 0.000000 0.000000 \n",
"50% 3.000000 117.000000 72.000000 23.000000 30.500000 \n",
"75% 6.000000 140.250000 80.000000 32.000000 127.250000 \n",
"max 17.000000 199.000000 122.000000 99.000000 846.000000 \n",
"\n",
" BMI DiabetesPedigreeFunction Age Outcome \n",
"count 768.000000 768.000000 768.000000 768.000000 \n",
"mean 31.992578 0.471876 33.240885 0.348958 \n",
"std 7.884160 0.331329 11.760232 0.476951 \n",
"min 0.000000 0.078000 21.000000 0.000000 \n",
"25% 27.300000 0.243750 24.000000 0.000000 \n",
"50% 32.000000 0.372500 29.000000 0.000000 \n",
"75% 36.600000 0.626250 41.000000 1.000000 \n",
"max 67.100000 2.420000 81.000000 1.000000 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Describe the data\n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 75 | \n",
" 1 | \n",
" 0 | \n",
" 48 | \n",
" 20 | \n",
" 0 | \n",
" 24.7 | \n",
" 0.140 | \n",
" 22 | \n",
" 0 | \n",
"
\n",
" \n",
" 182 | \n",
" 1 | \n",
" 0 | \n",
" 74 | \n",
" 20 | \n",
" 23 | \n",
" 27.7 | \n",
" 0.299 | \n",
" 21 | \n",
" 0 | \n",
"
\n",
" \n",
" 342 | \n",
" 1 | \n",
" 0 | \n",
" 68 | \n",
" 35 | \n",
" 0 | \n",
" 32.0 | \n",
" 0.389 | \n",
" 22 | \n",
" 0 | \n",
"
\n",
" \n",
" 349 | \n",
" 5 | \n",
" 0 | \n",
" 80 | \n",
" 32 | \n",
" 0 | \n",
" 41.0 | \n",
" 0.346 | \n",
" 37 | \n",
" 1 | \n",
"
\n",
" \n",
" 502 | \n",
" 6 | \n",
" 0 | \n",
" 68 | \n",
" 41 | \n",
" 0 | \n",
" 39.0 | \n",
" 0.727 | \n",
" 41 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"75 1 0 48 20 0 24.7 \n",
"182 1 0 74 20 23 27.7 \n",
"342 1 0 68 35 0 32.0 \n",
"349 5 0 80 32 0 41.0 \n",
"502 6 0 68 41 0 39.0 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"75 0.140 22 0 \n",
"182 0.299 21 0 \n",
"342 0.389 22 0 \n",
"349 0.346 37 1 \n",
"502 0.727 41 1 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Check missing data\n",
"df[df['Glucose'] == 0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Preprocess Dataset"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 768.000000 | \n",
" 763.000000 | \n",
" 733.000000 | \n",
" 541.000000 | \n",
" 394.000000 | \n",
" 757.000000 | \n",
" 768.000000 | \n",
" 768.000000 | \n",
" 768.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 3.845052 | \n",
" 121.686763 | \n",
" 72.405184 | \n",
" 29.153420 | \n",
" 155.548223 | \n",
" 32.457464 | \n",
" 0.471876 | \n",
" 33.240885 | \n",
" 0.348958 | \n",
"
\n",
" \n",
" std | \n",
" 3.369578 | \n",
" 30.535641 | \n",
" 12.382158 | \n",
" 10.476982 | \n",
" 118.775855 | \n",
" 6.924988 | \n",
" 0.331329 | \n",
" 11.760232 | \n",
" 0.476951 | \n",
"
\n",
" \n",
" min | \n",
" 0.000000 | \n",
" 44.000000 | \n",
" 24.000000 | \n",
" 7.000000 | \n",
" 14.000000 | \n",
" 18.200000 | \n",
" 0.078000 | \n",
" 21.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 1.000000 | \n",
" 99.000000 | \n",
" 64.000000 | \n",
" 22.000000 | \n",
" 76.250000 | \n",
" 27.500000 | \n",
" 0.243750 | \n",
" 24.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 3.000000 | \n",
" 117.000000 | \n",
" 72.000000 | \n",
" 29.000000 | \n",
" 125.000000 | \n",
" 32.300000 | \n",
" 0.372500 | \n",
" 29.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 6.000000 | \n",
" 141.000000 | \n",
" 80.000000 | \n",
" 36.000000 | \n",
" 190.000000 | \n",
" 36.600000 | \n",
" 0.626250 | \n",
" 41.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" max | \n",
" 17.000000 | \n",
" 199.000000 | \n",
" 122.000000 | \n",
" 99.000000 | \n",
" 846.000000 | \n",
" 67.100000 | \n",
" 2.420000 | \n",
" 81.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n",
"count 768.000000 763.000000 733.000000 541.000000 394.000000 \n",
"mean 3.845052 121.686763 72.405184 29.153420 155.548223 \n",
"std 3.369578 30.535641 12.382158 10.476982 118.775855 \n",
"min 0.000000 44.000000 24.000000 7.000000 14.000000 \n",
"25% 1.000000 99.000000 64.000000 22.000000 76.250000 \n",
"50% 3.000000 117.000000 72.000000 29.000000 125.000000 \n",
"75% 6.000000 141.000000 80.000000 36.000000 190.000000 \n",
"max 17.000000 199.000000 122.000000 99.000000 846.000000 \n",
"\n",
" BMI DiabetesPedigreeFunction Age Outcome \n",
"count 757.000000 768.000000 768.000000 768.000000 \n",
"mean 32.457464 0.471876 33.240885 0.348958 \n",
"std 6.924988 0.331329 11.760232 0.476951 \n",
"min 18.200000 0.078000 21.000000 0.000000 \n",
"25% 27.500000 0.243750 24.000000 0.000000 \n",
"50% 32.300000 0.372500 29.000000 0.000000 \n",
"75% 36.600000 0.626250 41.000000 1.000000 \n",
"max 67.100000 2.420000 81.000000 1.000000 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Preprocess the data, mark zero values as NaN and drop\n",
"columns = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']\n",
"\n",
"for col in columns:\n",
" df[col].replace(0, np.nan, inplace=True)\n",
" \n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 392.000000 | \n",
" 392.000000 | \n",
" 392.000000 | \n",
" 392.000000 | \n",
" 392.000000 | \n",
" 392.000000 | \n",
" 392.000000 | \n",
" 392.000000 | \n",
" 392.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 3.301020 | \n",
" 122.627551 | \n",
" 70.663265 | \n",
" 29.145408 | \n",
" 156.056122 | \n",
" 33.086224 | \n",
" 0.523046 | \n",
" 30.864796 | \n",
" 0.331633 | \n",
"
\n",
" \n",
" std | \n",
" 3.211424 | \n",
" 30.860781 | \n",
" 12.496092 | \n",
" 10.516424 | \n",
" 118.841690 | \n",
" 7.027659 | \n",
" 0.345488 | \n",
" 10.200777 | \n",
" 0.471401 | \n",
"
\n",
" \n",
" min | \n",
" 0.000000 | \n",
" 56.000000 | \n",
" 24.000000 | \n",
" 7.000000 | \n",
" 14.000000 | \n",
" 18.200000 | \n",
" 0.085000 | \n",
" 21.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 1.000000 | \n",
" 99.000000 | \n",
" 62.000000 | \n",
" 21.000000 | \n",
" 76.750000 | \n",
" 28.400000 | \n",
" 0.269750 | \n",
" 23.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 2.000000 | \n",
" 119.000000 | \n",
" 70.000000 | \n",
" 29.000000 | \n",
" 125.500000 | \n",
" 33.200000 | \n",
" 0.449500 | \n",
" 27.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 5.000000 | \n",
" 143.000000 | \n",
" 78.000000 | \n",
" 37.000000 | \n",
" 190.000000 | \n",
" 37.100000 | \n",
" 0.687000 | \n",
" 36.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" max | \n",
" 17.000000 | \n",
" 198.000000 | \n",
" 110.000000 | \n",
" 63.000000 | \n",
" 846.000000 | \n",
" 67.100000 | \n",
" 2.420000 | \n",
" 81.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n",
"count 392.000000 392.000000 392.000000 392.000000 392.000000 \n",
"mean 3.301020 122.627551 70.663265 29.145408 156.056122 \n",
"std 3.211424 30.860781 12.496092 10.516424 118.841690 \n",
"min 0.000000 56.000000 24.000000 7.000000 14.000000 \n",
"25% 1.000000 99.000000 62.000000 21.000000 76.750000 \n",
"50% 2.000000 119.000000 70.000000 29.000000 125.500000 \n",
"75% 5.000000 143.000000 78.000000 37.000000 190.000000 \n",
"max 17.000000 198.000000 110.000000 63.000000 846.000000 \n",
"\n",
" BMI DiabetesPedigreeFunction Age Outcome \n",
"count 392.000000 392.000000 392.000000 392.000000 \n",
"mean 33.086224 0.523046 30.864796 0.331633 \n",
"std 7.027659 0.345488 10.200777 0.471401 \n",
"min 18.200000 0.085000 21.000000 0.000000 \n",
"25% 28.400000 0.269750 23.000000 0.000000 \n",
"50% 33.200000 0.449500 27.000000 0.000000 \n",
"75% 37.100000 0.687000 36.000000 1.000000 \n",
"max 67.100000 2.420000 81.000000 1.000000 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Drop rows with missing values\n",
"df.dropna(inplace=True)\n",
"\n",
"# Summarize the number of rows and columns in df\n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 1. 89. 66. ... 0.167 21. 0. ]\n",
" [ 0. 137. 40. ... 2.288 33. 1. ]\n",
" [ 3. 78. 50. ... 0.248 26. 1. ]\n",
" ...\n",
" [ 2. 88. 58. ... 0.766 22. 0. ]\n",
" [ 10. 101. 76. ... 0.171 63. 0. ]\n",
" [ 5. 121. 72. ... 0.245 30. 0. ]]\n",
"(392, 9)\n"
]
}
],
"source": [
"# Convert the dataframe to numpy array\n",
"dataset = df.values\n",
"print(dataset)\n",
"print(dataset.shape)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(392, 8) (392,)\n"
]
}
],
"source": [
"# Split into input and output\n",
"X = dataset[:, :-1]\n",
"y = dataset[:, -1].astype(int)\n",
"print(X.shape, y.shape)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# Normalize the data \n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"scaler = StandardScaler().fit(X)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 3.920000e+02 | \n",
" 3.920000e+02 | \n",
" 3.920000e+02 | \n",
" 3.920000e+02 | \n",
" 3.920000e+02 | \n",
" 3.920000e+02 | \n",
" 3.920000e+02 | \n",
" 3.920000e+02 | \n",
"
\n",
" \n",
" mean | \n",
" -4.021726e-17 | \n",
" 3.129583e-17 | \n",
" -4.641624e-16 | \n",
" 1.042250e-16 | \n",
" 6.485742e-17 | \n",
" 1.543550e-16 | \n",
" 3.880116e-17 | \n",
" 1.028089e-16 | \n",
"
\n",
" \n",
" std | \n",
" 1.001278e+00 | \n",
" 1.001278e+00 | \n",
" 1.001278e+00 | \n",
" 1.001278e+00 | \n",
" 1.001278e+00 | \n",
" 1.001278e+00 | \n",
" 1.001278e+00 | \n",
" 1.001278e+00 | \n",
"
\n",
" \n",
" min | \n",
" -1.029213e+00 | \n",
" -2.161731e+00 | \n",
" -3.739001e+00 | \n",
" -2.108484e+00 | \n",
" -1.196867e+00 | \n",
" -2.120941e+00 | \n",
" -1.269525e+00 | \n",
" -9.682991e-01 | \n",
"
\n",
" \n",
" 25% | \n",
" -7.174265e-01 | \n",
" -7.665958e-01 | \n",
" -6.941640e-01 | \n",
" -7.755315e-01 | \n",
" -6.681786e-01 | \n",
" -6.676780e-01 | \n",
" -7.340909e-01 | \n",
" -7.719850e-01 | \n",
"
\n",
" \n",
" 50% | \n",
" -4.056403e-01 | \n",
" -1.176959e-01 | \n",
" -5.314565e-02 | \n",
" -1.384444e-02 | \n",
" -2.574448e-01 | \n",
" 1.621036e-02 | \n",
" -2.131475e-01 | \n",
" -3.793569e-01 | \n",
"
\n",
" \n",
" 75% | \n",
" 5.297185e-01 | \n",
" 6.609841e-01 | \n",
" 5.878727e-01 | \n",
" 7.478426e-01 | \n",
" 2.859877e-01 | \n",
" 5.718696e-01 | \n",
" 4.751644e-01 | \n",
" 5.040564e-01 | \n",
"
\n",
" \n",
" max | \n",
" 4.271153e+00 | \n",
" 2.445459e+00 | \n",
" 3.151946e+00 | \n",
" 3.223325e+00 | \n",
" 5.812990e+00 | \n",
" 4.846172e+00 | \n",
" 5.497667e+00 | \n",
" 4.921123e+00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 \\\n",
"count 3.920000e+02 3.920000e+02 3.920000e+02 3.920000e+02 3.920000e+02 \n",
"mean -4.021726e-17 3.129583e-17 -4.641624e-16 1.042250e-16 6.485742e-17 \n",
"std 1.001278e+00 1.001278e+00 1.001278e+00 1.001278e+00 1.001278e+00 \n",
"min -1.029213e+00 -2.161731e+00 -3.739001e+00 -2.108484e+00 -1.196867e+00 \n",
"25% -7.174265e-01 -7.665958e-01 -6.941640e-01 -7.755315e-01 -6.681786e-01 \n",
"50% -4.056403e-01 -1.176959e-01 -5.314565e-02 -1.384444e-02 -2.574448e-01 \n",
"75% 5.297185e-01 6.609841e-01 5.878727e-01 7.478426e-01 2.859877e-01 \n",
"max 4.271153e+00 2.445459e+00 3.151946e+00 3.223325e+00 5.812990e+00 \n",
"\n",
" 5 6 7 \n",
"count 3.920000e+02 3.920000e+02 3.920000e+02 \n",
"mean 1.543550e-16 3.880116e-17 1.028089e-16 \n",
"std 1.001278e+00 1.001278e+00 1.001278e+00 \n",
"min -2.120941e+00 -1.269525e+00 -9.682991e-01 \n",
"25% -6.676780e-01 -7.340909e-01 -7.719850e-01 \n",
"50% 1.621036e-02 -2.131475e-01 -3.793569e-01 \n",
"75% 5.718696e-01 4.751644e-01 5.040564e-01 \n",
"max 4.846172e+00 5.497667e+00 4.921123e+00 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Transform and display the training data\n",
"X_standard = scaler.transform(X)\n",
"data = pd.DataFrame(X_standard)\n",
"data.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build Neural Network"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import GridSearchCV, KFold\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense\n",
"from tensorflow.keras.wrappers.scikit_learn import KerasClassifier\n",
"from tensorflow.keras.optimizers import Adam"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"sequential_1\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"dense_3 (Dense) (None, 8) 72 \n",
"_________________________________________________________________\n",
"dense_4 (Dense) (None, 4) 36 \n",
"_________________________________________________________________\n",
"dense_5 (Dense) (None, 1) 5 \n",
"=================================================================\n",
"Total params: 113\n",
"Trainable params: 113\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n",
"None\n"
]
}
],
"source": [
"def create_model():\n",
" # Create model\n",
" model = Sequential()\n",
" model.add(Dense(8, input_shape=(8, ), kernel_initializer='normal', activation='relu'))\n",
" model.add(Dense(4, kernel_initializer='normal', activation='relu'))\n",
" model.add(Dense(1, activation='sigmoid'))\n",
" \n",
" # Compile model\n",
" model.compile(optimizer=Adam(lr=0.01), loss='binary_crossentropy', metrics=['accuracy'])\n",
" return model\n",
"\n",
"model = create_model()\n",
"print(model.summary())"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# Define a random seed\n",
"seed = 6\n",
"np.random.seed(seed)\n",
"\n",
"# Create model with KerasClassifier\n",
"model = KerasClassifier(build_fn=create_model, verbose=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Define grid Search"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\kcsgo\\anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:296: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n",
" FutureWarning\n",
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 5 folds for each of 9 candidates, totalling 45 fits\n",
"[CV] batch_size=10, epochs=10 ........................................\n",
"[CV] ............ batch_size=10, epochs=10, score=0.759, total= 1.5s\n",
"[CV] batch_size=10, epochs=10 ........................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 1.4s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ............ batch_size=10, epochs=10, score=0.633, total= 0.9s\n",
"[CV] batch_size=10, epochs=10 ........................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 2.3s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ............ batch_size=10, epochs=10, score=0.833, total= 0.9s\n",
"[CV] batch_size=10, epochs=10 ........................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 3.2s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ............ batch_size=10, epochs=10, score=0.859, total= 0.9s\n",
"[CV] batch_size=10, epochs=10 ........................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 4.1s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ............ batch_size=10, epochs=10, score=0.808, total= 0.9s\n",
"[CV] batch_size=10, epochs=50 ........................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 4.9s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ............ batch_size=10, epochs=50, score=0.709, total= 3.2s\n",
"[CV] batch_size=10, epochs=50 ........................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 8.2s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ............ batch_size=10, epochs=50, score=0.646, total= 3.2s\n",
"[CV] batch_size=10, epochs=50 ........................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 11.4s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ............ batch_size=10, epochs=50, score=0.846, total= 3.2s\n",
"[CV] batch_size=10, epochs=50 ........................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 14.6s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ............ batch_size=10, epochs=50, score=0.808, total= 3.2s\n",
"[CV] batch_size=10, epochs=50 ........................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 9 out of 9 | elapsed: 17.9s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ............ batch_size=10, epochs=50, score=0.795, total= 3.2s\n",
"[CV] batch_size=10, epochs=100 .......................................\n",
"[CV] ........... batch_size=10, epochs=100, score=0.772, total= 6.1s\n",
"[CV] batch_size=10, epochs=100 .......................................\n",
"[CV] ........... batch_size=10, epochs=100, score=0.646, total= 6.1s\n",
"[CV] batch_size=10, epochs=100 .......................................\n",
"[CV] ........... batch_size=10, epochs=100, score=0.756, total= 6.1s\n",
"[CV] batch_size=10, epochs=100 .......................................\n",
"[CV] ........... batch_size=10, epochs=100, score=0.833, total= 6.1s\n",
"[CV] batch_size=10, epochs=100 .......................................\n",
"[CV] ........... batch_size=10, epochs=100, score=0.846, total= 6.1s\n",
"[CV] batch_size=20, epochs=10 ........................................\n",
"[CV] ............ batch_size=20, epochs=10, score=0.759, total= 0.6s\n",
"[CV] batch_size=20, epochs=10 ........................................\n",
"[CV] ............ batch_size=20, epochs=10, score=0.608, total= 1.0s\n",
"[CV] batch_size=20, epochs=10 ........................................\n",
"[CV] ............ batch_size=20, epochs=10, score=0.808, total= 0.6s\n",
"[CV] batch_size=20, epochs=10 ........................................\n",
"[CV] ............ batch_size=20, epochs=10, score=0.808, total= 0.6s\n",
"[CV] batch_size=20, epochs=10 ........................................\n",
"[CV] ............ batch_size=20, epochs=10, score=0.846, total= 0.6s\n",
"[CV] batch_size=20, epochs=50 ........................................\n",
"[CV] ............ batch_size=20, epochs=50, score=0.785, total= 1.8s\n",
"[CV] batch_size=20, epochs=50 ........................................\n",
"[CV] ............ batch_size=20, epochs=50, score=0.658, total= 1.8s\n",
"[CV] batch_size=20, epochs=50 ........................................\n",
"[CV] ............ batch_size=20, epochs=50, score=0.808, total= 1.8s\n",
"[CV] batch_size=20, epochs=50 ........................................\n",
"[CV] ............ batch_size=20, epochs=50, score=0.808, total= 1.8s\n",
"[CV] batch_size=20, epochs=50 ........................................\n",
"[CV] ............ batch_size=20, epochs=50, score=0.872, total= 1.8s\n",
"[CV] batch_size=20, epochs=100 .......................................\n",
"[CV] ........... batch_size=20, epochs=100, score=0.772, total= 3.2s\n",
"[CV] batch_size=20, epochs=100 .......................................\n",
"[CV] ........... batch_size=20, epochs=100, score=0.684, total= 3.2s\n",
"[CV] batch_size=20, epochs=100 .......................................\n",
"[CV] ........... batch_size=20, epochs=100, score=0.795, total= 3.2s\n",
"[CV] batch_size=20, epochs=100 .......................................\n",
"[CV] ........... batch_size=20, epochs=100, score=0.769, total= 3.2s\n",
"[CV] batch_size=20, epochs=100 .......................................\n",
"[CV] ........... batch_size=20, epochs=100, score=0.859, total= 3.2s\n",
"[CV] batch_size=40, epochs=10 ........................................\n",
"[CV] ............ batch_size=40, epochs=10, score=0.797, total= 0.5s\n",
"[CV] batch_size=40, epochs=10 ........................................\n",
"[CV] ............ batch_size=40, epochs=10, score=0.608, total= 0.5s\n",
"[CV] batch_size=40, epochs=10 ........................................\n",
"[CV] ............ batch_size=40, epochs=10, score=0.808, total= 0.4s\n",
"[CV] batch_size=40, epochs=10 ........................................\n",
"[CV] ............ batch_size=40, epochs=10, score=0.821, total= 0.5s\n",
"[CV] batch_size=40, epochs=10 ........................................\n",
"[CV] ............ batch_size=40, epochs=10, score=0.833, total= 0.4s\n",
"[CV] batch_size=40, epochs=50 ........................................\n",
"[CV] ............ batch_size=40, epochs=50, score=0.747, total= 1.5s\n",
"[CV] batch_size=40, epochs=50 ........................................\n",
"[CV] ............ batch_size=40, epochs=50, score=0.671, total= 1.0s\n",
"[CV] batch_size=40, epochs=50 ........................................\n",
"[CV] ............ batch_size=40, epochs=50, score=0.821, total= 1.0s\n",
"[CV] batch_size=40, epochs=50 ........................................\n",
"[CV] ............ batch_size=40, epochs=50, score=0.821, total= 1.0s\n",
"[CV] batch_size=40, epochs=50 ........................................\n",
"[CV] ............ batch_size=40, epochs=50, score=0.833, total= 1.0s\n",
"[CV] batch_size=40, epochs=100 .......................................\n",
"[CV] ........... batch_size=40, epochs=100, score=0.696, total= 1.8s\n",
"[CV] batch_size=40, epochs=100 .......................................\n",
"[CV] ........... batch_size=40, epochs=100, score=0.696, total= 1.8s\n",
"[CV] batch_size=40, epochs=100 .......................................\n",
"[CV] ........... batch_size=40, epochs=100, score=0.756, total= 1.8s\n",
"[CV] batch_size=40, epochs=100 .......................................\n",
"[CV] ........... batch_size=40, epochs=100, score=0.821, total= 1.8s\n",
"[CV] batch_size=40, epochs=100 .......................................\n",
"[CV] ........... batch_size=40, epochs=100, score=0.808, total= 1.8s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 45 out of 45 | elapsed: 1.6min finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best: 0.7860434889793396, using {'batch_size': 20, 'epochs': 50} \n",
"0.7784809947013855 (0.07986524360686026) with: {'batch_size': 10, 'epochs': 10}\n",
"0.760629665851593 (0.07296661628343429) with: {'batch_size': 10, 'epochs': 50}\n",
"0.7707237839698792 (0.07138645524930724) with: {'batch_size': 10, 'epochs': 100}\n",
"0.7657253980636597 (0.08370813995870807) with: {'batch_size': 20, 'epochs': 10}\n",
"0.7860434889793396 (0.07018404097720617) with: {'batch_size': 20, 'epochs': 50}\n",
"0.7757546186447144 (0.05630146625584115) with: {'batch_size': 20, 'epochs': 100}\n",
"0.7733203411102295 (0.08373549454888415) with: {'batch_size': 40, 'epochs': 10}\n",
"0.778416109085083 (0.06183883425960871) with: {'batch_size': 40, 'epochs': 50}\n",
"0.7554040789604187 (0.052884532540700184) with: {'batch_size': 40, 'epochs': 100}\n"
]
}
],
"source": [
"# Define Grid Search parameter\n",
"batch_size = [10, 20, 40]\n",
"epochs = [10, 50, 100]\n",
"\n",
"# Make a dictionary of the grid search parameters\n",
"param_grid = {\n",
" 'batch_size':batch_size,\n",
" 'epochs':epochs\n",
"}\n",
"\n",
"# Build and fit the GridSearchCV\n",
"grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=KFold(random_state=seed), verbose=10)\n",
"grid_results = grid.fit(X_standard, y)\n",
"\n",
"# Summarize the results\n",
"print('Best: {0}, using {1} '.format(grid_results.best_score_, grid_results.best_params_))\n",
"means = grid_results.cv_results_['mean_test_score']\n",
"stds = grid_results.cv_results_['std_test_score']\n",
"params = grid_results.cv_results_['params']\n",
"\n",
"for mean, stdev, param in zip(means, stds, params):\n",
" print('{0} ({1}) with: {2}'.format(mean, stdev, param))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Applying Dropout, Optimizing learning rate"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 5 folds for each of 9 candidates, totalling 45 fits\n",
"[CV] dropout_rate=0.0, learning_rate=0.001 ...........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] dropout_rate=0.0, learning_rate=0.001, score=0.785, total= 1.8s\n",
"[CV] dropout_rate=0.0, learning_rate=0.001 ...........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 1.7s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] dropout_rate=0.0, learning_rate=0.001, score=0.595, total= 1.7s\n",
"[CV] dropout_rate=0.0, learning_rate=0.001 ...........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 3.4s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] dropout_rate=0.0, learning_rate=0.001, score=0.821, total= 1.8s\n",
"[CV] dropout_rate=0.0, learning_rate=0.001 ...........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 5.2s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] dropout_rate=0.0, learning_rate=0.001, score=0.821, total= 1.7s\n",
"[CV] dropout_rate=0.0, learning_rate=0.001 ...........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 6.9s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] dropout_rate=0.0, learning_rate=0.001, score=0.859, total= 2.1s\n",
"[CV] dropout_rate=0.0, learning_rate=0.01 ............................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 9.0s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] dropout_rate=0.0, learning_rate=0.01, score=0.747, total= 1.7s\n",
"[CV] dropout_rate=0.0, learning_rate=0.01 ............................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 10.8s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] dropout_rate=0.0, learning_rate=0.01, score=0.633, total= 1.7s\n",
"[CV] dropout_rate=0.0, learning_rate=0.01 ............................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 12.5s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] dropout_rate=0.0, learning_rate=0.01, score=0.833, total= 1.7s\n",
"[CV] dropout_rate=0.0, learning_rate=0.01 ............................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 14.2s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] dropout_rate=0.0, learning_rate=0.01, score=0.821, total= 1.7s\n",
"[CV] dropout_rate=0.0, learning_rate=0.01 ............................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 9 out of 9 | elapsed: 16.0s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] dropout_rate=0.0, learning_rate=0.01, score=0.795, total= 1.7s\n",
"[CV] dropout_rate=0.0, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.0, learning_rate=0.1, score=0.722, total= 1.7s\n",
"[CV] dropout_rate=0.0, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.0, learning_rate=0.1, score=0.722, total= 1.8s\n",
"[CV] dropout_rate=0.0, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.0, learning_rate=0.1, score=0.833, total= 1.8s\n",
"[CV] dropout_rate=0.0, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.0, learning_rate=0.1, score=0.756, total= 1.7s\n",
"[CV] dropout_rate=0.0, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.0, learning_rate=0.1, score=0.885, total= 1.7s\n",
"[CV] dropout_rate=0.1, learning_rate=0.001 ...........................\n",
"[CV] dropout_rate=0.1, learning_rate=0.001, score=0.734, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.001 ...........................\n",
"[CV] dropout_rate=0.1, learning_rate=0.001, score=0.620, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.001 ...........................\n",
"[CV] dropout_rate=0.1, learning_rate=0.001, score=0.808, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.001 ...........................\n",
"[CV] dropout_rate=0.1, learning_rate=0.001, score=0.821, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.001 ...........................\n",
"[CV] dropout_rate=0.1, learning_rate=0.001, score=0.859, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.01 ............................\n",
"[CV] dropout_rate=0.1, learning_rate=0.01, score=0.709, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.01 ............................\n",
"[CV] dropout_rate=0.1, learning_rate=0.01, score=0.658, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.01 ............................\n",
"[CV] dropout_rate=0.1, learning_rate=0.01, score=0.795, total= 2.2s\n",
"[CV] dropout_rate=0.1, learning_rate=0.01 ............................\n",
"[CV] dropout_rate=0.1, learning_rate=0.01, score=0.795, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.01 ............................\n",
"[CV] dropout_rate=0.1, learning_rate=0.01, score=0.821, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.1, learning_rate=0.1, score=0.709, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.1, learning_rate=0.1, score=0.658, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.1, learning_rate=0.1, score=0.756, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.1, learning_rate=0.1, score=0.769, total= 1.8s\n",
"[CV] dropout_rate=0.1, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.1, learning_rate=0.1, score=0.821, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.001 ...........................\n",
"[CV] dropout_rate=0.2, learning_rate=0.001, score=0.772, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.001 ...........................\n",
"[CV] dropout_rate=0.2, learning_rate=0.001, score=0.608, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.001 ...........................\n",
"[CV] dropout_rate=0.2, learning_rate=0.001, score=0.833, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.001 ...........................\n",
"[CV] dropout_rate=0.2, learning_rate=0.001, score=0.833, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.001 ...........................\n",
"[CV] dropout_rate=0.2, learning_rate=0.001, score=0.821, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.01 ............................\n",
"[CV] dropout_rate=0.2, learning_rate=0.01, score=0.747, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.01 ............................\n",
"[CV] dropout_rate=0.2, learning_rate=0.01, score=0.620, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.01 ............................\n",
"[CV] dropout_rate=0.2, learning_rate=0.01, score=0.795, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.01 ............................\n",
"[CV] dropout_rate=0.2, learning_rate=0.01, score=0.808, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.01 ............................\n",
"[CV] dropout_rate=0.2, learning_rate=0.01, score=0.833, total= 1.9s\n",
"[CV] dropout_rate=0.2, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.2, learning_rate=0.1, score=0.759, total= 1.9s\n",
"[CV] dropout_rate=0.2, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.2, learning_rate=0.1, score=0.608, total= 2.3s\n",
"[CV] dropout_rate=0.2, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.2, learning_rate=0.1, score=0.782, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.2, learning_rate=0.1, score=0.756, total= 1.8s\n",
"[CV] dropout_rate=0.2, learning_rate=0.1 .............................\n",
"[CV] . dropout_rate=0.2, learning_rate=0.1, score=0.808, total= 1.8s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 45 out of 45 | elapsed: 1.4min finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best: 0.783479380607605, using {'dropout_rate': 0.0, 'learning_rate': 0.1} \n",
"0.7759493708610534 (0.0934976350249817) with: {'dropout_rate': 0.0, 'learning_rate': 0.001}\n",
"0.7656929612159729 (0.0726889161193493) with: {'dropout_rate': 0.0, 'learning_rate': 0.01}\n",
"0.783479380607605 (0.06499972119101476) with: {'dropout_rate': 0.0, 'learning_rate': 0.1}\n",
"0.7683219671249389 (0.08435875729999517) with: {'dropout_rate': 0.1, 'learning_rate': 0.001}\n",
"0.7554690122604371 (0.06158576138404698) with: {'dropout_rate': 0.1, 'learning_rate': 0.01}\n",
"0.7426484942436218 (0.055185710792480795) with: {'dropout_rate': 0.1, 'learning_rate': 0.1}\n",
"0.7733852505683899 (0.08590286195286612) with: {'dropout_rate': 0.2, 'learning_rate': 0.001}\n",
"0.7605972051620483 (0.07558978386106281) with: {'dropout_rate': 0.2, 'learning_rate': 0.01}\n",
"0.742648470401764 (0.06999412819842643) with: {'dropout_rate': 0.2, 'learning_rate': 0.1}\n"
]
}
],
"source": [
"from tensorflow.keras.layers import Dropout\n",
"\n",
"def create_model(learning_rate, dropout_rate):\n",
" # Create model\n",
" model = Sequential()\n",
" model.add(Dense(8, input_shape=(8, ), kernel_initializer='normal', activation='relu'))\n",
" model.add(Dropout(dropout_rate))\n",
" model.add(Dense(4, kernel_initializer='normal', activation='relu'))\n",
" model.add(Dropout(dropout_rate))\n",
" model.add(Dense(1, activation='sigmoid'))\n",
" \n",
" # Compile model\n",
" model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy', metrics=['accuracy'])\n",
" return model\n",
"\n",
"# Create model with KerasClassifier\n",
"model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=20, verbose=False)\n",
"\n",
"# Define Grid Search parameter\n",
"learning_rates = [0.001, 0.01, 0.1]\n",
"dropout_rates = [0.0, 0.1, 0.2]\n",
"\n",
"# Make a dictionary of the grid search parameters\n",
"param_grid = {\n",
" 'learning_rate':learning_rates,\n",
" 'dropout_rate':dropout_rates\n",
"}\n",
"\n",
"# Build and fit the GridSearchCV\n",
"grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=KFold(random_state=seed), verbose=10)\n",
"grid_results = grid.fit(X_standard, y)\n",
"\n",
"# Summarize the results\n",
"print('Best: {0}, using {1} '.format(grid_results.best_score_, grid_results.best_params_))\n",
"means = grid_results.cv_results_['mean_test_score']\n",
"stds = grid_results.cv_results_['std_test_score']\n",
"params = grid_results.cv_results_['params']\n",
"\n",
"for mean, stdev, param in zip(means, stds, params):\n",
" print('{0} ({1}) with: {2}'.format(mean, stdev, param))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Weight Initialization, Activation function"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\kcsgo\\anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:296: FutureWarning: Setting a random_state has no effect since shuffle is False. This will raise an error in 0.24. You should leave random_state to its default (None), or set shuffle=True.\n",
" FutureWarning\n",
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 5 folds for each of 12 candidates, totalling 60 fits\n",
"[CV] activation=softmax, initializer=normal ..........................\n",
"[CV] activation=softmax, initializer=normal, score=0.696, total= 1.8s\n",
"[CV] activation=softmax, initializer=normal ..........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 1.7s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] activation=softmax, initializer=normal, score=0.646, total= 1.7s\n",
"[CV] activation=softmax, initializer=normal ..........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 3.5s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] activation=softmax, initializer=normal, score=0.833, total= 1.7s\n",
"[CV] activation=softmax, initializer=normal ..........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 5.2s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] activation=softmax, initializer=normal, score=0.808, total= 1.8s\n",
"[CV] activation=softmax, initializer=normal ..........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 7.0s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] activation=softmax, initializer=normal, score=0.795, total= 1.8s\n",
"[CV] activation=softmax, initializer=uniform .........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 8.8s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] activation=softmax, initializer=uniform, score=0.709, total= 1.7s\n",
"[CV] activation=softmax, initializer=uniform .........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 10.5s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] activation=softmax, initializer=uniform, score=0.608, total= 1.7s\n",
"[CV] activation=softmax, initializer=uniform .........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 12.3s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] activation=softmax, initializer=uniform, score=0.821, total= 1.7s\n",
"[CV] activation=softmax, initializer=uniform .........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 14.0s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] activation=softmax, initializer=uniform, score=0.795, total= 1.8s\n",
"[CV] activation=softmax, initializer=uniform .........................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 9 out of 9 | elapsed: 15.8s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] activation=softmax, initializer=uniform, score=0.769, total= 1.7s\n",
"[CV] activation=softmax, initializer=zero ............................\n",
"[CV] activation=softmax, initializer=zero, score=0.646, total= 1.7s\n",
"[CV] activation=softmax, initializer=zero ............................\n",
"[CV] activation=softmax, initializer=zero, score=0.570, total= 1.8s\n",
"[CV] activation=softmax, initializer=zero ............................\n",
"[CV] activation=softmax, initializer=zero, score=0.705, total= 1.8s\n",
"[CV] activation=softmax, initializer=zero ............................\n",
"[CV] activation=softmax, initializer=zero, score=0.744, total= 1.7s\n",
"[CV] activation=softmax, initializer=zero ............................\n",
"[CV] activation=softmax, initializer=zero, score=0.679, total= 1.7s\n",
"[CV] activation=relu, initializer=normal .............................\n",
"[CV] . activation=relu, initializer=normal, score=0.709, total= 1.8s\n",
"[CV] activation=relu, initializer=normal .............................\n",
"[CV] . activation=relu, initializer=normal, score=0.671, total= 1.7s\n",
"[CV] activation=relu, initializer=normal .............................\n",
"[CV] . activation=relu, initializer=normal, score=0.808, total= 2.2s\n",
"[CV] activation=relu, initializer=normal .............................\n",
"[CV] . activation=relu, initializer=normal, score=0.795, total= 1.7s\n",
"[CV] activation=relu, initializer=normal .............................\n",
"[CV] . activation=relu, initializer=normal, score=0.808, total= 1.7s\n",
"[CV] activation=relu, initializer=uniform ............................\n",
"[CV] activation=relu, initializer=uniform, score=0.722, total= 1.7s\n",
"[CV] activation=relu, initializer=uniform ............................\n",
"[CV] activation=relu, initializer=uniform, score=0.570, total= 1.7s\n",
"[CV] activation=relu, initializer=uniform ............................\n",
"[CV] activation=relu, initializer=uniform, score=0.821, total= 1.7s\n",
"[CV] activation=relu, initializer=uniform ............................\n",
"[CV] activation=relu, initializer=uniform, score=0.679, total= 1.8s\n",
"[CV] activation=relu, initializer=uniform ............................\n",
"[CV] activation=relu, initializer=uniform, score=0.782, total= 1.7s\n",
"[CV] activation=relu, initializer=zero ...............................\n",
"[CV] ... activation=relu, initializer=zero, score=0.646, total= 1.7s\n",
"[CV] activation=relu, initializer=zero ...............................\n",
"[CV] ... activation=relu, initializer=zero, score=0.570, total= 1.7s\n",
"[CV] activation=relu, initializer=zero ...............................\n",
"[CV] ... activation=relu, initializer=zero, score=0.705, total= 1.7s\n",
"[CV] activation=relu, initializer=zero ...............................\n",
"[CV] ... activation=relu, initializer=zero, score=0.744, total= 1.7s\n",
"[CV] activation=relu, initializer=zero ...............................\n",
"[CV] ... activation=relu, initializer=zero, score=0.679, total= 1.7s\n",
"[CV] activation=tanh, initializer=normal .............................\n",
"[CV] . activation=tanh, initializer=normal, score=0.696, total= 1.7s\n",
"[CV] activation=tanh, initializer=normal .............................\n",
"[CV] . activation=tanh, initializer=normal, score=0.633, total= 1.7s\n",
"[CV] activation=tanh, initializer=normal .............................\n",
"[CV] . activation=tanh, initializer=normal, score=0.833, total= 1.7s\n",
"[CV] activation=tanh, initializer=normal .............................\n",
"[CV] . activation=tanh, initializer=normal, score=0.808, total= 1.7s\n",
"[CV] activation=tanh, initializer=normal .............................\n",
"[CV] . activation=tanh, initializer=normal, score=0.821, total= 1.7s\n",
"[CV] activation=tanh, initializer=uniform ............................\n",
"[CV] activation=tanh, initializer=uniform, score=0.722, total= 1.7s\n",
"[CV] activation=tanh, initializer=uniform ............................\n",
"[CV] activation=tanh, initializer=uniform, score=0.595, total= 1.7s\n",
"[CV] activation=tanh, initializer=uniform ............................\n",
"[CV] activation=tanh, initializer=uniform, score=0.795, total= 1.7s\n",
"[CV] activation=tanh, initializer=uniform ............................\n",
"[CV] activation=tanh, initializer=uniform, score=0.833, total= 1.7s\n",
"[CV] activation=tanh, initializer=uniform ............................\n",
"[CV] activation=tanh, initializer=uniform, score=0.769, total= 1.8s\n",
"[CV] activation=tanh, initializer=zero ...............................\n",
"[CV] ... activation=tanh, initializer=zero, score=0.646, total= 2.2s\n",
"[CV] activation=tanh, initializer=zero ...............................\n",
"[CV] ... activation=tanh, initializer=zero, score=0.570, total= 1.7s\n",
"[CV] activation=tanh, initializer=zero ...............................\n",
"[CV] ... activation=tanh, initializer=zero, score=0.705, total= 1.7s\n",
"[CV] activation=tanh, initializer=zero ...............................\n",
"[CV] ... activation=tanh, initializer=zero, score=0.744, total= 1.7s\n",
"[CV] activation=tanh, initializer=zero ...............................\n",
"[CV] ... activation=tanh, initializer=zero, score=0.679, total= 1.7s\n",
"[CV] activation=linear, initializer=normal ...........................\n",
"[CV] activation=linear, initializer=normal, score=0.823, total= 1.7s\n",
"[CV] activation=linear, initializer=normal ...........................\n",
"[CV] activation=linear, initializer=normal, score=0.620, total= 1.7s\n",
"[CV] activation=linear, initializer=normal ...........................\n",
"[CV] activation=linear, initializer=normal, score=0.833, total= 1.7s\n",
"[CV] activation=linear, initializer=normal ...........................\n",
"[CV] activation=linear, initializer=normal, score=0.846, total= 1.7s\n",
"[CV] activation=linear, initializer=normal ...........................\n",
"[CV] activation=linear, initializer=normal, score=0.769, total= 1.7s\n",
"[CV] activation=linear, initializer=uniform ..........................\n",
"[CV] activation=linear, initializer=uniform, score=0.823, total= 1.7s\n",
"[CV] activation=linear, initializer=uniform ..........................\n",
"[CV] activation=linear, initializer=uniform, score=0.608, total= 1.7s\n",
"[CV] activation=linear, initializer=uniform ..........................\n",
"[CV] activation=linear, initializer=uniform, score=0.821, total= 1.7s\n",
"[CV] activation=linear, initializer=uniform ..........................\n",
"[CV] activation=linear, initializer=uniform, score=0.846, total= 1.7s\n",
"[CV] activation=linear, initializer=uniform ..........................\n",
"[CV] activation=linear, initializer=uniform, score=0.808, total= 1.7s\n",
"[CV] activation=linear, initializer=zero .............................\n",
"[CV] . activation=linear, initializer=zero, score=0.646, total= 1.7s\n",
"[CV] activation=linear, initializer=zero .............................\n",
"[CV] . activation=linear, initializer=zero, score=0.570, total= 1.7s\n",
"[CV] activation=linear, initializer=zero .............................\n",
"[CV] . activation=linear, initializer=zero, score=0.705, total= 1.7s\n",
"[CV] activation=linear, initializer=zero .............................\n",
"[CV] . activation=linear, initializer=zero, score=0.744, total= 1.7s\n",
"[CV] activation=linear, initializer=zero .............................\n",
"[CV] . activation=linear, initializer=zero, score=0.679, total= 1.7s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 60 out of 60 | elapsed: 1.7min finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best: 0.7809477329254151, using {'activation': 'linear', 'initializer': 'uniform'} \n",
"0.7555339097976684 (0.07201815414751926) with: {'activation': 'softmax', 'initializer': 'normal'}\n",
"0.740214216709137 (0.07595814846646004) with: {'activation': 'softmax', 'initializer': 'uniform'}\n",
"0.6686789989471436 (0.05899773033245815) with: {'activation': 'softmax', 'initializer': 'zero'}\n",
"0.758000648021698 (0.05709934235989969) with: {'activation': 'relu', 'initializer': 'normal'}\n",
"0.714638102054596 (0.08725491912068364) with: {'activation': 'relu', 'initializer': 'uniform'}\n",
"0.6686789989471436 (0.05899773033245815) with: {'activation': 'relu', 'initializer': 'zero'}\n",
"0.7581304669380188 (0.0793955810899181) with: {'activation': 'tanh', 'initializer': 'normal'}\n",
"0.7427783250808716 (0.08236839951084776) with: {'activation': 'tanh', 'initializer': 'uniform'}\n",
"0.6686789989471436 (0.05899773033245815) with: {'activation': 'tanh', 'initializer': 'zero'}\n",
"0.7783511757850647 (0.08327394888280217) with: {'activation': 'linear', 'initializer': 'normal'}\n",
"0.7809477329254151 (0.0875603191939732) with: {'activation': 'linear', 'initializer': 'uniform'}\n",
"0.6686789989471436 (0.05899773033245815) with: {'activation': 'linear', 'initializer': 'zero'}\n"
]
}
],
"source": [
"def create_model(activation, initializer):\n",
" # Create model\n",
" model = Sequential()\n",
" model.add(Dense(8, input_shape=(8, ), kernel_initializer=initializer, activation=activation))\n",
" model.add(Dense(4, kernel_initializer=initializer, activation=activation))\n",
" model.add(Dense(1, activation='sigmoid'))\n",
" \n",
" # Compile model\n",
" model.compile(optimizer=Adam(lr=0.1), loss='binary_crossentropy', metrics=['accuracy'])\n",
" return model\n",
"\n",
"# Create model with KerasClassifier\n",
"model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=20, verbose=False)\n",
"\n",
"# Define Grid Search parameter\n",
"activations = ['softmax', 'relu', 'tanh', 'linear']\n",
"initializers = ['normal', 'uniform', 'zero']\n",
"\n",
"# Make a dictionary of the grid search parameters\n",
"param_grid = {\n",
" 'activation':activations,\n",
" 'initializer':initializers\n",
"}\n",
"\n",
"# Build and fit the GridSearchCV\n",
"grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=KFold(random_state=seed), verbose=10)\n",
"grid_results = grid.fit(X_standard, y)\n",
"\n",
"# Summarize the results\n",
"print('Best: {0}, using {1} '.format(grid_results.best_score_, grid_results.best_params_))\n",
"means = grid_results.cv_results_['mean_test_score']\n",
"stds = grid_results.cv_results_['std_test_score']\n",
"params = grid_results.cv_results_['params']\n",
"\n",
"for mean, stdev, param in zip(means, stds, params):\n",
" print('{0} ({1}) with: {2}'.format(mean, stdev, param))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Number of Neurons"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 5 folds for each of 9 candidates, totalling 45 fits\n",
"[CV] neuron1=4, neuron2=2 ............................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ................ neuron1=4, neuron2=2, score=0.835, total= 1.7s\n",
"[CV] neuron1=4, neuron2=2 ............................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 1.6s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ................ neuron1=4, neuron2=2, score=0.646, total= 1.7s\n",
"[CV] neuron1=4, neuron2=2 ............................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 3.3s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ................ neuron1=4, neuron2=2, score=0.821, total= 1.7s\n",
"[CV] neuron1=4, neuron2=2 ............................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 3 out of 3 | elapsed: 5.0s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ................ neuron1=4, neuron2=2, score=0.846, total= 1.7s\n",
"[CV] neuron1=4, neuron2=2 ............................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 4 out of 4 | elapsed: 6.7s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ................ neuron1=4, neuron2=2, score=0.808, total= 1.7s\n",
"[CV] neuron1=4, neuron2=4 ............................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 8.3s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ................ neuron1=4, neuron2=4, score=0.835, total= 1.7s\n",
"[CV] neuron1=4, neuron2=4 ............................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 6 out of 6 | elapsed: 10.0s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ................ neuron1=4, neuron2=4, score=0.633, total= 1.7s\n",
"[CV] neuron1=4, neuron2=4 ............................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 11.7s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ................ neuron1=4, neuron2=4, score=0.821, total= 1.6s\n",
"[CV] neuron1=4, neuron2=4 ............................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 8 out of 8 | elapsed: 13.3s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ................ neuron1=4, neuron2=4, score=0.833, total= 1.6s\n",
"[CV] neuron1=4, neuron2=4 ............................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 9 out of 9 | elapsed: 15.0s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ................ neuron1=4, neuron2=4, score=0.795, total= 1.6s\n",
"[CV] neuron1=4, neuron2=8 ............................................\n",
"[CV] ................ neuron1=4, neuron2=8, score=0.823, total= 1.7s\n",
"[CV] neuron1=4, neuron2=8 ............................................\n",
"[CV] ................ neuron1=4, neuron2=8, score=0.608, total= 1.6s\n",
"[CV] neuron1=4, neuron2=8 ............................................\n",
"[CV] ................ neuron1=4, neuron2=8, score=0.808, total= 2.2s\n",
"[CV] neuron1=4, neuron2=8 ............................................\n",
"[CV] ................ neuron1=4, neuron2=8, score=0.846, total= 1.6s\n",
"[CV] neuron1=4, neuron2=8 ............................................\n",
"[CV] ................ neuron1=4, neuron2=8, score=0.756, total= 1.6s\n",
"[CV] neuron1=8, neuron2=2 ............................................\n",
"[CV] ................ neuron1=8, neuron2=2, score=0.810, total= 1.7s\n",
"[CV] neuron1=8, neuron2=2 ............................................\n",
"[CV] ................ neuron1=8, neuron2=2, score=0.671, total= 1.7s\n",
"[CV] neuron1=8, neuron2=2 ............................................\n",
"[CV] ................ neuron1=8, neuron2=2, score=0.808, total= 1.7s\n",
"[CV] neuron1=8, neuron2=2 ............................................\n",
"[CV] ................ neuron1=8, neuron2=2, score=0.859, total= 1.7s\n",
"[CV] neuron1=8, neuron2=2 ............................................\n",
"[CV] ................ neuron1=8, neuron2=2, score=0.808, total= 1.7s\n",
"[CV] neuron1=8, neuron2=4 ............................................\n",
"[CV] ................ neuron1=8, neuron2=4, score=0.810, total= 1.7s\n",
"[CV] neuron1=8, neuron2=4 ............................................\n",
"[CV] ................ neuron1=8, neuron2=4, score=0.608, total= 1.6s\n",
"[CV] neuron1=8, neuron2=4 ............................................\n",
"[CV] ................ neuron1=8, neuron2=4, score=0.833, total= 1.7s\n",
"[CV] neuron1=8, neuron2=4 ............................................\n",
"[CV] ................ neuron1=8, neuron2=4, score=0.808, total= 1.7s\n",
"[CV] neuron1=8, neuron2=4 ............................................\n",
"[CV] ................ neuron1=8, neuron2=4, score=0.808, total= 1.7s\n",
"[CV] neuron1=8, neuron2=8 ............................................\n",
"[CV] ................ neuron1=8, neuron2=8, score=0.823, total= 1.7s\n",
"[CV] neuron1=8, neuron2=8 ............................................\n",
"[CV] ................ neuron1=8, neuron2=8, score=0.620, total= 1.7s\n",
"[CV] neuron1=8, neuron2=8 ............................................\n",
"[CV] ................ neuron1=8, neuron2=8, score=0.846, total= 1.7s\n",
"[CV] neuron1=8, neuron2=8 ............................................\n",
"[CV] ................ neuron1=8, neuron2=8, score=0.833, total= 1.6s\n",
"[CV] neuron1=8, neuron2=8 ............................................\n",
"[CV] ................ neuron1=8, neuron2=8, score=0.808, total= 1.7s\n",
"[CV] neuron1=16, neuron2=2 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=2, score=0.835, total= 1.6s\n",
"[CV] neuron1=16, neuron2=2 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=2, score=0.608, total= 1.7s\n",
"[CV] neuron1=16, neuron2=2 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=2, score=0.821, total= 1.6s\n",
"[CV] neuron1=16, neuron2=2 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=2, score=0.679, total= 1.6s\n",
"[CV] neuron1=16, neuron2=2 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=2, score=0.808, total= 1.6s\n",
"[CV] neuron1=16, neuron2=4 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=4, score=0.810, total= 1.7s\n",
"[CV] neuron1=16, neuron2=4 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=4, score=0.633, total= 1.7s\n",
"[CV] neuron1=16, neuron2=4 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=4, score=0.821, total= 1.6s\n",
"[CV] neuron1=16, neuron2=4 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=4, score=0.821, total= 1.6s\n",
"[CV] neuron1=16, neuron2=4 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=4, score=0.833, total= 1.6s\n",
"[CV] neuron1=16, neuron2=8 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=8, score=0.772, total= 2.2s\n",
"[CV] neuron1=16, neuron2=8 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=8, score=0.608, total= 1.6s\n",
"[CV] neuron1=16, neuron2=8 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=8, score=0.821, total= 1.6s\n",
"[CV] neuron1=16, neuron2=8 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=8, score=0.808, total= 1.7s\n",
"[CV] neuron1=16, neuron2=8 ...........................................\n",
"[CV] ............... neuron1=16, neuron2=8, score=0.808, total= 1.6s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 45 out of 45 | elapsed: 1.3min finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best: 0.7910743236541748, using {'neuron1': 4, 'neuron2': 2} \n",
"0.7910743236541748 (0.07391445944714388) with: {'neuron1': 4, 'neuron2': 2}\n",
"0.7834144711494446 (0.07662584260652404) with: {'neuron1': 4, 'neuron2': 4}\n",
"0.7681272149085998 (0.08549726378903176) with: {'neuron1': 4, 'neuron2': 8}\n",
"0.7910743236541748 (0.06319956338108261) with: {'neuron1': 8, 'neuron2': 2}\n",
"0.7732878804206849 (0.08340747393344203) with: {'neuron1': 8, 'neuron2': 4}\n",
"0.7860434770584106 (0.08385059892312692) with: {'neuron1': 8, 'neuron2': 8}\n",
"0.7501460433006286 (0.09039041145364847) with: {'neuron1': 16, 'neuron2': 2}\n",
"0.7834793925285339 (0.07564279683053157) with: {'neuron1': 16, 'neuron2': 4}\n",
"0.7631288409233093 (0.07941907087664095) with: {'neuron1': 16, 'neuron2': 8}\n"
]
}
],
"source": [
"def create_model(neuron1, neuron2):\n",
" # Create model\n",
" model = Sequential()\n",
" model.add(Dense(neuron1, input_shape=(8, ), kernel_initializer='uniform', activation='linear'))\n",
" model.add(Dense(neuron2, kernel_initializer='uniform', activation='linear'))\n",
" model.add(Dense(1, activation='sigmoid'))\n",
" \n",
" # Compile model\n",
" model.compile(optimizer=Adam(lr=0.1), loss='binary_crossentropy', metrics=['accuracy'])\n",
" return model\n",
"\n",
"# Create model with KerasClassifier\n",
"model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=20, verbose=False)\n",
"\n",
"# Define Grid Search parameter\n",
"neuron1 = [4, 8, 16]\n",
"neuron2 = [2, 4, 8]\n",
"\n",
"# Make a dictionary of the grid search parameters\n",
"param_grid = {\n",
" 'neuron1':neuron1,\n",
" 'neuron2':neuron2\n",
"}\n",
"\n",
"# Build and fit the GridSearchCV\n",
"grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=KFold(random_state=seed), refit=True,\n",
" verbose=10)\n",
"grid_results = grid.fit(X_standard, y)\n",
"\n",
"# Summarize the results\n",
"print('Best: {0}, using {1} '.format(grid_results.best_score_, grid_results.best_params_))\n",
"means = grid_results.cv_results_['mean_test_score']\n",
"stds = grid_results.cv_results_['std_test_score']\n",
"params = grid_results.cv_results_['params']\n",
"\n",
"for mean, stdev, param in zip(means, stds, params):\n",
" print('{0} ({1}) with: {2}'.format(mean, stdev, param))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Predict with Optimal hyperparameters"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# Generate predictions with optimal hyperparameters\n",
"y_pred = grid.predict(X_standard)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(392, 1)"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_pred.shape"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0],\n",
" [1],\n",
" [0],\n",
" [1],\n",
" [1]])"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_pred[:5]"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7857142857142857\n",
" precision recall f1-score support\n",
"\n",
" 0 0.84 0.84 0.84 262\n",
" 1 0.67 0.68 0.68 130\n",
"\n",
" accuracy 0.79 392\n",
" macro avg 0.76 0.76 0.76 392\n",
"weighted avg 0.79 0.79 0.79 392\n",
"\n"
]
}
],
"source": [
"# Generate a classification report\n",
"from sklearn.metrics import classification_report, accuracy_score\n",
"\n",
"print(accuracy_score(y, y_pred))\n",
"print(classification_report(y, y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Pregnancies 1.000\n",
"Glucose 89.000\n",
"BloodPressure 66.000\n",
"SkinThickness 23.000\n",
"Insulin 94.000\n",
"BMI 28.100\n",
"DiabetesPedigreeFunction 0.167\n",
"Age 21.000\n",
"Outcome 0.000\n",
"Name: 3, dtype: float64"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Example datapoint\n",
"example = df.iloc[0]\n",
"example"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0]])"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prediction = grid.predict(X_standard[0].reshape(1, -1))\n",
"prediction"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}