{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"이 노트북의 코드에 대한 설명은 [다중 평가 지표: cross_validate()](https://tensorflow.blog/2018/03/13/%EB%8B%A4%EC%A4%91-%ED%8F%89%EA%B0%80-%EC%A7%80%ED%91%9C-cross_validate/) 글을 참고하세요."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.datasets import load_digits\n",
"from sklearn.model_selection import train_test_split, cross_val_score"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"digits = load_digits()\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" digits.data, digits.target == 9, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.svm import SVC"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.90200445, 0.90200445, 0.90200445])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cross_val_score(SVC(gamma='auto'), X_train, y_train, cv=3)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.90200445, 0.90200445, 0.90200445])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cross_val_score(SVC(gamma='auto'), X_train, y_train, scoring='accuracy', cv=3)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import cross_validate"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'fit_time': array([0.03770995, 0.03589416, 0.03686881]),\n",
" 'score_time': array([0.12240219, 0.11768389, 0.11690235]),\n",
" 'test_accuracy': array([0.90200445, 0.90200445, 0.90200445]),\n",
" 'train_accuracy': array([1., 1., 1.]),\n",
" 'test_roc_auc': array([0.99657688, 0.99814815, 0.99943883]),\n",
" 'train_roc_auc': array([1., 1., 1.])}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cross_validate(SVC(gamma='auto'), X_train, y_train, \n",
" scoring=['accuracy', 'roc_auc'], \n",
" return_train_score=True, cv=3)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.90200445, 0.90200445, 0.90200445])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cross_validate(SVC(gamma='auto'), X_train, y_train, \n",
" scoring=['accuracy'], cv=3,\n",
" return_train_score=False)['test_accuracy']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'fit_time': array([0.03640604, 0.03584003, 0.03449273]),\n",
" 'score_time': array([0.11128712, 0.10693693, 0.11939406]),\n",
" 'test_acc': array([0.90200445, 0.90200445, 0.90200445]),\n",
" 'test_ra': array([0.99657688, 0.99814815, 0.99943883])}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cross_validate(SVC(gamma='auto'), X_train, y_train, \n",
" scoring={'acc':'accuracy', 'ra':'roc_auc'}, \n",
" return_train_score=False, cv=3)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import GridSearchCV"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"param_grid = {'gamma': [0.0001, 0.01, 0.1, 1, 10]}"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GridSearchCV(cv=3, estimator=SVC(),\n",
" param_grid={'gamma': [0.0001, 0.01, 0.1, 1, 10]}, refit='accuracy',\n",
" return_train_score=True, scoring=['accuracy'])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid = GridSearchCV(SVC(), param_grid=param_grid, \n",
" scoring=['accuracy'], refit='accuracy',\n",
" return_train_score=True, cv=3)\n",
"grid.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'gamma': 0.0001}"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid.best_params_"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9651076466221232"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid.best_score_"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" \n",
" \n",
" mean_fit_time | \n",
" 0.006796 | \n",
" 0.033728 | \n",
" 0.036865 | \n",
" 0.029152 | \n",
" 0.028625 | \n",
"
\n",
" \n",
" std_fit_time | \n",
" 0.000076 | \n",
" 0.000763 | \n",
" 0.000255 | \n",
" 0.000303 | \n",
" 0.000275 | \n",
"
\n",
" \n",
" mean_score_time | \n",
" 0.012703 | \n",
" 0.059657 | \n",
" 0.060291 | \n",
" 0.054508 | \n",
" 0.054504 | \n",
"
\n",
" \n",
" std_score_time | \n",
" 0.001003 | \n",
" 0.000545 | \n",
" 0.000915 | \n",
" 0.003451 | \n",
" 0.001073 | \n",
"
\n",
" \n",
" param_gamma | \n",
" 0.0001 | \n",
" 0.01 | \n",
" 0.1 | \n",
" 1 | \n",
" 10 | \n",
"
\n",
" \n",
" params | \n",
" {'gamma': 0.0001} | \n",
" {'gamma': 0.01} | \n",
" {'gamma': 0.1} | \n",
" {'gamma': 1} | \n",
" {'gamma': 10} | \n",
"
\n",
" \n",
" split0_test_accuracy | \n",
" 0.966592 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
"
\n",
" \n",
" split1_test_accuracy | \n",
" 0.96882 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
"
\n",
" \n",
" split2_test_accuracy | \n",
" 0.959911 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
"
\n",
" \n",
" mean_test_accuracy | \n",
" 0.965108 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
"
\n",
" \n",
" std_test_accuracy | \n",
" 0.003785 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" rank_test_accuracy | \n",
" 1 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" split0_train_accuracy | \n",
" 0.975501 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" split1_train_accuracy | \n",
" 0.962138 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" split2_train_accuracy | \n",
" 0.974388 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" mean_train_accuracy | \n",
" 0.970676 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" std_train_accuracy | \n",
" 0.006054 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 \\\n",
"mean_fit_time 0.006796 0.033728 0.036865 \n",
"std_fit_time 0.000076 0.000763 0.000255 \n",
"mean_score_time 0.012703 0.059657 0.060291 \n",
"std_score_time 0.001003 0.000545 0.000915 \n",
"param_gamma 0.0001 0.01 0.1 \n",
"params {'gamma': 0.0001} {'gamma': 0.01} {'gamma': 0.1} \n",
"split0_test_accuracy 0.966592 0.902004 0.902004 \n",
"split1_test_accuracy 0.96882 0.902004 0.902004 \n",
"split2_test_accuracy 0.959911 0.902004 0.902004 \n",
"mean_test_accuracy 0.965108 0.902004 0.902004 \n",
"std_test_accuracy 0.003785 0.0 0.0 \n",
"rank_test_accuracy 1 2 2 \n",
"split0_train_accuracy 0.975501 1.0 1.0 \n",
"split1_train_accuracy 0.962138 1.0 1.0 \n",
"split2_train_accuracy 0.974388 1.0 1.0 \n",
"mean_train_accuracy 0.970676 1.0 1.0 \n",
"std_train_accuracy 0.006054 0.0 0.0 \n",
"\n",
" 3 4 \n",
"mean_fit_time 0.029152 0.028625 \n",
"std_fit_time 0.000303 0.000275 \n",
"mean_score_time 0.054508 0.054504 \n",
"std_score_time 0.003451 0.001073 \n",
"param_gamma 1 10 \n",
"params {'gamma': 1} {'gamma': 10} \n",
"split0_test_accuracy 0.902004 0.902004 \n",
"split1_test_accuracy 0.902004 0.902004 \n",
"split2_test_accuracy 0.902004 0.902004 \n",
"mean_test_accuracy 0.902004 0.902004 \n",
"std_test_accuracy 0.0 0.0 \n",
"rank_test_accuracy 2 2 \n",
"split0_train_accuracy 1.0 1.0 \n",
"split1_train_accuracy 1.0 1.0 \n",
"split2_train_accuracy 1.0 1.0 \n",
"mean_train_accuracy 1.0 1.0 \n",
"std_train_accuracy 0.0 0.0 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.transpose(pd.DataFrame(grid.cv_results_))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GridSearchCV(cv=3, estimator=SVC(),\n",
" param_grid={'gamma': [0.0001, 0.01, 0.1, 1, 10]}, refit='ra',\n",
" return_train_score=True,\n",
" scoring={'acc': 'accuracy', 'ra': 'roc_auc'})"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid = GridSearchCV(SVC(), param_grid=param_grid, \n",
" scoring={'acc':'accuracy', 'ra':'roc_auc'}, refit='ra',\n",
" return_train_score=True, cv=3)\n",
"grid.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'gamma': 0.01}"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid.best_params_"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9983352038907594"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid.best_score_"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
"
\n",
" \n",
" \n",
" \n",
" mean_fit_time | \n",
" 0.006864 | \n",
" 0.034273 | \n",
" 0.037054 | \n",
" 0.030654 | \n",
" 0.028933 | \n",
"
\n",
" \n",
" std_fit_time | \n",
" 0.000172 | \n",
" 0.000251 | \n",
" 0.000493 | \n",
" 0.000555 | \n",
" 0.000616 | \n",
"
\n",
" \n",
" mean_score_time | \n",
" 0.024493 | \n",
" 0.114614 | \n",
" 0.121366 | \n",
" 0.11359 | \n",
" 0.107006 | \n",
"
\n",
" \n",
" std_score_time | \n",
" 0.002559 | \n",
" 0.001332 | \n",
" 0.003262 | \n",
" 0.003361 | \n",
" 0.00633 | \n",
"
\n",
" \n",
" param_gamma | \n",
" 0.0001 | \n",
" 0.01 | \n",
" 0.1 | \n",
" 1 | \n",
" 10 | \n",
"
\n",
" \n",
" params | \n",
" {'gamma': 0.0001} | \n",
" {'gamma': 0.01} | \n",
" {'gamma': 0.1} | \n",
" {'gamma': 1} | \n",
" {'gamma': 10} | \n",
"
\n",
" \n",
" split0_test_acc | \n",
" 0.966592 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
"
\n",
" \n",
" split1_test_acc | \n",
" 0.96882 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
"
\n",
" \n",
" split2_test_acc | \n",
" 0.959911 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
"
\n",
" \n",
" mean_test_acc | \n",
" 0.965108 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
" 0.902004 | \n",
"
\n",
" \n",
" std_test_acc | \n",
" 0.003785 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" rank_test_acc | \n",
" 1 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" split0_train_acc | \n",
" 0.975501 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" split1_train_acc | \n",
" 0.962138 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" split2_train_acc | \n",
" 0.974388 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" mean_train_acc | \n",
" 0.970676 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" std_train_acc | \n",
" 0.006054 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" split0_test_ra | \n",
" 0.98367 | \n",
" 0.997419 | \n",
" 0.934007 | \n",
" 0.5 | \n",
" 0.5 | \n",
"
\n",
" \n",
" split1_test_ra | \n",
" 0.987149 | \n",
" 0.998148 | \n",
" 0.912458 | \n",
" 0.5 | \n",
" 0.5 | \n",
"
\n",
" \n",
" split2_test_ra | \n",
" 0.994388 | \n",
" 0.999439 | \n",
" 0.910494 | \n",
" 0.5 | \n",
" 0.5 | \n",
"
\n",
" \n",
" mean_test_ra | \n",
" 0.988403 | \n",
" 0.998335 | \n",
" 0.918986 | \n",
" 0.5 | \n",
" 0.5 | \n",
"
\n",
" \n",
" std_test_ra | \n",
" 0.004465 | \n",
" 0.000835 | \n",
" 0.010651 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" rank_test_ra | \n",
" 2 | \n",
" 1 | \n",
" 3 | \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
" split0_train_ra | \n",
" 0.992017 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" split1_train_ra | \n",
" 0.994935 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" split2_train_ra | \n",
" 0.98945 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" mean_train_ra | \n",
" 0.992134 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" std_train_ra | \n",
" 0.002241 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 \\\n",
"mean_fit_time 0.006864 0.034273 0.037054 \n",
"std_fit_time 0.000172 0.000251 0.000493 \n",
"mean_score_time 0.024493 0.114614 0.121366 \n",
"std_score_time 0.002559 0.001332 0.003262 \n",
"param_gamma 0.0001 0.01 0.1 \n",
"params {'gamma': 0.0001} {'gamma': 0.01} {'gamma': 0.1} \n",
"split0_test_acc 0.966592 0.902004 0.902004 \n",
"split1_test_acc 0.96882 0.902004 0.902004 \n",
"split2_test_acc 0.959911 0.902004 0.902004 \n",
"mean_test_acc 0.965108 0.902004 0.902004 \n",
"std_test_acc 0.003785 0.0 0.0 \n",
"rank_test_acc 1 2 2 \n",
"split0_train_acc 0.975501 1.0 1.0 \n",
"split1_train_acc 0.962138 1.0 1.0 \n",
"split2_train_acc 0.974388 1.0 1.0 \n",
"mean_train_acc 0.970676 1.0 1.0 \n",
"std_train_acc 0.006054 0.0 0.0 \n",
"split0_test_ra 0.98367 0.997419 0.934007 \n",
"split1_test_ra 0.987149 0.998148 0.912458 \n",
"split2_test_ra 0.994388 0.999439 0.910494 \n",
"mean_test_ra 0.988403 0.998335 0.918986 \n",
"std_test_ra 0.004465 0.000835 0.010651 \n",
"rank_test_ra 2 1 3 \n",
"split0_train_ra 0.992017 1.0 1.0 \n",
"split1_train_ra 0.994935 1.0 1.0 \n",
"split2_train_ra 0.98945 1.0 1.0 \n",
"mean_train_ra 0.992134 1.0 1.0 \n",
"std_train_ra 0.002241 0.0 0.0 \n",
"\n",
" 3 4 \n",
"mean_fit_time 0.030654 0.028933 \n",
"std_fit_time 0.000555 0.000616 \n",
"mean_score_time 0.11359 0.107006 \n",
"std_score_time 0.003361 0.00633 \n",
"param_gamma 1 10 \n",
"params {'gamma': 1} {'gamma': 10} \n",
"split0_test_acc 0.902004 0.902004 \n",
"split1_test_acc 0.902004 0.902004 \n",
"split2_test_acc 0.902004 0.902004 \n",
"mean_test_acc 0.902004 0.902004 \n",
"std_test_acc 0.0 0.0 \n",
"rank_test_acc 2 2 \n",
"split0_train_acc 1.0 1.0 \n",
"split1_train_acc 1.0 1.0 \n",
"split2_train_acc 1.0 1.0 \n",
"mean_train_acc 1.0 1.0 \n",
"std_train_acc 0.0 0.0 \n",
"split0_test_ra 0.5 0.5 \n",
"split1_test_ra 0.5 0.5 \n",
"split2_test_ra 0.5 0.5 \n",
"mean_test_ra 0.5 0.5 \n",
"std_test_ra 0.0 0.0 \n",
"rank_test_ra 4 4 \n",
"split0_train_ra 1.0 1.0 \n",
"split1_train_ra 1.0 1.0 \n",
"split2_train_ra 1.0 1.0 \n",
"mean_train_ra 1.0 1.0 \n",
"std_train_ra 0.0 0.0 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.transpose(pd.DataFrame(grid.cv_results_))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"SVC(gamma=0.01)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid.best_estimator_"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}