{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Multinomial AUC/PR AUC demo"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Checking whether there is an H2O instance running at http://localhost:54321 . connected.\n"
]
},
{
"data": {
"text/html": [
"
H2O_cluster_uptime: | \n",
"38 mins 14 secs |
\n",
"H2O_cluster_timezone: | \n",
"Europe/Berlin |
\n",
"H2O_data_parsing_timezone: | \n",
"UTC |
\n",
"H2O_cluster_version: | \n",
"3.33.0.99999 |
\n",
"H2O_cluster_version_age: | \n",
"47 minutes |
\n",
"H2O_cluster_name: | \n",
"mori |
\n",
"H2O_cluster_total_nodes: | \n",
"1 |
\n",
"H2O_cluster_free_memory: | \n",
"4.851 Gb |
\n",
"H2O_cluster_total_cores: | \n",
"8 |
\n",
"H2O_cluster_allowed_cores: | \n",
"8 |
\n",
"H2O_cluster_status: | \n",
"locked, healthy |
\n",
"H2O_connection_url: | \n",
"http://localhost:54321 |
\n",
"H2O_connection_proxy: | \n",
"{\"http\": null, \"https\": null} |
\n",
"H2O_internal_security: | \n",
"False |
\n",
"H2O_API_Extensions: | \n",
"Algos, Core V3, Core V4 |
\n",
"Python_version: | \n",
"3.7.3 candidate |
"
],
"text/plain": [
"-------------------------- -----------------------------\n",
"H2O_cluster_uptime: 38 mins 14 secs\n",
"H2O_cluster_timezone: Europe/Berlin\n",
"H2O_data_parsing_timezone: UTC\n",
"H2O_cluster_version: 3.33.0.99999\n",
"H2O_cluster_version_age: 47 minutes\n",
"H2O_cluster_name: mori\n",
"H2O_cluster_total_nodes: 1\n",
"H2O_cluster_free_memory: 4.851 Gb\n",
"H2O_cluster_total_cores: 8\n",
"H2O_cluster_allowed_cores: 8\n",
"H2O_cluster_status: locked, healthy\n",
"H2O_connection_url: http://localhost:54321\n",
"H2O_connection_proxy: {\"http\": null, \"https\": null}\n",
"H2O_internal_security: False\n",
"H2O_API_Extensions: Algos, Core V3, Core V4\n",
"Python_version: 3.7.3 candidate\n",
"-------------------------- -----------------------------"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import h2o\n",
"from h2o.estimators import H2OGradientBoostingEstimator\n",
"from h2o.utils.shared_utils import _locate # private function - used to find files within h2o git project directory\n",
"\n",
"h2o.init(strict_version_check=False, port=54321)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Parse progress: |█████████████████████████████████████████████████████████| 100%\n",
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n",
"Model Details\n",
"=============\n",
"H2OGradientBoostingEstimator : Gradient Boosting Machine\n",
"Model Key: GBM_model_python_1606223609474_67\n",
"\n",
"\n",
"Model Summary: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" number_of_trees | \n",
" number_of_internal_trees | \n",
" model_size_in_bytes | \n",
" min_depth | \n",
" max_depth | \n",
" mean_depth | \n",
" min_leaves | \n",
" max_leaves | \n",
" mean_leaves | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" | \n",
" 100.0 | \n",
" 500.0 | \n",
" 67451.0 | \n",
" 1.0 | \n",
" 3.0 | \n",
" 2.994 | \n",
" 2.0 | \n",
" 8.0 | \n",
" 6.078 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" number_of_trees number_of_internal_trees model_size_in_bytes \\\n",
"0 100.0 500.0 67451.0 \n",
"\n",
" min_depth max_depth mean_depth min_leaves max_leaves mean_leaves \n",
"0 1.0 3.0 2.994 2.0 8.0 6.078 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"ModelMetricsMultinomial: gbm\n",
"** Reported on train data. **\n",
"\n",
"MSE: 0.07963805354633585\n",
"RMSE: 0.28220215014477806\n",
"LogLoss: 0.32402399831181045\n",
"Mean Per-Class Error: 0.4120046082949308\n",
"AUC: 0.999511857555245\n",
"AUCPR: 0.998951571914844\n",
"\n",
"Multinomial AUC values: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" type | \n",
" first_class_domain | \n",
" second_class_domain | \n",
" auc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 vs Rest | \n",
" 3 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 1 | \n",
" 4 vs Rest | \n",
" 4 | \n",
" None | \n",
" 0.998556 | \n",
"
\n",
" \n",
" 2 | \n",
" 5 vs Rest | \n",
" 5 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 6 vs Rest | \n",
" 6 | \n",
" None | \n",
" 0.999004 | \n",
"
\n",
" \n",
" 4 | \n",
" 8 vs Rest | \n",
" 8 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 5 | \n",
" Macro OVR | \n",
" None | \n",
" None | \n",
" 0.999512 | \n",
"
\n",
" \n",
" 6 | \n",
" Weighted OVR | \n",
" None | \n",
" None | \n",
" 0.999032 | \n",
"
\n",
" \n",
" 7 | \n",
" Class 3 vs. 4 | \n",
" 3 | \n",
" 4 | \n",
" 0.994624 | \n",
"
\n",
" \n",
" 8 | \n",
" Class 3 vs. 5 | \n",
" 3 | \n",
" 5 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 9 | \n",
" Class 3 vs. 6 | \n",
" 3 | \n",
" 6 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 10 | \n",
" Class 3 vs. 8 | \n",
" 3 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 11 | \n",
" Class 4 vs. 5 | \n",
" 4 | \n",
" 5 | \n",
" 0.975269 | \n",
"
\n",
" \n",
" 12 | \n",
" Class 4 vs. 6 | \n",
" 4 | \n",
" 6 | \n",
" 0.999482 | \n",
"
\n",
" \n",
" 13 | \n",
" Class 4 vs. 8 | \n",
" 4 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 14 | \n",
" Class 5 vs. 6 | \n",
" 5 | \n",
" 6 | \n",
" 0.988095 | \n",
"
\n",
" \n",
" 15 | \n",
" Class 5 vs. 8 | \n",
" 5 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 16 | \n",
" Class 6 vs. 8 | \n",
" 6 | \n",
" 8 | \n",
" 0.999752 | \n",
"
\n",
" \n",
" 17 | \n",
" Macro OVO | \n",
" None | \n",
" None | \n",
" 0.995722 | \n",
"
\n",
" \n",
" 18 | \n",
" Weighted OVO | \n",
" None | \n",
" None | \n",
" 0.995155 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" type first_class_domain second_class_domain auc\n",
"0 3 vs Rest 3 None 1.000000\n",
"1 4 vs Rest 4 None 0.998556\n",
"2 5 vs Rest 5 None 1.000000\n",
"3 6 vs Rest 6 None 0.999004\n",
"4 8 vs Rest 8 None 1.000000\n",
"5 Macro OVR None None 0.999512\n",
"6 Weighted OVR None None 0.999032\n",
"7 Class 3 vs. 4 3 4 0.994624\n",
"8 Class 3 vs. 5 3 5 1.000000\n",
"9 Class 3 vs. 6 3 6 1.000000\n",
"10 Class 3 vs. 8 3 8 1.000000\n",
"11 Class 4 vs. 5 4 5 0.975269\n",
"12 Class 4 vs. 6 4 6 0.999482\n",
"13 Class 4 vs. 8 4 8 1.000000\n",
"14 Class 5 vs. 6 5 6 0.988095\n",
"15 Class 5 vs. 8 5 8 1.000000\n",
"16 Class 6 vs. 8 6 8 0.999752\n",
"17 Macro OVO None None 0.995722\n",
"18 Weighted OVO None None 0.995155"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Multinomial auc_pr values: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" type | \n",
" first_class_domain | \n",
" second_class_domain | \n",
" auc_pr | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 vs Rest | \n",
" 3 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 1 | \n",
" 4 vs Rest | \n",
" 4 | \n",
" None | \n",
" 0.998698 | \n",
"
\n",
" \n",
" 2 | \n",
" 5 vs Rest | \n",
" 5 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 6 vs Rest | \n",
" 6 | \n",
" None | \n",
" 0.996059 | \n",
"
\n",
" \n",
" 4 | \n",
" 8 vs Rest | \n",
" 8 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 5 | \n",
" Macro OVR | \n",
" None | \n",
" None | \n",
" 0.998952 | \n",
"
\n",
" \n",
" 6 | \n",
" Weighted OVR | \n",
" None | \n",
" None | \n",
" 0.998538 | \n",
"
\n",
" \n",
" 7 | \n",
" Class 3 vs. 4 | \n",
" 3 | \n",
" 4 | \n",
" 0.999897 | \n",
"
\n",
" \n",
" 8 | \n",
" Class 3 vs. 5 | \n",
" 3 | \n",
" 5 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 9 | \n",
" Class 3 vs. 6 | \n",
" 3 | \n",
" 6 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 10 | \n",
" Class 3 vs. 8 | \n",
" 3 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 11 | \n",
" Class 4 vs. 5 | \n",
" 4 | \n",
" 5 | \n",
" 0.999489 | \n",
"
\n",
" \n",
" 12 | \n",
" Class 4 vs. 6 | \n",
" 4 | \n",
" 6 | \n",
" 0.998871 | \n",
"
\n",
" \n",
" 13 | \n",
" Class 4 vs. 8 | \n",
" 4 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 14 | \n",
" Class 5 vs. 6 | \n",
" 5 | \n",
" 6 | \n",
" 0.999350 | \n",
"
\n",
" \n",
" 15 | \n",
" Class 5 vs. 8 | \n",
" 5 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 16 | \n",
" Class 6 vs. 8 | \n",
" 6 | \n",
" 8 | \n",
" 0.999689 | \n",
"
\n",
" \n",
" 17 | \n",
" Macro OVO | \n",
" None | \n",
" None | \n",
" 0.999730 | \n",
"
\n",
" \n",
" 18 | \n",
" Weighted OVO | \n",
" None | \n",
" None | \n",
" 0.999642 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" type first_class_domain second_class_domain auc_pr\n",
"0 3 vs Rest 3 None 1.000000\n",
"1 4 vs Rest 4 None 0.998698\n",
"2 5 vs Rest 5 None 1.000000\n",
"3 6 vs Rest 6 None 0.996059\n",
"4 8 vs Rest 8 None 1.000000\n",
"5 Macro OVR None None 0.998952\n",
"6 Weighted OVR None None 0.998538\n",
"7 Class 3 vs. 4 3 4 0.999897\n",
"8 Class 3 vs. 5 3 5 1.000000\n",
"9 Class 3 vs. 6 3 6 1.000000\n",
"10 Class 3 vs. 8 3 8 1.000000\n",
"11 Class 4 vs. 5 4 5 0.999489\n",
"12 Class 4 vs. 6 4 6 0.998871\n",
"13 Class 4 vs. 8 4 8 1.000000\n",
"14 Class 5 vs. 6 5 6 0.999350\n",
"15 Class 5 vs. 8 5 8 1.000000\n",
"16 Class 6 vs. 8 6 8 0.999689\n",
"17 Macro OVO None None 0.999730\n",
"18 Weighted OVO None None 0.999642"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Confusion Matrix: Row labels: Actual class; Column labels: Predicted class\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 8 | \n",
" Error | \n",
" Rate | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 3 / 3 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0 | \n",
" 154.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.006452 | \n",
" 1 / 155 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
" 2.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 3 / 3 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0 | \n",
" 2.0 | \n",
" 0.0 | \n",
" 53.0 | \n",
" 1.0 | \n",
" 0.053571 | \n",
" 3 / 56 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 72.0 | \n",
" 0.000000 | \n",
" 0 / 72 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.0 | \n",
" 161.0 | \n",
" 0.0 | \n",
" 55.0 | \n",
" 73.0 | \n",
" 0.034602 | \n",
" 10 / 289 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 3 4 5 6 8 Error Rate\n",
"0 0.0 3.0 0.0 0.0 0.0 1.000000 3 / 3\n",
"1 0.0 154.0 0.0 1.0 0.0 0.006452 1 / 155\n",
"2 0.0 2.0 0.0 1.0 0.0 1.000000 3 / 3\n",
"3 0.0 2.0 0.0 53.0 1.0 0.053571 3 / 56\n",
"4 0.0 0.0 0.0 0.0 72.0 0.000000 0 / 72\n",
"5 0.0 161.0 0.0 55.0 73.0 0.034602 10 / 289"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Top-5 Hit Ratios: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" k | \n",
" hit_ratio | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 0.965398 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" k hit_ratio\n",
"0 1 0.965398\n",
"1 2 1.000000\n",
"2 3 1.000000\n",
"3 4 1.000000\n",
"4 5 1.000000"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"ModelMetricsMultinomial: gbm\n",
"** Reported on validation data. **\n",
"\n",
"MSE: 0.08366695440378827\n",
"RMSE: 0.2892524060466711\n",
"LogLoss: 0.3345387893847581\n",
"Mean Per-Class Error: 0.2181318681318681\n",
"AUC: 0.7987492282997901\n",
"AUCPR: 0.7975377394397984\n",
"\n",
"Multinomial AUC values: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" type | \n",
" first_class_domain | \n",
" second_class_domain | \n",
" auc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 vs Rest | \n",
" 3 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 1 | \n",
" 4 vs Rest | \n",
" 4 | \n",
" None | \n",
" 0.996154 | \n",
"
\n",
" \n",
" 2 | \n",
" 5 vs Rest | \n",
" 5 | \n",
" None | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 6 vs Rest | \n",
" 6 | \n",
" None | \n",
" 0.997592 | \n",
"
\n",
" \n",
" 4 | \n",
" 8 vs Rest | \n",
" 8 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 5 | \n",
" Macro OVR | \n",
" None | \n",
" None | \n",
" 0.798749 | \n",
"
\n",
" \n",
" 6 | \n",
" Weighted OVR | \n",
" None | \n",
" None | \n",
" 0.997714 | \n",
"
\n",
" \n",
" 7 | \n",
" Class 3 vs. 4 | \n",
" 3 | \n",
" 4 | \n",
" 0.990385 | \n",
"
\n",
" \n",
" 8 | \n",
" Class 3 vs. 5 | \n",
" 3 | \n",
" 5 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" 9 | \n",
" Class 3 vs. 6 | \n",
" 3 | \n",
" 6 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 10 | \n",
" Class 3 vs. 8 | \n",
" 3 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 11 | \n",
" Class 4 vs. 5 | \n",
" 4 | \n",
" 5 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" 12 | \n",
" Class 4 vs. 6 | \n",
" 4 | \n",
" 6 | \n",
" 0.995192 | \n",
"
\n",
" \n",
" 13 | \n",
" Class 4 vs. 8 | \n",
" 4 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 14 | \n",
" Class 5 vs. 6 | \n",
" 5 | \n",
" 6 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" 15 | \n",
" Class 5 vs. 8 | \n",
" 5 | \n",
" 8 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" 16 | \n",
" Class 6 vs. 8 | \n",
" 6 | \n",
" 8 | \n",
" 0.998016 | \n",
"
\n",
" \n",
" 17 | \n",
" Macro OVO | \n",
" None | \n",
" None | \n",
" 0.798359 | \n",
"
\n",
" \n",
" 18 | \n",
" Weighted OVO | \n",
" None | \n",
" None | \n",
" 0.872818 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" type first_class_domain second_class_domain auc\n",
"0 3 vs Rest 3 None 1.000000\n",
"1 4 vs Rest 4 None 0.996154\n",
"2 5 vs Rest 5 None 0.000000\n",
"3 6 vs Rest 6 None 0.997592\n",
"4 8 vs Rest 8 None 1.000000\n",
"5 Macro OVR None None 0.798749\n",
"6 Weighted OVR None None 0.997714\n",
"7 Class 3 vs. 4 3 4 0.990385\n",
"8 Class 3 vs. 5 3 5 0.500000\n",
"9 Class 3 vs. 6 3 6 1.000000\n",
"10 Class 3 vs. 8 3 8 1.000000\n",
"11 Class 4 vs. 5 4 5 0.500000\n",
"12 Class 4 vs. 6 4 6 0.995192\n",
"13 Class 4 vs. 8 4 8 1.000000\n",
"14 Class 5 vs. 6 5 6 0.500000\n",
"15 Class 5 vs. 8 5 8 0.500000\n",
"16 Class 6 vs. 8 6 8 0.998016\n",
"17 Macro OVO None None 0.798359\n",
"18 Weighted OVO None None 0.872818"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Multinomial auc_pr values: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" type | \n",
" first_class_domain | \n",
" second_class_domain | \n",
" auc_pr | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 vs Rest | \n",
" 3 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 1 | \n",
" 4 vs Rest | \n",
" 4 | \n",
" None | \n",
" 0.994834 | \n",
"
\n",
" \n",
" 2 | \n",
" 5 vs Rest | \n",
" 5 | \n",
" None | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 6 vs Rest | \n",
" 6 | \n",
" None | \n",
" 0.992854 | \n",
"
\n",
" \n",
" 4 | \n",
" 8 vs Rest | \n",
" 8 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 5 | \n",
" Macro OVR | \n",
" None | \n",
" None | \n",
" 0.797538 | \n",
"
\n",
" \n",
" 6 | \n",
" Weighted OVR | \n",
" None | \n",
" None | \n",
" 0.995994 | \n",
"
\n",
" \n",
" 7 | \n",
" Class 3 vs. 4 | \n",
" 3 | \n",
" 4 | \n",
" 0.999817 | \n",
"
\n",
" \n",
" 8 | \n",
" Class 3 vs. 5 | \n",
" 3 | \n",
" 5 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" 9 | \n",
" Class 3 vs. 6 | \n",
" 3 | \n",
" 6 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 10 | \n",
" Class 3 vs. 8 | \n",
" 3 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 11 | \n",
" Class 4 vs. 5 | \n",
" 4 | \n",
" 5 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" 12 | \n",
" Class 4 vs. 6 | \n",
" 4 | \n",
" 6 | \n",
" 0.996311 | \n",
"
\n",
" \n",
" 13 | \n",
" Class 4 vs. 8 | \n",
" 4 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 14 | \n",
" Class 5 vs. 6 | \n",
" 5 | \n",
" 6 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" 15 | \n",
" Class 5 vs. 8 | \n",
" 5 | \n",
" 8 | \n",
" 0.500000 | \n",
"
\n",
" \n",
" 16 | \n",
" Class 6 vs. 8 | \n",
" 6 | \n",
" 8 | \n",
" 0.997536 | \n",
"
\n",
" \n",
" 17 | \n",
" Macro OVO | \n",
" None | \n",
" None | \n",
" 0.799366 | \n",
"
\n",
" \n",
" 18 | \n",
" Weighted OVO | \n",
" None | \n",
" None | \n",
" 0.874012 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" type first_class_domain second_class_domain auc_pr\n",
"0 3 vs Rest 3 None 1.000000\n",
"1 4 vs Rest 4 None 0.994834\n",
"2 5 vs Rest 5 None 0.000000\n",
"3 6 vs Rest 6 None 0.992854\n",
"4 8 vs Rest 8 None 1.000000\n",
"5 Macro OVR None None 0.797538\n",
"6 Weighted OVR None None 0.995994\n",
"7 Class 3 vs. 4 3 4 0.999817\n",
"8 Class 3 vs. 5 3 5 0.500000\n",
"9 Class 3 vs. 6 3 6 1.000000\n",
"10 Class 3 vs. 8 3 8 1.000000\n",
"11 Class 4 vs. 5 4 5 0.500000\n",
"12 Class 4 vs. 6 4 6 0.996311\n",
"13 Class 4 vs. 8 4 8 1.000000\n",
"14 Class 5 vs. 6 5 6 0.500000\n",
"15 Class 5 vs. 8 5 8 0.500000\n",
"16 Class 6 vs. 8 6 8 0.997536\n",
"17 Macro OVO None None 0.799366\n",
"18 Weighted OVO None None 0.874012"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Confusion Matrix: Row labels: Actual class; Column labels: Predicted class\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 8 | \n",
" Error | \n",
" Rate | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 1 / 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0 | \n",
" 51.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.019231 | \n",
" 1 / 52 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" NaN | \n",
" 0 / 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0 | \n",
" 2.0 | \n",
" 0.0 | \n",
" 26.0 | \n",
" 0.0 | \n",
" 0.071429 | \n",
" 2 / 28 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 36.0 | \n",
" 0.000000 | \n",
" 0 / 36 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.0 | \n",
" 54.0 | \n",
" 0.0 | \n",
" 27.0 | \n",
" 36.0 | \n",
" 0.034188 | \n",
" 4 / 117 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 3 4 5 6 8 Error Rate\n",
"0 0.0 1.0 0.0 0.0 0.0 1.000000 1 / 1\n",
"1 0.0 51.0 0.0 1.0 0.0 0.019231 1 / 52\n",
"2 0.0 0.0 0.0 0.0 0.0 NaN 0 / 0\n",
"3 0.0 2.0 0.0 26.0 0.0 0.071429 2 / 28\n",
"4 0.0 0.0 0.0 0.0 36.0 0.000000 0 / 36\n",
"5 0.0 54.0 0.0 27.0 36.0 0.034188 4 / 117"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Top-5 Hit Ratios: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" k | \n",
" hit_ratio | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 0.965812 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" k hit_ratio\n",
"0 1 0.965812\n",
"1 2 1.000000\n",
"2 3 1.000000\n",
"3 4 1.000000\n",
"4 5 1.000000"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Scoring History: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" timestamp | \n",
" duration | \n",
" number_of_trees | \n",
" training_rmse | \n",
" training_logloss | \n",
" training_classification_error | \n",
" training_auc | \n",
" training_pr_auc | \n",
" validation_rmse | \n",
" validation_logloss | \n",
" validation_classification_error | \n",
" validation_auc | \n",
" validation_pr_auc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.001 sec | \n",
" 0.0 | \n",
" 0.800000 | \n",
" 1.609438 | \n",
" 0.643599 | \n",
" 0.500000 | \n",
" 0.200000 | \n",
" 0.800000 | \n",
" 1.609438 | \n",
" 0.615385 | \n",
" 0.400000 | \n",
" 0.200000 | \n",
"
\n",
" \n",
" 1 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.007 sec | \n",
" 1.0 | \n",
" 0.792456 | \n",
" 1.572433 | \n",
" 0.034602 | \n",
" 0.996920 | \n",
" 0.923202 | \n",
" 0.792423 | \n",
" 1.572271 | \n",
" 0.034188 | \n",
" 0.798394 | \n",
" 0.797055 | \n",
"
\n",
" \n",
" 2 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.010 sec | \n",
" 2.0 | \n",
" 0.784944 | \n",
" 1.536933 | \n",
" 0.034602 | \n",
" 0.998389 | \n",
" 0.953458 | \n",
" 0.784923 | \n",
" 1.536834 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 3 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.013 sec | \n",
" 3.0 | \n",
" 0.777463 | \n",
" 1.502819 | \n",
" 0.034602 | \n",
" 0.998044 | \n",
" 0.945566 | \n",
" 0.777452 | \n",
" 1.502770 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 4 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.016 sec | \n",
" 4.0 | \n",
" 0.770014 | \n",
" 1.470001 | \n",
" 0.034602 | \n",
" 0.998044 | \n",
" 0.945566 | \n",
" 0.769983 | \n",
" 1.469861 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 5 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.019 sec | \n",
" 5.0 | \n",
" 0.762600 | \n",
" 1.438399 | \n",
" 0.034602 | \n",
" 0.998054 | \n",
" 0.945574 | \n",
" 0.762554 | \n",
" 1.438200 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 6 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.021 sec | \n",
" 6.0 | \n",
" 0.755219 | \n",
" 1.407921 | \n",
" 0.034602 | \n",
" 0.998092 | \n",
" 0.945606 | \n",
" 0.755176 | \n",
" 1.407739 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 7 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.024 sec | \n",
" 7.0 | \n",
" 0.747874 | \n",
" 1.378506 | \n",
" 0.034602 | \n",
" 0.998092 | \n",
" 0.945606 | \n",
" 0.747814 | \n",
" 1.378260 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 8 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.026 sec | \n",
" 8.0 | \n",
" 0.740564 | \n",
" 1.350089 | \n",
" 0.034602 | \n",
" 0.998092 | \n",
" 0.945606 | \n",
" 0.740519 | \n",
" 1.349903 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 9 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.028 sec | \n",
" 9.0 | \n",
" 0.733293 | \n",
" 1.322618 | \n",
" 0.034602 | \n",
" 0.998092 | \n",
" 0.945606 | \n",
" 0.733238 | \n",
" 1.322396 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 10 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.031 sec | \n",
" 10.0 | \n",
" 0.726058 | \n",
" 1.296014 | \n",
" 0.034602 | \n",
" 0.998102 | \n",
" 0.945614 | \n",
" 0.726020 | \n",
" 1.295866 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 11 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.034 sec | \n",
" 11.0 | \n",
" 0.718861 | \n",
" 1.270251 | \n",
" 0.034602 | \n",
" 0.998102 | \n",
" 0.945614 | \n",
" 0.718823 | \n",
" 1.270106 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 12 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.036 sec | \n",
" 12.0 | \n",
" 0.711704 | \n",
" 1.245292 | \n",
" 0.034602 | \n",
" 0.998102 | \n",
" 0.945614 | \n",
" 0.711680 | \n",
" 1.245194 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 13 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.038 sec | \n",
" 13.0 | \n",
" 0.704588 | \n",
" 1.221078 | \n",
" 0.034602 | \n",
" 0.998102 | \n",
" 0.945614 | \n",
" 0.704565 | \n",
" 1.220990 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 14 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.041 sec | \n",
" 14.0 | \n",
" 0.697512 | \n",
" 1.197577 | \n",
" 0.034602 | \n",
" 0.999277 | \n",
" 0.998774 | \n",
" 0.697507 | \n",
" 1.197554 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 15 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.045 sec | \n",
" 15.0 | \n",
" 0.690478 | \n",
" 1.174763 | \n",
" 0.034602 | \n",
" 0.999292 | \n",
" 0.998837 | \n",
" 0.690506 | \n",
" 1.174846 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 16 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.048 sec | \n",
" 16.0 | \n",
" 0.683489 | \n",
" 1.152605 | \n",
" 0.034602 | \n",
" 0.999292 | \n",
" 0.998837 | \n",
" 0.683503 | \n",
" 1.152644 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 17 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.052 sec | \n",
" 17.0 | \n",
" 0.676542 | \n",
" 1.131059 | \n",
" 0.031142 | \n",
" 0.999311 | \n",
" 0.998856 | \n",
" 0.676589 | \n",
" 1.131201 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 18 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.055 sec | \n",
" 18.0 | \n",
" 0.669640 | \n",
" 1.110108 | \n",
" 0.031142 | \n",
" 0.999311 | \n",
" 0.998856 | \n",
" 0.669721 | \n",
" 1.110354 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
" 19 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.058 sec | \n",
" 19.0 | \n",
" 0.662783 | \n",
" 1.089722 | \n",
" 0.031142 | \n",
" 0.999311 | \n",
" 0.998856 | \n",
" 0.662885 | \n",
" 1.090031 | \n",
" 0.034188 | \n",
" 0.798834 | \n",
" 0.798153 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" timestamp duration number_of_trees training_rmse \\\n",
"0 2020-11-24 14:51:44 0.001 sec 0.0 0.800000 \n",
"1 2020-11-24 14:51:44 0.007 sec 1.0 0.792456 \n",
"2 2020-11-24 14:51:44 0.010 sec 2.0 0.784944 \n",
"3 2020-11-24 14:51:44 0.013 sec 3.0 0.777463 \n",
"4 2020-11-24 14:51:44 0.016 sec 4.0 0.770014 \n",
"5 2020-11-24 14:51:44 0.019 sec 5.0 0.762600 \n",
"6 2020-11-24 14:51:44 0.021 sec 6.0 0.755219 \n",
"7 2020-11-24 14:51:44 0.024 sec 7.0 0.747874 \n",
"8 2020-11-24 14:51:44 0.026 sec 8.0 0.740564 \n",
"9 2020-11-24 14:51:44 0.028 sec 9.0 0.733293 \n",
"10 2020-11-24 14:51:44 0.031 sec 10.0 0.726058 \n",
"11 2020-11-24 14:51:44 0.034 sec 11.0 0.718861 \n",
"12 2020-11-24 14:51:44 0.036 sec 12.0 0.711704 \n",
"13 2020-11-24 14:51:44 0.038 sec 13.0 0.704588 \n",
"14 2020-11-24 14:51:44 0.041 sec 14.0 0.697512 \n",
"15 2020-11-24 14:51:44 0.045 sec 15.0 0.690478 \n",
"16 2020-11-24 14:51:44 0.048 sec 16.0 0.683489 \n",
"17 2020-11-24 14:51:44 0.052 sec 17.0 0.676542 \n",
"18 2020-11-24 14:51:44 0.055 sec 18.0 0.669640 \n",
"19 2020-11-24 14:51:44 0.058 sec 19.0 0.662783 \n",
"\n",
" training_logloss training_classification_error training_auc \\\n",
"0 1.609438 0.643599 0.500000 \n",
"1 1.572433 0.034602 0.996920 \n",
"2 1.536933 0.034602 0.998389 \n",
"3 1.502819 0.034602 0.998044 \n",
"4 1.470001 0.034602 0.998044 \n",
"5 1.438399 0.034602 0.998054 \n",
"6 1.407921 0.034602 0.998092 \n",
"7 1.378506 0.034602 0.998092 \n",
"8 1.350089 0.034602 0.998092 \n",
"9 1.322618 0.034602 0.998092 \n",
"10 1.296014 0.034602 0.998102 \n",
"11 1.270251 0.034602 0.998102 \n",
"12 1.245292 0.034602 0.998102 \n",
"13 1.221078 0.034602 0.998102 \n",
"14 1.197577 0.034602 0.999277 \n",
"15 1.174763 0.034602 0.999292 \n",
"16 1.152605 0.034602 0.999292 \n",
"17 1.131059 0.031142 0.999311 \n",
"18 1.110108 0.031142 0.999311 \n",
"19 1.089722 0.031142 0.999311 \n",
"\n",
" training_pr_auc validation_rmse validation_logloss \\\n",
"0 0.200000 0.800000 1.609438 \n",
"1 0.923202 0.792423 1.572271 \n",
"2 0.953458 0.784923 1.536834 \n",
"3 0.945566 0.777452 1.502770 \n",
"4 0.945566 0.769983 1.469861 \n",
"5 0.945574 0.762554 1.438200 \n",
"6 0.945606 0.755176 1.407739 \n",
"7 0.945606 0.747814 1.378260 \n",
"8 0.945606 0.740519 1.349903 \n",
"9 0.945606 0.733238 1.322396 \n",
"10 0.945614 0.726020 1.295866 \n",
"11 0.945614 0.718823 1.270106 \n",
"12 0.945614 0.711680 1.245194 \n",
"13 0.945614 0.704565 1.220990 \n",
"14 0.998774 0.697507 1.197554 \n",
"15 0.998837 0.690506 1.174846 \n",
"16 0.998837 0.683503 1.152644 \n",
"17 0.998856 0.676589 1.131201 \n",
"18 0.998856 0.669721 1.110354 \n",
"19 0.998856 0.662885 1.090031 \n",
"\n",
" validation_classification_error validation_auc validation_pr_auc \n",
"0 0.615385 0.400000 0.200000 \n",
"1 0.034188 0.798394 0.797055 \n",
"2 0.034188 0.798834 0.798153 \n",
"3 0.034188 0.798834 0.798153 \n",
"4 0.034188 0.798834 0.798153 \n",
"5 0.034188 0.798834 0.798153 \n",
"6 0.034188 0.798834 0.798153 \n",
"7 0.034188 0.798834 0.798153 \n",
"8 0.034188 0.798834 0.798153 \n",
"9 0.034188 0.798834 0.798153 \n",
"10 0.034188 0.798834 0.798153 \n",
"11 0.034188 0.798834 0.798153 \n",
"12 0.034188 0.798834 0.798153 \n",
"13 0.034188 0.798834 0.798153 \n",
"14 0.034188 0.798834 0.798153 \n",
"15 0.034188 0.798834 0.798153 \n",
"16 0.034188 0.798834 0.798153 \n",
"17 0.034188 0.798834 0.798153 \n",
"18 0.034188 0.798834 0.798153 \n",
"19 0.034188 0.798834 0.798153 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"See the whole table with table.as_data_frame()\n",
"\n",
"Variable Importances: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" variable | \n",
" relative_importance | \n",
" scaled_importance | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" displacement | \n",
" 6259.065430 | \n",
" 1.000000 | \n",
" 0.979955 | \n",
"
\n",
" \n",
" 1 | \n",
" power | \n",
" 56.853493 | \n",
" 0.009083 | \n",
" 0.008901 | \n",
"
\n",
" \n",
" 2 | \n",
" acceleration | \n",
" 31.531071 | \n",
" 0.005038 | \n",
" 0.004937 | \n",
"
\n",
" \n",
" 3 | \n",
" weight | \n",
" 21.419443 | \n",
" 0.003422 | \n",
" 0.003354 | \n",
"
\n",
" \n",
" 4 | \n",
" year | \n",
" 18.227747 | \n",
" 0.002912 | \n",
" 0.002854 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" variable relative_importance scaled_importance percentage\n",
"0 displacement 6259.065430 1.000000 0.979955\n",
"1 power 56.853493 0.009083 0.008901\n",
"2 acceleration 31.531071 0.005038 0.004937\n",
"3 weight 21.419443 0.003422 0.003354\n",
"4 year 18.227747 0.002912 0.002854"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"cars = h2o.import_file(path=_locate(\"smalldata/junit/cars_20mpg.csv\"))\n",
"\n",
"features = [\"displacement\", \"power\", \"weight\", \"acceleration\", \"year\"]\n",
"response = \"cylinders\"\n",
"distribution = \"multinomial\"\n",
"\n",
"cars[response] = cars[response].asfactor()\n",
"\n",
"r = cars[0].runif()\n",
"train = cars[r > .3]\n",
"valid = cars[r <= .3]\n",
" \n",
"# train model\n",
"gbm = H2OGradientBoostingEstimator(distribution=\"multinomial\", \n",
" ntrees=100, \n",
" max_depth=3, \n",
" learn_rate=0.01, \n",
" auc_type=\"MACRO_OVR\")\n",
"gbm.train(x =features, \n",
" y =response, \n",
" training_frame =train,\n",
" validation_frame=valid)\n",
"gbm.show()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.999511857555245"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gbm.auc()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.998951571914844"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gbm.aucpr()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Multinomial AUC values: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" type | \n",
" first_class_domain | \n",
" second_class_domain | \n",
" auc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 vs Rest | \n",
" 3 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 1 | \n",
" 4 vs Rest | \n",
" 4 | \n",
" None | \n",
" 0.998556 | \n",
"
\n",
" \n",
" 2 | \n",
" 5 vs Rest | \n",
" 5 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 6 vs Rest | \n",
" 6 | \n",
" None | \n",
" 0.999004 | \n",
"
\n",
" \n",
" 4 | \n",
" 8 vs Rest | \n",
" 8 | \n",
" None | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 5 | \n",
" Macro OVR | \n",
" None | \n",
" None | \n",
" 0.999512 | \n",
"
\n",
" \n",
" 6 | \n",
" Weighted OVR | \n",
" None | \n",
" None | \n",
" 0.999032 | \n",
"
\n",
" \n",
" 7 | \n",
" Class 3 vs. 4 | \n",
" 3 | \n",
" 4 | \n",
" 0.994624 | \n",
"
\n",
" \n",
" 8 | \n",
" Class 3 vs. 5 | \n",
" 3 | \n",
" 5 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 9 | \n",
" Class 3 vs. 6 | \n",
" 3 | \n",
" 6 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 10 | \n",
" Class 3 vs. 8 | \n",
" 3 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 11 | \n",
" Class 4 vs. 5 | \n",
" 4 | \n",
" 5 | \n",
" 0.975269 | \n",
"
\n",
" \n",
" 12 | \n",
" Class 4 vs. 6 | \n",
" 4 | \n",
" 6 | \n",
" 0.999482 | \n",
"
\n",
" \n",
" 13 | \n",
" Class 4 vs. 8 | \n",
" 4 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 14 | \n",
" Class 5 vs. 6 | \n",
" 5 | \n",
" 6 | \n",
" 0.988095 | \n",
"
\n",
" \n",
" 15 | \n",
" Class 5 vs. 8 | \n",
" 5 | \n",
" 8 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 16 | \n",
" Class 6 vs. 8 | \n",
" 6 | \n",
" 8 | \n",
" 0.999752 | \n",
"
\n",
" \n",
" 17 | \n",
" Macro OVO | \n",
" None | \n",
" None | \n",
" 0.995722 | \n",
"
\n",
" \n",
" 18 | \n",
" Weighted OVO | \n",
" None | \n",
" None | \n",
" 0.995155 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" type first_class_domain second_class_domain auc\n",
"0 3 vs Rest 3 None 1.000000\n",
"1 4 vs Rest 4 None 0.998556\n",
"2 5 vs Rest 5 None 1.000000\n",
"3 6 vs Rest 6 None 0.999004\n",
"4 8 vs Rest 8 None 1.000000\n",
"5 Macro OVR None None 0.999512\n",
"6 Weighted OVR None None 0.999032\n",
"7 Class 3 vs. 4 3 4 0.994624\n",
"8 Class 3 vs. 5 3 5 1.000000\n",
"9 Class 3 vs. 6 3 6 1.000000\n",
"10 Class 3 vs. 8 3 8 1.000000\n",
"11 Class 4 vs. 5 4 5 0.975269\n",
"12 Class 4 vs. 6 4 6 0.999482\n",
"13 Class 4 vs. 8 4 8 1.000000\n",
"14 Class 5 vs. 6 5 6 0.988095\n",
"15 Class 5 vs. 8 5 8 1.000000\n",
"16 Class 6 vs. 8 6 8 0.999752\n",
"17 Macro OVO None None 0.995722\n",
"18 Weighted OVO None None 0.995155"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": []
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gbm.multinomial_auc_table()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" timestamp | \n",
" duration | \n",
" number_of_trees | \n",
" training_rmse | \n",
" training_logloss | \n",
" training_classification_error | \n",
" training_auc | \n",
" training_pr_auc | \n",
" validation_rmse | \n",
" validation_logloss | \n",
" validation_classification_error | \n",
" validation_auc | \n",
" validation_pr_auc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.003 sec | \n",
" 0.0 | \n",
" 0.800000 | \n",
" 1.609438 | \n",
" 0.643599 | \n",
" 0.500000 | \n",
" 0.387483 | \n",
" 0.800000 | \n",
" 1.609438 | \n",
" 0.615385 | \n",
" 0.500000 | \n",
" 0.349551 | \n",
"
\n",
" \n",
" 1 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.016 sec | \n",
" 1.0 | \n",
" 0.792456 | \n",
" 1.572433 | \n",
" 0.034602 | \n",
" 0.997761 | \n",
" 0.993284 | \n",
" 0.792423 | \n",
" 1.572271 | \n",
" 0.034188 | \n",
" 0.996925 | \n",
" 0.994890 | \n",
"
\n",
" \n",
" 2 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.020 sec | \n",
" 2.0 | \n",
" 0.784944 | \n",
" 1.536933 | \n",
" 0.034602 | \n",
" 0.998140 | \n",
" 0.995454 | \n",
" 0.784923 | \n",
" 1.536834 | \n",
" 0.034188 | \n",
" 0.997573 | \n",
" 0.996420 | \n",
"
\n",
" \n",
" 3 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.024 sec | \n",
" 3.0 | \n",
" 0.777463 | \n",
" 1.502819 | \n",
" 0.034602 | \n",
" 0.998134 | \n",
" 0.995058 | \n",
" 0.777452 | \n",
" 1.502770 | \n",
" 0.034188 | \n",
" 0.997573 | \n",
" 0.996420 | \n",
"
\n",
" \n",
" 4 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.026 sec | \n",
" 4.0 | \n",
" 0.770014 | \n",
" 1.470001 | \n",
" 0.034602 | \n",
" 0.998134 | \n",
" 0.995058 | \n",
" 0.769983 | \n",
" 1.469861 | \n",
" 0.034188 | \n",
" 0.997573 | \n",
" 0.996420 | \n",
"
\n",
" \n",
" 5 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.029 sec | \n",
" 5.0 | \n",
" 0.762600 | \n",
" 1.438399 | \n",
" 0.034602 | \n",
" 0.998160 | \n",
" 0.995082 | \n",
" 0.762554 | \n",
" 1.438200 | \n",
" 0.034188 | \n",
" 0.997573 | \n",
" 0.996420 | \n",
"
\n",
" \n",
" 6 | \n",
" | \n",
" 2020-11-24 14:51:44 | \n",
" 0.031 sec | \n",
" 6.0 | \n",
" 0.755219 | \n",
" 1.407921 | \n",
" 0.034602 | \n",
" 0.998264 | \n",
" 0.995168 | \n",
" 0.755176 | \n",
" 1.407739 | \n",
" 0.034188 | \n",
" 0.997573 | \n",
" 0.996420 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" timestamp duration number_of_trees training_rmse \\\n",
"0 2020-11-24 14:51:44 0.003 sec 0.0 0.800000 \n",
"1 2020-11-24 14:51:44 0.016 sec 1.0 0.792456 \n",
"2 2020-11-24 14:51:44 0.020 sec 2.0 0.784944 \n",
"3 2020-11-24 14:51:44 0.024 sec 3.0 0.777463 \n",
"4 2020-11-24 14:51:44 0.026 sec 4.0 0.770014 \n",
"5 2020-11-24 14:51:44 0.029 sec 5.0 0.762600 \n",
"6 2020-11-24 14:51:44 0.031 sec 6.0 0.755219 \n",
"\n",
" training_logloss training_classification_error training_auc \\\n",
"0 1.609438 0.643599 0.500000 \n",
"1 1.572433 0.034602 0.997761 \n",
"2 1.536933 0.034602 0.998140 \n",
"3 1.502819 0.034602 0.998134 \n",
"4 1.470001 0.034602 0.998134 \n",
"5 1.438399 0.034602 0.998160 \n",
"6 1.407921 0.034602 0.998264 \n",
"\n",
" training_pr_auc validation_rmse validation_logloss \\\n",
"0 0.387483 0.800000 1.609438 \n",
"1 0.993284 0.792423 1.572271 \n",
"2 0.995454 0.784923 1.536834 \n",
"3 0.995058 0.777452 1.502770 \n",
"4 0.995058 0.769983 1.469861 \n",
"5 0.995082 0.762554 1.438200 \n",
"6 0.995168 0.755176 1.407739 \n",
"\n",
" validation_classification_error validation_auc validation_pr_auc \n",
"0 0.615385 0.500000 0.349551 \n",
"1 0.034188 0.996925 0.994890 \n",
"2 0.034188 0.997573 0.996420 \n",
"3 0.034188 0.997573 0.996420 \n",
"4 0.034188 0.997573 0.996420 \n",
"5 0.034188 0.997573 0.996420 \n",
"6 0.034188 0.997573 0.996420 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# early stopping\n",
"\n",
"gbm = H2OGradientBoostingEstimator(distribution=\"multinomial\", \n",
" ntrees=100, \n",
" max_depth=3, \n",
" learn_rate=0.01,\n",
" stopping_metric=\"AUCPR\", \n",
" stopping_tolerance=0.01, \n",
" stopping_rounds=3,\n",
" auc_type=\"WEIGHTED_OVR\")\n",
"gbm.train(x =features, \n",
" y =response, \n",
" training_frame =train,\n",
" validation_frame=valid)\n",
"gbm.scoring_history()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"gbm Grid Build progress: |████████████████████████████████████████████████| 100%\n"
]
},
{
"data": {
"text/plain": [
"{'Grid_GBM_py_3_sid_b9cb_model_python_1606223609474_69_model_3': 0.9996445050710137,\n",
" 'Grid_GBM_py_3_sid_b9cb_model_python_1606223609474_69_model_4': 0.9996445050710137,\n",
" 'Grid_GBM_py_3_sid_b9cb_model_python_1606223609474_69_model_2': 0.9994134025192564,\n",
" 'Grid_GBM_py_3_sid_b9cb_model_python_1606223609474_69_model_1': 0.9994134025192564}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# grid search\n",
"\n",
"from h2o.grid.grid_search import H2OGridSearch\n",
"hyper_parameters = {'ntrees': [5, 10], 'max_depth': [10, 20]}\n",
"gs = H2OGridSearch(H2OGradientBoostingEstimator(distribution = \"multinomial\", auc_type=\"MACRO_OVR\"), hyper_parameters)\n",
"gs.train(x=features, y=response, training_frame=train)\n",
"gs.auc(train=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Parse progress: |█████████████████████████████████████████████████████████| 100%\n",
"gbm Model Build progress: |███████████████████████████████████████████████| 100%\n",
"Model Details\n",
"=============\n",
"H2OGradientBoostingEstimator : Gradient Boosting Machine\n",
"Model Key: GBM_model_python_1606223609474_70\n",
"\n",
"\n",
"Model Summary: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" number_of_trees | \n",
" number_of_internal_trees | \n",
" model_size_in_bytes | \n",
" min_depth | \n",
" max_depth | \n",
" mean_depth | \n",
" min_leaves | \n",
" max_leaves | \n",
" mean_leaves | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" | \n",
" 100.0 | \n",
" 9800.0 | \n",
" 2008108.0 | \n",
" 1.0 | \n",
" 3.0 | \n",
" 2.99949 | \n",
" 2.0 | \n",
" 8.0 | \n",
" 7.804898 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" number_of_trees number_of_internal_trees model_size_in_bytes \\\n",
"0 100.0 9800.0 2008108.0 \n",
"\n",
" min_depth max_depth mean_depth min_leaves max_leaves mean_leaves \n",
"0 1.0 3.0 2.99949 2.0 8.0 7.804898 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"ModelMetricsMultinomial: gbm\n",
"** Reported on train data. **\n",
"\n",
"MSE: 0.430057090190663\n",
"RMSE: 0.6557873818477015\n",
"LogLoss: 1.3717578431630804\n",
"Mean Per-Class Error: 0.7578920698567744\n",
"AUC: NaN\n",
"AUCPR: NaN\n",
"Multinomial auc values: Table is not computed because it is disabled or due to domain size (maximum is 50 domains).\n",
"Multinomial auc_pr values: Table is not computed because it is disabled or due to domain size (maximum is 50 domains).\n",
"\n",
"Confusion Matrix: Row labels: Actual class; Column labels: Predicted class\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ABE | \n",
" ABQ | \n",
" ACY | \n",
" ALB | \n",
" ATL | \n",
" AVP | \n",
" BDL | \n",
" BGM | \n",
" BNA | \n",
" BOS | \n",
" ... | \n",
" SNA | \n",
" STL | \n",
" SWF | \n",
" SYR | \n",
" TOL | \n",
" TPA | \n",
" TUS | \n",
" UCA | \n",
" Error | \n",
" Rate | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 102 / 102 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 76 / 76 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 15 / 15 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 81 / 81 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 18 / 18 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 20 / 20 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 15.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.761905 | \n",
" 48 / 63 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 10.0 | \n",
" 8.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.794872 | \n",
" 31 / 39 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 41 / 41 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 114.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.673352 | \n",
" 235 / 349 | \n",
"
\n",
" \n",
" 10 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 30 / 30 | \n",
"
\n",
" \n",
" 11 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 36.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 8.0 | \n",
" 0.859813 | \n",
" 92 / 107 | \n",
"
\n",
" \n",
" 12 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 17.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" 0.0 | \n",
" 0.658046 | \n",
" 229 / 348 | \n",
"
\n",
" \n",
" 13 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 9.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.591503 | \n",
" 181 / 306 | \n",
"
\n",
" \n",
" 14 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 21 / 21 | \n",
"
\n",
" \n",
" 15 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 22 / 22 | \n",
"
\n",
" \n",
" 16 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 1 / 1 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0 / 13 | \n",
"
\n",
" \n",
" 18 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 40 / 40 | \n",
"
\n",
" \n",
" 19 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.421769 | \n",
" 62 / 147 | \n",
"
\n",
" \n",
"
\n",
"
20 rows × 100 columns
\n",
"
"
],
"text/plain": [
" ABE ABQ ACY ALB ATL AVP BDL BGM BNA BOS ... SNA STL SWF \\\n",
"0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"6 0.0 0.0 0.0 0.0 0.0 0.0 15.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"7 0.0 0.0 0.0 0.0 0.0 0.0 10.0 8.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"9 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 114.0 ... 0.0 0.0 0.0 \n",
"10 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"11 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 36.0 ... 0.0 0.0 0.0 \n",
"12 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 17.0 0.0 0.0 \n",
"13 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 9.0 ... 0.0 0.0 0.0 \n",
"14 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"15 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"16 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"17 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"18 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"19 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.0 ... 0.0 0.0 0.0 \n",
"\n",
" SYR TOL TPA TUS UCA Error Rate \n",
"0 0.0 0.0 0.0 0.0 0.0 1.000000 102 / 102 \n",
"1 0.0 0.0 0.0 0.0 0.0 1.000000 76 / 76 \n",
"2 0.0 0.0 0.0 0.0 0.0 1.000000 15 / 15 \n",
"3 0.0 0.0 0.0 0.0 0.0 1.000000 81 / 81 \n",
"4 0.0 0.0 0.0 0.0 0.0 1.000000 18 / 18 \n",
"5 0.0 0.0 0.0 0.0 0.0 1.000000 20 / 20 \n",
"6 0.0 0.0 0.0 0.0 0.0 0.761905 48 / 63 \n",
"7 0.0 0.0 0.0 0.0 0.0 0.794872 31 / 39 \n",
"8 0.0 0.0 0.0 0.0 0.0 1.000000 41 / 41 \n",
"9 0.0 0.0 0.0 0.0 0.0 0.673352 235 / 349 \n",
"10 0.0 0.0 0.0 0.0 0.0 1.000000 30 / 30 \n",
"11 0.0 0.0 0.0 0.0 8.0 0.859813 92 / 107 \n",
"12 0.0 0.0 0.0 5.0 0.0 0.658046 229 / 348 \n",
"13 0.0 0.0 0.0 0.0 0.0 0.591503 181 / 306 \n",
"14 0.0 0.0 0.0 0.0 0.0 1.000000 21 / 21 \n",
"15 0.0 0.0 0.0 0.0 0.0 1.000000 22 / 22 \n",
"16 0.0 0.0 0.0 0.0 0.0 1.000000 1 / 1 \n",
"17 0.0 0.0 0.0 0.0 0.0 0.000000 0 / 13 \n",
"18 0.0 0.0 0.0 0.0 0.0 1.000000 40 / 40 \n",
"19 0.0 0.0 0.0 0.0 0.0 0.421769 62 / 147 \n",
"\n",
"[20 rows x 100 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"See the whole table with table.as_data_frame()\n",
"\n",
"Top-10 Hit Ratios: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" k | \n",
" hit_ratio | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 0.647989 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 0.772855 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 0.834294 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 0.872992 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 0.900168 | \n",
"
\n",
" \n",
" 5 | \n",
" 6 | \n",
" 0.920155 | \n",
"
\n",
" \n",
" 6 | \n",
" 7 | \n",
" 0.933615 | \n",
"
\n",
" \n",
" 7 | \n",
" 8 | \n",
" 0.943201 | \n",
"
\n",
" \n",
" 8 | \n",
" 9 | \n",
" 0.950798 | \n",
"
\n",
" \n",
" 9 | \n",
" 10 | \n",
" 0.956356 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" k hit_ratio\n",
"0 1 0.647989\n",
"1 2 0.772855\n",
"2 3 0.834294\n",
"3 4 0.872992\n",
"4 5 0.900168\n",
"5 6 0.920155\n",
"6 7 0.933615\n",
"7 8 0.943201\n",
"8 9 0.950798\n",
"9 10 0.956356"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"ModelMetricsMultinomial: gbm\n",
"** Reported on validation data. **\n",
"\n",
"MSE: 0.45785658129894297\n",
"RMSE: 0.6766510040626135\n",
"LogLoss: 1.6072449485890636\n",
"Mean Per-Class Error: 0.794834751023698\n",
"AUC: NaN\n",
"AUCPR: NaN\n",
"Multinomial auc values: Table is not computed because it is disabled or due to domain size (maximum is 50 domains).\n",
"Multinomial auc_pr values: Table is not computed because it is disabled or due to domain size (maximum is 50 domains).\n",
"\n",
"Confusion Matrix: Row labels: Actual class; Column labels: Predicted class\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ABE | \n",
" ABQ | \n",
" ACY | \n",
" ALB | \n",
" ATL | \n",
" AVP | \n",
" BDL | \n",
" BGM | \n",
" BNA | \n",
" BOS | \n",
" ... | \n",
" SNA | \n",
" STL | \n",
" SWF | \n",
" SYR | \n",
" TOL | \n",
" TPA | \n",
" TUS | \n",
" UCA | \n",
" Error | \n",
" Rate | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 28 / 28 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 21 / 21 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 4 / 4 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 17 / 17 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 5 / 5 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 7 / 7 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 18 / 18 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 14 / 14 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 8 / 8 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 27.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.712766 | \n",
" 67 / 94 | \n",
"
\n",
" \n",
" 10 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 7 / 7 | \n",
"
\n",
" \n",
" 11 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 7.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 4.0 | \n",
" 0.950000 | \n",
" 19 / 20 | \n",
"
\n",
" \n",
" 12 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 3.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.740260 | \n",
" 57 / 77 | \n",
"
\n",
" \n",
" 13 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.712644 | \n",
" 62 / 87 | \n",
"
\n",
" \n",
" 14 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 3 / 3 | \n",
"
\n",
" \n",
" 15 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 3 / 3 | \n",
"
\n",
" \n",
" 16 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 2 / 2 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0 / 6 | \n",
"
\n",
" \n",
" 18 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.000000 | \n",
" 12 / 12 | \n",
"
\n",
" \n",
" 19 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.444444 | \n",
" 16 / 36 | \n",
"
\n",
" \n",
"
\n",
"
20 rows × 100 columns
\n",
"
"
],
"text/plain": [
" ABE ABQ ACY ALB ATL AVP BDL BGM BNA BOS ... SNA STL SWF \\\n",
"0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"7 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"9 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 27.0 ... 0.0 0.0 0.0 \n",
"10 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 \n",
"11 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 7.0 ... 0.0 0.0 0.0 \n",
"12 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 3.0 0.0 0.0 \n",
"13 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 ... 0.0 0.0 0.0 \n",
"14 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"15 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"16 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"17 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"18 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n",
"19 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 \n",
"\n",
" SYR TOL TPA TUS UCA Error Rate \n",
"0 0.0 0.0 0.0 0.0 0.0 1.000000 28 / 28 \n",
"1 0.0 0.0 0.0 0.0 0.0 1.000000 21 / 21 \n",
"2 0.0 0.0 0.0 0.0 0.0 1.000000 4 / 4 \n",
"3 0.0 0.0 0.0 0.0 0.0 1.000000 17 / 17 \n",
"4 0.0 0.0 0.0 0.0 0.0 1.000000 5 / 5 \n",
"5 0.0 0.0 0.0 0.0 0.0 1.000000 7 / 7 \n",
"6 0.0 0.0 0.0 0.0 0.0 1.000000 18 / 18 \n",
"7 0.0 0.0 0.0 0.0 0.0 1.000000 14 / 14 \n",
"8 0.0 0.0 0.0 0.0 0.0 1.000000 8 / 8 \n",
"9 0.0 0.0 0.0 0.0 0.0 0.712766 67 / 94 \n",
"10 0.0 0.0 0.0 0.0 0.0 1.000000 7 / 7 \n",
"11 0.0 0.0 0.0 0.0 4.0 0.950000 19 / 20 \n",
"12 0.0 0.0 0.0 1.0 0.0 0.740260 57 / 77 \n",
"13 0.0 0.0 0.0 0.0 0.0 0.712644 62 / 87 \n",
"14 0.0 0.0 0.0 0.0 0.0 1.000000 3 / 3 \n",
"15 0.0 0.0 0.0 0.0 0.0 1.000000 3 / 3 \n",
"16 0.0 0.0 0.0 0.0 0.0 1.000000 2 / 2 \n",
"17 0.0 0.0 0.0 0.0 0.0 0.000000 0 / 6 \n",
"18 0.0 0.0 0.0 0.0 0.0 1.000000 12 / 12 \n",
"19 0.0 0.0 0.0 0.0 0.0 0.444444 16 / 36 \n",
"\n",
"[20 rows x 100 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"See the whole table with table.as_data_frame()\n",
"\n",
"Top-10 Hit Ratios: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" k | \n",
" hit_ratio | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 0.611065 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 0.737729 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 0.778910 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 0.814476 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 0.844218 | \n",
"
\n",
" \n",
" 5 | \n",
" 6 | \n",
" 0.864601 | \n",
"
\n",
" \n",
" 6 | \n",
" 7 | \n",
" 0.878744 | \n",
"
\n",
" \n",
" 7 | \n",
" 8 | \n",
" 0.891015 | \n",
"
\n",
" \n",
" 8 | \n",
" 9 | \n",
" 0.900999 | \n",
"
\n",
" \n",
" 9 | \n",
" 10 | \n",
" 0.909526 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" k hit_ratio\n",
"0 1 0.611065\n",
"1 2 0.737729\n",
"2 3 0.778910\n",
"3 4 0.814476\n",
"4 5 0.844218\n",
"5 6 0.864601\n",
"6 7 0.878744\n",
"7 8 0.891015\n",
"8 9 0.900999\n",
"9 10 0.909526"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Scoring History: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" timestamp | \n",
" duration | \n",
" number_of_trees | \n",
" training_rmse | \n",
" training_logloss | \n",
" training_classification_error | \n",
" training_auc | \n",
" training_pr_auc | \n",
" validation_rmse | \n",
" validation_logloss | \n",
" validation_classification_error | \n",
" validation_auc | \n",
" validation_pr_auc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" | \n",
" 2020-11-24 14:51:45 | \n",
" 0.010 sec | \n",
" 0.0 | \n",
" 0.989796 | \n",
" 4.584967 | \n",
" 0.866007 | \n",
" NaN | \n",
" NaN | \n",
" 0.989796 | \n",
" 4.584967 | \n",
" 0.867512 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" | \n",
" 2020-11-24 14:51:46 | \n",
" 0.345 sec | \n",
" 1.0 | \n",
" 0.981584 | \n",
" 4.064828 | \n",
" 0.365268 | \n",
" NaN | \n",
" NaN | \n",
" 0.981879 | \n",
" 4.084868 | \n",
" 0.386855 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" | \n",
" 2020-11-24 14:51:46 | \n",
" 0.589 sec | \n",
" 2.0 | \n",
" 0.975317 | \n",
" 3.822181 | \n",
" 0.364095 | \n",
" NaN | \n",
" NaN | \n",
" 0.975852 | \n",
" 3.853578 | \n",
" 0.387687 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 3 | \n",
" | \n",
" 2020-11-24 14:51:46 | \n",
" 0.852 sec | \n",
" 3.0 | \n",
" 0.969404 | \n",
" 3.645022 | \n",
" 0.363840 | \n",
" NaN | \n",
" NaN | \n",
" 0.970166 | \n",
" 3.685062 | \n",
" 0.387479 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 4 | \n",
" | \n",
" 2020-11-24 14:51:47 | \n",
" 1.110 sec | \n",
" 4.0 | \n",
" 0.963683 | \n",
" 3.503089 | \n",
" 0.363432 | \n",
" NaN | \n",
" NaN | \n",
" 0.964669 | \n",
" 3.550449 | \n",
" 0.387479 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 5 | \n",
" | \n",
" 2020-11-24 14:51:47 | \n",
" 1.399 sec | \n",
" 5.0 | \n",
" 0.958093 | \n",
" 3.383978 | \n",
" 0.363636 | \n",
" NaN | \n",
" NaN | \n",
" 0.959299 | \n",
" 3.437732 | \n",
" 0.388311 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 6 | \n",
" | \n",
" 2020-11-24 14:51:47 | \n",
" 1.692 sec | \n",
" 6.0 | \n",
" 0.952597 | \n",
" 3.280713 | \n",
" 0.363636 | \n",
" NaN | \n",
" NaN | \n",
" 0.954023 | \n",
" 3.340154 | \n",
" 0.388311 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 7 | \n",
" | \n",
" 2020-11-24 14:51:47 | \n",
" 2.032 sec | \n",
" 7.0 | \n",
" 0.947184 | \n",
" 3.189588 | \n",
" 0.363636 | \n",
" NaN | \n",
" NaN | \n",
" 0.948829 | \n",
" 3.254264 | \n",
" 0.388311 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 8 | \n",
" | \n",
" 2020-11-24 14:51:48 | \n",
" 2.358 sec | \n",
" 8.0 | \n",
" 0.941846 | \n",
" 3.107945 | \n",
" 0.363636 | \n",
" NaN | \n",
" NaN | \n",
" 0.943709 | \n",
" 3.177402 | \n",
" 0.388311 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 9 | \n",
" | \n",
" 2020-11-24 14:51:48 | \n",
" 2.691 sec | \n",
" 9.0 | \n",
" 0.936570 | \n",
" 3.033831 | \n",
" 0.363738 | \n",
" NaN | \n",
" NaN | \n",
" 0.938650 | \n",
" 3.107704 | \n",
" 0.387895 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 10 | \n",
" | \n",
" 2020-11-24 14:51:48 | \n",
" 2.985 sec | \n",
" 10.0 | \n",
" 0.931338 | \n",
" 2.965584 | \n",
" 0.363687 | \n",
" NaN | \n",
" NaN | \n",
" 0.933635 | \n",
" 3.043692 | \n",
" 0.387895 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 11 | \n",
" | \n",
" 2020-11-24 14:51:49 | \n",
" 3.243 sec | \n",
" 11.0 | \n",
" 0.926156 | \n",
" 2.902436 | \n",
" 0.363687 | \n",
" NaN | \n",
" NaN | \n",
" 0.928669 | \n",
" 2.984427 | \n",
" 0.387895 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 12 | \n",
" | \n",
" 2020-11-24 14:51:49 | \n",
" 3.495 sec | \n",
" 12.0 | \n",
" 0.921028 | \n",
" 2.843769 | \n",
" 0.363534 | \n",
" NaN | \n",
" NaN | \n",
" 0.923761 | \n",
" 2.929714 | \n",
" 0.388311 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 13 | \n",
" | \n",
" 2020-11-24 14:51:49 | \n",
" 3.756 sec | \n",
" 13.0 | \n",
" 0.915957 | \n",
" 2.789250 | \n",
" 0.363687 | \n",
" NaN | \n",
" NaN | \n",
" 0.918907 | \n",
" 2.878823 | \n",
" 0.387895 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 14 | \n",
" | \n",
" 2020-11-24 14:51:49 | \n",
" 4.012 sec | \n",
" 14.0 | \n",
" 0.910934 | \n",
" 2.738108 | \n",
" 0.363126 | \n",
" NaN | \n",
" NaN | \n",
" 0.914103 | \n",
" 2.831229 | \n",
" 0.388103 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 15 | \n",
" | \n",
" 2020-11-24 14:51:54 | \n",
" 8.125 sec | \n",
" 43.0 | \n",
" 0.786695 | \n",
" 1.924839 | \n",
" 0.360424 | \n",
" NaN | \n",
" NaN | \n",
" 0.796223 | \n",
" 2.084962 | \n",
" 0.389559 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 16 | \n",
" | \n",
" 2020-11-24 14:52:03 | \n",
" 17.703 sec | \n",
" 100.0 | \n",
" 0.655787 | \n",
" 1.371758 | \n",
" 0.352011 | \n",
" NaN | \n",
" NaN | \n",
" 0.676651 | \n",
" 1.607245 | \n",
" 0.388935 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" timestamp duration number_of_trees training_rmse \\\n",
"0 2020-11-24 14:51:45 0.010 sec 0.0 0.989796 \n",
"1 2020-11-24 14:51:46 0.345 sec 1.0 0.981584 \n",
"2 2020-11-24 14:51:46 0.589 sec 2.0 0.975317 \n",
"3 2020-11-24 14:51:46 0.852 sec 3.0 0.969404 \n",
"4 2020-11-24 14:51:47 1.110 sec 4.0 0.963683 \n",
"5 2020-11-24 14:51:47 1.399 sec 5.0 0.958093 \n",
"6 2020-11-24 14:51:47 1.692 sec 6.0 0.952597 \n",
"7 2020-11-24 14:51:47 2.032 sec 7.0 0.947184 \n",
"8 2020-11-24 14:51:48 2.358 sec 8.0 0.941846 \n",
"9 2020-11-24 14:51:48 2.691 sec 9.0 0.936570 \n",
"10 2020-11-24 14:51:48 2.985 sec 10.0 0.931338 \n",
"11 2020-11-24 14:51:49 3.243 sec 11.0 0.926156 \n",
"12 2020-11-24 14:51:49 3.495 sec 12.0 0.921028 \n",
"13 2020-11-24 14:51:49 3.756 sec 13.0 0.915957 \n",
"14 2020-11-24 14:51:49 4.012 sec 14.0 0.910934 \n",
"15 2020-11-24 14:51:54 8.125 sec 43.0 0.786695 \n",
"16 2020-11-24 14:52:03 17.703 sec 100.0 0.655787 \n",
"\n",
" training_logloss training_classification_error training_auc \\\n",
"0 4.584967 0.866007 NaN \n",
"1 4.064828 0.365268 NaN \n",
"2 3.822181 0.364095 NaN \n",
"3 3.645022 0.363840 NaN \n",
"4 3.503089 0.363432 NaN \n",
"5 3.383978 0.363636 NaN \n",
"6 3.280713 0.363636 NaN \n",
"7 3.189588 0.363636 NaN \n",
"8 3.107945 0.363636 NaN \n",
"9 3.033831 0.363738 NaN \n",
"10 2.965584 0.363687 NaN \n",
"11 2.902436 0.363687 NaN \n",
"12 2.843769 0.363534 NaN \n",
"13 2.789250 0.363687 NaN \n",
"14 2.738108 0.363126 NaN \n",
"15 1.924839 0.360424 NaN \n",
"16 1.371758 0.352011 NaN \n",
"\n",
" training_pr_auc validation_rmse validation_logloss \\\n",
"0 NaN 0.989796 4.584967 \n",
"1 NaN 0.981879 4.084868 \n",
"2 NaN 0.975852 3.853578 \n",
"3 NaN 0.970166 3.685062 \n",
"4 NaN 0.964669 3.550449 \n",
"5 NaN 0.959299 3.437732 \n",
"6 NaN 0.954023 3.340154 \n",
"7 NaN 0.948829 3.254264 \n",
"8 NaN 0.943709 3.177402 \n",
"9 NaN 0.938650 3.107704 \n",
"10 NaN 0.933635 3.043692 \n",
"11 NaN 0.928669 2.984427 \n",
"12 NaN 0.923761 2.929714 \n",
"13 NaN 0.918907 2.878823 \n",
"14 NaN 0.914103 2.831229 \n",
"15 NaN 0.796223 2.084962 \n",
"16 NaN 0.676651 1.607245 \n",
"\n",
" validation_classification_error validation_auc validation_pr_auc \n",
"0 0.867512 NaN NaN \n",
"1 0.386855 NaN NaN \n",
"2 0.387687 NaN NaN \n",
"3 0.387479 NaN NaN \n",
"4 0.387479 NaN NaN \n",
"5 0.388311 NaN NaN \n",
"6 0.388311 NaN NaN \n",
"7 0.388311 NaN NaN \n",
"8 0.388311 NaN NaN \n",
"9 0.387895 NaN NaN \n",
"10 0.387895 NaN NaN \n",
"11 0.387895 NaN NaN \n",
"12 0.388311 NaN NaN \n",
"13 0.387895 NaN NaN \n",
"14 0.388103 NaN NaN \n",
"15 0.389559 NaN NaN \n",
"16 0.388935 NaN NaN "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Variable Importances: \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" variable | \n",
" relative_importance | \n",
" scaled_importance | \n",
" percentage | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Origin | \n",
" 156621.484375 | \n",
" 1.000000 | \n",
" 0.505359 | \n",
"
\n",
" \n",
" 1 | \n",
" UniqueCarrier | \n",
" 144643.531250 | \n",
" 0.923523 | \n",
" 0.466710 | \n",
"
\n",
" \n",
" 2 | \n",
" fDayofMonth | \n",
" 4113.584473 | \n",
" 0.026264 | \n",
" 0.013273 | \n",
"
\n",
" \n",
" 3 | \n",
" fMonth | \n",
" 3239.199219 | \n",
" 0.020682 | \n",
" 0.010452 | \n",
"
\n",
" \n",
" 4 | \n",
" IsDepDelayed | \n",
" 964.104858 | \n",
" 0.006156 | \n",
" 0.003111 | \n",
"
\n",
" \n",
" 5 | \n",
" fDayOfWeek | \n",
" 339.425415 | \n",
" 0.002167 | \n",
" 0.001095 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" variable relative_importance scaled_importance percentage\n",
"0 Origin 156621.484375 1.000000 0.505359\n",
"1 UniqueCarrier 144643.531250 0.923523 0.466710\n",
"2 fDayofMonth 4113.584473 0.026264 0.013273\n",
"3 fMonth 3239.199219 0.020682 0.010452\n",
"4 IsDepDelayed 964.104858 0.006156 0.003111\n",
"5 fDayOfWeek 339.425415 0.002167 0.001095"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# domain is too big\n",
"\n",
"air = h2o.import_file(path=_locate(\"smalldata/airlines/AirlinesTrain.csv.zip\"))\n",
"\n",
"features = [\"Origin\", \"Dest\", \"IsDepDelayed\", \"UniqueCarrier\", \"fMonth\", \"fDayofMonth\", \"fDayOfWeek\"]\n",
"response = \"Dest\"\n",
"\n",
"r = air[0].runif()\n",
"train = air[r < 0.8]\n",
"valid = air[r >= 0.8]\n",
"\n",
"#Too many domains - AUC/PR AUC is not calculated\n",
"gbm = H2OGradientBoostingEstimator(distribution=\"multinomial\", \n",
" ntrees=100, \n",
" max_depth=3, \n",
" learn_rate=0.01,\n",
" auc_type=\"MACRO_OVO\")\n",
"gbm.train(x =features, \n",
" y =response, \n",
" training_frame =train,\n",
" validation_frame=valid)\n",
"gbm.show()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Table is not computed because it is disabled or due to domain size (maximum is 50 domains).'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gbm.multinomial_auc_table()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3rc1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}