{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Multinomial AUC/PR AUC demo" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Checking whether there is an H2O instance running at http://localhost:54321 . connected.\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
H2O_cluster_uptime:38 mins 14 secs
H2O_cluster_timezone:Europe/Berlin
H2O_data_parsing_timezone:UTC
H2O_cluster_version:3.33.0.99999
H2O_cluster_version_age:47 minutes
H2O_cluster_name:mori
H2O_cluster_total_nodes:1
H2O_cluster_free_memory:4.851 Gb
H2O_cluster_total_cores:8
H2O_cluster_allowed_cores:8
H2O_cluster_status:locked, healthy
H2O_connection_url:http://localhost:54321
H2O_connection_proxy:{\"http\": null, \"https\": null}
H2O_internal_security:False
H2O_API_Extensions:Algos, Core V3, Core V4
Python_version:3.7.3 candidate
" ], "text/plain": [ "-------------------------- -----------------------------\n", "H2O_cluster_uptime: 38 mins 14 secs\n", "H2O_cluster_timezone: Europe/Berlin\n", "H2O_data_parsing_timezone: UTC\n", "H2O_cluster_version: 3.33.0.99999\n", "H2O_cluster_version_age: 47 minutes\n", "H2O_cluster_name: mori\n", "H2O_cluster_total_nodes: 1\n", "H2O_cluster_free_memory: 4.851 Gb\n", "H2O_cluster_total_cores: 8\n", "H2O_cluster_allowed_cores: 8\n", "H2O_cluster_status: locked, healthy\n", "H2O_connection_url: http://localhost:54321\n", "H2O_connection_proxy: {\"http\": null, \"https\": null}\n", "H2O_internal_security: False\n", "H2O_API_Extensions: Algos, Core V3, Core V4\n", "Python_version: 3.7.3 candidate\n", "-------------------------- -----------------------------" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import h2o\n", "from h2o.estimators import H2OGradientBoostingEstimator\n", "from h2o.utils.shared_utils import _locate # private function - used to find files within h2o git project directory\n", "\n", "h2o.init(strict_version_check=False, port=54321)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parse progress: |█████████████████████████████████████████████████████████| 100%\n", "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", "Model Details\n", "=============\n", "H2OGradientBoostingEstimator : Gradient Boosting Machine\n", "Model Key: GBM_model_python_1606223609474_67\n", "\n", "\n", "Model Summary: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
number_of_treesnumber_of_internal_treesmodel_size_in_bytesmin_depthmax_depthmean_depthmin_leavesmax_leavesmean_leaves
0100.0500.067451.01.03.02.9942.08.06.078
\n", "
" ], "text/plain": [ " number_of_trees number_of_internal_trees model_size_in_bytes \\\n", "0 100.0 500.0 67451.0 \n", "\n", " min_depth max_depth mean_depth min_leaves max_leaves mean_leaves \n", "0 1.0 3.0 2.994 2.0 8.0 6.078 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "ModelMetricsMultinomial: gbm\n", "** Reported on train data. **\n", "\n", "MSE: 0.07963805354633585\n", "RMSE: 0.28220215014477806\n", "LogLoss: 0.32402399831181045\n", "Mean Per-Class Error: 0.4120046082949308\n", "AUC: 0.999511857555245\n", "AUCPR: 0.998951571914844\n", "\n", "Multinomial AUC values: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typefirst_class_domainsecond_class_domainauc
03 vs Rest3None1.000000
14 vs Rest4None0.998556
25 vs Rest5None1.000000
36 vs Rest6None0.999004
48 vs Rest8None1.000000
5Macro OVRNoneNone0.999512
6Weighted OVRNoneNone0.999032
7Class 3 vs. 4340.994624
8Class 3 vs. 5351.000000
9Class 3 vs. 6361.000000
10Class 3 vs. 8381.000000
11Class 4 vs. 5450.975269
12Class 4 vs. 6460.999482
13Class 4 vs. 8481.000000
14Class 5 vs. 6560.988095
15Class 5 vs. 8581.000000
16Class 6 vs. 8680.999752
17Macro OVONoneNone0.995722
18Weighted OVONoneNone0.995155
\n", "
" ], "text/plain": [ " type first_class_domain second_class_domain auc\n", "0 3 vs Rest 3 None 1.000000\n", "1 4 vs Rest 4 None 0.998556\n", "2 5 vs Rest 5 None 1.000000\n", "3 6 vs Rest 6 None 0.999004\n", "4 8 vs Rest 8 None 1.000000\n", "5 Macro OVR None None 0.999512\n", "6 Weighted OVR None None 0.999032\n", "7 Class 3 vs. 4 3 4 0.994624\n", "8 Class 3 vs. 5 3 5 1.000000\n", "9 Class 3 vs. 6 3 6 1.000000\n", "10 Class 3 vs. 8 3 8 1.000000\n", "11 Class 4 vs. 5 4 5 0.975269\n", "12 Class 4 vs. 6 4 6 0.999482\n", "13 Class 4 vs. 8 4 8 1.000000\n", "14 Class 5 vs. 6 5 6 0.988095\n", "15 Class 5 vs. 8 5 8 1.000000\n", "16 Class 6 vs. 8 6 8 0.999752\n", "17 Macro OVO None None 0.995722\n", "18 Weighted OVO None None 0.995155" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Multinomial auc_pr values: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typefirst_class_domainsecond_class_domainauc_pr
03 vs Rest3None1.000000
14 vs Rest4None0.998698
25 vs Rest5None1.000000
36 vs Rest6None0.996059
48 vs Rest8None1.000000
5Macro OVRNoneNone0.998952
6Weighted OVRNoneNone0.998538
7Class 3 vs. 4340.999897
8Class 3 vs. 5351.000000
9Class 3 vs. 6361.000000
10Class 3 vs. 8381.000000
11Class 4 vs. 5450.999489
12Class 4 vs. 6460.998871
13Class 4 vs. 8481.000000
14Class 5 vs. 6560.999350
15Class 5 vs. 8581.000000
16Class 6 vs. 8680.999689
17Macro OVONoneNone0.999730
18Weighted OVONoneNone0.999642
\n", "
" ], "text/plain": [ " type first_class_domain second_class_domain auc_pr\n", "0 3 vs Rest 3 None 1.000000\n", "1 4 vs Rest 4 None 0.998698\n", "2 5 vs Rest 5 None 1.000000\n", "3 6 vs Rest 6 None 0.996059\n", "4 8 vs Rest 8 None 1.000000\n", "5 Macro OVR None None 0.998952\n", "6 Weighted OVR None None 0.998538\n", "7 Class 3 vs. 4 3 4 0.999897\n", "8 Class 3 vs. 5 3 5 1.000000\n", "9 Class 3 vs. 6 3 6 1.000000\n", "10 Class 3 vs. 8 3 8 1.000000\n", "11 Class 4 vs. 5 4 5 0.999489\n", "12 Class 4 vs. 6 4 6 0.998871\n", "13 Class 4 vs. 8 4 8 1.000000\n", "14 Class 5 vs. 6 5 6 0.999350\n", "15 Class 5 vs. 8 5 8 1.000000\n", "16 Class 6 vs. 8 6 8 0.999689\n", "17 Macro OVO None None 0.999730\n", "18 Weighted OVO None None 0.999642" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Confusion Matrix: Row labels: Actual class; Column labels: Predicted class\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
34568ErrorRate
00.03.00.00.00.01.0000003 / 3
10.0154.00.01.00.00.0064521 / 155
20.02.00.01.00.01.0000003 / 3
30.02.00.053.01.00.0535713 / 56
40.00.00.00.072.00.0000000 / 72
50.0161.00.055.073.00.03460210 / 289
\n", "
" ], "text/plain": [ " 3 4 5 6 8 Error Rate\n", "0 0.0 3.0 0.0 0.0 0.0 1.000000 3 / 3\n", "1 0.0 154.0 0.0 1.0 0.0 0.006452 1 / 155\n", "2 0.0 2.0 0.0 1.0 0.0 1.000000 3 / 3\n", "3 0.0 2.0 0.0 53.0 1.0 0.053571 3 / 56\n", "4 0.0 0.0 0.0 0.0 72.0 0.000000 0 / 72\n", "5 0.0 161.0 0.0 55.0 73.0 0.034602 10 / 289" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Top-5 Hit Ratios: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
khit_ratio
010.965398
121.000000
231.000000
341.000000
451.000000
\n", "
" ], "text/plain": [ " k hit_ratio\n", "0 1 0.965398\n", "1 2 1.000000\n", "2 3 1.000000\n", "3 4 1.000000\n", "4 5 1.000000" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "ModelMetricsMultinomial: gbm\n", "** Reported on validation data. **\n", "\n", "MSE: 0.08366695440378827\n", "RMSE: 0.2892524060466711\n", "LogLoss: 0.3345387893847581\n", "Mean Per-Class Error: 0.2181318681318681\n", "AUC: 0.7987492282997901\n", "AUCPR: 0.7975377394397984\n", "\n", "Multinomial AUC values: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typefirst_class_domainsecond_class_domainauc
03 vs Rest3None1.000000
14 vs Rest4None0.996154
25 vs Rest5None0.000000
36 vs Rest6None0.997592
48 vs Rest8None1.000000
5Macro OVRNoneNone0.798749
6Weighted OVRNoneNone0.997714
7Class 3 vs. 4340.990385
8Class 3 vs. 5350.500000
9Class 3 vs. 6361.000000
10Class 3 vs. 8381.000000
11Class 4 vs. 5450.500000
12Class 4 vs. 6460.995192
13Class 4 vs. 8481.000000
14Class 5 vs. 6560.500000
15Class 5 vs. 8580.500000
16Class 6 vs. 8680.998016
17Macro OVONoneNone0.798359
18Weighted OVONoneNone0.872818
\n", "
" ], "text/plain": [ " type first_class_domain second_class_domain auc\n", "0 3 vs Rest 3 None 1.000000\n", "1 4 vs Rest 4 None 0.996154\n", "2 5 vs Rest 5 None 0.000000\n", "3 6 vs Rest 6 None 0.997592\n", "4 8 vs Rest 8 None 1.000000\n", "5 Macro OVR None None 0.798749\n", "6 Weighted OVR None None 0.997714\n", "7 Class 3 vs. 4 3 4 0.990385\n", "8 Class 3 vs. 5 3 5 0.500000\n", "9 Class 3 vs. 6 3 6 1.000000\n", "10 Class 3 vs. 8 3 8 1.000000\n", "11 Class 4 vs. 5 4 5 0.500000\n", "12 Class 4 vs. 6 4 6 0.995192\n", "13 Class 4 vs. 8 4 8 1.000000\n", "14 Class 5 vs. 6 5 6 0.500000\n", "15 Class 5 vs. 8 5 8 0.500000\n", "16 Class 6 vs. 8 6 8 0.998016\n", "17 Macro OVO None None 0.798359\n", "18 Weighted OVO None None 0.872818" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Multinomial auc_pr values: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typefirst_class_domainsecond_class_domainauc_pr
03 vs Rest3None1.000000
14 vs Rest4None0.994834
25 vs Rest5None0.000000
36 vs Rest6None0.992854
48 vs Rest8None1.000000
5Macro OVRNoneNone0.797538
6Weighted OVRNoneNone0.995994
7Class 3 vs. 4340.999817
8Class 3 vs. 5350.500000
9Class 3 vs. 6361.000000
10Class 3 vs. 8381.000000
11Class 4 vs. 5450.500000
12Class 4 vs. 6460.996311
13Class 4 vs. 8481.000000
14Class 5 vs. 6560.500000
15Class 5 vs. 8580.500000
16Class 6 vs. 8680.997536
17Macro OVONoneNone0.799366
18Weighted OVONoneNone0.874012
\n", "
" ], "text/plain": [ " type first_class_domain second_class_domain auc_pr\n", "0 3 vs Rest 3 None 1.000000\n", "1 4 vs Rest 4 None 0.994834\n", "2 5 vs Rest 5 None 0.000000\n", "3 6 vs Rest 6 None 0.992854\n", "4 8 vs Rest 8 None 1.000000\n", "5 Macro OVR None None 0.797538\n", "6 Weighted OVR None None 0.995994\n", "7 Class 3 vs. 4 3 4 0.999817\n", "8 Class 3 vs. 5 3 5 0.500000\n", "9 Class 3 vs. 6 3 6 1.000000\n", "10 Class 3 vs. 8 3 8 1.000000\n", "11 Class 4 vs. 5 4 5 0.500000\n", "12 Class 4 vs. 6 4 6 0.996311\n", "13 Class 4 vs. 8 4 8 1.000000\n", "14 Class 5 vs. 6 5 6 0.500000\n", "15 Class 5 vs. 8 5 8 0.500000\n", "16 Class 6 vs. 8 6 8 0.997536\n", "17 Macro OVO None None 0.799366\n", "18 Weighted OVO None None 0.874012" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Confusion Matrix: Row labels: Actual class; Column labels: Predicted class\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
34568ErrorRate
00.01.00.00.00.01.0000001 / 1
10.051.00.01.00.00.0192311 / 52
20.00.00.00.00.0NaN0 / 0
30.02.00.026.00.00.0714292 / 28
40.00.00.00.036.00.0000000 / 36
50.054.00.027.036.00.0341884 / 117
\n", "
" ], "text/plain": [ " 3 4 5 6 8 Error Rate\n", "0 0.0 1.0 0.0 0.0 0.0 1.000000 1 / 1\n", "1 0.0 51.0 0.0 1.0 0.0 0.019231 1 / 52\n", "2 0.0 0.0 0.0 0.0 0.0 NaN 0 / 0\n", "3 0.0 2.0 0.0 26.0 0.0 0.071429 2 / 28\n", "4 0.0 0.0 0.0 0.0 36.0 0.000000 0 / 36\n", "5 0.0 54.0 0.0 27.0 36.0 0.034188 4 / 117" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Top-5 Hit Ratios: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
khit_ratio
010.965812
121.000000
231.000000
341.000000
451.000000
\n", "
" ], "text/plain": [ " k hit_ratio\n", "0 1 0.965812\n", "1 2 1.000000\n", "2 3 1.000000\n", "3 4 1.000000\n", "4 5 1.000000" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Scoring History: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampdurationnumber_of_treestraining_rmsetraining_loglosstraining_classification_errortraining_auctraining_pr_aucvalidation_rmsevalidation_loglossvalidation_classification_errorvalidation_aucvalidation_pr_auc
02020-11-24 14:51:440.001 sec0.00.8000001.6094380.6435990.5000000.2000000.8000001.6094380.6153850.4000000.200000
12020-11-24 14:51:440.007 sec1.00.7924561.5724330.0346020.9969200.9232020.7924231.5722710.0341880.7983940.797055
22020-11-24 14:51:440.010 sec2.00.7849441.5369330.0346020.9983890.9534580.7849231.5368340.0341880.7988340.798153
32020-11-24 14:51:440.013 sec3.00.7774631.5028190.0346020.9980440.9455660.7774521.5027700.0341880.7988340.798153
42020-11-24 14:51:440.016 sec4.00.7700141.4700010.0346020.9980440.9455660.7699831.4698610.0341880.7988340.798153
52020-11-24 14:51:440.019 sec5.00.7626001.4383990.0346020.9980540.9455740.7625541.4382000.0341880.7988340.798153
62020-11-24 14:51:440.021 sec6.00.7552191.4079210.0346020.9980920.9456060.7551761.4077390.0341880.7988340.798153
72020-11-24 14:51:440.024 sec7.00.7478741.3785060.0346020.9980920.9456060.7478141.3782600.0341880.7988340.798153
82020-11-24 14:51:440.026 sec8.00.7405641.3500890.0346020.9980920.9456060.7405191.3499030.0341880.7988340.798153
92020-11-24 14:51:440.028 sec9.00.7332931.3226180.0346020.9980920.9456060.7332381.3223960.0341880.7988340.798153
102020-11-24 14:51:440.031 sec10.00.7260581.2960140.0346020.9981020.9456140.7260201.2958660.0341880.7988340.798153
112020-11-24 14:51:440.034 sec11.00.7188611.2702510.0346020.9981020.9456140.7188231.2701060.0341880.7988340.798153
122020-11-24 14:51:440.036 sec12.00.7117041.2452920.0346020.9981020.9456140.7116801.2451940.0341880.7988340.798153
132020-11-24 14:51:440.038 sec13.00.7045881.2210780.0346020.9981020.9456140.7045651.2209900.0341880.7988340.798153
142020-11-24 14:51:440.041 sec14.00.6975121.1975770.0346020.9992770.9987740.6975071.1975540.0341880.7988340.798153
152020-11-24 14:51:440.045 sec15.00.6904781.1747630.0346020.9992920.9988370.6905061.1748460.0341880.7988340.798153
162020-11-24 14:51:440.048 sec16.00.6834891.1526050.0346020.9992920.9988370.6835031.1526440.0341880.7988340.798153
172020-11-24 14:51:440.052 sec17.00.6765421.1310590.0311420.9993110.9988560.6765891.1312010.0341880.7988340.798153
182020-11-24 14:51:440.055 sec18.00.6696401.1101080.0311420.9993110.9988560.6697211.1103540.0341880.7988340.798153
192020-11-24 14:51:440.058 sec19.00.6627831.0897220.0311420.9993110.9988560.6628851.0900310.0341880.7988340.798153
\n", "
" ], "text/plain": [ " timestamp duration number_of_trees training_rmse \\\n", "0 2020-11-24 14:51:44 0.001 sec 0.0 0.800000 \n", "1 2020-11-24 14:51:44 0.007 sec 1.0 0.792456 \n", "2 2020-11-24 14:51:44 0.010 sec 2.0 0.784944 \n", "3 2020-11-24 14:51:44 0.013 sec 3.0 0.777463 \n", "4 2020-11-24 14:51:44 0.016 sec 4.0 0.770014 \n", "5 2020-11-24 14:51:44 0.019 sec 5.0 0.762600 \n", "6 2020-11-24 14:51:44 0.021 sec 6.0 0.755219 \n", "7 2020-11-24 14:51:44 0.024 sec 7.0 0.747874 \n", "8 2020-11-24 14:51:44 0.026 sec 8.0 0.740564 \n", "9 2020-11-24 14:51:44 0.028 sec 9.0 0.733293 \n", "10 2020-11-24 14:51:44 0.031 sec 10.0 0.726058 \n", "11 2020-11-24 14:51:44 0.034 sec 11.0 0.718861 \n", "12 2020-11-24 14:51:44 0.036 sec 12.0 0.711704 \n", "13 2020-11-24 14:51:44 0.038 sec 13.0 0.704588 \n", "14 2020-11-24 14:51:44 0.041 sec 14.0 0.697512 \n", "15 2020-11-24 14:51:44 0.045 sec 15.0 0.690478 \n", "16 2020-11-24 14:51:44 0.048 sec 16.0 0.683489 \n", "17 2020-11-24 14:51:44 0.052 sec 17.0 0.676542 \n", "18 2020-11-24 14:51:44 0.055 sec 18.0 0.669640 \n", "19 2020-11-24 14:51:44 0.058 sec 19.0 0.662783 \n", "\n", " training_logloss training_classification_error training_auc \\\n", "0 1.609438 0.643599 0.500000 \n", "1 1.572433 0.034602 0.996920 \n", "2 1.536933 0.034602 0.998389 \n", "3 1.502819 0.034602 0.998044 \n", "4 1.470001 0.034602 0.998044 \n", "5 1.438399 0.034602 0.998054 \n", "6 1.407921 0.034602 0.998092 \n", "7 1.378506 0.034602 0.998092 \n", "8 1.350089 0.034602 0.998092 \n", "9 1.322618 0.034602 0.998092 \n", "10 1.296014 0.034602 0.998102 \n", "11 1.270251 0.034602 0.998102 \n", "12 1.245292 0.034602 0.998102 \n", "13 1.221078 0.034602 0.998102 \n", "14 1.197577 0.034602 0.999277 \n", "15 1.174763 0.034602 0.999292 \n", "16 1.152605 0.034602 0.999292 \n", "17 1.131059 0.031142 0.999311 \n", "18 1.110108 0.031142 0.999311 \n", "19 1.089722 0.031142 0.999311 \n", "\n", " training_pr_auc validation_rmse validation_logloss \\\n", "0 0.200000 0.800000 1.609438 \n", "1 0.923202 0.792423 1.572271 \n", "2 0.953458 0.784923 1.536834 \n", "3 0.945566 0.777452 1.502770 \n", "4 0.945566 0.769983 1.469861 \n", "5 0.945574 0.762554 1.438200 \n", "6 0.945606 0.755176 1.407739 \n", "7 0.945606 0.747814 1.378260 \n", "8 0.945606 0.740519 1.349903 \n", "9 0.945606 0.733238 1.322396 \n", "10 0.945614 0.726020 1.295866 \n", "11 0.945614 0.718823 1.270106 \n", "12 0.945614 0.711680 1.245194 \n", "13 0.945614 0.704565 1.220990 \n", "14 0.998774 0.697507 1.197554 \n", "15 0.998837 0.690506 1.174846 \n", "16 0.998837 0.683503 1.152644 \n", "17 0.998856 0.676589 1.131201 \n", "18 0.998856 0.669721 1.110354 \n", "19 0.998856 0.662885 1.090031 \n", "\n", " validation_classification_error validation_auc validation_pr_auc \n", "0 0.615385 0.400000 0.200000 \n", "1 0.034188 0.798394 0.797055 \n", "2 0.034188 0.798834 0.798153 \n", "3 0.034188 0.798834 0.798153 \n", "4 0.034188 0.798834 0.798153 \n", "5 0.034188 0.798834 0.798153 \n", "6 0.034188 0.798834 0.798153 \n", "7 0.034188 0.798834 0.798153 \n", "8 0.034188 0.798834 0.798153 \n", "9 0.034188 0.798834 0.798153 \n", "10 0.034188 0.798834 0.798153 \n", "11 0.034188 0.798834 0.798153 \n", "12 0.034188 0.798834 0.798153 \n", "13 0.034188 0.798834 0.798153 \n", "14 0.034188 0.798834 0.798153 \n", "15 0.034188 0.798834 0.798153 \n", "16 0.034188 0.798834 0.798153 \n", "17 0.034188 0.798834 0.798153 \n", "18 0.034188 0.798834 0.798153 \n", "19 0.034188 0.798834 0.798153 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "See the whole table with table.as_data_frame()\n", "\n", "Variable Importances: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
variablerelative_importancescaled_importancepercentage
0displacement6259.0654301.0000000.979955
1power56.8534930.0090830.008901
2acceleration31.5310710.0050380.004937
3weight21.4194430.0034220.003354
4year18.2277470.0029120.002854
\n", "
" ], "text/plain": [ " variable relative_importance scaled_importance percentage\n", "0 displacement 6259.065430 1.000000 0.979955\n", "1 power 56.853493 0.009083 0.008901\n", "2 acceleration 31.531071 0.005038 0.004937\n", "3 weight 21.419443 0.003422 0.003354\n", "4 year 18.227747 0.002912 0.002854" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "cars = h2o.import_file(path=_locate(\"smalldata/junit/cars_20mpg.csv\"))\n", "\n", "features = [\"displacement\", \"power\", \"weight\", \"acceleration\", \"year\"]\n", "response = \"cylinders\"\n", "distribution = \"multinomial\"\n", "\n", "cars[response] = cars[response].asfactor()\n", "\n", "r = cars[0].runif()\n", "train = cars[r > .3]\n", "valid = cars[r <= .3]\n", " \n", "# train model\n", "gbm = H2OGradientBoostingEstimator(distribution=\"multinomial\", \n", " ntrees=100, \n", " max_depth=3, \n", " learn_rate=0.01, \n", " auc_type=\"MACRO_OVR\")\n", "gbm.train(x =features, \n", " y =response, \n", " training_frame =train,\n", " validation_frame=valid)\n", "gbm.show()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.999511857555245" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbm.auc()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.998951571914844" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbm.aucpr()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Multinomial AUC values: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typefirst_class_domainsecond_class_domainauc
03 vs Rest3None1.000000
14 vs Rest4None0.998556
25 vs Rest5None1.000000
36 vs Rest6None0.999004
48 vs Rest8None1.000000
5Macro OVRNoneNone0.999512
6Weighted OVRNoneNone0.999032
7Class 3 vs. 4340.994624
8Class 3 vs. 5351.000000
9Class 3 vs. 6361.000000
10Class 3 vs. 8381.000000
11Class 4 vs. 5450.975269
12Class 4 vs. 6460.999482
13Class 4 vs. 8481.000000
14Class 5 vs. 6560.988095
15Class 5 vs. 8581.000000
16Class 6 vs. 8680.999752
17Macro OVONoneNone0.995722
18Weighted OVONoneNone0.995155
\n", "
" ], "text/plain": [ " type first_class_domain second_class_domain auc\n", "0 3 vs Rest 3 None 1.000000\n", "1 4 vs Rest 4 None 0.998556\n", "2 5 vs Rest 5 None 1.000000\n", "3 6 vs Rest 6 None 0.999004\n", "4 8 vs Rest 8 None 1.000000\n", "5 Macro OVR None None 0.999512\n", "6 Weighted OVR None None 0.999032\n", "7 Class 3 vs. 4 3 4 0.994624\n", "8 Class 3 vs. 5 3 5 1.000000\n", "9 Class 3 vs. 6 3 6 1.000000\n", "10 Class 3 vs. 8 3 8 1.000000\n", "11 Class 4 vs. 5 4 5 0.975269\n", "12 Class 4 vs. 6 4 6 0.999482\n", "13 Class 4 vs. 8 4 8 1.000000\n", "14 Class 5 vs. 6 5 6 0.988095\n", "15 Class 5 vs. 8 5 8 1.000000\n", "16 Class 6 vs. 8 6 8 0.999752\n", "17 Macro OVO None None 0.995722\n", "18 Weighted OVO None None 0.995155" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbm.multinomial_auc_table()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampdurationnumber_of_treestraining_rmsetraining_loglosstraining_classification_errortraining_auctraining_pr_aucvalidation_rmsevalidation_loglossvalidation_classification_errorvalidation_aucvalidation_pr_auc
02020-11-24 14:51:440.003 sec0.00.8000001.6094380.6435990.5000000.3874830.8000001.6094380.6153850.5000000.349551
12020-11-24 14:51:440.016 sec1.00.7924561.5724330.0346020.9977610.9932840.7924231.5722710.0341880.9969250.994890
22020-11-24 14:51:440.020 sec2.00.7849441.5369330.0346020.9981400.9954540.7849231.5368340.0341880.9975730.996420
32020-11-24 14:51:440.024 sec3.00.7774631.5028190.0346020.9981340.9950580.7774521.5027700.0341880.9975730.996420
42020-11-24 14:51:440.026 sec4.00.7700141.4700010.0346020.9981340.9950580.7699831.4698610.0341880.9975730.996420
52020-11-24 14:51:440.029 sec5.00.7626001.4383990.0346020.9981600.9950820.7625541.4382000.0341880.9975730.996420
62020-11-24 14:51:440.031 sec6.00.7552191.4079210.0346020.9982640.9951680.7551761.4077390.0341880.9975730.996420
\n", "
" ], "text/plain": [ " timestamp duration number_of_trees training_rmse \\\n", "0 2020-11-24 14:51:44 0.003 sec 0.0 0.800000 \n", "1 2020-11-24 14:51:44 0.016 sec 1.0 0.792456 \n", "2 2020-11-24 14:51:44 0.020 sec 2.0 0.784944 \n", "3 2020-11-24 14:51:44 0.024 sec 3.0 0.777463 \n", "4 2020-11-24 14:51:44 0.026 sec 4.0 0.770014 \n", "5 2020-11-24 14:51:44 0.029 sec 5.0 0.762600 \n", "6 2020-11-24 14:51:44 0.031 sec 6.0 0.755219 \n", "\n", " training_logloss training_classification_error training_auc \\\n", "0 1.609438 0.643599 0.500000 \n", "1 1.572433 0.034602 0.997761 \n", "2 1.536933 0.034602 0.998140 \n", "3 1.502819 0.034602 0.998134 \n", "4 1.470001 0.034602 0.998134 \n", "5 1.438399 0.034602 0.998160 \n", "6 1.407921 0.034602 0.998264 \n", "\n", " training_pr_auc validation_rmse validation_logloss \\\n", "0 0.387483 0.800000 1.609438 \n", "1 0.993284 0.792423 1.572271 \n", "2 0.995454 0.784923 1.536834 \n", "3 0.995058 0.777452 1.502770 \n", "4 0.995058 0.769983 1.469861 \n", "5 0.995082 0.762554 1.438200 \n", "6 0.995168 0.755176 1.407739 \n", "\n", " validation_classification_error validation_auc validation_pr_auc \n", "0 0.615385 0.500000 0.349551 \n", "1 0.034188 0.996925 0.994890 \n", "2 0.034188 0.997573 0.996420 \n", "3 0.034188 0.997573 0.996420 \n", "4 0.034188 0.997573 0.996420 \n", "5 0.034188 0.997573 0.996420 \n", "6 0.034188 0.997573 0.996420 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# early stopping\n", "\n", "gbm = H2OGradientBoostingEstimator(distribution=\"multinomial\", \n", " ntrees=100, \n", " max_depth=3, \n", " learn_rate=0.01,\n", " stopping_metric=\"AUCPR\", \n", " stopping_tolerance=0.01, \n", " stopping_rounds=3,\n", " auc_type=\"WEIGHTED_OVR\")\n", "gbm.train(x =features, \n", " y =response, \n", " training_frame =train,\n", " validation_frame=valid)\n", "gbm.scoring_history()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gbm Grid Build progress: |████████████████████████████████████████████████| 100%\n" ] }, { "data": { "text/plain": [ "{'Grid_GBM_py_3_sid_b9cb_model_python_1606223609474_69_model_3': 0.9996445050710137,\n", " 'Grid_GBM_py_3_sid_b9cb_model_python_1606223609474_69_model_4': 0.9996445050710137,\n", " 'Grid_GBM_py_3_sid_b9cb_model_python_1606223609474_69_model_2': 0.9994134025192564,\n", " 'Grid_GBM_py_3_sid_b9cb_model_python_1606223609474_69_model_1': 0.9994134025192564}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# grid search\n", "\n", "from h2o.grid.grid_search import H2OGridSearch\n", "hyper_parameters = {'ntrees': [5, 10], 'max_depth': [10, 20]}\n", "gs = H2OGridSearch(H2OGradientBoostingEstimator(distribution = \"multinomial\", auc_type=\"MACRO_OVR\"), hyper_parameters)\n", "gs.train(x=features, y=response, training_frame=train)\n", "gs.auc(train=True)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parse progress: |█████████████████████████████████████████████████████████| 100%\n", "gbm Model Build progress: |███████████████████████████████████████████████| 100%\n", "Model Details\n", "=============\n", "H2OGradientBoostingEstimator : Gradient Boosting Machine\n", "Model Key: GBM_model_python_1606223609474_70\n", "\n", "\n", "Model Summary: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
number_of_treesnumber_of_internal_treesmodel_size_in_bytesmin_depthmax_depthmean_depthmin_leavesmax_leavesmean_leaves
0100.09800.02008108.01.03.02.999492.08.07.804898
\n", "
" ], "text/plain": [ " number_of_trees number_of_internal_trees model_size_in_bytes \\\n", "0 100.0 9800.0 2008108.0 \n", "\n", " min_depth max_depth mean_depth min_leaves max_leaves mean_leaves \n", "0 1.0 3.0 2.99949 2.0 8.0 7.804898 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "ModelMetricsMultinomial: gbm\n", "** Reported on train data. **\n", "\n", "MSE: 0.430057090190663\n", "RMSE: 0.6557873818477015\n", "LogLoss: 1.3717578431630804\n", "Mean Per-Class Error: 0.7578920698567744\n", "AUC: NaN\n", "AUCPR: NaN\n", "Multinomial auc values: Table is not computed because it is disabled or due to domain size (maximum is 50 domains).\n", "Multinomial auc_pr values: Table is not computed because it is disabled or due to domain size (maximum is 50 domains).\n", "\n", "Confusion Matrix: Row labels: Actual class; Column labels: Predicted class\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABEABQACYALBATLAVPBDLBGMBNABOS...SNASTLSWFSYRTOLTPATUSUCAErrorRate
00.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.000000102 / 102
10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000076 / 76
20.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000015 / 15
30.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000081 / 81
40.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000018 / 18
50.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000020 / 20
60.00.00.00.00.00.015.00.00.00.0...0.00.00.00.00.00.00.00.00.76190548 / 63
70.00.00.00.00.00.010.08.00.00.0...0.00.00.00.00.00.00.00.00.79487231 / 39
80.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000041 / 41
90.00.00.00.00.00.00.00.00.0114.0...0.00.00.00.00.00.00.00.00.673352235 / 349
100.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000030 / 30
110.00.00.00.00.00.00.00.00.036.0...0.00.00.00.00.00.00.08.00.85981392 / 107
120.00.00.00.00.00.00.00.00.00.0...17.00.00.00.00.00.05.00.00.658046229 / 348
130.00.00.00.00.00.00.00.00.09.0...0.00.00.00.00.00.00.00.00.591503181 / 306
140.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000021 / 21
150.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000022 / 22
160.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.0000001 / 1
170.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.0000000 / 13
180.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000040 / 40
190.00.00.00.00.00.00.00.00.04.0...0.00.00.00.00.00.00.00.00.42176962 / 147
\n", "

20 rows × 100 columns

\n", "
" ], "text/plain": [ " ABE ABQ ACY ALB ATL AVP BDL BGM BNA BOS ... SNA STL SWF \\\n", "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "6 0.0 0.0 0.0 0.0 0.0 0.0 15.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "7 0.0 0.0 0.0 0.0 0.0 0.0 10.0 8.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "9 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 114.0 ... 0.0 0.0 0.0 \n", "10 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "11 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 36.0 ... 0.0 0.0 0.0 \n", "12 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 17.0 0.0 0.0 \n", "13 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 9.0 ... 0.0 0.0 0.0 \n", "14 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "15 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "16 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "17 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "18 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "19 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.0 ... 0.0 0.0 0.0 \n", "\n", " SYR TOL TPA TUS UCA Error Rate \n", "0 0.0 0.0 0.0 0.0 0.0 1.000000 102 / 102 \n", "1 0.0 0.0 0.0 0.0 0.0 1.000000 76 / 76 \n", "2 0.0 0.0 0.0 0.0 0.0 1.000000 15 / 15 \n", "3 0.0 0.0 0.0 0.0 0.0 1.000000 81 / 81 \n", "4 0.0 0.0 0.0 0.0 0.0 1.000000 18 / 18 \n", "5 0.0 0.0 0.0 0.0 0.0 1.000000 20 / 20 \n", "6 0.0 0.0 0.0 0.0 0.0 0.761905 48 / 63 \n", "7 0.0 0.0 0.0 0.0 0.0 0.794872 31 / 39 \n", "8 0.0 0.0 0.0 0.0 0.0 1.000000 41 / 41 \n", "9 0.0 0.0 0.0 0.0 0.0 0.673352 235 / 349 \n", "10 0.0 0.0 0.0 0.0 0.0 1.000000 30 / 30 \n", "11 0.0 0.0 0.0 0.0 8.0 0.859813 92 / 107 \n", "12 0.0 0.0 0.0 5.0 0.0 0.658046 229 / 348 \n", "13 0.0 0.0 0.0 0.0 0.0 0.591503 181 / 306 \n", "14 0.0 0.0 0.0 0.0 0.0 1.000000 21 / 21 \n", "15 0.0 0.0 0.0 0.0 0.0 1.000000 22 / 22 \n", "16 0.0 0.0 0.0 0.0 0.0 1.000000 1 / 1 \n", "17 0.0 0.0 0.0 0.0 0.0 0.000000 0 / 13 \n", "18 0.0 0.0 0.0 0.0 0.0 1.000000 40 / 40 \n", "19 0.0 0.0 0.0 0.0 0.0 0.421769 62 / 147 \n", "\n", "[20 rows x 100 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "See the whole table with table.as_data_frame()\n", "\n", "Top-10 Hit Ratios: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
khit_ratio
010.647989
120.772855
230.834294
340.872992
450.900168
560.920155
670.933615
780.943201
890.950798
9100.956356
\n", "
" ], "text/plain": [ " k hit_ratio\n", "0 1 0.647989\n", "1 2 0.772855\n", "2 3 0.834294\n", "3 4 0.872992\n", "4 5 0.900168\n", "5 6 0.920155\n", "6 7 0.933615\n", "7 8 0.943201\n", "8 9 0.950798\n", "9 10 0.956356" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "ModelMetricsMultinomial: gbm\n", "** Reported on validation data. **\n", "\n", "MSE: 0.45785658129894297\n", "RMSE: 0.6766510040626135\n", "LogLoss: 1.6072449485890636\n", "Mean Per-Class Error: 0.794834751023698\n", "AUC: NaN\n", "AUCPR: NaN\n", "Multinomial auc values: Table is not computed because it is disabled or due to domain size (maximum is 50 domains).\n", "Multinomial auc_pr values: Table is not computed because it is disabled or due to domain size (maximum is 50 domains).\n", "\n", "Confusion Matrix: Row labels: Actual class; Column labels: Predicted class\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ABEABQACYALBATLAVPBDLBGMBNABOS...SNASTLSWFSYRTOLTPATUSUCAErrorRate
00.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000028 / 28
10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000021 / 21
20.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.0000004 / 4
30.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000017 / 17
40.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.0000005 / 5
50.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.0000007 / 7
60.00.00.00.00.00.00.04.00.00.0...0.00.00.00.00.00.00.00.01.00000018 / 18
70.00.00.00.00.00.03.00.00.00.0...0.00.00.00.00.00.00.00.01.00000014 / 14
80.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.0000008 / 8
90.00.00.00.00.00.00.00.00.027.0...0.00.00.00.00.00.00.00.00.71276667 / 94
100.00.00.00.00.00.00.00.00.01.0...0.00.00.00.00.00.00.00.01.0000007 / 7
110.00.00.00.00.00.00.00.00.07.0...0.00.00.00.00.00.00.04.00.95000019 / 20
120.00.00.00.00.00.00.00.00.00.0...3.00.00.00.00.00.01.00.00.74026057 / 77
130.00.00.00.00.00.00.00.00.03.0...0.00.00.00.00.00.00.00.00.71264462 / 87
140.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.0000003 / 3
150.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.0000003 / 3
160.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.0000002 / 2
170.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.0000000 / 6
180.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00000012 / 12
190.00.00.00.00.00.00.00.00.01.0...0.00.00.00.00.00.00.00.00.44444416 / 36
\n", "

20 rows × 100 columns

\n", "
" ], "text/plain": [ " ABE ABQ ACY ALB ATL AVP BDL BGM BNA BOS ... SNA STL SWF \\\n", "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "7 0.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "9 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 27.0 ... 0.0 0.0 0.0 \n", "10 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 \n", "11 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 7.0 ... 0.0 0.0 0.0 \n", "12 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 3.0 0.0 0.0 \n", "13 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 ... 0.0 0.0 0.0 \n", "14 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "15 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "16 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "17 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "18 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n", "19 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 \n", "\n", " SYR TOL TPA TUS UCA Error Rate \n", "0 0.0 0.0 0.0 0.0 0.0 1.000000 28 / 28 \n", "1 0.0 0.0 0.0 0.0 0.0 1.000000 21 / 21 \n", "2 0.0 0.0 0.0 0.0 0.0 1.000000 4 / 4 \n", "3 0.0 0.0 0.0 0.0 0.0 1.000000 17 / 17 \n", "4 0.0 0.0 0.0 0.0 0.0 1.000000 5 / 5 \n", "5 0.0 0.0 0.0 0.0 0.0 1.000000 7 / 7 \n", "6 0.0 0.0 0.0 0.0 0.0 1.000000 18 / 18 \n", "7 0.0 0.0 0.0 0.0 0.0 1.000000 14 / 14 \n", "8 0.0 0.0 0.0 0.0 0.0 1.000000 8 / 8 \n", "9 0.0 0.0 0.0 0.0 0.0 0.712766 67 / 94 \n", "10 0.0 0.0 0.0 0.0 0.0 1.000000 7 / 7 \n", "11 0.0 0.0 0.0 0.0 4.0 0.950000 19 / 20 \n", "12 0.0 0.0 0.0 1.0 0.0 0.740260 57 / 77 \n", "13 0.0 0.0 0.0 0.0 0.0 0.712644 62 / 87 \n", "14 0.0 0.0 0.0 0.0 0.0 1.000000 3 / 3 \n", "15 0.0 0.0 0.0 0.0 0.0 1.000000 3 / 3 \n", "16 0.0 0.0 0.0 0.0 0.0 1.000000 2 / 2 \n", "17 0.0 0.0 0.0 0.0 0.0 0.000000 0 / 6 \n", "18 0.0 0.0 0.0 0.0 0.0 1.000000 12 / 12 \n", "19 0.0 0.0 0.0 0.0 0.0 0.444444 16 / 36 \n", "\n", "[20 rows x 100 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "See the whole table with table.as_data_frame()\n", "\n", "Top-10 Hit Ratios: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
khit_ratio
010.611065
120.737729
230.778910
340.814476
450.844218
560.864601
670.878744
780.891015
890.900999
9100.909526
\n", "
" ], "text/plain": [ " k hit_ratio\n", "0 1 0.611065\n", "1 2 0.737729\n", "2 3 0.778910\n", "3 4 0.814476\n", "4 5 0.844218\n", "5 6 0.864601\n", "6 7 0.878744\n", "7 8 0.891015\n", "8 9 0.900999\n", "9 10 0.909526" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Scoring History: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampdurationnumber_of_treestraining_rmsetraining_loglosstraining_classification_errortraining_auctraining_pr_aucvalidation_rmsevalidation_loglossvalidation_classification_errorvalidation_aucvalidation_pr_auc
02020-11-24 14:51:450.010 sec0.00.9897964.5849670.866007NaNNaN0.9897964.5849670.867512NaNNaN
12020-11-24 14:51:460.345 sec1.00.9815844.0648280.365268NaNNaN0.9818794.0848680.386855NaNNaN
22020-11-24 14:51:460.589 sec2.00.9753173.8221810.364095NaNNaN0.9758523.8535780.387687NaNNaN
32020-11-24 14:51:460.852 sec3.00.9694043.6450220.363840NaNNaN0.9701663.6850620.387479NaNNaN
42020-11-24 14:51:471.110 sec4.00.9636833.5030890.363432NaNNaN0.9646693.5504490.387479NaNNaN
52020-11-24 14:51:471.399 sec5.00.9580933.3839780.363636NaNNaN0.9592993.4377320.388311NaNNaN
62020-11-24 14:51:471.692 sec6.00.9525973.2807130.363636NaNNaN0.9540233.3401540.388311NaNNaN
72020-11-24 14:51:472.032 sec7.00.9471843.1895880.363636NaNNaN0.9488293.2542640.388311NaNNaN
82020-11-24 14:51:482.358 sec8.00.9418463.1079450.363636NaNNaN0.9437093.1774020.388311NaNNaN
92020-11-24 14:51:482.691 sec9.00.9365703.0338310.363738NaNNaN0.9386503.1077040.387895NaNNaN
102020-11-24 14:51:482.985 sec10.00.9313382.9655840.363687NaNNaN0.9336353.0436920.387895NaNNaN
112020-11-24 14:51:493.243 sec11.00.9261562.9024360.363687NaNNaN0.9286692.9844270.387895NaNNaN
122020-11-24 14:51:493.495 sec12.00.9210282.8437690.363534NaNNaN0.9237612.9297140.388311NaNNaN
132020-11-24 14:51:493.756 sec13.00.9159572.7892500.363687NaNNaN0.9189072.8788230.387895NaNNaN
142020-11-24 14:51:494.012 sec14.00.9109342.7381080.363126NaNNaN0.9141032.8312290.388103NaNNaN
152020-11-24 14:51:548.125 sec43.00.7866951.9248390.360424NaNNaN0.7962232.0849620.389559NaNNaN
162020-11-24 14:52:0317.703 sec100.00.6557871.3717580.352011NaNNaN0.6766511.6072450.388935NaNNaN
\n", "
" ], "text/plain": [ " timestamp duration number_of_trees training_rmse \\\n", "0 2020-11-24 14:51:45 0.010 sec 0.0 0.989796 \n", "1 2020-11-24 14:51:46 0.345 sec 1.0 0.981584 \n", "2 2020-11-24 14:51:46 0.589 sec 2.0 0.975317 \n", "3 2020-11-24 14:51:46 0.852 sec 3.0 0.969404 \n", "4 2020-11-24 14:51:47 1.110 sec 4.0 0.963683 \n", "5 2020-11-24 14:51:47 1.399 sec 5.0 0.958093 \n", "6 2020-11-24 14:51:47 1.692 sec 6.0 0.952597 \n", "7 2020-11-24 14:51:47 2.032 sec 7.0 0.947184 \n", "8 2020-11-24 14:51:48 2.358 sec 8.0 0.941846 \n", "9 2020-11-24 14:51:48 2.691 sec 9.0 0.936570 \n", "10 2020-11-24 14:51:48 2.985 sec 10.0 0.931338 \n", "11 2020-11-24 14:51:49 3.243 sec 11.0 0.926156 \n", "12 2020-11-24 14:51:49 3.495 sec 12.0 0.921028 \n", "13 2020-11-24 14:51:49 3.756 sec 13.0 0.915957 \n", "14 2020-11-24 14:51:49 4.012 sec 14.0 0.910934 \n", "15 2020-11-24 14:51:54 8.125 sec 43.0 0.786695 \n", "16 2020-11-24 14:52:03 17.703 sec 100.0 0.655787 \n", "\n", " training_logloss training_classification_error training_auc \\\n", "0 4.584967 0.866007 NaN \n", "1 4.064828 0.365268 NaN \n", "2 3.822181 0.364095 NaN \n", "3 3.645022 0.363840 NaN \n", "4 3.503089 0.363432 NaN \n", "5 3.383978 0.363636 NaN \n", "6 3.280713 0.363636 NaN \n", "7 3.189588 0.363636 NaN \n", "8 3.107945 0.363636 NaN \n", "9 3.033831 0.363738 NaN \n", "10 2.965584 0.363687 NaN \n", "11 2.902436 0.363687 NaN \n", "12 2.843769 0.363534 NaN \n", "13 2.789250 0.363687 NaN \n", "14 2.738108 0.363126 NaN \n", "15 1.924839 0.360424 NaN \n", "16 1.371758 0.352011 NaN \n", "\n", " training_pr_auc validation_rmse validation_logloss \\\n", "0 NaN 0.989796 4.584967 \n", "1 NaN 0.981879 4.084868 \n", "2 NaN 0.975852 3.853578 \n", "3 NaN 0.970166 3.685062 \n", "4 NaN 0.964669 3.550449 \n", "5 NaN 0.959299 3.437732 \n", "6 NaN 0.954023 3.340154 \n", "7 NaN 0.948829 3.254264 \n", "8 NaN 0.943709 3.177402 \n", "9 NaN 0.938650 3.107704 \n", "10 NaN 0.933635 3.043692 \n", "11 NaN 0.928669 2.984427 \n", "12 NaN 0.923761 2.929714 \n", "13 NaN 0.918907 2.878823 \n", "14 NaN 0.914103 2.831229 \n", "15 NaN 0.796223 2.084962 \n", "16 NaN 0.676651 1.607245 \n", "\n", " validation_classification_error validation_auc validation_pr_auc \n", "0 0.867512 NaN NaN \n", "1 0.386855 NaN NaN \n", "2 0.387687 NaN NaN \n", "3 0.387479 NaN NaN \n", "4 0.387479 NaN NaN \n", "5 0.388311 NaN NaN \n", "6 0.388311 NaN NaN \n", "7 0.388311 NaN NaN \n", "8 0.388311 NaN NaN \n", "9 0.387895 NaN NaN \n", "10 0.387895 NaN NaN \n", "11 0.387895 NaN NaN \n", "12 0.388311 NaN NaN \n", "13 0.387895 NaN NaN \n", "14 0.388103 NaN NaN \n", "15 0.389559 NaN NaN \n", "16 0.388935 NaN NaN " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Variable Importances: \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
variablerelative_importancescaled_importancepercentage
0Origin156621.4843751.0000000.505359
1UniqueCarrier144643.5312500.9235230.466710
2fDayofMonth4113.5844730.0262640.013273
3fMonth3239.1992190.0206820.010452
4IsDepDelayed964.1048580.0061560.003111
5fDayOfWeek339.4254150.0021670.001095
\n", "
" ], "text/plain": [ " variable relative_importance scaled_importance percentage\n", "0 Origin 156621.484375 1.000000 0.505359\n", "1 UniqueCarrier 144643.531250 0.923523 0.466710\n", "2 fDayofMonth 4113.584473 0.026264 0.013273\n", "3 fMonth 3239.199219 0.020682 0.010452\n", "4 IsDepDelayed 964.104858 0.006156 0.003111\n", "5 fDayOfWeek 339.425415 0.002167 0.001095" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# domain is too big\n", "\n", "air = h2o.import_file(path=_locate(\"smalldata/airlines/AirlinesTrain.csv.zip\"))\n", "\n", "features = [\"Origin\", \"Dest\", \"IsDepDelayed\", \"UniqueCarrier\", \"fMonth\", \"fDayofMonth\", \"fDayOfWeek\"]\n", "response = \"Dest\"\n", "\n", "r = air[0].runif()\n", "train = air[r < 0.8]\n", "valid = air[r >= 0.8]\n", "\n", "#Too many domains - AUC/PR AUC is not calculated\n", "gbm = H2OGradientBoostingEstimator(distribution=\"multinomial\", \n", " ntrees=100, \n", " max_depth=3, \n", " learn_rate=0.01,\n", " auc_type=\"MACRO_OVO\")\n", "gbm.train(x =features, \n", " y =response, \n", " training_frame =train,\n", " validation_frame=valid)\n", "gbm.show()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Table is not computed because it is disabled or due to domain size (maximum is 50 domains).'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbm.multinomial_auc_table()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3rc1" } }, "nbformat": 4, "nbformat_minor": 4 }