{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import h2o\n",
"from h2o.estimators.deeplearning import H2ODeepLearningEstimator"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: Version mismatch. H2O is version 3.5.0.99999, but the python package is version UNKNOWN.\n"
]
},
{
"data": {
"text/html": [
"
H2O cluster uptime: | \n",
"58 minutes 48 seconds 43 milliseconds |
\n",
"H2O cluster version: | \n",
"3.5.0.99999 |
\n",
"H2O cluster name: | \n",
"ludirehak |
\n",
"H2O cluster total nodes: | \n",
"1 |
\n",
"H2O cluster total memory: | \n",
"4.44 GB |
\n",
"H2O cluster total cores: | \n",
"8 |
\n",
"H2O cluster allowed cores: | \n",
"8 |
\n",
"H2O cluster healthy: | \n",
"True |
\n",
"H2O Connection ip: | \n",
"127.0.0.1 |
\n",
"H2O Connection port: | \n",
"54321 |
"
],
"text/plain": [
"-------------------------- -------------------------------------\n",
"H2O cluster uptime: 58 minutes 48 seconds 43 milliseconds\n",
"H2O cluster version: 3.5.0.99999\n",
"H2O cluster name: ludirehak\n",
"H2O cluster total nodes: 1\n",
"H2O cluster total memory: 4.44 GB\n",
"H2O cluster total cores: 8\n",
"H2O cluster allowed cores: 8\n",
"H2O cluster healthy: True\n",
"H2O Connection ip: 127.0.0.1\n",
"H2O Connection port: 54321\n",
"-------------------------- -------------------------------------"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"h2o.init()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Parse Progress: [##################################################] 100%\n",
"Uploaded py2a71800e-2ec6-4f71-b955-854a4f22aeb3 into cluster with 380 rows and 9 cols\n",
"Rows: 380 Cols: 9\n",
"\n",
"Chunk compression summary:\n"
]
},
{
"data": {
"text/html": [
"chunk_type | \n",
"chunk_name | \n",
"count | \n",
"count_percentage | \n",
"size | \n",
"size_percentage |
\n",
"CBS | \n",
"Bits | \n",
"1 | \n",
"11.111112 | \n",
" 118 B | \n",
"2.4210093 |
\n",
"C1N | \n",
"1-Byte Integers (w/o NAs) | \n",
"5 | \n",
"55.555557 | \n",
" 2.2 KB | \n",
"45.958145 |
\n",
"C2 | \n",
"2-Byte Integers | \n",
"1 | \n",
"11.111112 | \n",
" 828 B | \n",
"16.9881 |
\n",
"C2S | \n",
"2-Byte Fractions | \n",
"2 | \n",
"22.222223 | \n",
" 1.6 KB | \n",
"34.632744 |
"
],
"text/plain": [
"chunk_type chunk_name count count_percentage size size_percentage\n",
"------------ ------------------------- ------- ------------------ ------ -----------------\n",
"CBS Bits 1 11.1111 118 B 2.42101\n",
"C1N 1-Byte Integers (w/o NAs) 5 55.5556 2.2 KB 45.9581\n",
"C2 2-Byte Integers 1 11.1111 828 B 16.9881\n",
"C2S 2-Byte Fractions 2 22.2222 1.6 KB 34.6327"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Frame distribution summary:\n"
]
},
{
"data": {
"text/html": [
" | \n",
"size | \n",
"number_of_rows | \n",
"number_of_chunks_per_column | \n",
"number_of_chunks |
\n",
"172.16.2.37:54321 | \n",
" 4.8 KB | \n",
"380.0 | \n",
"1.0 | \n",
"9.0 |
\n",
"mean | \n",
" 4.8 KB | \n",
"380.0 | \n",
"1.0 | \n",
"9.0 |
\n",
"min | \n",
" 4.8 KB | \n",
"380.0 | \n",
"1.0 | \n",
"9.0 |
\n",
"max | \n",
" 4.8 KB | \n",
"380.0 | \n",
"1.0 | \n",
"9.0 |
\n",
"stddev | \n",
" 0 B | \n",
"0.0 | \n",
"0.0 | \n",
"0.0 |
\n",
"total | \n",
" 4.8 KB | \n",
"380.0 | \n",
"1.0 | \n",
"9.0 |
"
],
"text/plain": [
" size number_of_rows number_of_chunks_per_column number_of_chunks\n",
"----------------- ------ ---------------- ----------------------------- ------------------\n",
"172.16.2.37:54321 4.8 KB 380 1 9\n",
"mean 4.8 KB 380 1 9\n",
"min 4.8 KB 380 1 9\n",
"max 4.8 KB 380 1 9\n",
"stddev 0 B 0 0 0\n",
"total 4.8 KB 380 1 9"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Column-by-Column Summary:\n",
"\n"
]
},
{
"data": {
"text/html": [
" | \n",
"ID | \n",
"CAPSULE | \n",
"AGE | \n",
"RACE | \n",
"DPROS | \n",
"DCAPS | \n",
"PSA | \n",
"VOL | \n",
"GLEASON |
\n",
"type | \n",
"int | \n",
"int | \n",
"int | \n",
"int | \n",
"int | \n",
"int | \n",
"real | \n",
"real | \n",
"int |
\n",
"mins | \n",
"1.0 | \n",
"0.0 | \n",
"43.0 | \n",
"0.0 | \n",
"1.0 | \n",
"1.0 | \n",
"0.3 | \n",
"0.0 | \n",
"0.0 |
\n",
"maxs | \n",
"380.0 | \n",
"1.0 | \n",
"79.0 | \n",
"2.0 | \n",
"4.0 | \n",
"2.0 | \n",
"139.7 | \n",
"97.6 | \n",
"9.0 |
\n",
"mean | \n",
"190.5 | \n",
"0.4 | \n",
"66.0 | \n",
"1.1 | \n",
"2.3 | \n",
"1.1 | \n",
"15.4 | \n",
"15.8 | \n",
"6.4 |
\n",
"sigma | \n",
"109.8 | \n",
"0.5 | \n",
"6.5 | \n",
"0.3 | \n",
"1.0 | \n",
"0.3 | \n",
"20.0 | \n",
"18.3 | \n",
"1.1 |
\n",
"zero_count | \n",
"0 | \n",
"227 | \n",
"0 | \n",
"3 | \n",
"0 | \n",
"0 | \n",
"0 | \n",
"167 | \n",
"2 |
\n",
"missing_count | \n",
"0 | \n",
"0 | \n",
"0 | \n",
"0 | \n",
"0 | \n",
"0 | \n",
"0 | \n",
"0 | \n",
"0 |
"
],
"text/plain": [
" ID CAPSULE AGE RACE DPROS DCAPS PSA VOL GLEASON\n",
"------------- ------------- -------------- ------------- -------------- ------------- -------------- ------------- ------------- -------------\n",
"type int int int int int int real real int\n",
"mins 1.0 0.0 43.0 0.0 1.0 1.0 0.3 0.0 0.0\n",
"maxs 380.0 1.0 79.0 2.0 4.0 2.0 139.7 97.6 9.0\n",
"mean 190.5 0.402631578947 66.0394736842 1.08684210526 2.27105263158 1.10789473684 15.4086315789 15.8129210526 6.38421052632\n",
"sigma 109.840793879 0.491074338963 6.52707126917 0.308773258025 1.00010761815 0.310656449351 19.9975726686 18.3476199673 1.09195337443\n",
"zero_count 0 227 0 3 0 0 0 167 2\n",
"missing_count 0 0 0 0 0 0 0 0 0"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from h2o.utils.shared_utils import _locate # private function. used to find files within h2o git project directory.\n",
"\n",
"prostate = h2o.upload_file(path=_locate(\"smalldata/logreg/prostate.csv\"))\n",
"prostate.describe()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"deeplearning Model Build Progress: [##################################################] 100%\n",
"Model Details\n",
"=============\n",
"H2ODeepLearningEstimator : Deep Learning\n",
"Model Key: DeepLearning_model_python_1445544453075_137\n",
"\n",
"Status of Neuron Layers: predicting CAPSULE, 2-class classification, bernoulli distribution, CrossEntropy loss, 322 weights/biases, 8.5 KB, 3,800,000 training samples, mini-batch size 1\n",
"\n"
]
},
{
"data": {
"text/html": [
" | \n",
"layer | \n",
"units | \n",
"type | \n",
"dropout | \n",
"l1 | \n",
"l2 | \n",
"mean_rate | \n",
"rate_RMS | \n",
"momentum | \n",
"mean_weight | \n",
"weight_RMS | \n",
"mean_bias | \n",
"bias_RMS |
\n",
" | \n",
"1 | \n",
"7 | \n",
"Input | \n",
"0.0 | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" |
\n",
" | \n",
"2 | \n",
"10 | \n",
"Tanh | \n",
"0.0 | \n",
"0.0 | \n",
"0.0 | \n",
"0.1 | \n",
"0.1 | \n",
"0.0 | \n",
"0.1 | \n",
"1.0 | \n",
"0.4 | \n",
"0.7 |
\n",
" | \n",
"3 | \n",
"10 | \n",
"Tanh | \n",
"0.0 | \n",
"0.0 | \n",
"0.0 | \n",
"0.1 | \n",
"0.1 | \n",
"0.0 | \n",
"0.0 | \n",
"1.4 | \n",
"1.1 | \n",
"0.7 |
\n",
" | \n",
"4 | \n",
"10 | \n",
"Tanh | \n",
"0.0 | \n",
"0.0 | \n",
"0.0 | \n",
"0.2 | \n",
"0.2 | \n",
"0.0 | \n",
"-0.2 | \n",
"1.8 | \n",
"-0.2 | \n",
"0.8 |
\n",
" | \n",
"5 | \n",
"2 | \n",
"Softmax | \n",
" | \n",
"0.0 | \n",
"0.0 | \n",
"0.4 | \n",
"0.1 | \n",
"0.0 | \n",
"-0.2 | \n",
"5.7 | \n",
"0.1 | \n",
"0.3 |
"
],
"text/plain": [
" layer units type dropout l1 l2 mean_rate rate_RMS momentum mean_weight weight_RMS mean_bias bias_RMS\n",
"-- ------- ------- ------- --------- ---- ---- -------------- --------------- ---------- --------------- ------------- --------------- --------------\n",
" 1 7 Input 0.0\n",
" 2 10 Tanh 0.0 0.0 0.0 0.113385616509 0.106353402138 0.0 0.0609744639036 1.01239204407 0.39729323576 0.706557273865\n",
" 3 10 Tanh 0.0 0.0 0.0 0.127355974298 0.0778207182884 0.0 0.0452940063318 1.41430282593 1.08699401568 0.748293399811\n",
" 4 10 Tanh 0.0 0.0 0.0 0.247714677732 0.205178380013 0.0 -0.153704360016 1.77612018585 -0.246065597484 0.803660392761\n",
" 5 2 Softmax 0.0 0.0 0.426404607296 0.102490842342 0.0 -0.192942607403 5.6915397644 0.0971629403181 0.319541573524"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"ModelMetricsBinomial: deeplearning\n",
"** Reported on train data. **\n",
"\n",
"MSE: 0.010708193224\n",
"R^2: 0.955478877615\n",
"LogLoss: 0.0689458344205\n",
"AUC: 0.996818404307\n",
"Gini: 0.993636808615\n",
"\n",
"Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.85259659057:\n"
]
},
{
"data": {
"text/html": [
" | \n",
"0 | \n",
"1 | \n",
"Error | \n",
"Rate |
\n",
"0 | \n",
"224.0 | \n",
"3.0 | \n",
"0.0132 | \n",
" (3.0/227.0) |
\n",
"1 | \n",
"0.0 | \n",
"153.0 | \n",
"0.0 | \n",
" (0.0/153.0) |
\n",
"Total | \n",
"224.0 | \n",
"156.0 | \n",
"0.0079 | \n",
" (3.0/380.0) |
"
],
"text/plain": [
" 0 1 Error Rate\n",
"----- --- --- ------- -----------\n",
"0 224 3 0.0132 (3.0/227.0)\n",
"1 0 153 0 (0.0/153.0)\n",
"Total 224 156 0.0079 (3.0/380.0)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Maximum Metrics: Maximum metrics at their respective thresholds\n",
"\n"
]
},
{
"data": {
"text/html": [
"metric | \n",
"threshold | \n",
"value | \n",
"idx |
\n",
"max f1 | \n",
"0.9 | \n",
"1.0 | \n",
"107.0 |
\n",
"max f2 | \n",
"0.9 | \n",
"1.0 | \n",
"107.0 |
\n",
"max f0point5 | \n",
"0.9 | \n",
"1.0 | \n",
"107.0 |
\n",
"max accuracy | \n",
"0.9 | \n",
"1.0 | \n",
"107.0 |
\n",
"max precision | \n",
"1.0 | \n",
"1.0 | \n",
"0.0 |
\n",
"max absolute_MCC | \n",
"0.9 | \n",
"1.0 | \n",
"107.0 |
\n",
"max min_per_class_accuracy | \n",
"0.9 | \n",
"1.0 | \n",
"105.0 |
"
],
"text/plain": [
"metric threshold value idx\n",
"-------------------------- ----------- -------- -----\n",
"max f1 0.852597 0.990291 107\n",
"max f2 0.852597 0.996094 107\n",
"max f0point5 0.852597 0.984556 107\n",
"max accuracy 0.852597 0.992105 107\n",
"max precision 1 1 0\n",
"max absolute_MCC 0.852597 0.983772 107\n",
"max min_per_class_accuracy 0.904469 0.986784 105"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Scoring History:\n"
]
},
{
"data": {
"text/html": [
" | \n",
"timestamp | \n",
"duration | \n",
"training_speed | \n",
"epochs | \n",
"samples | \n",
"training_MSE | \n",
"training_r2 | \n",
"training_logloss | \n",
"training_AUC | \n",
"training_classification_error |
\n",
" | \n",
"2015-10-22 14:06:21 | \n",
" 0.000 sec | \n",
"None | \n",
"0.0 | \n",
"0.0 | \n",
"nan | \n",
"nan | \n",
"nan | \n",
"nan | \n",
"nan |
\n",
" | \n",
"2015-10-22 14:06:21 | \n",
" 0.038 sec | \n",
"115151 rows/sec | \n",
"10.0 | \n",
"3800.0 | \n",
"0.2 | \n",
"0.2 | \n",
"0.6 | \n",
"0.8 | \n",
"0.3 |
\n",
" | \n",
"2015-10-22 14:06:26 | \n",
" 5.047 sec | \n",
"309126 rows/sec | \n",
"4100.0 | \n",
"1558000.0 | \n",
"0.0 | \n",
"0.9 | \n",
"0.1 | \n",
"1.0 | \n",
"0.0 |
\n",
" | \n",
"2015-10-22 14:06:31 | \n",
"10.051 sec | \n",
"308783 rows/sec | \n",
"8160.0 | \n",
"3100800.0 | \n",
"0.0 | \n",
"0.9 | \n",
"0.1 | \n",
"1.0 | \n",
"0.0 |
\n",
" | \n",
"2015-10-22 14:06:34 | \n",
"12.360 sec | \n",
"307717 rows/sec | \n",
"10000.0 | \n",
"3800000.0 | \n",
"0.0 | \n",
"1.0 | \n",
"0.1 | \n",
"1.0 | \n",
"0.0 |
"
],
"text/plain": [
" timestamp duration training_speed epochs samples training_MSE training_r2 training_logloss training_AUC training_classification_error\n",
"-- ------------------- ---------- ---------------- -------- ---------- -------------- ------------- ------------------ -------------- -------------------------------\n",
" 2015-10-22 14:06:21 0.000 sec 0 0 nan nan nan nan nan\n",
" 2015-10-22 14:06:21 0.038 sec 115151 rows/sec 10 3800 0.183857 0.235583 0.550464 0.803288 0.268421\n",
" 2015-10-22 14:06:26 5.047 sec 309126 rows/sec 4100 1.558e+06 0.0152973 0.936399 0.0886012 0.993291 0.0131579\n",
" 2015-10-22 14:06:31 10.051 sec 308783 rows/sec 8160 3.1008e+06 0.0182986 0.92392 0.0792696 0.997121 0.0184211\n",
" 2015-10-22 14:06:34 12.360 sec 307717 rows/sec 10000 3.8e+06 0.0107082 0.955479 0.0689458 0.996818 0.00789474"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"prostate[\"CAPSULE\"] = prostate[\"CAPSULE\"].asfactor()\n",
"model = H2ODeepLearningEstimator(activation = \"Tanh\", hidden = [10, 10, 10], epochs = 10000)\n",
"model.train(x = list(set(prostate.columns) - set([\"ID\",\"CAPSULE\"])), y =\"CAPSULE\", training_frame = prostate)\n",
"model.show()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"H2OFrame with 380 rows and 3 columns: \n"
]
},
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" predict | \n",
" p0 | \n",
" p1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 9.993875e-01 | \n",
" 6.125394e-04 | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
" 9.999998e-01 | \n",
" 1.937478e-07 | \n",
"
\n",
" \n",
" 2 | \n",
" 0 | \n",
" 9.999646e-01 | \n",
" 3.535732e-05 | \n",
"
\n",
" \n",
" 3 | \n",
" 0 | \n",
" 1.000000e+00 | \n",
" 2.235483e-12 | \n",
"
\n",
" \n",
" 4 | \n",
" 0 | \n",
" 9.999950e-01 | \n",
" 5.024862e-06 | \n",
"
\n",
" \n",
" 5 | \n",
" 1 | \n",
" 1.237468e-07 | \n",
" 9.999999e-01 | \n",
"
\n",
" \n",
" 6 | \n",
" 0 | \n",
" 9.992793e-01 | \n",
" 7.206910e-04 | \n",
"
\n",
" \n",
" 7 | \n",
" 0 | \n",
" 1.000000e+00 | \n",
" 9.146884e-19 | \n",
"
\n",
" \n",
" 8 | \n",
" 0 | \n",
" 1.000000e+00 | \n",
" 8.434714e-13 | \n",
"
\n",
" \n",
" 9 | \n",
" 0 | \n",
" 9.999994e-01 | \n",
" 6.112821e-07 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" predict p0 p1\n",
"0 0 9.993875e-01 6.125394e-04\n",
"1 0 9.999998e-01 1.937478e-07\n",
"2 0 9.999646e-01 3.535732e-05\n",
"3 0 1.000000e+00 2.235483e-12\n",
"4 0 9.999950e-01 5.024862e-06\n",
"5 1 1.237468e-07 9.999999e-01\n",
"6 0 9.992793e-01 7.206910e-04\n",
"7 0 1.000000e+00 9.146884e-19\n",
"8 0 1.000000e+00 8.434714e-13\n",
"9 0 9.999994e-01 6.112821e-07"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"predictions = model.predict(prostate)\n",
"predictions.show()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"ModelMetricsBinomial: deeplearning\n",
"** Reported on test data. **\n",
"\n",
"MSE: 0.010708193224\n",
"R^2: 0.955478877615\n",
"LogLoss: 0.0689458344205\n",
"AUC: 0.996804007947\n",
"Gini: 0.993608015894\n",
"\n",
"Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.85259659057:\n"
]
},
{
"data": {
"text/html": [
" | \n",
"0 | \n",
"1 | \n",
"Error | \n",
"Rate |
\n",
"0 | \n",
"224.0 | \n",
"3.0 | \n",
"0.0132 | \n",
" (3.0/227.0) |
\n",
"1 | \n",
"0.0 | \n",
"153.0 | \n",
"0.0 | \n",
" (0.0/153.0) |
\n",
"Total | \n",
"224.0 | \n",
"156.0 | \n",
"0.0079 | \n",
" (3.0/380.0) |
"
],
"text/plain": [
" 0 1 Error Rate\n",
"----- --- --- ------- -----------\n",
"0 224 3 0.0132 (3.0/227.0)\n",
"1 0 153 0 (0.0/153.0)\n",
"Total 224 156 0.0079 (3.0/380.0)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Maximum Metrics: Maximum metrics at their respective thresholds\n",
"\n"
]
},
{
"data": {
"text/html": [
"metric | \n",
"threshold | \n",
"value | \n",
"idx |
\n",
"max f1 | \n",
"0.9 | \n",
"1.0 | \n",
"155.0 |
\n",
"max f2 | \n",
"0.9 | \n",
"1.0 | \n",
"155.0 |
\n",
"max f0point5 | \n",
"0.9 | \n",
"1.0 | \n",
"155.0 |
\n",
"max accuracy | \n",
"0.9 | \n",
"1.0 | \n",
"155.0 |
\n",
"max precision | \n",
"1.0 | \n",
"1.0 | \n",
"0.0 |
\n",
"max absolute_MCC | \n",
"0.9 | \n",
"1.0 | \n",
"155.0 |
\n",
"max min_per_class_accuracy | \n",
"0.9 | \n",
"1.0 | \n",
"153.0 |
"
],
"text/plain": [
"metric threshold value idx\n",
"-------------------------- ----------- -------- -----\n",
"max f1 0.852597 0.990291 155\n",
"max f2 0.852597 0.996094 155\n",
"max f0point5 0.852597 0.984556 155\n",
"max accuracy 0.852597 0.992105 155\n",
"max precision 1 1 0\n",
"max absolute_MCC 0.852597 0.983772 155\n",
"max min_per_class_accuracy 0.904469 0.986784 153"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"performance = model.model_performance(prostate)\n",
"performance.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 0
}