{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Neural averaging ensembles.ipynb",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ScpcrL6quKNs",
"colab_type": "text"
},
"source": [
"# Neural averaging ensembles on benchml data"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "tLg7ElixubDE",
"colab_type": "text"
},
"source": [
"Dr. Michael Allgöwer, b.telligent, michael.allgoewer@btelligent.com"
]
},
{
"cell_type": "code",
"metadata": {
"id": "OPyMiAzoTt5g",
"colab_type": "code",
"outputId": "2a7c148b-391a-46c8-e626-0faa715f6870",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"try:\n",
" %tensorflow_version 2.x\n",
"except Exception:\n",
" pass"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"TensorFlow 2.x selected.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "y9IXsNjrTX1O",
"colab_type": "code",
"colab": {}
},
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"import os\n",
"from pathlib import Path\n",
"from collections import OrderedDict"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "22Y9RjpwBhn8",
"colab_type": "code",
"outputId": "c945b494-3a49-43da-f840-3dee26da5e22",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"print(tf.__version__, tf.keras.__version__)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"2.0.0-rc2 2.2.4-tf\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "rVEwxoNfeL7F",
"colab_type": "code",
"colab": {}
},
"source": [
"# Importing data, keeping it all together in a class being able to return either pandas dataframe or tf dataset\n",
"# tf datasets are the TensorFlow 2.0-native way of handling data.\n",
"class Flights():\n",
" '''Flight delay classification data from Szilard Pafka's benchml; derived from the well-known fligts dataset'''\n",
"\n",
" def __init__(self):\n",
" \n",
" # you may want to change these paths, depending on where you put the files\n",
" train_path = 'https://raw.githubusercontent.com/Allgoerithm/neuralaveraging/master/data/train-0.01m.csv' \n",
" test_path = 'https://raw.githubusercontent.com/Allgoerithm/neuralaveraging/master/data/test.csv'\n",
" paths = {'train': train_path, 'test': test_path}\n",
" slices = list(paths.keys())\n",
"\n",
" random_seed = 4711\n",
" self.data = {} # neural-network version of the data, with integer indices for categorial data\n",
" self.data_1h = {} # onehot-encoded version of the data, needed for gradient boosted trees\n",
"\n",
" for (data_slice, input_path) in paths.items():\n",
" self.data[data_slice] = pd.read_csv(input_path, delimiter=',', quotechar='\"', na_values=' ')\n",
" self.data[data_slice]['slice'] = data_slice # add new column with the slice the data belongs to\n",
" data_complete = self.data['train'].append(self.data['test'])\n",
" data_complete.rename(index=str, columns={'dep_delayed_15min': 'target'}, inplace=True) \n",
"\n",
" # change binary target variable from Y/N to 0/1 (new datatype: int)\n",
" all_replacements = {'target': {'Y': 1, 'N': 0}}\n",
" data_complete.replace(all_replacements, inplace=True)\n",
" data_complete_1h = data_complete\n",
" \n",
" # indexing all categorial columns (transform into successive integers)\n",
" self.categorial_columns = [list(data_complete.columns)[i]\n",
" for i in range(len(data_complete.columns))\n",
" if list(data_complete.dtypes)[i] == np.dtype('object')]\n",
" self.categorial_columns.remove('slice')\n",
" self.index_lengths = OrderedDict()\n",
" for column in self.categorial_columns:\n",
" data_complete['catindex_' + column] = -1 + data_complete[column]\\\n",
" .rank(method='dense', numeric_only=False)\n",
" data_complete = data_complete.drop(columns=[column])\n",
" self.index_lengths['catindex_' + column] = 1 + data_complete['catindex_' + column].max()\n",
"\n",
" # onehot-encoding for onehot-version of data \n",
" categorial_column_prefixes = ['onehot_' + name for name in self.categorial_columns]\n",
" data_complete_1h = pd.get_dummies(data_complete_1h, columns=self.categorial_columns,\n",
" prefix=categorial_column_prefixes, drop_first=True) \n",
"\n",
" for (data_slice, input_path) in paths.items():\n",
" self.data[data_slice] = data_complete[data_complete['slice'] == data_slice].drop(columns=['slice'])\n",
" self.data_1h[data_slice] = data_complete_1h[data_complete_1h['slice'] == data_slice].drop(columns=['slice'])\n",
"\n",
" # standardize all columns except categorial columns and target variable \n",
" self.categorial_columns = ['catindex_' + col for col in self.categorial_columns]\n",
" columns_to_standardize = [col for col in self.data['train'].columns \n",
" if col not in self.categorial_columns + ['target']]\n",
" for feature_name in columns_to_standardize:\n",
" # mean and variance equal of noncategorial columns are equal for data and data_1h\n",
" mean = self.data['train'][feature_name].mean() \n",
" std = self.data['train'][feature_name].std()\n",
" if std > 0: # keep only colums with at least some variance\n",
" for data_slice in slices:\n",
" self.data[data_slice][feature_name] = (self.data[data_slice][feature_name] - mean) / std\n",
" self.data_1h[data_slice][feature_name] = (self.data_1h[data_slice][feature_name] - mean) / std\n",
" else: # drop constant columns\n",
" for data_slice in slices:\n",
" self.data[data_slice] = self.data[data_slice].drop(feature_name, axis=1)\n",
" self.data_1h[data_slice] = self.data_1h[data_slice].drop(feature_name, axis=1)\n",
"\n",
" def get_dataframe(self, data_slice: str, categorials_as_onehot: bool = False):\n",
" assert data_slice in ('train', 'test', 'valid')\n",
" result = self.data_1h[data_slice] if categorials_as_onehot else self.data[data_slice]\n",
" return result\n",
"\n",
" def get_dataset(self, data_slice: str):\n",
" assert data_slice in ('train', 'test', 'valid')\n",
" target = self.data[data_slice]['target']\n",
" predictor_cols = [c for c in self.data[data_slice].columns if c != 'target']\n",
" predictors = self.data[data_slice][predictor_cols]\n",
" dataset = tf.data.Dataset.from_tensor_slices((predictors.values, target.values))\n",
" return dataset\n",
" \n",
" def get_index_lengths(self):\n",
" return self.index_lengths\n",
"\n",
" def no_of_predictors(self):\n",
" return len(self.data['train'].columns) - 1\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "ONF3C6ZmeU51",
"colab_type": "code",
"colab": {}
},
"source": [
"# instantiate our new class and get test and training data\n",
"flights = Flights()\n",
"data_train_df = flights.get_dataframe(data_slice='train')\n",
"data_test_df = flights.get_dataframe(data_slice='test')\n",
"\n",
"# now for XGboost, with one-hot encoded categorial variables\n",
"data_train_1h_df = flights.get_dataframe(data_slice='train', categorials_as_onehot=True)\n",
"data_test_1h_df = flights.get_dataframe(data_slice='test', categorials_as_onehot=True)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "fsvc0eQvxaL4",
"colab_type": "code",
"colab": {}
},
"source": [
"# fit gradient boosting model to the data as a baseline, to check we can reproduce Szilard Pafkas's findings\n",
"\n",
"import xgboost as xgb\n",
"import numpy as np\n",
"import sklearn.metrics\n",
"\n",
"d_train = xgb.DMatrix(data_train_1h_df.drop(columns=['target']), label=data_train_1h_df['target'])\n",
"d_test = xgb.DMatrix(data_test_1h_df.drop(columns=['target']), label=data_test_1h_df['target'])\n",
"param = {'objective':'binary:logistic', 'max_depth': 16, 'eta': 0.01, 'subsample': 0.5, 'min_obs_node': 1}\n",
"\n",
"gb_model = xgb.train(params=param, dtrain=d_train, num_boost_round=1000) \n",
"\n",
"gb_pred_test = gb_model.predict(d_test)\n",
"gb_auc = sklearn.metrics.roc_auc_score(data_test_df['target'], gb_pred_test)\n",
"gb_mae = sklearn.metrics.mean_absolute_error(data_test_df['target'], gb_pred_test)\n",
"print(f'AUC:{gb_auc}, MAE:{gb_mae}')"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "8gFarMekS764",
"colab_type": "code",
"colab": {}
},
"source": [
"import time\n",
"from pathlib import Path\n",
"root_logdir = Path('logs')\n",
"\n",
"def get_log_dir() -> Path:\n",
" run_id = Path(time.strftime('run_%Y_%m_%d-%H_%M_%S'))\n",
" return root_logdir / run_id"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "1WmPqsiZZwnb",
"colab_type": "text"
},
"source": [
"##Building a neural averaging ensemble"
]
},
{
"cell_type": "code",
"metadata": {
"id": "KDwmiLd2ZaYJ",
"colab_type": "code",
"colab": {}
},
"source": [
"import sklearn.metrics # to compute AUC\n",
"import datetime\n",
"\n",
"from functools import partial # higher-order-function for currying\n",
"\n",
"# We use the functional API here which is almost as simple to use as a sequential model,\n",
"# and versatile enough for our needs. To keep things tidy, we place the model definition inside a function.\n",
"# If things get more complicated (especially dynamic nets), the subclassing API is needed.\n",
"\n",
"def averaging_ensemble(inputs_numeric: int, inputs_for_embedding: int, embedding_input_dims: list, \n",
" embedding_output_dims: list, width: int, weak_learners: int, activation_name: \n",
" str = 'tanh', share_embedding_layer: bool = False, sigmoid_layer: bool = True, \n",
" averaging_layer: bool = True):\n",
" r'''Return a generic dense network model\n",
"\n",
" inputs_numeric: number of numeric columns (features) in the input data set; these are expected to be the first \n",
" columns\n",
" inputs_for_embedding: integer columns of input set to be transformed by embeddings\n",
" embedding_input_dims: input dimension (size of the vocabulary) for each column to be transformed by an embedding;\n",
" this is supposed to be a list of length inputs_for_embedding\n",
" embedding_output_dims: output dimensions for each column to be transformed by an embedding;\n",
" this is supposed to be a list of length inputs_for_embedding\n",
" width: number of neurons in the hidden layer of each weak learner\n",
" weak_learners: number of weak learners in the ensemble\n",
" activation_name: string choosing the activation function for the hidden layers,\n",
" 'tanh' for tanh activation,\n",
" 'relu' for ReLU activation,\n",
" 'selu' for SELU activation\n",
" sigmoid_layer: switches sigmoid layer on and off as last layer for each weak learner. The layer is usually needed, \n",
" it is only switched off for hidden layer size checking \n",
" averaging_layer: switches last averaging layer on and off\n",
" '''\n",
" assert width >= 1, 'width is required to be at least 1'\n",
" assert weak_learners >= 1, 'weak_learners is required to be at least 1'\n",
" assert activation_name.lower() in ['tanh', 'relu', 'selu'], \\\n",
" f'Unknown value \"{activation_name}\" for activation_fct. Options are \"tanh\", \"relu\" and \"selu\".'\n",
" assert len(embedding_input_dims) == inputs_for_embedding, \\\n",
" 'length of list embedding_input_dims is supposed to be equal to inputs_for_embedding'\n",
" assert len(embedding_output_dims) == inputs_for_embedding, \\\n",
" 'length of list embedding_output_dims is supposed to be equal to inputs_for_embedding'\n",
"\n",
" if activation_name.lower() == 'tanh':\n",
" activation = tf.keras.activations.tanh\n",
" kernel_initializer = tf.initializers.GlorotUniform()\n",
" elif activation_name.lower() == 'relu':\n",
" activation = tf.keras.activations.relu\n",
" kernel_initializer = tf.initializers.GlorotUniform() \n",
" else:\n",
" activation = tf.keras.activations.selu\n",
" kernel_initializer = tf.initializers.VarianceScaling(scale=1.0, mode='fan_in')\n",
"\n",
" input_layer = tf.keras.Input(shape=(inputs_numeric + inputs_for_embedding,))\n",
" split_input_layer = tf.split(input_layer, [inputs_numeric] + [1]*inputs_for_embedding, axis=1)\n",
"\n",
" hidden = []\n",
" name_hidden = 'hidden' if weak_learners==1 else None\n",
" # add hidden layer as a list of weak learners\n",
" for i in range(weak_learners):\n",
" if i == 0 or not(share_embedding_layer):\n",
" embedded_input_components = [split_input_layer[0]] # use numerical inputs without transformation\n",
" # embedd the other components\n",
" for j in range(inputs_for_embedding):\n",
" prefix = '' if share_embedding_layer else f'wl_{i}_'\n",
" embedding_layer_name = prefix + f'emb_{j}_in{embedding_input_dims[j]}_out{embedding_output_dims[j]}'\n",
" embedded_input = tf.keras.layers.Embedding(input_dim=embedding_input_dims[j], \n",
" output_dim=embedding_output_dims[j], input_length=1, \n",
" name=embedding_layer_name)(split_input_layer[1 + j])\n",
" embedded_input_components.append(tf.keras.layers.Flatten()(embedded_input))\n",
" embedded_input = tf.keras.layers.Concatenate(axis=1)(embedded_input_components)\n",
"\n",
" # create flat dense layer and sigmoid layer for classification \n",
" weak_learner = tf.keras.layers.Dense(units=width, activation=activation, kernel_initializer=kernel_initializer,\n",
" name=name_hidden)(embedded_input)\n",
" weak_learner = tf.keras.layers.Dense(units=1, activation=tf.keras.activations.sigmoid)(weak_learner)\n",
" hidden.append(weak_learner)\n",
"\n",
" if weak_learners > 1 and averaging_layer: \n",
" output_layer = tf.keras.layers.Average()(hidden) # add an averaging layer at the end\n",
" elif weak_learners > 1: # if we have multiple outputs and no averaging layer, we return them all\n",
" output_layer = hidden\n",
" else:\n",
" output_layer = weak_learner # if there's only one weak learner, we use it as output directly\n",
" \n",
" return (input_layer, output_layer) "
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "OWjox3QLMqjQ",
"colab_type": "text"
},
"source": [
"### first, determine layer size"
]
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "esCLU_9_N6e6",
"colab": {}
},
"source": [
"#this function is needed as a helper below\n",
"def compute_correlation_histogram(mat: np.array):\n",
" '''Computes the correlations of the columns of mat and returns a histogram (counts for each binned correlation value)\n",
" '''\n",
" corrmatrix_raw = pd.DataFrame(data=mat, \n",
" columns=[f'n_{i:02}' for i in range(mat.shape[1])])\\\n",
" .corr(method=\"spearman\").abs() # we discard the sign of the correlations\n",
" corrmatrix = corrmatrix_raw.stack().reset_index()\n",
" corrmatrix.rename(index=str, columns={\"level_0\": \"variable_1\", \"level_1\": \"variable_2\", 0: \"correlation\"},\n",
" inplace=True) # set meaningful variable names\n",
" correlations = corrmatrix[corrmatrix['variable_1'] > corrmatrix['variable_2']] # keep only upper triangular entries\n",
" correlations = correlations.reset_index()\n",
" \n",
" return correlations.iloc[correlations['correlation'].idxmax(axis='rows')] # return row with maximum correlation\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "kxesDbL-fTI7",
"colab_type": "code",
"colab": {}
},
"source": [
"output_dimensions = OrderedDict([('catindex_Month', 2), ('catindex_DayofMonth', 2), ('catindex_DayOfWeek', 2),\n",
" ('catindex_UniqueCarrier', 5), ('catindex_Origin', 5), ('catindex_Dest', 5)])\n",
"input_dims = OrderedDict([('catindex_Month', 13),\n",
" ('catindex_DayofMonth', 32),\n",
" ('catindex_DayOfWeek', 8),\n",
" ('catindex_UniqueCarrier', 23),\n",
" ('catindex_Origin', 305),\n",
" ('catindex_Dest', 305)])"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "HP8D7MA5TqKC",
"colab_type": "code",
"outputId": "4eec2bed-61b8-4b4d-9bbd-d9480a9da97e",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 824
}
},
"source": [
"# Check the size of the hidden layer: Train a model with a single weak learner\n",
"import datetime\n",
"tf.random.set_seed(3141592653) # set a fixed (arbitrary) seed for TensorFlow's random numbers, global level\n",
"np.random.seed(seed=3141592653) # ...and do the same for numpy's random numbers\n",
"\n",
"for x in range(10):\n",
" now=datetime.datetime.now()\n",
"\n",
" log_dir = get_log_dir()\n",
" model_name = 'benchml100k_Layersize_check'\n",
" activation_name ='tanh'\n",
" weak_learners = 1 # when we check the size, we do not use averaging\n",
"\n",
" # We go for a low batch size (slow, but less prone to overfitting).\n",
" # The learning rate has been chosen by some quick trials (going down from 1 by dividing by 10 in each step until\n",
" # learning is sufficiently stable).\n",
" # We combine that with a low number of epochs as we only need a rough estimation to gauge the correlations.\n",
" learning_rate = 0.1\n",
" batch_size = 10\n",
" epochs = 20\n",
" widths = []\n",
" max_correlations = []\n",
" validation_data = (data_test_df[[c for c in data_test_df.columns if c != 'target']].values, \n",
" data_test_df['target'].values)\n",
"\n",
" for width in (10, 20, 30, 40, 50, 60, 80, 100, 120, 140):\n",
" (inputs, outputs) = averaging_ensemble(inputs_numeric=flights.no_of_predictors() - len(flights.get_index_lengths()), \n",
" inputs_for_embedding=len(flights.get_index_lengths()), \n",
" embedding_input_dims=list(input_dims.values()), # list(flights.get_index_lengths().values()), \n",
" embedding_output_dims=list(output_dimensions.values()),\n",
" width=width, weak_learners=weak_learners, activation_name=activation_name)\n",
" model = tf.keras.Model(inputs=inputs, outputs=outputs)\n",
" model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), loss='binary_crossentropy', \n",
" metrics=['mae', 'AUC'])\n",
" model.fit(data_train_df[[c for c in data_train_df.columns if c != 'target']].values, data_train_df['target'].values, \n",
" epochs=epochs, batch_size=batch_size, verbose=0)\n",
" \n",
" # shave the model, i.e., delete the last layer\n",
" layer_name = 'hidden'\n",
" shaved_model = tf.keras.Model(inputs=model.input, outputs=model.get_layer(layer_name).output)\n",
" hidden_layer_output = shaved_model.predict(validation_data)\n",
" max_correlation = compute_correlation_histogram(hidden_layer_output)\n",
" widths.append(width)\n",
" max_correlations.append(max_correlation)\n",
" print(f\"{widths[-1]}: {max_correlation['correlation']}\")\n",
" if max_correlation['correlation'] >= 0.98:\n",
" break"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"10: 0.8881912545780639\n",
"20: 0.9451014211983376\n",
"30: 0.9685687572680682\n",
"40: 0.947551540745585\n",
"50: 0.9645273599570406\n",
"60: 0.9763770528780563\n",
"80: 0.9572017647929024\n",
"100: 0.9846774011987006\n",
"10: 0.9419475646449638\n",
"20: 0.958465299219847\n",
"30: 0.9711734558646341\n",
"40: 0.9801705036474484\n",
"10: 0.7366949374359877\n",
"20: 0.8926622584310067\n",
"30: 0.9508609708677088\n",
"40: 0.9760320661499896\n",
"50: 0.9605671215074669\n",
"60: 0.9374681472720487\n",
"80: 0.9806870175219736\n",
"10: 0.8780105916016454\n",
"20: 0.9002817446860963\n",
"30: 0.9599076665318469\n",
"40: 0.9701751239117474\n",
"50: 0.9358039686850287\n",
"60: 0.93732918693722\n",
"80: 0.9835738119950854\n",
"10: 0.7920294754857148\n"
],
"name": "stdout"
},
{
"output_type": "error",
"ename": "KeyboardInterrupt",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 33\u001b[0m metrics=['mae', 'AUC'])\n\u001b[1;32m 34\u001b[0m model.fit(data_train_df[[c for c in data_train_df.columns if c != 'target']].values, data_train_df['target'].values, \n\u001b[0;32m---> 35\u001b[0;31m epochs=epochs, batch_size=batch_size, verbose=0)\n\u001b[0m\u001b[1;32m 36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[0;31m# shave the model, i.e., delete the last layer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0-rc2/python3.6/tensorflow_core/python/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0mmax_queue_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 727\u001b[0m \u001b[0mworkers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 728\u001b[0;31m use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[1;32m 729\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 730\u001b[0m def evaluate(self,\n",
"\u001b[0;32m/tensorflow-2.0.0-rc2/python3.6/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)\u001b[0m\n\u001b[1;32m 322\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mModeKeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 323\u001b[0m \u001b[0mtraining_context\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtraining_context\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 324\u001b[0;31m total_epochs=epochs)\n\u001b[0m\u001b[1;32m 325\u001b[0m \u001b[0mcbks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_logs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch_logs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtraining_result\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mModeKeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 326\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0-rc2/python3.6/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36mrun_one_epoch\u001b[0;34m(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)\u001b[0m\n\u001b[1;32m 121\u001b[0m step=step, mode=mode, size=current_batch_size) as batch_logs:\n\u001b[1;32m 122\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 123\u001b[0;31m \u001b[0mbatch_outs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 124\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mStopIteration\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOutOfRangeError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0;31m# TODO(kaftan): File bug about tf function and errors.OutOfRangeError?\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0-rc2/python3.6/tensorflow_core/python/keras/engine/training_v2_utils.py\u001b[0m in \u001b[0;36mexecution_function\u001b[0;34m(input_fn)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[0;31m# `numpy` translates Tensors to values in Eager mode.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 85\u001b[0m return nest.map_structure(_non_none_constant_value,\n\u001b[0;32m---> 86\u001b[0;31m distributed_function(input_fn))\n\u001b[0m\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0-rc2/python3.6/tensorflow_core/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 457\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 458\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 459\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_counter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcalled_without_tracing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0-rc2/python3.6/tensorflow_core/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 485\u001b[0m \u001b[0;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[0;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 487\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# pylint: disable=not-callable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 488\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 489\u001b[0m \u001b[0;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0-rc2/python3.6/tensorflow_core/python/eager/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1821\u001b[0m \u001b[0;34m\"\"\"Calls a graph function specialized to the inputs.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1822\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1823\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1824\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1825\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0-rc2/python3.6/tensorflow_core/python/eager/function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[0;34m(self, args, kwargs)\u001b[0m\n\u001b[1;32m 1139\u001b[0m if isinstance(t, (ops.Tensor,\n\u001b[1;32m 1140\u001b[0m resource_variable_ops.BaseResourceVariable))),\n\u001b[0;32m-> 1141\u001b[0;31m self.captured_inputs)\n\u001b[0m\u001b[1;32m 1142\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1143\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_flat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcancellation_manager\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0-rc2/python3.6/tensorflow_core/python/eager/function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m 1222\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mexecuting_eagerly\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1223\u001b[0m flat_outputs = forward_function.call(\n\u001b[0;32m-> 1224\u001b[0;31m ctx, args, cancellation_manager=cancellation_manager)\n\u001b[0m\u001b[1;32m 1225\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1226\u001b[0m \u001b[0mgradient_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_delayed_rewrite_functions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mregister\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tensorflow-2.0.0-rc2/python3.6/tensorflow_core/python/eager/function.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m 509\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 510\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"executor_type\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexecutor_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"config_proto\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 511\u001b[0;31m ctx=ctx)\n\u001b[0m\u001b[1;32m 512\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 513\u001b[0m outputs = execute.execute_with_cancellation(\n",
"\u001b[0;32m/tensorflow-2.0.0-rc2/python3.6/tensorflow_core/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 59\u001b[0m tensors = pywrap_tensorflow.TFE_Py_Execute(ctx._handle, device_name,\n\u001b[1;32m 60\u001b[0m \u001b[0mop_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m num_outputs)\n\u001b[0m\u001b[1;32m 62\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "R2rkANYlRnIy",
"colab_type": "code",
"outputId": "17d533e2-5c39-4175-fd9d-67ab1b142152",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 218
}
},
"source": [
"# Now train the model, with the weak learner size we just determined: From the output of the last cell we estimate\n",
"# that width 80 will probably suffice.\n",
"import datetime\n",
"now=datetime.datetime.now()\n",
"\n",
"tf.random.set_seed(3141592653) # set a fixed (arbitrary) seed for TensorFlow's random numbers, global level\n",
"np.random.seed(seed=3141592653) # ...and do the same for numpy's random numbers\n",
"\n",
"model_name = 'benchml10k'\n",
"activation_name ='tanh'\n",
"weak_learners = 100 # 100 is good for a final model\n",
"\n",
"# We go for a low batch size (slow, but less prone to overfitting).\n",
"# The learning rate has been chosen by some quick trials (going down from 1 by dividing by 10 in each step until\n",
"# learning is sufficiently stable).\n",
"# We combine that with a low number of epochs as we only need a rough estimation to gauge the correlations.\n",
"learning_rate = 1\n",
"batch_size = 10\n",
"epochs = 5\n",
"width = 80\n",
"\n",
"(inputs, outputs) = averaging_ensemble(inputs_numeric=flights.no_of_predictors() - len(flights.get_index_lengths()), \n",
" inputs_for_embedding=len(flights.get_index_lengths()), \n",
" embedding_input_dims=list(input_dims.values()), # list(flights.get_index_lengths().values()), \n",
" embedding_output_dims=list(output_dimensions.values()),\n",
" width=width, weak_learners=weak_learners, activation_name=activation_name,\n",
" share_embedding_layer=True)\n",
"model = tf.keras.Model(inputs=inputs, outputs=outputs)\n",
"validation_data = (data_test_df.drop(columns=['target']).values, \n",
" data_test_df['target'].values)\n",
"model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), loss='binary_crossentropy', \n",
" metrics=['mae', 'AUC'])\n",
"model.fit(data_train_df[[c for c in data_train_df.columns if c != 'target']].values, data_train_df['target'].values, \n",
" epochs=epochs, batch_size=batch_size, validation_data=validation_data) "
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Train on 10000 samples, validate on 100000 samples\n",
"Epoch 1/5\n",
"10000/10000 [==============================] - 173s 17ms/sample - loss: 0.4777 - mae: 0.3229 - AUC: 0.6430 - val_loss: 0.4889 - val_mae: 0.3036 - val_AUC: 0.6842\n",
"Epoch 2/5\n",
"10000/10000 [==============================] - 168s 17ms/sample - loss: 0.4518 - mae: 0.2883 - AUC: 0.6952 - val_loss: 0.4874 - val_mae: 0.2984 - val_AUC: 0.6884\n",
"Epoch 3/5\n",
"10000/10000 [==============================] - 167s 17ms/sample - loss: 0.4470 - mae: 0.2835 - AUC: 0.7048 - val_loss: 0.4836 - val_mae: 0.3005 - val_AUC: 0.6931\n",
"Epoch 4/5\n",
"10000/10000 [==============================] - 166s 17ms/sample - loss: 0.4436 - mae: 0.2826 - AUC: 0.7113 - val_loss: 0.4829 - val_mae: 0.2977 - val_AUC: 0.6959\n",
"Epoch 5/5\n",
"10000/10000 [==============================] - 157s 16ms/sample - loss: 0.4405 - mae: 0.2788 - AUC: 0.7176 - val_loss: 0.4836 - val_mae: 0.3009 - val_AUC: 0.6927\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
""
]
},
"metadata": {
"tags": []
},
"execution_count": 18
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "IjTU37l8pNVu",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}