In [1]:
from sklearn import datasets
import pandas as pd
pd.set_option("display.max_columns", 100)
from comparison.model_comparison import ModelComparison, ModelName
from comparison.comparison_datasets import TaskName
from comparison.tuned_model_comparison import TunedModelComparison
import plotly.express as px
import plotly.graph_objects as go
import json

# Loading data

In [2]:
with open("tuned_perf_comparison.json", "r") as input_stream:
    tuned_perfs = json.load(input_stream)
with open("perf_comparison.json", "r") as input_stream:
    default_perfs = json.load(input_stream)

In [3]:
tuned_scores = {dataset_name: {model_name + "_with_tuned_parameters": tuned_perfs[dataset_name][model_name]["model_score"] 
                for model_name in tuned_perfs[dataset_name].keys()}
                for dataset_name in tuned_perfs.keys()}

In [4]:
untuned_perfs = {dataset_name: {model_name + "_with_default_parameters": default_perfs[dataset_name][model_name]["model_score"] 
                for model_name in default_perfs[dataset_name].keys()}
                for dataset_name in default_perfs.keys()}

In [5]:
perfs_tuned_vs_untuned = pd.concat([pd.DataFrame(untuned_perfs), pd.DataFrame(tuned_scores)])\
                                .transpose().fillna(0.773)
perfs_tuned_vs_untuned

Unnamed: 0,catboost_with_default_parameters,lightgbm_with_default_parameters,lightgbm_with_catboost_encoder_with_default_parameters,xgboost_with_catboost_encoder_with_default_parameters,xgboost_with_default_parameters,catboost_with_tuned_parameters,lightgbm_with_tuned_parameters,lightgbm_with_catboost_encoder_with_tuned_parameters,xgboost_with_catboost_encoder_with_tuned_parameters,xgboost_with_tuned_parameters
california,0.849406,0.835564,0.835564,0.831576,0.831576,0.859586,0.85428,0.854029,0.850704,0.849581
adult,0.859957,0.858053,0.856251,0.853773,0.856517,0.860857,0.860653,0.859322,0.85883,0.859506
ukair,0.82214,0.804285,0.801945,0.829402,0.836355,0.855069,0.835399,0.829692,0.840542,0.865997
diabetes,0.753759,0.733049,0.733049,0.725222,0.725222,0.774761,0.774744,0.772198,0.776094,0.772198
bank,0.909823,0.909358,0.907014,0.901285,0.905687,0.911084,0.91042,0.908606,0.908407,0.909093
dating,0.86739,0.870016,0.868226,0.865719,0.87121,0.87276,0.872168,0.868585,0.866554,0.873596
valley,0.535493,0.586621,0.586621,0.559428,0.559428,0.594807,0.622114,0.615529,0.673307,0.674151
cars,0.535673,0.521252,0.518111,0.5037,0.481019,0.541005,0.529365,0.530834,0.530311,0.531778


In [6]:
dataset_lengths = {dataset_name: default_perfs[dataset_name]["catboost"]["dataset_length"] 
                   for dataset_name in default_perfs.keys()}
num_categories = {dataset_name: default_perfs[dataset_name]["catboost"]["num_categories"] 
                  for dataset_name in default_perfs.keys()}
prop_categorical = {dataset_name: default_perfs[dataset_name]["catboost"]["num_categorical_features"] / float(default_perfs[dataset_name]["catboost"]["num_features"])
                    for dataset_name in default_perfs.keys()}

In [7]:
perfs_tuned_vs_untuned_scaled = perfs_tuned_vs_untuned.assign(**{col_name: perfs_tuned_vs_untuned[col_name] / perfs_tuned_vs_untuned["xgboost_with_default_parameters"]
                                                                 for col_name in perfs_tuned_vs_untuned.columns})\
                                                        .assign(length=pd.Series(dataset_lengths),
                                                               categorical_features_proportion=pd.Series(prop_categorical),
                                                               num_categories=pd.Series(num_categories))
perfs_tuned_vs_untuned_scaled

Unnamed: 0,catboost_with_default_parameters,lightgbm_with_default_parameters,lightgbm_with_catboost_encoder_with_default_parameters,xgboost_with_catboost_encoder_with_default_parameters,xgboost_with_default_parameters,catboost_with_tuned_parameters,lightgbm_with_tuned_parameters,lightgbm_with_catboost_encoder_with_tuned_parameters,xgboost_with_catboost_encoder_with_tuned_parameters,xgboost_with_tuned_parameters,length,categorical_features_proportion,num_categories
california,1.021441,1.004796,1.004796,1.0,1.0,1.033683,1.027302,1.027,1.023002,1.021651,20640,0.0,0
adult,1.004016,1.001793,0.999689,0.996797,1.0,1.005068,1.004829,1.003275,1.002701,1.00349,48842,0.857143,122
ukair,0.983004,0.961656,0.958858,0.991687,1.0,1.022377,0.998858,0.992034,1.005006,1.035443,394299,0.555556,106
diabetes,1.03935,1.010792,1.010792,1.0,1.0,1.068308,1.068285,1.064774,1.070146,1.064774,768,0.0,0
bank,1.004567,1.004054,1.001465,0.99514,1.0,1.005959,1.005226,1.003224,1.003004,1.003761,45211,0.5625,44
dating,0.995615,0.99863,0.996575,0.993697,1.0,1.001779,1.001099,0.996987,0.994656,1.002739,8378,0.508333,444
valley,0.957214,1.048608,1.048608,1.0,1.0,1.06324,1.112052,1.100282,1.203563,1.205072,1212,0.0,0
cars,1.113619,1.08364,1.077111,1.04715,1.0,1.124704,1.100506,1.103561,1.102473,1.105523,38531,0.793103,1246


# Dataset length impact

In [9]:
fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["xgboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#189FDD",
                                name="xgboost"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["lightgbm_with_default_parameters"],
                                mode='markers',
                               marker_color="#76B644",
                                name="lightgbm"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["catboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#FFCC00",
                                name="catboost")
                     ])
fig.update_xaxes(type="log", title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")

fig.update_layout(title="Performance of models with default parameters, given dataset length")
fig.show()

In [10]:
perfs_tuned_vs_untuned_scaled.sort_values("num_categories")

Unnamed: 0,catboost_with_default_parameters,lightgbm_with_default_parameters,lightgbm_with_catboost_encoder_with_default_parameters,xgboost_with_catboost_encoder_with_default_parameters,xgboost_with_default_parameters,catboost_with_tuned_parameters,lightgbm_with_tuned_parameters,lightgbm_with_catboost_encoder_with_tuned_parameters,xgboost_with_catboost_encoder_with_tuned_parameters,xgboost_with_tuned_parameters,length,categorical_features_proportion,num_categories
california,1.021441,1.004796,1.004796,1.0,1.0,1.033683,1.027302,1.027,1.023002,1.021651,20640,0.0,0
diabetes,1.03935,1.010792,1.010792,1.0,1.0,1.068308,1.068285,1.064774,1.070146,1.064774,768,0.0,0
valley,0.957214,1.048608,1.048608,1.0,1.0,1.06324,1.112052,1.100282,1.203563,1.205072,1212,0.0,0
bank,1.004567,1.004054,1.001465,0.99514,1.0,1.005959,1.005226,1.003224,1.003004,1.003761,45211,0.5625,44
ukair,0.983004,0.961656,0.958858,0.991687,1.0,1.022377,0.998858,0.992034,1.005006,1.035443,394299,0.555556,106
adult,1.004016,1.001793,0.999689,0.996797,1.0,1.005068,1.004829,1.003275,1.002701,1.00349,48842,0.857143,122
dating,0.995615,0.99863,0.996575,0.993697,1.0,1.001779,1.001099,0.996987,0.994656,1.002739,8378,0.508333,444
cars,1.113619,1.08364,1.077111,1.04715,1.0,1.124704,1.100506,1.103561,1.102473,1.105523,38531,0.793103,1246


In [11]:
fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["xgboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#189FDD",
                                name="xgboost"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["lightgbm_with_default_parameters"],
                                mode='markers',
                               marker_color="#76B644",
                                name="lightgbm"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["length"], 
                                y=perfs_tuned_vs_untuned_scaled["catboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#FFCC00",
                                name="catboost")
                     ])
fig.update_xaxes(type="log", title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")

fig.update_layout(title="Performance of models with tuned parameters, given dataset length")
fig.show()

# Proportion of categorical features impact

In [12]:
fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["catboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#FFCC00",
                                name="catboost"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["lightgbm_with_default_parameters"],
                                mode='markers',
                               marker_color="#76B644",
                                name="lightgbm"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["xgboost_with_default_parameters"],
                                mode='markers',
                               marker_color="#189FDD",
                                name="xgboost")
                     ])
fig.update_xaxes(title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")

fig.update_layout(title="Performance of models with default parameters, given proportion of categorical features")
fig.show()

In [13]:
fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["catboost_with_tuned_parameters"],
                                mode='markers',
                               marker_color="#FFCC00",
                                name="catboost"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["lightgbm_with_tuned_parameters"],
                                mode='markers',
                               marker_color="#76B644",
                                name="lightgbm"),
                      go.Scatter(x=perfs_tuned_vs_untuned_scaled["categorical_features_proportion"], 
                                y=perfs_tuned_vs_untuned_scaled["xgboost_with_tuned_parameters"],
                                mode='markers',
                               marker_color="#189FDD",
                                name="xgboost")
                     ])
fig.update_xaxes(title="Dataset length")
fig.update_yaxes(title="Performance difference with xgboost")

fig.update_layout(title="Performance of models with tuned parameters, given proportion of categorical features")
fig.show()

# All performances

In [36]:
px.bar(perfs_tuned_vs_untuned[["xgboost_with_default_parameters", "lightgbm_with_default_parameters", "catboost_with_default_parameters",
                              "xgboost_with_tuned_parameters", "lightgbm_with_tuned_parameters", "catboost_with_tuned_parameters"]],
       labels={
             "index": "Dataset",
             "value": "Score",
             "variable": "Model",
                 },
       barmode="group", template='xgridoff',
      color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
                              "dodgerblue", "olivedrab", "orange"])

In [20]:
px.bar(perfs_tuned_vs_untuned_scaled[["xgboost_with_default_parameters", "lightgbm_with_default_parameters", "catboost_with_default_parameters",
                              "xgboost_with_tuned_parameters", "lightgbm_with_tuned_parameters", "catboost_with_tuned_parameters"]],
       labels={
             "index": "Dataset",
             "value": "Score",
             "variable": "Model",
                 },
       barmode="group", template='xgridoff',
      color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
                              "dodgerblue", "olivedrab", "orange"])

In [21]:
print("Mean score of each model, comparing to xgboost")
perfs_tuned_vs_untuned_scaled.drop(columns=["categorical_features_proportion", "num_categories", "length"]).dropna()\
                             .mean(axis=0)

Mean score of each model, comparing to xgboost


catboost_with_default_parameters                          1.014853
lightgbm_with_default_parameters                          1.014246
lightgbm_with_catboost_encoder_with_default_parameters    1.012237
xgboost_with_catboost_encoder_with_default_parameters     1.003059
xgboost_with_default_parameters                           1.000000
catboost_with_tuned_parameters                            1.040640
lightgbm_with_tuned_parameters                            1.039770
lightgbm_with_catboost_encoder_with_tuned_parameters      1.036392
xgboost_with_catboost_encoder_with_tuned_parameters       1.050569
xgboost_with_tuned_parameters                             1.055307
dtype: float64

In [22]:
print("Median score of each model, comparing to xgboost")
perfs_tuned_vs_untuned_scaled.drop(columns=["categorical_features_proportion", "num_categories", "length"]).dropna()\
                             .median(axis=0)

Median score of each model, comparing to xgboost


catboost_with_default_parameters                          1.004291
lightgbm_with_default_parameters                          1.004425
lightgbm_with_catboost_encoder_with_default_parameters    1.003131
xgboost_with_catboost_encoder_with_default_parameters     0.998398
xgboost_with_default_parameters                           1.000000
catboost_with_tuned_parameters                            1.028030
lightgbm_with_tuned_parameters                            1.016264
lightgbm_with_catboost_encoder_with_tuned_parameters      1.015137
xgboost_with_catboost_encoder_with_tuned_parameters       1.014004
xgboost_with_tuned_parameters                             1.028547
dtype: float64

In [23]:
print("Mean rank of each default model")
perfs_tuned_vs_untuned_scaled[["catboost_with_default_parameters", "lightgbm_with_default_parameters", 
                               "xgboost_with_default_parameters"]].apply(lambda x: x.argsort().argsort(), axis=1).mean()

Mean rank of each default model


catboost_with_default_parameters    1.375
lightgbm_with_default_parameters    1.000
xgboost_with_default_parameters     0.625
dtype: float64

In [24]:
print("Mean rank of each tuned model")
perfs_tuned_vs_untuned_scaled[["catboost_with_tuned_parameters", "lightgbm_with_tuned_parameters", 
                               "xgboost_with_tuned_parameters"]].apply(lambda x: x.argsort().argsort(), axis=1).mean()

Mean rank of each tuned model


catboost_with_tuned_parameters    1.500
lightgbm_with_tuned_parameters    0.625
xgboost_with_tuned_parameters     0.875
dtype: float64

# Training and prediction time

In [25]:
training_times = pd.DataFrame({dataset_name: {model_name: default_perfs[dataset_name][model_name]["training_time"] 
                for model_name in default_perfs[dataset_name].keys()}
                for dataset_name in default_perfs.keys()}).transpose()
prediction_times = pd.DataFrame({dataset_name: {model_name: default_perfs[dataset_name][model_name]["prediction_time"] 
                for model_name in default_perfs[dataset_name].keys()}
                for dataset_name in default_perfs.keys()}).transpose()
times_df = pd.concat([training_times, prediction_times], axis=1)

In [26]:
times_df

Unnamed: 0,catboost,lightgbm,lightgbm_with_catboost_encoder,xgboost_with_catboost_encoder,xgboost,catboost.1,lightgbm.1,lightgbm_with_catboost_encoder.1,xgboost_with_catboost_encoder.1,xgboost.1
california,24.689261,0.178142,0.171883,119.791898,118.781466,0.014021,0.031725,0.031709,0.04888,0.042857
adult,149.724603,0.811872,1.547185,129.368296,112.526765,0.129134,0.100814,0.22269,0.194931,0.149172
ukair,144.291018,3.167551,3.082787,65.907621,33.585652,0.725683,1.181819,1.214606,0.544002,0.187727
diabetes,9.472824,0.070993,0.180932,151.992842,153.275499,0.011634,0.00442,0.010192,0.143612,0.157037
bank,119.778596,0.450672,1.30789,128.35589,117.680134,0.064928,0.087015,0.091556,0.206689,0.127684
dating,218.227686,2.734924,2.686051,240.91434,224.078994,0.189192,0.205373,0.168617,0.556958,0.273364
valley,70.126429,1.343469,2.123457,182.244021,182.811797,0.01628,0.004819,0.004638,0.138728,0.125285
cars,141.749353,1.185837,1.893203,143.05152,135.966851,0.933087,0.122637,0.194635,0.281825,0.178372


In [27]:
px.bar(training_times[["xgboost", "lightgbm", "catboost"]], barmode="group", log_y=True, labels={
             "index": "Dataset",
             "value": "Training time",
             "variable": "Model",
                 },
       template='xgridoff',
      color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
                              "dodgerblue", "olivedrab", "orange"])

In [28]:
px.bar(prediction_times[["xgboost", "lightgbm", "catboost"]], barmode="group", log_y=True, labels={
             "index": "Dataset",
             "value": "Training time",
             "variable": "Model",
                 },
       template='xgridoff',
      color_discrete_sequence=['#189FDD', "#76B644", "#FFCC00",
                              "dodgerblue", "olivedrab", "orange"])

In [29]:
print("Mean training times")
training_times.mean(axis=0)

Mean training times


catboost                          109.757471
lightgbm                            1.242933
lightgbm_with_catboost_encoder      1.624173
xgboost_with_catboost_encoder     145.203303
xgboost                           134.838395
dtype: float64

In [30]:
print("Mdian training times comparing to xgboost training time")
training_times.assign(**{col_name: training_times[col_name] / training_times["xgboost"]
                        for col_name in training_times.columns}).median(axis=0)

Mdian training times comparing to xgboost training time


catboost                          0.995860
lightgbm                          0.007282
lightgbm_with_catboost_encoder    0.011801
xgboost_with_catboost_encoder     1.063619
xgboost                           1.000000
dtype: float64

In [31]:
print("Mean prediction times")
prediction_times.mean(axis=0)

Mean prediction times


catboost                          0.260495
lightgbm                          0.217328
lightgbm_with_catboost_encoder    0.242330
xgboost_with_catboost_encoder     0.264453
xgboost                           0.155187
dtype: float64

In [32]:
print("Mdian prediction times comparing to xgboost prediction time")
prediction_times.assign(**{col_name: prediction_times[col_name] / prediction_times["xgboost"]
                        for col_name in prediction_times.columns}).median(axis=0)

Mdian prediction times comparing to xgboost prediction time


catboost                          0.600297
lightgbm                          0.684511
lightgbm_with_catboost_encoder    0.728463
xgboost_with_catboost_encoder     1.443369
xgboost                           1.000000
dtype: float64