{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "intermediate-namibia", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:50:19.632701Z", "start_time": "2021-03-22T09:50:11.444255Z" }, "lines_to_next_cell": 2 }, "outputs": [], "source": [ "from sklearn import datasets\n", "import pandas as pd\n", "pd.set_option(\"display.max_columns\", 100)\n", "from comparison.model_comparison import ModelComparison, ModelName\n", "from comparison.comparison_datasets import TaskName\n", "from comparison.tuned_model_comparison import TunedModelComparison\n", "import plotly.express as px\n", "import plotly.graph_objects as go\n", "import json" ] }, { "cell_type": "markdown", "id": "attractive-waters", "metadata": {}, "source": [ "# Loading data" ] }, { "cell_type": "code", "execution_count": 2, "id": "incomplete-joshua", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:50:19.642319Z", "start_time": "2021-03-22T09:50:19.639032Z" } }, "outputs": [], "source": [ "with open(\"tuned_perf_comparison.json\", \"r\") as input_stream:\n", " tuned_perfs = json.load(input_stream)\n", "with open(\"perf_comparison.json\", \"r\") as input_stream:\n", " default_perfs = json.load(input_stream)" ] }, { "cell_type": "code", "execution_count": 3, "id": "built-straight", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:50:19.649917Z", "start_time": "2021-03-22T09:50:19.647476Z" } }, "outputs": [], "source": [ "tuned_scores = {dataset_name: {model_name + \"_with_tuned_parameters\": tuned_perfs[dataset_name][model_name][\"model_score\"] \n", " for model_name in tuned_perfs[dataset_name].keys()}\n", " for dataset_name in tuned_perfs.keys()}" ] }, { "cell_type": "code", "execution_count": 4, "id": "color-jackson", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:50:19.659772Z", "start_time": "2021-03-22T09:50:19.657400Z" } }, "outputs": [], "source": [ "untuned_perfs = {dataset_name: {model_name + \"_with_default_parameters\": default_perfs[dataset_name][model_name][\"model_score\"] \n", " for model_name in default_perfs[dataset_name].keys()}\n", " for dataset_name in default_perfs.keys()}" ] }, { "cell_type": "code", "execution_count": 5, "id": "likely-romance", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:50:19.686047Z", "start_time": "2021-03-22T09:50:19.666035Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
catboost_with_default_parameterslightgbm_with_default_parameterslightgbm_with_catboost_encoder_with_default_parametersxgboost_with_catboost_encoder_with_default_parametersxgboost_with_default_parameterscatboost_with_tuned_parameterslightgbm_with_tuned_parameterslightgbm_with_catboost_encoder_with_tuned_parametersxgboost_with_catboost_encoder_with_tuned_parametersxgboost_with_tuned_parameters
california0.8494060.8355640.8355640.8315760.8315760.8595860.8542800.8540290.8507040.849581
adult0.8599570.8580530.8562510.8537730.8565170.8608570.8606530.8593220.8588300.859506
ukair0.8221400.8042850.8019450.8294020.8363550.8550690.8353990.8296920.8405420.865997
diabetes0.7537590.7330490.7330490.7252220.7252220.7747610.7747440.7721980.7760940.772198
bank0.9098230.9093580.9070140.9012850.9056870.9110840.9104200.9086060.9084070.909093
dating0.8673900.8700160.8682260.8657190.8712100.8727600.8721680.8685850.8665540.873596
valley0.5354930.5866210.5866210.5594280.5594280.5948070.6221140.6155290.6733070.674151
cars0.5356730.5212520.5181110.5037000.4810190.5410050.5293650.5308340.5303110.531778
\n", "
" ], "text/plain": [ " catboost_with_default_parameters \\\n", "california 0.849406 \n", "adult 0.859957 \n", "ukair 0.822140 \n", "diabetes 0.753759 \n", "bank 0.909823 \n", "dating 0.867390 \n", "valley 0.535493 \n", "cars 0.535673 \n", "\n", " lightgbm_with_default_parameters \\\n", "california 0.835564 \n", "adult 0.858053 \n", "ukair 0.804285 \n", "diabetes 0.733049 \n", "bank 0.909358 \n", "dating 0.870016 \n", "valley 0.586621 \n", "cars 0.521252 \n", "\n", " lightgbm_with_catboost_encoder_with_default_parameters \\\n", "california 0.835564 \n", "adult 0.856251 \n", "ukair 0.801945 \n", "diabetes 0.733049 \n", "bank 0.907014 \n", "dating 0.868226 \n", "valley 0.586621 \n", "cars 0.518111 \n", "\n", " xgboost_with_catboost_encoder_with_default_parameters \\\n", "california 0.831576 \n", "adult 0.853773 \n", "ukair 0.829402 \n", "diabetes 0.725222 \n", "bank 0.901285 \n", "dating 0.865719 \n", "valley 0.559428 \n", "cars 0.503700 \n", "\n", " xgboost_with_default_parameters catboost_with_tuned_parameters \\\n", "california 0.831576 0.859586 \n", "adult 0.856517 0.860857 \n", "ukair 0.836355 0.855069 \n", "diabetes 0.725222 0.774761 \n", "bank 0.905687 0.911084 \n", "dating 0.871210 0.872760 \n", "valley 0.559428 0.594807 \n", "cars 0.481019 0.541005 \n", "\n", " lightgbm_with_tuned_parameters \\\n", "california 0.854280 \n", "adult 0.860653 \n", "ukair 0.835399 \n", "diabetes 0.774744 \n", "bank 0.910420 \n", "dating 0.872168 \n", "valley 0.622114 \n", "cars 0.529365 \n", "\n", " lightgbm_with_catboost_encoder_with_tuned_parameters \\\n", "california 0.854029 \n", "adult 0.859322 \n", "ukair 0.829692 \n", "diabetes 0.772198 \n", "bank 0.908606 \n", "dating 0.868585 \n", "valley 0.615529 \n", "cars 0.530834 \n", "\n", " xgboost_with_catboost_encoder_with_tuned_parameters \\\n", "california 0.850704 \n", "adult 0.858830 \n", "ukair 0.840542 \n", "diabetes 0.776094 \n", "bank 0.908407 \n", "dating 0.866554 \n", "valley 0.673307 \n", "cars 0.530311 \n", "\n", " xgboost_with_tuned_parameters \n", "california 0.849581 \n", "adult 0.859506 \n", "ukair 0.865997 \n", "diabetes 0.772198 \n", "bank 0.909093 \n", "dating 0.873596 \n", "valley 0.674151 \n", "cars 0.531778 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "perfs_tuned_vs_untuned = pd.concat([pd.DataFrame(untuned_perfs), pd.DataFrame(tuned_scores)])\\\n", " .transpose().fillna(0.773)\n", "perfs_tuned_vs_untuned" ] }, { "cell_type": "code", "execution_count": 6, "id": "governing-demographic", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:50:19.701180Z", "start_time": "2021-03-22T09:50:19.698208Z" } }, "outputs": [], "source": [ "dataset_lengths = {dataset_name: default_perfs[dataset_name][\"catboost\"][\"dataset_length\"] \n", " for dataset_name in default_perfs.keys()}\n", "num_categories = {dataset_name: default_perfs[dataset_name][\"catboost\"][\"num_categories\"] \n", " for dataset_name in default_perfs.keys()}\n", "prop_categorical = {dataset_name: default_perfs[dataset_name][\"catboost\"][\"num_categorical_features\"] / float(default_perfs[dataset_name][\"catboost\"][\"num_features\"])\n", " for dataset_name in default_perfs.keys()}" ] }, { "cell_type": "code", "execution_count": 7, "id": "stuck-stereo", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:50:19.733202Z", "start_time": "2021-03-22T09:50:19.712113Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
catboost_with_default_parameterslightgbm_with_default_parameterslightgbm_with_catboost_encoder_with_default_parametersxgboost_with_catboost_encoder_with_default_parametersxgboost_with_default_parameterscatboost_with_tuned_parameterslightgbm_with_tuned_parameterslightgbm_with_catboost_encoder_with_tuned_parametersxgboost_with_catboost_encoder_with_tuned_parametersxgboost_with_tuned_parameterslengthcategorical_features_proportionnum_categories
california1.0214411.0047961.0047961.0000001.01.0336831.0273021.0270001.0230021.021651206400.0000000
adult1.0040161.0017930.9996890.9967971.01.0050681.0048291.0032751.0027011.003490488420.857143122
ukair0.9830040.9616560.9588580.9916871.01.0223770.9988580.9920341.0050061.0354433942990.555556106
diabetes1.0393501.0107921.0107921.0000001.01.0683081.0682851.0647741.0701461.0647747680.0000000
bank1.0045671.0040541.0014650.9951401.01.0059591.0052261.0032241.0030041.003761452110.56250044
dating0.9956150.9986300.9965750.9936971.01.0017791.0010990.9969870.9946561.00273983780.508333444
valley0.9572141.0486081.0486081.0000001.01.0632401.1120521.1002821.2035631.20507212120.0000000
cars1.1136191.0836401.0771111.0471501.01.1247041.1005061.1035611.1024731.105523385310.7931031246
\n", "
" ], "text/plain": [ " catboost_with_default_parameters \\\n", "california 1.021441 \n", "adult 1.004016 \n", "ukair 0.983004 \n", "diabetes 1.039350 \n", "bank 1.004567 \n", "dating 0.995615 \n", "valley 0.957214 \n", "cars 1.113619 \n", "\n", " lightgbm_with_default_parameters \\\n", "california 1.004796 \n", "adult 1.001793 \n", "ukair 0.961656 \n", "diabetes 1.010792 \n", "bank 1.004054 \n", "dating 0.998630 \n", "valley 1.048608 \n", "cars 1.083640 \n", "\n", " lightgbm_with_catboost_encoder_with_default_parameters \\\n", "california 1.004796 \n", "adult 0.999689 \n", "ukair 0.958858 \n", "diabetes 1.010792 \n", "bank 1.001465 \n", "dating 0.996575 \n", "valley 1.048608 \n", "cars 1.077111 \n", "\n", " xgboost_with_catboost_encoder_with_default_parameters \\\n", "california 1.000000 \n", "adult 0.996797 \n", "ukair 0.991687 \n", "diabetes 1.000000 \n", "bank 0.995140 \n", "dating 0.993697 \n", "valley 1.000000 \n", "cars 1.047150 \n", "\n", " xgboost_with_default_parameters catboost_with_tuned_parameters \\\n", "california 1.0 1.033683 \n", "adult 1.0 1.005068 \n", "ukair 1.0 1.022377 \n", "diabetes 1.0 1.068308 \n", "bank 1.0 1.005959 \n", "dating 1.0 1.001779 \n", "valley 1.0 1.063240 \n", "cars 1.0 1.124704 \n", "\n", " lightgbm_with_tuned_parameters \\\n", "california 1.027302 \n", "adult 1.004829 \n", "ukair 0.998858 \n", "diabetes 1.068285 \n", "bank 1.005226 \n", "dating 1.001099 \n", "valley 1.112052 \n", "cars 1.100506 \n", "\n", " lightgbm_with_catboost_encoder_with_tuned_parameters \\\n", "california 1.027000 \n", "adult 1.003275 \n", "ukair 0.992034 \n", "diabetes 1.064774 \n", "bank 1.003224 \n", "dating 0.996987 \n", "valley 1.100282 \n", "cars 1.103561 \n", "\n", " xgboost_with_catboost_encoder_with_tuned_parameters \\\n", "california 1.023002 \n", "adult 1.002701 \n", "ukair 1.005006 \n", "diabetes 1.070146 \n", "bank 1.003004 \n", "dating 0.994656 \n", "valley 1.203563 \n", "cars 1.102473 \n", "\n", " xgboost_with_tuned_parameters length \\\n", "california 1.021651 20640 \n", "adult 1.003490 48842 \n", "ukair 1.035443 394299 \n", "diabetes 1.064774 768 \n", "bank 1.003761 45211 \n", "dating 1.002739 8378 \n", "valley 1.205072 1212 \n", "cars 1.105523 38531 \n", "\n", " categorical_features_proportion num_categories \n", "california 0.000000 0 \n", "adult 0.857143 122 \n", "ukair 0.555556 106 \n", "diabetes 0.000000 0 \n", "bank 0.562500 44 \n", "dating 0.508333 444 \n", "valley 0.000000 0 \n", "cars 0.793103 1246 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "perfs_tuned_vs_untuned_scaled = perfs_tuned_vs_untuned.assign(**{col_name: perfs_tuned_vs_untuned[col_name] / perfs_tuned_vs_untuned[\"xgboost_with_default_parameters\"]\n", " for col_name in perfs_tuned_vs_untuned.columns})\\\n", " .assign(length=pd.Series(dataset_lengths),\n", " categorical_features_proportion=pd.Series(prop_categorical),\n", " num_categories=pd.Series(num_categories))\n", "perfs_tuned_vs_untuned_scaled" ] }, { "cell_type": "markdown", "id": "incident-scenario", "metadata": {}, "source": [ "# Dataset length impact" ] }, { "cell_type": "code", "execution_count": 9, "id": "departmental-summit", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:50:39.782497Z", "start_time": "2021-03-22T09:50:39.764840Z" }, "lines_to_next_cell": 2 }, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "marker": { "color": "#189FDD" }, "mode": "markers", "name": "xgboost", "type": "scatter", "x": [ 20640, 48842, 394299, 768, 45211, 8378, 1212, 38531 ], "y": [ 1, 1, 1, 1, 1, 1, 1, 1 ] }, { "marker": { "color": "#76B644" }, "mode": "markers", "name": "lightgbm", "type": "scatter", "x": [ 20640, 48842, 394299, 768, 45211, 8378, 1212, 38531 ], "y": [ 1.0047958990004422, 1.0017927892142444, 0.9616557411395943, 1.0107917059377947, 1.004054035155882, 0.9986296216549206, 1.0486078660436138, 1.0836403293103205 ] }, { "marker": { "color": "#FFCC00" }, "mode": "markers", "name": "catboost", "type": "scatter", "x": [ 20640, 48842, 394299, 768, 45211, 8378, 1212, 38531 ], "y": [ 1.0214405078542739, 1.0040158386780615, 0.9830037213493318, 1.039349670122526, 1.0045668602961224, 0.9956152797675948, 0.9572137850467289, 1.1136192390017632 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Performance of models with default parameters, given dataset length" }, "xaxis": { "title": { "text": "Dataset length" }, "type": "log" }, "yaxis": { "title": { "text": "Performance difference with xgboost" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"length\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"xgboost_with_default_parameters\"],\n", " mode='markers',\n", " marker_color=\"#189FDD\",\n", " name=\"xgboost\"),\n", " go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"length\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"lightgbm_with_default_parameters\"],\n", " mode='markers',\n", " marker_color=\"#76B644\",\n", " name=\"lightgbm\"),\n", " go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"length\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"catboost_with_default_parameters\"],\n", " mode='markers',\n", " marker_color=\"#FFCC00\",\n", " name=\"catboost\")\n", " ])\n", "fig.update_xaxes(type=\"log\", title=\"Dataset length\")\n", "fig.update_yaxes(title=\"Performance difference with xgboost\")\n", "\n", "fig.update_layout(title=\"Performance of models with default parameters, given dataset length\")\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": 10, "id": "higher-child", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:50:43.555137Z", "start_time": "2021-03-22T09:50:43.539364Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
catboost_with_default_parameterslightgbm_with_default_parameterslightgbm_with_catboost_encoder_with_default_parametersxgboost_with_catboost_encoder_with_default_parametersxgboost_with_default_parameterscatboost_with_tuned_parameterslightgbm_with_tuned_parameterslightgbm_with_catboost_encoder_with_tuned_parametersxgboost_with_catboost_encoder_with_tuned_parametersxgboost_with_tuned_parameterslengthcategorical_features_proportionnum_categories
california1.0214411.0047961.0047961.0000001.01.0336831.0273021.0270001.0230021.021651206400.0000000
diabetes1.0393501.0107921.0107921.0000001.01.0683081.0682851.0647741.0701461.0647747680.0000000
valley0.9572141.0486081.0486081.0000001.01.0632401.1120521.1002821.2035631.20507212120.0000000
bank1.0045671.0040541.0014650.9951401.01.0059591.0052261.0032241.0030041.003761452110.56250044
ukair0.9830040.9616560.9588580.9916871.01.0223770.9988580.9920341.0050061.0354433942990.555556106
adult1.0040161.0017930.9996890.9967971.01.0050681.0048291.0032751.0027011.003490488420.857143122
dating0.9956150.9986300.9965750.9936971.01.0017791.0010990.9969870.9946561.00273983780.508333444
cars1.1136191.0836401.0771111.0471501.01.1247041.1005061.1035611.1024731.105523385310.7931031246
\n", "
" ], "text/plain": [ " catboost_with_default_parameters \\\n", "california 1.021441 \n", "diabetes 1.039350 \n", "valley 0.957214 \n", "bank 1.004567 \n", "ukair 0.983004 \n", "adult 1.004016 \n", "dating 0.995615 \n", "cars 1.113619 \n", "\n", " lightgbm_with_default_parameters \\\n", "california 1.004796 \n", "diabetes 1.010792 \n", "valley 1.048608 \n", "bank 1.004054 \n", "ukair 0.961656 \n", "adult 1.001793 \n", "dating 0.998630 \n", "cars 1.083640 \n", "\n", " lightgbm_with_catboost_encoder_with_default_parameters \\\n", "california 1.004796 \n", "diabetes 1.010792 \n", "valley 1.048608 \n", "bank 1.001465 \n", "ukair 0.958858 \n", "adult 0.999689 \n", "dating 0.996575 \n", "cars 1.077111 \n", "\n", " xgboost_with_catboost_encoder_with_default_parameters \\\n", "california 1.000000 \n", "diabetes 1.000000 \n", "valley 1.000000 \n", "bank 0.995140 \n", "ukair 0.991687 \n", "adult 0.996797 \n", "dating 0.993697 \n", "cars 1.047150 \n", "\n", " xgboost_with_default_parameters catboost_with_tuned_parameters \\\n", "california 1.0 1.033683 \n", "diabetes 1.0 1.068308 \n", "valley 1.0 1.063240 \n", "bank 1.0 1.005959 \n", "ukair 1.0 1.022377 \n", "adult 1.0 1.005068 \n", "dating 1.0 1.001779 \n", "cars 1.0 1.124704 \n", "\n", " lightgbm_with_tuned_parameters \\\n", "california 1.027302 \n", "diabetes 1.068285 \n", "valley 1.112052 \n", "bank 1.005226 \n", "ukair 0.998858 \n", "adult 1.004829 \n", "dating 1.001099 \n", "cars 1.100506 \n", "\n", " lightgbm_with_catboost_encoder_with_tuned_parameters \\\n", "california 1.027000 \n", "diabetes 1.064774 \n", "valley 1.100282 \n", "bank 1.003224 \n", "ukair 0.992034 \n", "adult 1.003275 \n", "dating 0.996987 \n", "cars 1.103561 \n", "\n", " xgboost_with_catboost_encoder_with_tuned_parameters \\\n", "california 1.023002 \n", "diabetes 1.070146 \n", "valley 1.203563 \n", "bank 1.003004 \n", "ukair 1.005006 \n", "adult 1.002701 \n", "dating 0.994656 \n", "cars 1.102473 \n", "\n", " xgboost_with_tuned_parameters length \\\n", "california 1.021651 20640 \n", "diabetes 1.064774 768 \n", "valley 1.205072 1212 \n", "bank 1.003761 45211 \n", "ukair 1.035443 394299 \n", "adult 1.003490 48842 \n", "dating 1.002739 8378 \n", "cars 1.105523 38531 \n", "\n", " categorical_features_proportion num_categories \n", "california 0.000000 0 \n", "diabetes 0.000000 0 \n", "valley 0.000000 0 \n", "bank 0.562500 44 \n", "ukair 0.555556 106 \n", "adult 0.857143 122 \n", "dating 0.508333 444 \n", "cars 0.793103 1246 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "perfs_tuned_vs_untuned_scaled.sort_values(\"num_categories\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "regulated-carolina", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:51:07.379531Z", "start_time": "2021-03-22T09:51:07.362680Z" }, "lines_to_next_cell": 2 }, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "marker": { "color": "#189FDD" }, "mode": "markers", "name": "xgboost", "type": "scatter", "x": [ 20640, 48842, 394299, 768, 45211, 8378, 1212, 38531 ], "y": [ 1, 1, 1, 1, 1, 1, 1, 1 ] }, { "marker": { "color": "#76B644" }, "mode": "markers", "name": "lightgbm", "type": "scatter", "x": [ 20640, 48842, 394299, 768, 45211, 8378, 1212, 38531 ], "y": [ 1.0047958990004422, 1.0017927892142444, 0.9616557411395943, 1.0107917059377947, 1.004054035155882, 0.9986296216549206, 1.0486078660436138, 1.0836403293103205 ] }, { "marker": { "color": "#FFCC00" }, "mode": "markers", "name": "catboost", "type": "scatter", "x": [ 20640, 48842, 394299, 768, 45211, 8378, 1212, 38531 ], "y": [ 1.0214405078542739, 1.0040158386780615, 0.9830037213493318, 1.039349670122526, 1.0045668602961224, 0.9956152797675948, 0.9572137850467289, 1.1136192390017632 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Performance of models with tuned parameters, given dataset length" }, "xaxis": { "title": { "text": "Dataset length" }, "type": "log" }, "yaxis": { "title": { "text": "Performance difference with xgboost" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"length\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"xgboost_with_default_parameters\"],\n", " mode='markers',\n", " marker_color=\"#189FDD\",\n", " name=\"xgboost\"),\n", " go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"length\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"lightgbm_with_default_parameters\"],\n", " mode='markers',\n", " marker_color=\"#76B644\",\n", " name=\"lightgbm\"),\n", " go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"length\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"catboost_with_default_parameters\"],\n", " mode='markers',\n", " marker_color=\"#FFCC00\",\n", " name=\"catboost\")\n", " ])\n", "fig.update_xaxes(type=\"log\", title=\"Dataset length\")\n", "fig.update_yaxes(title=\"Performance difference with xgboost\")\n", "\n", "fig.update_layout(title=\"Performance of models with tuned parameters, given dataset length\")\n", "fig.show()" ] }, { "cell_type": "markdown", "id": "pacific-corpus", "metadata": { "ExecuteTime": { "end_time": "2021-03-01T19:00:44.819905Z", "start_time": "2021-03-01T19:00:44.814691Z" } }, "source": [ "# Proportion of categorical features impact" ] }, { "cell_type": "code", "execution_count": 12, "id": "professional-equilibrium", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:51:11.225868Z", "start_time": "2021-03-22T09:51:11.211928Z" }, "lines_to_next_cell": 2 }, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "marker": { "color": "#FFCC00" }, "mode": "markers", "name": "catboost", "type": "scatter", "x": [ 0, 0.8571428571428571, 0.5555555555555556, 0, 0.5625, 0.5083333333333333, 0, 0.7931034482758621 ], "y": [ 1.0214405078542739, 1.0040158386780615, 0.9830037213493318, 1.039349670122526, 1.0045668602961224, 0.9956152797675948, 0.9572137850467289, 1.1136192390017632 ] }, { "marker": { "color": "#76B644" }, "mode": "markers", "name": "lightgbm", "type": "scatter", "x": [ 0, 0.8571428571428571, 0.5555555555555556, 0, 0.5625, 0.5083333333333333, 0, 0.7931034482758621 ], "y": [ 1.0047958990004422, 1.0017927892142444, 0.9616557411395943, 1.0107917059377947, 1.004054035155882, 0.9986296216549206, 1.0486078660436138, 1.0836403293103205 ] }, { "marker": { "color": "#189FDD" }, "mode": "markers", "name": "xgboost", "type": "scatter", "x": [ 0, 0.8571428571428571, 0.5555555555555556, 0, 0.5625, 0.5083333333333333, 0, 0.7931034482758621 ], "y": [ 1, 1, 1, 1, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Performance of models with default parameters, given proportion of categorical features" }, "xaxis": { "title": { "text": "Dataset length" } }, "yaxis": { "title": { "text": "Performance difference with xgboost" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"categorical_features_proportion\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"catboost_with_default_parameters\"],\n", " mode='markers',\n", " marker_color=\"#FFCC00\",\n", " name=\"catboost\"),\n", " go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"categorical_features_proportion\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"lightgbm_with_default_parameters\"],\n", " mode='markers',\n", " marker_color=\"#76B644\",\n", " name=\"lightgbm\"),\n", " go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"categorical_features_proportion\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"xgboost_with_default_parameters\"],\n", " mode='markers',\n", " marker_color=\"#189FDD\",\n", " name=\"xgboost\")\n", " ])\n", "fig.update_xaxes(title=\"Dataset length\")\n", "fig.update_yaxes(title=\"Performance difference with xgboost\")\n", "\n", "fig.update_layout(title=\"Performance of models with default parameters, given proportion of categorical features\")\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": 13, "id": "respected-pulse", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:51:11.520280Z", "start_time": "2021-03-22T09:51:11.505729Z" } }, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "marker": { "color": "#FFCC00" }, "mode": "markers", "name": "catboost", "type": "scatter", "x": [ 0, 0.8571428571428571, 0.5555555555555556, 0, 0.5625, 0.5083333333333333, 0, 0.7931034482758621 ], "y": [ 1.0336829667258485, 1.0050675715986264, 1.0223765792879222, 1.0683081998114985, 1.0059589636515507, 1.001779333772469, 1.0632398753894081, 1.1247042571958041 ] }, { "marker": { "color": "#76B644" }, "mode": "markers", "name": "lightgbm", "type": "scatter", "x": [ 0, 0.8571428571428571, 0.5555555555555556, 0, 0.5625, 0.5083333333333333, 0, 0.7931034482758621 ], "y": [ 1.0273017908158903, 1.0048285844579132, 0.9988576651468543, 1.0682846371347785, 1.0052262735808768, 1.0010994416958947, 1.1120521806853583, 1.1005062262100413 ] }, { "marker": { "color": "#189FDD" }, "mode": "markers", "name": "xgboost", "type": "scatter", "x": [ 0, 0.8571428571428571, 0.5555555555555556, 0, 0.5625, 0.5083333333333333, 0, 0.7931034482758621 ], "y": [ 1.021651166881036, 1.0034898175626887, 1.0354426254652978, 1.0647737983034873, 1.0037610382126079, 1.0027390890858732, 1.2050720404984423, 1.1055225067114367 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Performance of models with tuned parameters, given proportion of categorical features" }, "xaxis": { "title": { "text": "Dataset length" } }, "yaxis": { "title": { "text": "Performance difference with xgboost" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = go.Figure(data=[go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"categorical_features_proportion\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"catboost_with_tuned_parameters\"],\n", " mode='markers',\n", " marker_color=\"#FFCC00\",\n", " name=\"catboost\"),\n", " go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"categorical_features_proportion\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"lightgbm_with_tuned_parameters\"],\n", " mode='markers',\n", " marker_color=\"#76B644\",\n", " name=\"lightgbm\"),\n", " go.Scatter(x=perfs_tuned_vs_untuned_scaled[\"categorical_features_proportion\"], \n", " y=perfs_tuned_vs_untuned_scaled[\"xgboost_with_tuned_parameters\"],\n", " mode='markers',\n", " marker_color=\"#189FDD\",\n", " name=\"xgboost\")\n", " ])\n", "fig.update_xaxes(title=\"Dataset length\")\n", "fig.update_yaxes(title=\"Performance difference with xgboost\")\n", "\n", "fig.update_layout(title=\"Performance of models with tuned parameters, given proportion of categorical features\")\n", "fig.show()" ] }, { "cell_type": "markdown", "id": "korean-straight", "metadata": {}, "source": [ "# All performances" ] }, { "cell_type": "code", "execution_count": 36, "id": "polish-henry", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T10:00:21.789309Z", "start_time": "2021-03-22T10:00:21.733590Z" } }, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "alignmentgroup": "True", "hovertemplate": "Model=xgboost_with_default_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "xgboost_with_default_parameters", "marker": { "color": "#189FDD" }, "name": "xgboost_with_default_parameters", "offsetgroup": "xgboost_with_default_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 0.8315761386437693, 0.8565169973019453, 0.8363546492423953, 0.7252221462747779, 0.9056866572929987, 0.8712099846922626, 0.5594283722551412, 0.48101945480906416 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=lightgbm_with_default_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "lightgbm_with_default_parameters", "marker": { "color": "#76B644" }, "name": "lightgbm_with_default_parameters", "offsetgroup": "lightgbm_with_default_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 0.8355642938158825, 0.8580525517365252, 0.8042852500727411, 0.7330485304169516, 0.9093583428418778, 0.8700160973952235, 0.586620991634716, 0.5212520804139651 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=catboost_with_default_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "catboost_with_default_parameters", "marker": { "color": "#FFCC00" }, "name": "catboost_with_default_parameters", "offsetgroup": "catboost_with_default_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 0.8494055533757878, 0.8599566313881276, 0.8221397325730897, 0.7537593984962406, 0.9098228017289178, 0.867389972645709, 0.5354925496688742, 0.5356725192095131 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=xgboost_with_tuned_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "xgboost_with_tuned_parameters", "marker": { "color": "dodgerblue" }, "name": "xgboost_with_tuned_parameters", "offsetgroup": "xgboost_with_tuned_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 0.8495807323958331, 0.8595060853618711, 0.865997253831654, 0.7721975393028024, 0.9090929794197267, 0.8735963064528369, 0.6741514900662251, 0.5317778334574853 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=lightgbm_with_tuned_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "lightgbm_with_tuned_parameters", "marker": { "color": "olivedrab" }, "name": "lightgbm_with_tuned_parameters", "offsetgroup": "lightgbm_with_tuned_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 0.8542796564285073, 0.860652761963056, 0.8353992521769753, 0.7747436773752563, 0.9104200235425617, 0.872167829275313, 0.6221135413035901, 0.5293649049455347 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=catboost_with_tuned_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "catboost_with_tuned_parameters", "marker": { "color": "orange" }, "name": "catboost_with_tuned_parameters", "offsetgroup": "catboost_with_tuned_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 0.859586090051717, 0.8608574585112133, 0.8550694053639901, 0.7747607655502392, 0.9110836111635021, 0.8727601580409378, 0.5948065528058557, 0.5410046286177592 ], "yaxis": "y" } ], "layout": { "barmode": "group", "legend": { "title": { "text": "Model" }, "tracegroupgap": 0 }, "margin": { "t": 60 }, "template": { "data": { "pie": [ { "automargin": true, "type": "pie" } ] }, "layout": { "xaxis": { "showgrid": false, "title": { "standoff": 15 } }, "yaxis": { "title": { "standoff": 15 } } } }, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "title": { "text": "Dataset" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "Score" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "px.bar(perfs_tuned_vs_untuned[[\"xgboost_with_default_parameters\", \"lightgbm_with_default_parameters\", \"catboost_with_default_parameters\",\n", " \"xgboost_with_tuned_parameters\", \"lightgbm_with_tuned_parameters\", \"catboost_with_tuned_parameters\"]],\n", " labels={\n", " \"index\": \"Dataset\",\n", " \"value\": \"Score\",\n", " \"variable\": \"Model\",\n", " },\n", " barmode=\"group\", template='xgridoff',\n", " color_discrete_sequence=['#189FDD', \"#76B644\", \"#FFCC00\",\n", " \"dodgerblue\", \"olivedrab\", \"orange\"])" ] }, { "cell_type": "code", "execution_count": 20, "id": "instant-confidence", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:54:24.800161Z", "start_time": "2021-03-22T09:54:24.743629Z" } }, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "alignmentgroup": "True", "hovertemplate": "Model=xgboost_with_default_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "xgboost_with_default_parameters", "marker": { "color": "#189FDD" }, "name": "xgboost_with_default_parameters", "offsetgroup": "xgboost_with_default_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 1, 1, 1, 1, 1, 1, 1, 1 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=lightgbm_with_default_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "lightgbm_with_default_parameters", "marker": { "color": "#76B644" }, "name": "lightgbm_with_default_parameters", "offsetgroup": "lightgbm_with_default_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 1.0047958990004422, 1.0017927892142444, 0.9616557411395943, 1.0107917059377947, 1.004054035155882, 0.9986296216549206, 1.0486078660436138, 1.0836403293103205 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=catboost_with_default_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "catboost_with_default_parameters", "marker": { "color": "#FFCC00" }, "name": "catboost_with_default_parameters", "offsetgroup": "catboost_with_default_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 1.0214405078542739, 1.0040158386780615, 0.9830037213493318, 1.039349670122526, 1.0045668602961224, 0.9956152797675948, 0.9572137850467289, 1.1136192390017632 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=xgboost_with_tuned_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "xgboost_with_tuned_parameters", "marker": { "color": "dodgerblue" }, "name": "xgboost_with_tuned_parameters", "offsetgroup": "xgboost_with_tuned_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 1.021651166881036, 1.0034898175626887, 1.0354426254652978, 1.0647737983034873, 1.0037610382126079, 1.0027390890858732, 1.2050720404984423, 1.1055225067114367 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=lightgbm_with_tuned_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "lightgbm_with_tuned_parameters", "marker": { "color": "olivedrab" }, "name": "lightgbm_with_tuned_parameters", "offsetgroup": "lightgbm_with_tuned_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 1.0273017908158903, 1.0048285844579132, 0.9988576651468543, 1.0682846371347785, 1.0052262735808768, 1.0010994416958947, 1.1120521806853583, 1.1005062262100413 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=catboost_with_tuned_parameters
Dataset=%{x}
Score=%{y}", "legendgroup": "catboost_with_tuned_parameters", "marker": { "color": "orange" }, "name": "catboost_with_tuned_parameters", "offsetgroup": "catboost_with_tuned_parameters", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 1.0336829667258485, 1.0050675715986264, 1.0223765792879222, 1.0683081998114985, 1.0059589636515507, 1.001779333772469, 1.0632398753894081, 1.1247042571958041 ], "yaxis": "y" } ], "layout": { "barmode": "group", "legend": { "title": { "text": "Model" }, "tracegroupgap": 0 }, "margin": { "t": 60 }, "template": { "data": { "pie": [ { "automargin": true, "type": "pie" } ] }, "layout": { "xaxis": { "showgrid": false, "title": { "standoff": 15 } }, "yaxis": { "title": { "standoff": 15 } } } }, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "title": { "text": "Dataset" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "Score" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "px.bar(perfs_tuned_vs_untuned_scaled[[\"xgboost_with_default_parameters\", \"lightgbm_with_default_parameters\", \"catboost_with_default_parameters\",\n", " \"xgboost_with_tuned_parameters\", \"lightgbm_with_tuned_parameters\", \"catboost_with_tuned_parameters\"]],\n", " labels={\n", " \"index\": \"Dataset\",\n", " \"value\": \"Score\",\n", " \"variable\": \"Model\",\n", " },\n", " barmode=\"group\", template='xgridoff',\n", " color_discrete_sequence=['#189FDD', \"#76B644\", \"#FFCC00\",\n", " \"dodgerblue\", \"olivedrab\", \"orange\"])" ] }, { "cell_type": "code", "execution_count": 21, "id": "daily-running", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:54:51.489528Z", "start_time": "2021-03-22T09:54:51.480685Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean score of each model, comparing to xgboost\n" ] }, { "data": { "text/plain": [ "catboost_with_default_parameters 1.014853\n", "lightgbm_with_default_parameters 1.014246\n", "lightgbm_with_catboost_encoder_with_default_parameters 1.012237\n", "xgboost_with_catboost_encoder_with_default_parameters 1.003059\n", "xgboost_with_default_parameters 1.000000\n", "catboost_with_tuned_parameters 1.040640\n", "lightgbm_with_tuned_parameters 1.039770\n", "lightgbm_with_catboost_encoder_with_tuned_parameters 1.036392\n", "xgboost_with_catboost_encoder_with_tuned_parameters 1.050569\n", "xgboost_with_tuned_parameters 1.055307\n", "dtype: float64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"Mean score of each model, comparing to xgboost\")\n", "perfs_tuned_vs_untuned_scaled.drop(columns=[\"categorical_features_proportion\", \"num_categories\", \"length\"]).dropna()\\\n", " .mean(axis=0)" ] }, { "cell_type": "code", "execution_count": 22, "id": "insured-forum", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:54:51.698375Z", "start_time": "2021-03-22T09:54:51.688946Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Median score of each model, comparing to xgboost\n" ] }, { "data": { "text/plain": [ "catboost_with_default_parameters 1.004291\n", "lightgbm_with_default_parameters 1.004425\n", "lightgbm_with_catboost_encoder_with_default_parameters 1.003131\n", "xgboost_with_catboost_encoder_with_default_parameters 0.998398\n", "xgboost_with_default_parameters 1.000000\n", "catboost_with_tuned_parameters 1.028030\n", "lightgbm_with_tuned_parameters 1.016264\n", "lightgbm_with_catboost_encoder_with_tuned_parameters 1.015137\n", "xgboost_with_catboost_encoder_with_tuned_parameters 1.014004\n", "xgboost_with_tuned_parameters 1.028547\n", "dtype: float64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"Median score of each model, comparing to xgboost\")\n", "perfs_tuned_vs_untuned_scaled.drop(columns=[\"categorical_features_proportion\", \"num_categories\", \"length\"]).dropna()\\\n", " .median(axis=0)" ] }, { "cell_type": "code", "execution_count": 23, "id": "governing-advice", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:54:53.142495Z", "start_time": "2021-03-22T09:54:53.129330Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean rank of each default model\n" ] }, { "data": { "text/plain": [ "catboost_with_default_parameters 1.375\n", "lightgbm_with_default_parameters 1.000\n", "xgboost_with_default_parameters 0.625\n", "dtype: float64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"Mean rank of each default model\")\n", "perfs_tuned_vs_untuned_scaled[[\"catboost_with_default_parameters\", \"lightgbm_with_default_parameters\", \n", " \"xgboost_with_default_parameters\"]].apply(lambda x: x.argsort().argsort(), axis=1).mean()" ] }, { "cell_type": "code", "execution_count": 24, "id": "posted-buddy", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:54:53.454846Z", "start_time": "2021-03-22T09:54:53.443498Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean rank of each tuned model\n" ] }, { "data": { "text/plain": [ "catboost_with_tuned_parameters 1.500\n", "lightgbm_with_tuned_parameters 0.625\n", "xgboost_with_tuned_parameters 0.875\n", "dtype: float64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"Mean rank of each tuned model\")\n", "perfs_tuned_vs_untuned_scaled[[\"catboost_with_tuned_parameters\", \"lightgbm_with_tuned_parameters\", \n", " \"xgboost_with_tuned_parameters\"]].apply(lambda x: x.argsort().argsort(), axis=1).mean()" ] }, { "cell_type": "markdown", "id": "perfect-implement", "metadata": {}, "source": [ "# Training and prediction time" ] }, { "cell_type": "code", "execution_count": 25, "id": "atomic-privacy", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:54:54.889555Z", "start_time": "2021-03-22T09:54:54.882603Z" } }, "outputs": [], "source": [ "training_times = pd.DataFrame({dataset_name: {model_name: default_perfs[dataset_name][model_name][\"training_time\"] \n", " for model_name in default_perfs[dataset_name].keys()}\n", " for dataset_name in default_perfs.keys()}).transpose()\n", "prediction_times = pd.DataFrame({dataset_name: {model_name: default_perfs[dataset_name][model_name][\"prediction_time\"] \n", " for model_name in default_perfs[dataset_name].keys()}\n", " for dataset_name in default_perfs.keys()}).transpose()\n", "times_df = pd.concat([training_times, prediction_times], axis=1)" ] }, { "cell_type": "code", "execution_count": 26, "id": "amazing-machine", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:54:55.926343Z", "start_time": "2021-03-22T09:54:55.912082Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
catboostlightgbmlightgbm_with_catboost_encoderxgboost_with_catboost_encoderxgboostcatboostlightgbmlightgbm_with_catboost_encoderxgboost_with_catboost_encoderxgboost
california24.6892610.1781420.171883119.791898118.7814660.0140210.0317250.0317090.0488800.042857
adult149.7246030.8118721.547185129.368296112.5267650.1291340.1008140.2226900.1949310.149172
ukair144.2910183.1675513.08278765.90762133.5856520.7256831.1818191.2146060.5440020.187727
diabetes9.4728240.0709930.180932151.992842153.2754990.0116340.0044200.0101920.1436120.157037
bank119.7785960.4506721.307890128.355890117.6801340.0649280.0870150.0915560.2066890.127684
dating218.2276862.7349242.686051240.914340224.0789940.1891920.2053730.1686170.5569580.273364
valley70.1264291.3434692.123457182.244021182.8117970.0162800.0048190.0046380.1387280.125285
cars141.7493531.1858371.893203143.051520135.9668510.9330870.1226370.1946350.2818250.178372
\n", "
" ], "text/plain": [ " catboost lightgbm lightgbm_with_catboost_encoder \\\n", "california 24.689261 0.178142 0.171883 \n", "adult 149.724603 0.811872 1.547185 \n", "ukair 144.291018 3.167551 3.082787 \n", "diabetes 9.472824 0.070993 0.180932 \n", "bank 119.778596 0.450672 1.307890 \n", "dating 218.227686 2.734924 2.686051 \n", "valley 70.126429 1.343469 2.123457 \n", "cars 141.749353 1.185837 1.893203 \n", "\n", " xgboost_with_catboost_encoder xgboost catboost lightgbm \\\n", "california 119.791898 118.781466 0.014021 0.031725 \n", "adult 129.368296 112.526765 0.129134 0.100814 \n", "ukair 65.907621 33.585652 0.725683 1.181819 \n", "diabetes 151.992842 153.275499 0.011634 0.004420 \n", "bank 128.355890 117.680134 0.064928 0.087015 \n", "dating 240.914340 224.078994 0.189192 0.205373 \n", "valley 182.244021 182.811797 0.016280 0.004819 \n", "cars 143.051520 135.966851 0.933087 0.122637 \n", "\n", " lightgbm_with_catboost_encoder xgboost_with_catboost_encoder \\\n", "california 0.031709 0.048880 \n", "adult 0.222690 0.194931 \n", "ukair 1.214606 0.544002 \n", "diabetes 0.010192 0.143612 \n", "bank 0.091556 0.206689 \n", "dating 0.168617 0.556958 \n", "valley 0.004638 0.138728 \n", "cars 0.194635 0.281825 \n", "\n", " xgboost \n", "california 0.042857 \n", "adult 0.149172 \n", "ukair 0.187727 \n", "diabetes 0.157037 \n", "bank 0.127684 \n", "dating 0.273364 \n", "valley 0.125285 \n", "cars 0.178372 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "times_df" ] }, { "cell_type": "code", "execution_count": 27, "id": "compatible-smooth", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:55:11.500132Z", "start_time": "2021-03-22T09:55:11.454098Z" } }, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "alignmentgroup": "True", "hovertemplate": "Model=xgboost
Dataset=%{x}
Training time=%{y}", "legendgroup": "xgboost", "marker": { "color": "#189FDD" }, "name": "xgboost", "offsetgroup": "xgboost", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 118.78146642446518, 112.52676504850388, 33.585652112960815, 153.2754987001419, 117.68013417720795, 224.07899371782938, 182.81179705262184, 135.9668505191803 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=lightgbm
Dataset=%{x}
Training time=%{y}", "legendgroup": "lightgbm", "marker": { "color": "#76B644" }, "name": "lightgbm", "offsetgroup": "lightgbm", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 0.17814165353775024, 0.8118723034858704, 3.167551279067993, 0.07099325656890869, 0.45067161321640015, 2.7349242766698203, 1.3434689939022064, 1.1858367919921875 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=catboost
Dataset=%{x}
Training time=%{y}", "legendgroup": "catboost", "marker": { "color": "#FFCC00" }, "name": "catboost", "offsetgroup": "catboost", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 24.68926101922989, 149.72460305690765, 144.29101753234863, 9.472824048995971, 119.7785957455635, 218.22768604755402, 70.12642854452133, 141.74935269355774 ], "yaxis": "y" } ], "layout": { "barmode": "group", "legend": { "title": { "text": "Model" }, "tracegroupgap": 0 }, "margin": { "t": 60 }, "template": { "data": { "pie": [ { "automargin": true, "type": "pie" } ] }, "layout": { "xaxis": { "showgrid": false, "title": { "standoff": 15 } }, "yaxis": { "title": { "standoff": 15 } } } }, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "title": { "text": "Dataset" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "Training time" }, "type": "log" } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "px.bar(training_times[[\"xgboost\", \"lightgbm\", \"catboost\"]], barmode=\"group\", log_y=True, labels={\n", " \"index\": \"Dataset\",\n", " \"value\": \"Training time\",\n", " \"variable\": \"Model\",\n", " },\n", " template='xgridoff',\n", " color_discrete_sequence=['#189FDD', \"#76B644\", \"#FFCC00\",\n", " \"dodgerblue\", \"olivedrab\", \"orange\"])" ] }, { "cell_type": "code", "execution_count": 28, "id": "studied-appreciation", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:55:25.028732Z", "start_time": "2021-03-22T09:55:24.983053Z" } }, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "alignmentgroup": "True", "hovertemplate": "Model=xgboost
Dataset=%{x}
Training time=%{y}", "legendgroup": "xgboost", "marker": { "color": "#189FDD" }, "name": "xgboost", "offsetgroup": "xgboost", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 0.04285699129104614, 0.14917218685150146, 0.18772661685943604, 0.1570371150970459, 0.1276841163635254, 0.2733643452326457, 0.12528470158576965, 0.17837196588516235 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=lightgbm
Dataset=%{x}
Training time=%{y}", "legendgroup": "lightgbm", "marker": { "color": "#76B644" }, "name": "lightgbm", "offsetgroup": "lightgbm", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 0.03172546625137329, 0.10081368684768677, 1.181818962097168, 0.004420185089111328, 0.08701473474502563, 0.20537320772806802, 0.004819363355636597, 0.12263745069503784 ], "yaxis": "y" }, { "alignmentgroup": "True", "hovertemplate": "Model=catboost
Dataset=%{x}
Training time=%{y}", "legendgroup": "catboost", "marker": { "color": "#FFCC00" }, "name": "catboost", "offsetgroup": "catboost", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "california", "adult", "ukair", "diabetes", "bank", "dating", "valley", "cars" ], "xaxis": "x", "y": [ 0.01402062177658081, 0.1291337013244629, 0.7256834506988525, 0.011633682250976562, 0.06492817401885986, 0.18919217586517334, 0.016279786825180054, 0.933086633682251 ], "yaxis": "y" } ], "layout": { "barmode": "group", "legend": { "title": { "text": "Model" }, "tracegroupgap": 0 }, "margin": { "t": 60 }, "template": { "data": { "pie": [ { "automargin": true, "type": "pie" } ] }, "layout": { "xaxis": { "showgrid": false, "title": { "standoff": 15 } }, "yaxis": { "title": { "standoff": 15 } } } }, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "title": { "text": "Dataset" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "Training time" }, "type": "log" } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "px.bar(prediction_times[[\"xgboost\", \"lightgbm\", \"catboost\"]], barmode=\"group\", log_y=True, labels={\n", " \"index\": \"Dataset\",\n", " \"value\": \"Training time\",\n", " \"variable\": \"Model\",\n", " },\n", " template='xgridoff',\n", " color_discrete_sequence=['#189FDD', \"#76B644\", \"#FFCC00\",\n", " \"dodgerblue\", \"olivedrab\", \"orange\"])" ] }, { "cell_type": "code", "execution_count": 29, "id": "stupid-projector", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:55:27.347668Z", "start_time": "2021-03-22T09:55:27.342755Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean training times\n" ] }, { "data": { "text/plain": [ "catboost 109.757471\n", "lightgbm 1.242933\n", "lightgbm_with_catboost_encoder 1.624173\n", "xgboost_with_catboost_encoder 145.203303\n", "xgboost 134.838395\n", "dtype: float64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"Mean training times\")\n", "training_times.mean(axis=0)" ] }, { "cell_type": "code", "execution_count": 30, "id": "harmful-alfred", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:55:27.542145Z", "start_time": "2021-03-22T09:55:27.534819Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mdian training times comparing to xgboost training time\n" ] }, { "data": { "text/plain": [ "catboost 0.995860\n", "lightgbm 0.007282\n", "lightgbm_with_catboost_encoder 0.011801\n", "xgboost_with_catboost_encoder 1.063619\n", "xgboost 1.000000\n", "dtype: float64" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"Mdian training times comparing to xgboost training time\")\n", "training_times.assign(**{col_name: training_times[col_name] / training_times[\"xgboost\"]\n", " for col_name in training_times.columns}).median(axis=0)" ] }, { "cell_type": "code", "execution_count": 31, "id": "opponent-evolution", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:55:27.696150Z", "start_time": "2021-03-22T09:55:27.691026Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean prediction times\n" ] }, { "data": { "text/plain": [ "catboost 0.260495\n", "lightgbm 0.217328\n", "lightgbm_with_catboost_encoder 0.242330\n", "xgboost_with_catboost_encoder 0.264453\n", "xgboost 0.155187\n", "dtype: float64" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"Mean prediction times\")\n", "prediction_times.mean(axis=0)" ] }, { "cell_type": "code", "execution_count": 32, "id": "prepared-lodging", "metadata": { "ExecuteTime": { "end_time": "2021-03-22T09:55:27.877998Z", "start_time": "2021-03-22T09:55:27.869283Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mdian prediction times comparing to xgboost prediction time\n" ] }, { "data": { "text/plain": [ "catboost 0.600297\n", "lightgbm 0.684511\n", "lightgbm_with_catboost_encoder 0.728463\n", "xgboost_with_catboost_encoder 1.443369\n", "xgboost 1.000000\n", "dtype: float64" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"Mdian prediction times comparing to xgboost prediction time\")\n", "prediction_times.assign(**{col_name: prediction_times[col_name] / prediction_times[\"xgboost\"]\n", " for col_name in prediction_times.columns}).median(axis=0)" ] } ], "metadata": { "hide_input": false, "jupytext": { "encoding": "# -*- coding: utf-8 -*-", "formats": "ipynb,py:percent" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }