{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "![KTS logo](https://raw.githubusercontent.com/konodyuk/kts/master/docs/static/banner_alpha.png)\n", "# Modelling Guide" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
DASHBOARD
\n", "
features
\n", "
\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
simple_feature
\n", "
source
\n", "
@feature\n",
       "def simple_feature(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res['is_male'] = (df.Sex == 'male') + 0\n",
       "    return res\n",
       "
\n", "
\n", "\n", "
\n", "
\n", "
GENERIC FEATURE
\n", " \n", "
\n", "
name
\n", "
interactions
\n", "
source
\n", "
@feature\n",
       "@generic(left="Pclass", right="SibSp")\n",
       "def interactions(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res[f"{left}_add_{right}"] = df[left] + df[right]\n",
       "    res[f"{left}_sub_{right}"] = df[left] - df[right]\n",
       "    res[f"{left}_mul_{right}"] = df[left] * df[right]\n",
       "    return res\n",
       "
\n", "
\n", "\n", "
\n", "
\n", "
GENERIC FEATURE
\n", " \n", "
\n", "
name
\n", "
num_aggs
\n", "
description
\n", "
Descriptions are also supported.
\n", "
source
\n", "
@feature\n",
       "@generic(col="Parch")\n",
       "def num_aggs(df):\n",
       "    """Descriptions are also supported."""\n",
       "    res = pd.DataFrame(index=df.index)\n",
       "    mean = df[col].mean()\n",
       "    std = df[col].std()\n",
       "    res[f"{col}_div_mean"] = df[col] / mean\n",
       "    res[f"{col}_sub_div_mean"] = (df[col] - mean) / mean\n",
       "    res[f"{col}_div_std"] = df[col] / std\n",
       "    return res\n",
       "
\n", "
\n", "\n", "
\n", "
\n", "
GENERIC FEATURE
\n", " \n", "
\n", "
name
\n", "
tfidf
\n", "
source
\n", "
@feature\n",
       "@generic(col='Name')\n",
       "def tfidf(df):\n",
       "    if df.train:\n",
       "        enc = TfidfVectorizer(analyzer='char', ngram_range=(1, 3), max_features=5)\n",
       "        res = enc.fit_transform(df[col])\n",
       "        df.state['enc'] = enc\n",
       "    else:\n",
       "        enc = df.state['enc']\n",
       "        res = enc.transform(df[col])\n",
       "    return res.todense()\n",
       "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "
helpers
\n", "
You've got no helpers so far.
\n", "
\n", "\n", "
\n", "\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "np.random.seed(0)\n", "\n", "import kts\n", "from kts import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Feature constructors and helpers defined earlier are automatically loaded:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
FEATURE CONSTRUCTOR
\n", "
name
\n", "
simple_feature
\n", "
source
\n", "
@feature\n",
       "def simple_feature(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res['is_male'] = (df.Sex == 'male') + 0\n",
       "    return res\n",
       "
" ], "text/plain": [ "simple_feature" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "simple_feature" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use `kts.ls` to list objects saved in your user cache and `kts.rm` to remove them:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['train', 'test', 'external']\n", "['train', 'test']\n" ] } ], "source": [ "print(kts.ls())\n", "kts.rm('external')\n", "print(kts.ls())" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "train = kts.load('train')\n", "test = kts.load('test')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Models\n", "\n", "`kts.models.{binary, multiclass, regression}` contain most popular models for each task type. \n", "In particular, all of the corresponding sklearn models are present, as well as CatBoost, LGBM and XGB if already installed. We'll also add neural nets there soon." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from kts.models import binary, multiclass, regression" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Init signatures are preserved:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
MODEL
\n", "
name
\n", "
CatBoostClassifierGVB
\n", "
model
\n", "
CatBoostClassifier
\n", "
params
\n", "
custom_metric = 'AUC'\n",
       "loss_function = 'Logloss'\n",
       "          rsm = 0.15\n",
       "   iterations = 100\n",
       "
\n", "
source
\n", "
CatBoostClassifier(custom_metric='AUC', loss_function='Logloss', rsm=0.15, iterations=100)\n",
       "
" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cb = binary.CatBoostClassifier(iterations=100, rsm=.15, custom_metric='AUC')\n", "cb" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
MODEL
\n", "
name
\n", "
LogisticRegressionGNM
\n", "
model
\n", "
LogisticRegression
\n", "
params
\n", "
                C = 0.5\n",
       "     class_weight = None\n",
       "             dual = False\n",
       "    fit_intercept = True\n",
       "intercept_scaling = 1\n",
       "         max_iter = 1000\n",
       "      multi_class = 'warn'\n",
       "          penalty = 'l2'\n",
       "     random_state = None\n",
       "           solver = 'lbfgs'\n",
       "              tol = 0.0001\n",
       "       warm_start = False\n",
       "
\n", "
source
\n", "
LogisticRegression(C=0.5, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=1000, multi_class='warn', penalty='l2', random_state=None, solver='lbfgs', tol=0.0001, warm_start=False)\n",
       "
" ], "text/plain": [ "LogisticRegression(C=0.5, class_weight=None, dual=False, fit_intercept=True,\n", " intercept_scaling=1, max_iter=1000, multi_class='warn',\n", " n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',\n", " tol=0.0001, verbose=0, warm_start=False)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lr = binary.LogisticRegression(C=.5, solver='lbfgs', max_iter=1000)\n", "lr" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from category_encoders import TargetEncoder, WOEEncoder\n", "\n", "fs = FeatureSet([simple_feature, interactions('Pclass', 'Age'), num_aggs('Fare'), tfidf('Name'), stl.one_hot_encode('Embarked')], \n", " [stl.category_encode(TargetEncoder(), 'Embarked', 'Survived'), \n", " stl.category_encode(WOEEncoder(), 'Embarked', 'Survived')],\n", " train_frame=train,\n", " targets='Survived')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Validation\n", "\n", "To define a validation scheme, you'll use `kts.Validator(splitter, metric)`. Splitter is used to split the training set, and metric is for evaluating trained models." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.metrics import roc_auc_score\n", "\n", "skf = StratifiedKFold(5, True, 42)\n", "val = Validator(skf, roc_auc_score)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Running validation is as easy as `val.score(model, feature_set)`:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
COMPUTING FEATURES
feature
progress
\n", "
num_aggs__Fare
\n", "
0s
\n", "
\n", "
simple_feature
\n", "
0s
\n", "
\n", "
interactions__Pclass_Age
\n", "
0s
\n", "
\n", "
tfidf__Name
\n", "
0s
\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
FITTING
\n", "
progress
\n", "
\n", "
train
\n", "
0.224
\n", "
valid
\n", "
0.456
\n", "
metric
\n", "
0.862
\n", "
took
\n", "
3s
\n", "
eta
\n", "
0s
\n", "
\n", "
0.218
\n", "
0.516
\n", "
0.821
\n", "
2s
\n", "
0s
\n", "
\n", "
0.229
\n", "
0.487
\n", "
0.85
\n", "
3s
\n", "
0s
\n", "
\n", "
0.240
\n", "
0.415
\n", "
0.892
\n", "
3s
\n", "
0s
\n", "
\n", "
\n", "
0.236
\n", "
0.585
\n", "
0.800
\n", "
2s
\n", "
0s
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'score': 0.8450484134619144, 'id': 'KPBVAI'}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val.score(cb, fs)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
FITTING
\n", "
progress
\n", "
\n", "
train
\n", "
\n", "
valid
\n", "
\n", "
metric
\n", "
0.863
\n", "
took
\n", "
0s
\n", "
eta
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.773
\n", "
1s
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.812
\n", "
0s
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.829
\n", "
0s
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.796
\n", "
1s
\n", "
0s
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'score': 0.8145216602070352, 'id': 'FYCMDA'}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val.score(lr, fs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Leaderboard\n", "\n", "Right after validation your experiments are placed in the leaderboard:" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
LEADERBOARD
\n", "
#
id
score
model
# features
date
took
\n", "\n", "
\n", "
\n", "
EXPERIMENT
\n", " \n", "
\n", "
ID
\n", "
KPBVAI
\n", "
score
\n", "
0.8450484134619144
\n", "
model
\n", "
CatBoostClassifier
\n", "
params
\n", "
loss_function = 'Logloss'\n",
       "custom_metric = 'AUC'\n",
       "          rsm = 0.15\n",
       "   iterations = 100\n",
       "
\n", "
features
\n", "
\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
simple_feature
\n", "
source
\n", "
@feature\n",
       "def simple_feature(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res['is_male'] = (df.Sex == 'male') + 0\n",
       "    return res\n",
       "
\n", "
columns
\n", "
is_male
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
interactions('Pclass', 'Age')
\n", "
description
\n", "
An instance of generic feature constructor interactions
\n", "
source
\n", "
interactions('Pclass', 'Age')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(left="Pclass", right="SibSp")\n",
       "def interactions(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res[f"{left}_add_{right}"] = df[left] + df[right]\n",
       "    res[f"{left}_sub_{right}"] = df[left] - df[right]\n",
       "    res[f"{left}_mul_{right}"] = df[left] * df[right]\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Pclass_add_Age, Pclass_sub_Age, Pclass_mul_Age
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
num_aggs('Fare')
\n", "
description
\n", "
An instance of generic feature constructor num_aggs
\n", "
source
\n", "
num_aggs('Fare')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col="Parch")\n",
       "def num_aggs(df):\n",
       "    """Descriptions are also supported."""\n",
       "    res = pd.DataFrame(index=df.index)\n",
       "    mean = df[col].mean()\n",
       "    std = df[col].std()\n",
       "    res[f"{col}_div_mean"] = df[col] / mean\n",
       "    res[f"{col}_sub_div_mean"] = (df[col] - mean) / mean\n",
       "    res[f"{col}_div_std"] = df[col] / std\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Fare_div_mean, Fare_sub_div_mean, Fare_div_std
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
tfidf('Name')
\n", "
description
\n", "
An instance of generic feature constructor tfidf
\n", "
source
\n", "
tfidf('Name')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col='Name')\n",
       "def tfidf(df):\n",
       "    if df.train:\n",
       "        enc = TfidfVectorizer(analyzer='char', ngram_range=(1, 3), max_features=5)\n",
       "        res = enc.fit_transform(df[col])\n",
       "        df.state['enc'] = enc\n",
       "    else:\n",
       "        enc = df.state['enc']\n",
       "        res = enc.transform(df[col])\n",
       "    return res.todense()\n",
       "
\n", "
columns
\n", "
tfidf__Name_0, tfidf__Name_3, tfidf__Name_1, tfidf__Name_2, tfidf__Name_4
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_TargetEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_WOEEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])
\n", "
source
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])\n",
       "
\n", "
columns
\n", "
Embarked_ce_OneHotEncoder_0, Embarked_ce_OneHotEncoder_3, Embarked_ce_OneHotEncoder_1, Embarked_ce_OneHotEncoder_2
\n", "
\n", "
details
\n", "
\n", "
\n", "
\n", "
FEATURE SET
\n", " \n", "
\n", "
name
\n", "
FSBWBPEK
\n", "
source
\n", "
FeatureSet([simple_feature,\n",
       "            interactions('Pclass', 'Age'),\n",
       "            num_aggs('Fare'),\n",
       "            tfidf('Name')],\n",
       "           [stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(OneHotEncoder(), ['Embarked'], [None])],\n",
       "           targets=['Survived'],\n",
       "           auxiliary=[])\n",
       "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
MODEL
\n", " \n", "
\n", "
name
\n", "
CatBoostClassifierGVB
\n", "
model
\n", "
CatBoostClassifier
\n", "
params
\n", "
loss_function = 'Logloss'\n",
       "custom_metric = 'AUC'\n",
       "          rsm = 0.15\n",
       "   iterations = 100\n",
       "
\n", "
source
\n", "
CatBoostClassifier(loss_function='Logloss', custom_metric='AUC', rsm=0.15, iterations=100)\n",
       "
\n", "
\n", "\n", "
\n", "
\n", "
VALIDATOR
\n", " \n", "
\n", "
splitter
\n", "
StratifiedKFold(n_splits=5, random_state=42, shuffle=True)\n",
       "
\n", "
metric
\n", "
roc_auc_score\n",
       "
\n", "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
EXPERIMENT
\n", " \n", "
\n", "
ID
\n", "
FYCMDA
\n", "
score
\n", "
0.8145216602070352
\n", "
model
\n", "
LogisticRegression
\n", "
params
\n", "
                C = 0.5\n",
       "     class_weight = None\n",
       "             dual = False\n",
       "    fit_intercept = True\n",
       "intercept_scaling = 1\n",
       "         max_iter = 1000\n",
       "      multi_class = 'warn'\n",
       "          penalty = 'l2'\n",
       "     random_state = None\n",
       "           solver = 'lbfgs'\n",
       "              tol = 0.0001\n",
       "       warm_start = False\n",
       "
\n", "
features
\n", "
\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
simple_feature
\n", "
source
\n", "
@feature\n",
       "def simple_feature(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res['is_male'] = (df.Sex == 'male') + 0\n",
       "    return res\n",
       "
\n", "
columns
\n", "
is_male
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
interactions('Pclass', 'Age')
\n", "
description
\n", "
An instance of generic feature constructor interactions
\n", "
source
\n", "
interactions('Pclass', 'Age')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(left="Pclass", right="SibSp")\n",
       "def interactions(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res[f"{left}_add_{right}"] = df[left] + df[right]\n",
       "    res[f"{left}_sub_{right}"] = df[left] - df[right]\n",
       "    res[f"{left}_mul_{right}"] = df[left] * df[right]\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Pclass_add_Age, Pclass_sub_Age, Pclass_mul_Age
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
num_aggs('Fare')
\n", "
description
\n", "
An instance of generic feature constructor num_aggs
\n", "
source
\n", "
num_aggs('Fare')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col="Parch")\n",
       "def num_aggs(df):\n",
       "    """Descriptions are also supported."""\n",
       "    res = pd.DataFrame(index=df.index)\n",
       "    mean = df[col].mean()\n",
       "    std = df[col].std()\n",
       "    res[f"{col}_div_mean"] = df[col] / mean\n",
       "    res[f"{col}_sub_div_mean"] = (df[col] - mean) / mean\n",
       "    res[f"{col}_div_std"] = df[col] / std\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Fare_div_mean, Fare_sub_div_mean, Fare_div_std
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
tfidf('Name')
\n", "
description
\n", "
An instance of generic feature constructor tfidf
\n", "
source
\n", "
tfidf('Name')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col='Name')\n",
       "def tfidf(df):\n",
       "    if df.train:\n",
       "        enc = TfidfVectorizer(analyzer='char', ngram_range=(1, 3), max_features=5)\n",
       "        res = enc.fit_transform(df[col])\n",
       "        df.state['enc'] = enc\n",
       "    else:\n",
       "        enc = df.state['enc']\n",
       "        res = enc.transform(df[col])\n",
       "    return res.todense()\n",
       "
\n", "
columns
\n", "
tfidf__Name_0, tfidf__Name_3, tfidf__Name_1, tfidf__Name_2, tfidf__Name_4
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_TargetEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_WOEEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])
\n", "
source
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])\n",
       "
\n", "
columns
\n", "
Embarked_ce_OneHotEncoder_0, Embarked_ce_OneHotEncoder_3, Embarked_ce_OneHotEncoder_1, Embarked_ce_OneHotEncoder_2
\n", "
\n", "
details
\n", "
\n", "
\n", "
\n", "
FEATURE SET
\n", " \n", "
\n", "
name
\n", "
FSBWBPEK
\n", "
source
\n", "
FeatureSet([simple_feature,\n",
       "            interactions('Pclass', 'Age'),\n",
       "            num_aggs('Fare'),\n",
       "            tfidf('Name')],\n",
       "           [stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(OneHotEncoder(), ['Embarked'], [None])],\n",
       "           targets=['Survived'],\n",
       "           auxiliary=[])\n",
       "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
MODEL
\n", " \n", "
\n", "
name
\n", "
LogisticRegressionGNM
\n", "
model
\n", "
LogisticRegression
\n", "
params
\n", "
                C = 0.5\n",
       "     class_weight = None\n",
       "             dual = False\n",
       "    fit_intercept = True\n",
       "intercept_scaling = 1\n",
       "         max_iter = 1000\n",
       "      multi_class = 'warn'\n",
       "          penalty = 'l2'\n",
       "     random_state = None\n",
       "           solver = 'lbfgs'\n",
       "              tol = 0.0001\n",
       "       warm_start = False\n",
       "
\n", "
source
\n", "
LogisticRegression(C=0.5, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=1000, multi_class='warn', penalty='l2', random_state=None, solver='lbfgs', tol=0.0001, warm_start=False)\n",
       "
\n", "
\n", "\n", "
\n", "
\n", "
VALIDATOR
\n", " \n", "
\n", "
splitter
\n", "
StratifiedKFold(n_splits=5, random_state=42, shuffle=True)\n",
       "
\n", "
metric
\n", "
roc_auc_score\n",
       "
\n", "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lb" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Multiple Leaderboards" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can also keep multiple leaderboards by passing `leaderboard` parameter to `val.score()`. Default leaderboard is `main`." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
FITTING
\n", "
progress
\n", "
\n", "
train
\n", "
\n", "
valid
\n", "
\n", "
metric
\n", "
0.819
\n", "
took
\n", "
1s
\n", "
eta
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.788
\n", "
1s
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.794
\n", "
1s
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.813
\n", "
0s
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.76
\n", "
0s
\n", "
0s
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'score': 0.7948359406496035, 'id': 'GWZCPQ'}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "some_model = binary.KNeighborsClassifier()\n", "\n", "val.score(some_model, fs, leaderboard='other')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use `kts.leaderboard_list` or `kts.lbs` to access leaderboards other than main:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "True" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "True" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lbs.main is lb\n", "lbs.other is lbs['other']\n", "leaderboard_list is lbs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that the new experiment appeared only in the new leaderboard:" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
LEADERBOARD
\n", "
#
id
score
model
# features
date
took
\n", "\n", "
\n", "
\n", "
EXPERIMENT
\n", " \n", "
\n", "
ID
\n", "
KPBVAI
\n", "
score
\n", "
0.8450484134619144
\n", "
model
\n", "
CatBoostClassifier
\n", "
params
\n", "
loss_function = 'Logloss'\n",
       "custom_metric = 'AUC'\n",
       "          rsm = 0.15\n",
       "   iterations = 100\n",
       "
\n", "
features
\n", "
\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
simple_feature
\n", "
source
\n", "
@feature\n",
       "def simple_feature(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res['is_male'] = (df.Sex == 'male') + 0\n",
       "    return res\n",
       "
\n", "
columns
\n", "
is_male
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
interactions('Pclass', 'Age')
\n", "
description
\n", "
An instance of generic feature constructor interactions
\n", "
source
\n", "
interactions('Pclass', 'Age')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(left="Pclass", right="SibSp")\n",
       "def interactions(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res[f"{left}_add_{right}"] = df[left] + df[right]\n",
       "    res[f"{left}_sub_{right}"] = df[left] - df[right]\n",
       "    res[f"{left}_mul_{right}"] = df[left] * df[right]\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Pclass_add_Age, Pclass_sub_Age, Pclass_mul_Age
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
num_aggs('Fare')
\n", "
description
\n", "
An instance of generic feature constructor num_aggs
\n", "
source
\n", "
num_aggs('Fare')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col="Parch")\n",
       "def num_aggs(df):\n",
       "    """Descriptions are also supported."""\n",
       "    res = pd.DataFrame(index=df.index)\n",
       "    mean = df[col].mean()\n",
       "    std = df[col].std()\n",
       "    res[f"{col}_div_mean"] = df[col] / mean\n",
       "    res[f"{col}_sub_div_mean"] = (df[col] - mean) / mean\n",
       "    res[f"{col}_div_std"] = df[col] / std\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Fare_div_mean, Fare_sub_div_mean, Fare_div_std
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
tfidf('Name')
\n", "
description
\n", "
An instance of generic feature constructor tfidf
\n", "
source
\n", "
tfidf('Name')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col='Name')\n",
       "def tfidf(df):\n",
       "    if df.train:\n",
       "        enc = TfidfVectorizer(analyzer='char', ngram_range=(1, 3), max_features=5)\n",
       "        res = enc.fit_transform(df[col])\n",
       "        df.state['enc'] = enc\n",
       "    else:\n",
       "        enc = df.state['enc']\n",
       "        res = enc.transform(df[col])\n",
       "    return res.todense()\n",
       "
\n", "
columns
\n", "
tfidf__Name_0, tfidf__Name_3, tfidf__Name_1, tfidf__Name_2, tfidf__Name_4
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_TargetEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_WOEEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])
\n", "
source
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])\n",
       "
\n", "
columns
\n", "
Embarked_ce_OneHotEncoder_0, Embarked_ce_OneHotEncoder_3, Embarked_ce_OneHotEncoder_1, Embarked_ce_OneHotEncoder_2
\n", "
\n", "
details
\n", "
\n", "
\n", "
\n", "
FEATURE SET
\n", " \n", "
\n", "
name
\n", "
FSBWBPEK
\n", "
source
\n", "
FeatureSet([simple_feature,\n",
       "            interactions('Pclass', 'Age'),\n",
       "            num_aggs('Fare'),\n",
       "            tfidf('Name')],\n",
       "           [stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(OneHotEncoder(), ['Embarked'], [None])],\n",
       "           targets=['Survived'],\n",
       "           auxiliary=[])\n",
       "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
MODEL
\n", " \n", "
\n", "
name
\n", "
CatBoostClassifierGVB
\n", "
model
\n", "
CatBoostClassifier
\n", "
params
\n", "
loss_function = 'Logloss'\n",
       "custom_metric = 'AUC'\n",
       "          rsm = 0.15\n",
       "   iterations = 100\n",
       "
\n", "
source
\n", "
CatBoostClassifier(loss_function='Logloss', custom_metric='AUC', rsm=0.15, iterations=100)\n",
       "
\n", "
\n", "\n", "
\n", "
\n", "
VALIDATOR
\n", " \n", "
\n", "
splitter
\n", "
StratifiedKFold(n_splits=5, random_state=42, shuffle=True)\n",
       "
\n", "
metric
\n", "
roc_auc_score\n",
       "
\n", "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
EXPERIMENT
\n", " \n", "
\n", "
ID
\n", "
FYCMDA
\n", "
score
\n", "
0.8145216602070352
\n", "
model
\n", "
LogisticRegression
\n", "
params
\n", "
                C = 0.5\n",
       "     class_weight = None\n",
       "             dual = False\n",
       "    fit_intercept = True\n",
       "intercept_scaling = 1\n",
       "         max_iter = 1000\n",
       "      multi_class = 'warn'\n",
       "          penalty = 'l2'\n",
       "     random_state = None\n",
       "           solver = 'lbfgs'\n",
       "              tol = 0.0001\n",
       "       warm_start = False\n",
       "
\n", "
features
\n", "
\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
simple_feature
\n", "
source
\n", "
@feature\n",
       "def simple_feature(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res['is_male'] = (df.Sex == 'male') + 0\n",
       "    return res\n",
       "
\n", "
columns
\n", "
is_male
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
interactions('Pclass', 'Age')
\n", "
description
\n", "
An instance of generic feature constructor interactions
\n", "
source
\n", "
interactions('Pclass', 'Age')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(left="Pclass", right="SibSp")\n",
       "def interactions(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res[f"{left}_add_{right}"] = df[left] + df[right]\n",
       "    res[f"{left}_sub_{right}"] = df[left] - df[right]\n",
       "    res[f"{left}_mul_{right}"] = df[left] * df[right]\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Pclass_add_Age, Pclass_sub_Age, Pclass_mul_Age
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
num_aggs('Fare')
\n", "
description
\n", "
An instance of generic feature constructor num_aggs
\n", "
source
\n", "
num_aggs('Fare')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col="Parch")\n",
       "def num_aggs(df):\n",
       "    """Descriptions are also supported."""\n",
       "    res = pd.DataFrame(index=df.index)\n",
       "    mean = df[col].mean()\n",
       "    std = df[col].std()\n",
       "    res[f"{col}_div_mean"] = df[col] / mean\n",
       "    res[f"{col}_sub_div_mean"] = (df[col] - mean) / mean\n",
       "    res[f"{col}_div_std"] = df[col] / std\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Fare_div_mean, Fare_sub_div_mean, Fare_div_std
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
tfidf('Name')
\n", "
description
\n", "
An instance of generic feature constructor tfidf
\n", "
source
\n", "
tfidf('Name')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col='Name')\n",
       "def tfidf(df):\n",
       "    if df.train:\n",
       "        enc = TfidfVectorizer(analyzer='char', ngram_range=(1, 3), max_features=5)\n",
       "        res = enc.fit_transform(df[col])\n",
       "        df.state['enc'] = enc\n",
       "    else:\n",
       "        enc = df.state['enc']\n",
       "        res = enc.transform(df[col])\n",
       "    return res.todense()\n",
       "
\n", "
columns
\n", "
tfidf__Name_0, tfidf__Name_3, tfidf__Name_1, tfidf__Name_2, tfidf__Name_4
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_TargetEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_WOEEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])
\n", "
source
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])\n",
       "
\n", "
columns
\n", "
Embarked_ce_OneHotEncoder_0, Embarked_ce_OneHotEncoder_3, Embarked_ce_OneHotEncoder_1, Embarked_ce_OneHotEncoder_2
\n", "
\n", "
details
\n", "
\n", "
\n", "
\n", "
FEATURE SET
\n", " \n", "
\n", "
name
\n", "
FSBWBPEK
\n", "
source
\n", "
FeatureSet([simple_feature,\n",
       "            interactions('Pclass', 'Age'),\n",
       "            num_aggs('Fare'),\n",
       "            tfidf('Name')],\n",
       "           [stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(OneHotEncoder(), ['Embarked'], [None])],\n",
       "           targets=['Survived'],\n",
       "           auxiliary=[])\n",
       "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
MODEL
\n", " \n", "
\n", "
name
\n", "
LogisticRegressionGNM
\n", "
model
\n", "
LogisticRegression
\n", "
params
\n", "
                C = 0.5\n",
       "     class_weight = None\n",
       "             dual = False\n",
       "    fit_intercept = True\n",
       "intercept_scaling = 1\n",
       "         max_iter = 1000\n",
       "      multi_class = 'warn'\n",
       "          penalty = 'l2'\n",
       "     random_state = None\n",
       "           solver = 'lbfgs'\n",
       "              tol = 0.0001\n",
       "       warm_start = False\n",
       "
\n", "
source
\n", "
LogisticRegression(C=0.5, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=1000, multi_class='warn', penalty='l2', random_state=None, solver='lbfgs', tol=0.0001, warm_start=False)\n",
       "
\n", "
\n", "\n", "
\n", "
\n", "
VALIDATOR
\n", " \n", "
\n", "
splitter
\n", "
StratifiedKFold(n_splits=5, random_state=42, shuffle=True)\n",
       "
\n", "
metric
\n", "
roc_auc_score\n",
       "
\n", "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/html": [ "
LEADERBOARD
\n", "
#
id
score
model
# features
date
took
\n", "\n", "
\n", "
\n", "
EXPERIMENT
\n", " \n", "
\n", "
ID
\n", "
GWZCPQ
\n", "
score
\n", "
0.7948359406496035
\n", "
model
\n", "
KNeighborsClassifier
\n", "
params
\n", "
    algorithm = 'auto'\n",
       "    leaf_size = 30\n",
       "       metric = 'minkowski'\n",
       "metric_params = None\n",
       "  n_neighbors = 5\n",
       "            p = 2\n",
       "      weights = 'uniform'\n",
       "
\n", "
features
\n", "
\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
simple_feature
\n", "
source
\n", "
@feature\n",
       "def simple_feature(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res['is_male'] = (df.Sex == 'male') + 0\n",
       "    return res\n",
       "
\n", "
columns
\n", "
is_male
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
interactions('Pclass', 'Age')
\n", "
description
\n", "
An instance of generic feature constructor interactions
\n", "
source
\n", "
interactions('Pclass', 'Age')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(left="Pclass", right="SibSp")\n",
       "def interactions(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res[f"{left}_add_{right}"] = df[left] + df[right]\n",
       "    res[f"{left}_sub_{right}"] = df[left] - df[right]\n",
       "    res[f"{left}_mul_{right}"] = df[left] * df[right]\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Pclass_add_Age, Pclass_sub_Age, Pclass_mul_Age
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
num_aggs('Fare')
\n", "
description
\n", "
An instance of generic feature constructor num_aggs
\n", "
source
\n", "
num_aggs('Fare')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col="Parch")\n",
       "def num_aggs(df):\n",
       "    """Descriptions are also supported."""\n",
       "    res = pd.DataFrame(index=df.index)\n",
       "    mean = df[col].mean()\n",
       "    std = df[col].std()\n",
       "    res[f"{col}_div_mean"] = df[col] / mean\n",
       "    res[f"{col}_sub_div_mean"] = (df[col] - mean) / mean\n",
       "    res[f"{col}_div_std"] = df[col] / std\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Fare_div_mean, Fare_sub_div_mean, Fare_div_std
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
tfidf('Name')
\n", "
description
\n", "
An instance of generic feature constructor tfidf
\n", "
source
\n", "
tfidf('Name')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col='Name')\n",
       "def tfidf(df):\n",
       "    if df.train:\n",
       "        enc = TfidfVectorizer(analyzer='char', ngram_range=(1, 3), max_features=5)\n",
       "        res = enc.fit_transform(df[col])\n",
       "        df.state['enc'] = enc\n",
       "    else:\n",
       "        enc = df.state['enc']\n",
       "        res = enc.transform(df[col])\n",
       "    return res.todense()\n",
       "
\n", "
columns
\n", "
tfidf__Name_0, tfidf__Name_3, tfidf__Name_1, tfidf__Name_2, tfidf__Name_4
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_TargetEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_WOEEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])
\n", "
source
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])\n",
       "
\n", "
columns
\n", "
Embarked_ce_OneHotEncoder_0, Embarked_ce_OneHotEncoder_3, Embarked_ce_OneHotEncoder_1, Embarked_ce_OneHotEncoder_2
\n", "
\n", "
details
\n", "
\n", "
\n", "
\n", "
FEATURE SET
\n", " \n", "
\n", "
name
\n", "
FSBWBPEK
\n", "
source
\n", "
FeatureSet([simple_feature,\n",
       "            interactions('Pclass', 'Age'),\n",
       "            num_aggs('Fare'),\n",
       "            tfidf('Name')],\n",
       "           [stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(OneHotEncoder(), ['Embarked'], [None])],\n",
       "           targets=['Survived'],\n",
       "           auxiliary=[])\n",
       "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
MODEL
\n", " \n", "
\n", "
name
\n", "
KNeighborsClassifierEJC
\n", "
model
\n", "
KNeighborsClassifier
\n", "
params
\n", "
    algorithm = 'auto'\n",
       "    leaf_size = 30\n",
       "       metric = 'minkowski'\n",
       "metric_params = None\n",
       "  n_neighbors = 5\n",
       "            p = 2\n",
       "      weights = 'uniform'\n",
       "
\n", "
source
\n", "
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_neighbors=5, p=2, weights='uniform')\n",
       "
\n", "
\n", "\n", "
\n", "
\n", "
VALIDATOR
\n", " \n", "
\n", "
splitter
\n", "
StratifiedKFold(n_splits=5, random_state=42, shuffle=True)\n",
       "
\n", "
metric
\n", "
roc_auc_score\n",
       "
\n", "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lb\n", "lbs.other" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Experiments\n", "\n", "Experiments are accessible by their identifiers:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lb['KPBVAI'] is lb.KPBVAI" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
EXPERIMENT
\n", "
ID
\n", "
KPBVAI
\n", "
score
\n", "
0.8450484134619144
\n", "
model
\n", "
CatBoostClassifier
\n", "
params
\n", "
loss_function = 'Logloss'\n",
       "custom_metric = 'AUC'\n",
       "          rsm = 0.15\n",
       "   iterations = 100\n",
       "
\n", "
features
\n", "
\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
simple_feature
\n", "
source
\n", "
@feature\n",
       "def simple_feature(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res['is_male'] = (df.Sex == 'male') + 0\n",
       "    return res\n",
       "
\n", "
columns
\n", "
is_male
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
interactions('Pclass', 'Age')
\n", "
description
\n", "
An instance of generic feature constructor interactions
\n", "
source
\n", "
interactions('Pclass', 'Age')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(left="Pclass", right="SibSp")\n",
       "def interactions(df):\n",
       "    res = stl.empty_like(df)\n",
       "    res[f"{left}_add_{right}"] = df[left] + df[right]\n",
       "    res[f"{left}_sub_{right}"] = df[left] - df[right]\n",
       "    res[f"{left}_mul_{right}"] = df[left] * df[right]\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Pclass_add_Age, Pclass_sub_Age, Pclass_mul_Age
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
num_aggs('Fare')
\n", "
description
\n", "
An instance of generic feature constructor num_aggs
\n", "
source
\n", "
num_aggs('Fare')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col="Parch")\n",
       "def num_aggs(df):\n",
       "    """Descriptions are also supported."""\n",
       "    res = pd.DataFrame(index=df.index)\n",
       "    mean = df[col].mean()\n",
       "    std = df[col].std()\n",
       "    res[f"{col}_div_mean"] = df[col] / mean\n",
       "    res[f"{col}_sub_div_mean"] = (df[col] - mean) / mean\n",
       "    res[f"{col}_div_std"] = df[col] / std\n",
       "    return res\n",
       "
\n", "
columns
\n", "
Fare_div_mean, Fare_sub_div_mean, Fare_div_std
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
tfidf('Name')
\n", "
description
\n", "
An instance of generic feature constructor tfidf
\n", "
source
\n", "
tfidf('Name')\n",
       "
\n", "
additional source
\n", "
@feature\n",
       "@generic(col='Name')\n",
       "def tfidf(df):\n",
       "    if df.train:\n",
       "        enc = TfidfVectorizer(analyzer='char', ngram_range=(1, 3), max_features=5)\n",
       "        res = enc.fit_transform(df[col])\n",
       "        df.state['enc'] = enc\n",
       "    else:\n",
       "        enc = df.state['enc']\n",
       "        res = enc.transform(df[col])\n",
       "    return res.todense()\n",
       "
\n", "
columns
\n", "
tfidf__Name_0, tfidf__Name_3, tfidf__Name_1, tfidf__Name_2, tfidf__Name_4
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_TargetEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])
\n", "
source
\n", "
stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived'])\n",
       "
\n", "
columns
\n", "
Embarked_ce_Survived_WOEEncoder
\n", "
\n", "\n", "
\n", "
\n", "
FEATURE CONSTRUCTOR
\n", " \n", "
\n", "
name
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])
\n", "
source
\n", "
stl.category_encode(OneHotEncoder(), ['Embarked'], [None])\n",
       "
\n", "
columns
\n", "
Embarked_ce_OneHotEncoder_0, Embarked_ce_OneHotEncoder_3, Embarked_ce_OneHotEncoder_1, Embarked_ce_OneHotEncoder_2
\n", "
\n", "
details
\n", "
\n", "
\n", "
\n", "
FEATURE SET
\n", " \n", "
\n", "
name
\n", "
FSBWBPEK
\n", "
source
\n", "
FeatureSet([simple_feature,\n",
       "            interactions('Pclass', 'Age'),\n",
       "            num_aggs('Fare'),\n",
       "            tfidf('Name')],\n",
       "           [stl.category_encode(TargetEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(WOEEncoder(), ['Embarked'], ['Survived']),\n",
       "            stl.category_encode(OneHotEncoder(), ['Embarked'], [None])],\n",
       "           targets=['Survived'],\n",
       "           auxiliary=[])\n",
       "
\n", "
requirements
\n", "
sklearn==0.20.2
\n", "
\n", "\n", "
\n", "
\n", "
MODEL
\n", " \n", "
\n", "
name
\n", "
CatBoostClassifierGVB
\n", "
model
\n", "
CatBoostClassifier
\n", "
params
\n", "
loss_function = 'Logloss'\n",
       "custom_metric = 'AUC'\n",
       "          rsm = 0.15\n",
       "   iterations = 100\n",
       "
\n", "
source
\n", "
CatBoostClassifier(loss_function='Logloss', custom_metric='AUC', rsm=0.15, iterations=100)\n",
       "
\n", "
\n", "\n", "
\n", "
\n", "
VALIDATOR
\n", " \n", "
\n", "
splitter
\n", "
StratifiedKFold(n_splits=5, random_state=42, shuffle=True)\n",
       "
\n", "
metric
\n", "
roc_auc_score\n",
       "
\n", "
\n", "
requirements
\n", "
sklearn==0.20.2
" ], "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lb.KPBVAI" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Inference\n", "\n", "Inference is as easy as `experiment.predict(frame)`. Features are computed automatically." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
COMPUTING FEATURES
feature
progress
\n", "
simple_feature
\n", "
0s
\n", "
\n", "
interactions__Pclass_Age
\n", "
0s
\n", "
\n", "
num_aggs__Fare
\n", "
0s
\n", "
\n", "
tfidf__Name
\n", "
0s
\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFERENCE
\n", "
id
\n", "
KPBVAI
\n", "
progress
\n", "
\n", "
took
\n", "
1s
\n", "
eta
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "array([0.17121523, 0.57615125, 0.0999157 , 0.23952768, 0.78401192])" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lb.KPBVAI.predict(test.head(5))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Feature Importances" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
EXPERIMENT.FEATURE_IMPORTANCES DOCS
\n", "
signature
\n", "
Experiment.feature_importances(self, plot, estimator, sort_by, n_best, verbose)\n",
       "
\n", "
description
\n", "
Computes feature importance
\n", "
params
\n", "
plot
if true, then returns a graph, otherwise returns a dataframe
\n", "
estimator
importance estimator instance used to compute feature importances
\n", "
sort_by
fold-wise statistic used to sort features. One of min, mean, and max
\n", "
n_best
number of best features to show
\n", "
verbose
whether to produce reports during computing, such as progress bar
and interim feature importances. Useful for long-running estimators
\n", "
\n", "
returns
\n", "
A feature importances graph if plot=True, dataframe with importances otherwise
\n", "
examples
\n", "
>>> from kts.feature_selection import Permutation\n",
       ">>> lb.ABCDEF.feature_importances(plot=False)  # -> pd.DataFrame\n",
       ">>> lb.ABCDEF.feature_importances(estimator=Permutation(train_frame, n_iters=3), sort_by='max')\n",
       "
" ], "text/plain": [ ">" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lb.KPBVAI.feature_importances" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
FEATURE IMPORTANCES
feature
mean
importance
\n", "
is_male
37.519
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "
Pclass_mul_Age
8.839
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "
Pclass_sub_Age
6.410
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "
Fare_div_mean
6.262
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "
Fare_sub_div_mean
5.858
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "
tfidf__Name_4
4.894
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "
tfidf__Name_2
4.600
\n", "
\n", " \n", "
\n", "
\n", "
\n", "
\n", " \n", "
" ], "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lb.KPBVAI.feature_importances(sort_by='mean', n_best=7)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use `plot=False` to get feature importances by fold:" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tfidf__Name_0Embarked_ce_OneHotEncoder_2Pclass_add_AgeFare_sub_div_meanEmbarked_ce_Survived_WOEEncodertfidf__Name_3Embarked_ce_OneHotEncoder_1Embarked_ce_OneHotEncoder_3Embarked_ce_Survived_TargetEncoderEmbarked_ce_OneHotEncoder_0tfidf__Name_1tfidf__Name_2tfidf__Name_4Fare_div_stdFare_div_meanPclass_sub_AgePclass_mul_Ageis_male
02.946771.834141.961587.294551.275125.175522.048101.12012.875653.168095.482295.951253.488554.002946.072875.7958539.5066
15.999831.119642.714312.00951.272023.338752.5542101.376443.573523.791593.87064.674072.165741.387015.5899110.294534.2684
22.480020.8468255.263185.463352.984473.86421.5688201.769542.114864.455595.138154.276185.26876.559076.863138.5542332.5297
34.315371.303872.747863.782221.870284.349411.4834701.032830.9107956.25025.768455.145166.063016.241311.00837.6706430.0568
41.743470.304580.8486580.7418910.2628540.4929111.8887602.9152404.456622.741654.421250.42841413.12092.517211.881851.2338
\n", "
" ], "text/plain": [ " tfidf__Name_0 Embarked_ce_OneHotEncoder_2 Pclass_add_Age Fare_sub_div_mean \\\n", "0 2.94677 1.83414 1.96158 7.29455 \n", "1 5.99983 1.11964 2.7143 12.0095 \n", "2 2.48002 0.846825 5.26318 5.46335 \n", "3 4.31537 1.30387 2.74786 3.78222 \n", "4 1.74347 0.30458 0.848658 0.741891 \n", "\n", " Embarked_ce_Survived_WOEEncoder tfidf__Name_3 Embarked_ce_OneHotEncoder_1 \\\n", "0 1.27512 5.17552 2.0481 \n", "1 1.27202 3.33875 2.55421 \n", "2 2.98447 3.8642 1.56882 \n", "3 1.87028 4.34941 1.48347 \n", "4 0.262854 0.492911 1.88876 \n", "\n", " Embarked_ce_OneHotEncoder_3 Embarked_ce_Survived_TargetEncoder \\\n", "0 0 1.1201 \n", "1 0 1.37644 \n", "2 0 1.76954 \n", "3 0 1.03283 \n", "4 0 2.91524 \n", "\n", " Embarked_ce_OneHotEncoder_0 tfidf__Name_1 tfidf__Name_2 tfidf__Name_4 \\\n", "0 2.87565 3.16809 5.48229 5.95125 \n", "1 3.57352 3.79159 3.8706 4.67407 \n", "2 2.11486 4.45559 5.13815 4.27618 \n", "3 0.910795 6.2502 5.76845 5.14516 \n", "4 0 4.45662 2.74165 4.42125 \n", "\n", " Fare_div_std Fare_div_mean Pclass_sub_Age Pclass_mul_Age is_male \n", "0 3.48855 4.00294 6.07287 5.79585 39.5066 \n", "1 2.16574 1.38701 5.58991 10.2945 34.2684 \n", "2 5.2687 6.55907 6.86313 8.55423 32.5297 \n", "3 6.06301 6.2413 11.0083 7.67064 30.0568 \n", "4 0.428414 13.1209 2.5172 11.8818 51.2338 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lb.KPBVAI.feature_importances(plot=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specify an importance estimator to compute permutation importance:" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
COMPUTING IMPORTANCES
\n", "
progress
\n", "
\n", "
Computing Embarked_ce_OneHotEncoder_3
\n", "
took
\n", "
5s
\n", "
eta
\n", "
0s
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
FEATURE IMPORTANCES
feature
mean
importance
\n", "
is_male
0.208
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Pclass_sub_Age
0.018
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Pclass_mul_Age
0.017
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Fare_div_mean
0.011
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Fare_sub_div_mean
7.27e-03
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
tfidf__Name_0
6.43e-03
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Fare_div_std
4.32e-03
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
tfidf__Name_4
3.71e-03
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Pclass_add_Age
2.57e-03
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
tfidf__Name_1
2.54e-03
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Embarked_ce_OneHotEncoder_1
2.46e-03
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Embarked_ce_Survived_WOEEncoder
1.83e-03
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
tfidf__Name_3
1.78e-03
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Embarked_ce_OneHotEncoder_2
1.33e-03
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
tfidf__Name_2
8.88e-04
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Embarked_ce_OneHotEncoder_0
7.91e-04
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Embarked_ce_Survived_TargetEncoder
5.84e-04
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
Embarked_ce_OneHotEncoder_3
0.e+00
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "lb.KPBVAI.feature_importances(sort_by='mean', estimator=Permutation(train, n_iters=10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Custom Models\n", "\n", "Suppose you want to use some model which is not in `kts.models`, like [Regularized Greedy Forest](https://github.com/RGF-team/rgf/tree/master/python-package)." ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "!pip3 install rgf_python > /dev/null" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "from rgf.sklearn import RGFClassifier" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To use it, you simply need to create a class derived from both your classifier and `kts.CustomModel`. It may optionally include `preprocess` method or inherit it from some mixin, like `kts.NormalizeFillNAMixin`:\n", "\n", "```python\n", "class KTSWrapper(kts.CustomModel, somelib.SomeClassifier):\n", " ignored_params = [...]\n", " \n", " def preprocess(X, y=None):\n", " if y is None:\n", " print('if y is None then .predict is called')\n", " else:\n", " print('otherwise .fit')\n", " return X, y\n", "```\n", "\n", "An alternative approach is using `kts.custom_model(ModelClass, ignored_params, normalize_fillna=True/False)` function:\n", "```python\n", "RGF = custom_model(RGFClassifier, ignored_params=['memory_policy', 'n_jobs', 'verbose'], normalize_fillna=True)\n", "```\n", "However, subclassing gives more freedom in defining custom preprocessing.\n", "\n", "In this example the classifier can't deal with NaN values, and we use `kts.NormalizeFillNAMixin` to add preprocessing method:" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "class RGF(NormalizeFillNAMixin, CustomModel, RGFClassifier):\n", " ignored_params = ['memory_policy', 'n_jobs', 'verbose']" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/rgf/utils.py:225: UserWarning: Cannot find FastRGF executable files. FastRGF estimators will be unavailable for usage.\n", " warnings.warn(\"Cannot find FastRGF executable files. \"\n" ] }, { "data": { "text/html": [ "
MODEL
\n", "
name
\n", "
RGFCVOCZ
\n", "
model
\n", "
RGF
\n", "
params
\n", "
       algorithm = 'RGF'\n",
       "       calc_prob = 'sigmoid'\n",
       "      init_model = None\n",
       "              l2 = 0.1\n",
       "   learning_rate = 0.5\n",
       "            loss = 'Log'\n",
       "        max_leaf = 1000\n",
       "min_samples_leaf = 10\n",
       "          n_iter = None\n",
       "   n_tree_search = 1\n",
       "       normalize = False\n",
       "    opt_interval = 100\n",
       "       reg_depth = 1.0\n",
       "             sl2 = None\n",
       "   test_interval = 100\n",
       "
\n", "
source
\n", "
RGF(algorithm='RGF', calc_prob='sigmoid', init_model=None, l2=0.1, learning_rate=0.5, loss='Log', max_leaf=1000, min_samples_leaf=10, n_iter=None, n_tree_search=1, normalize=False, opt_interval=100, reg_depth=1.0, sl2=None, test_interval=100)\n",
       "
\n", "
custom model class source
\n", "
class RGF(NormalizeFillNAMixin, CustomModel, RGFClassifier):\n",
       "    ignored_params = ['memory_policy', 'n_jobs', 'verbose']\n",
       "
" ], "text/plain": [ "RGF(algorithm='RGF', calc_prob='sigmoid', init_model=None, l2=0.1,\n", " learning_rate=0.5, loss='Log', max_leaf=1000, memory_policy='generous',\n", " min_samples_leaf=10, n_iter=None, n_jobs=-1, n_tree_search=1,\n", " normalize=False, opt_interval=100, reg_depth=1.0, sl2=None,\n", " test_interval=100, verbose=0)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "RGF()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
FITTING
\n", "
progress
\n", "
\n", "
train
\n", "
\n", "
valid
\n", "
\n", "
metric
\n", "
0.754
\n", "
took
\n", "
1s
\n", "
eta
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.733
\n", "
2s
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.795
\n", "
1s
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.817
\n", "
1s
\n", "
0s
\n", "
\n", "
\n", "
\n", "
0.754
\n", "
1s
\n", "
0s
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'score': 0.7704334423329472, 'id': 'BFELLE'}" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val.score(RGF(), fs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Custom Validators\n", "\n", "*TODO*" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 }