{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "![KTS logo](https://raw.githubusercontent.com/konodyuk/kts/master/docs/static/banner_alpha.png)\n", "# Modelling Guide" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
@feature\n", "def simple_feature(df):\n", " res = stl.empty_like(df)\n", " res['is_male'] = (df.Sex == 'male') + 0\n", " return res\n", "
custom_metric = 'AUC'\n", "loss_function = 'Logloss'\n", " rsm = 0.15\n", " iterations = 100\n", "
CatBoostClassifier(custom_metric='AUC', loss_function='Logloss', rsm=0.15, iterations=100)\n", "
C = 0.5\n", " class_weight = None\n", " dual = False\n", " fit_intercept = True\n", "intercept_scaling = 1\n", " max_iter = 1000\n", " multi_class = 'warn'\n", " penalty = 'l2'\n", " random_state = None\n", " solver = 'lbfgs'\n", " tol = 0.0001\n", " warm_start = False\n", "
LogisticRegression(C=0.5, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=1000, multi_class='warn', penalty='l2', random_state=None, solver='lbfgs', tol=0.0001, warm_start=False)\n", "
loss_function = 'Logloss'\n", "custom_metric = 'AUC'\n", " rsm = 0.15\n", " iterations = 100\n", "
Experiment.feature_importances(self, plot, estimator, sort_by, n_best, verbose)\n", "
>>> from kts.feature_selection import Permutation\n", ">>> lb.ABCDEF.feature_importances(plot=False) # -> pd.DataFrame\n", ">>> lb.ABCDEF.feature_importances(estimator=Permutation(train_frame, n_iters=3), sort_by='max')\n", "
\n", " | tfidf__Name_0 | \n", "Embarked_ce_OneHotEncoder_2 | \n", "Pclass_add_Age | \n", "Fare_sub_div_mean | \n", "Embarked_ce_Survived_WOEEncoder | \n", "tfidf__Name_3 | \n", "Embarked_ce_OneHotEncoder_1 | \n", "Embarked_ce_OneHotEncoder_3 | \n", "Embarked_ce_Survived_TargetEncoder | \n", "Embarked_ce_OneHotEncoder_0 | \n", "tfidf__Name_1 | \n", "tfidf__Name_2 | \n", "tfidf__Name_4 | \n", "Fare_div_std | \n", "Fare_div_mean | \n", "Pclass_sub_Age | \n", "Pclass_mul_Age | \n", "is_male | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2.94677 | \n", "1.83414 | \n", "1.96158 | \n", "7.29455 | \n", "1.27512 | \n", "5.17552 | \n", "2.0481 | \n", "0 | \n", "1.1201 | \n", "2.87565 | \n", "3.16809 | \n", "5.48229 | \n", "5.95125 | \n", "3.48855 | \n", "4.00294 | \n", "6.07287 | \n", "5.79585 | \n", "39.5066 | \n", "
1 | \n", "5.99983 | \n", "1.11964 | \n", "2.7143 | \n", "12.0095 | \n", "1.27202 | \n", "3.33875 | \n", "2.55421 | \n", "0 | \n", "1.37644 | \n", "3.57352 | \n", "3.79159 | \n", "3.8706 | \n", "4.67407 | \n", "2.16574 | \n", "1.38701 | \n", "5.58991 | \n", "10.2945 | \n", "34.2684 | \n", "
2 | \n", "2.48002 | \n", "0.846825 | \n", "5.26318 | \n", "5.46335 | \n", "2.98447 | \n", "3.8642 | \n", "1.56882 | \n", "0 | \n", "1.76954 | \n", "2.11486 | \n", "4.45559 | \n", "5.13815 | \n", "4.27618 | \n", "5.2687 | \n", "6.55907 | \n", "6.86313 | \n", "8.55423 | \n", "32.5297 | \n", "
3 | \n", "4.31537 | \n", "1.30387 | \n", "2.74786 | \n", "3.78222 | \n", "1.87028 | \n", "4.34941 | \n", "1.48347 | \n", "0 | \n", "1.03283 | \n", "0.910795 | \n", "6.2502 | \n", "5.76845 | \n", "5.14516 | \n", "6.06301 | \n", "6.2413 | \n", "11.0083 | \n", "7.67064 | \n", "30.0568 | \n", "
4 | \n", "1.74347 | \n", "0.30458 | \n", "0.848658 | \n", "0.741891 | \n", "0.262854 | \n", "0.492911 | \n", "1.88876 | \n", "0 | \n", "2.91524 | \n", "0 | \n", "4.45662 | \n", "2.74165 | \n", "4.42125 | \n", "0.428414 | \n", "13.1209 | \n", "2.5172 | \n", "11.8818 | \n", "51.2338 | \n", "
algorithm = 'RGF'\n", " calc_prob = 'sigmoid'\n", " init_model = None\n", " l2 = 0.1\n", " learning_rate = 0.5\n", " loss = 'Log'\n", " max_leaf = 1000\n", "min_samples_leaf = 10\n", " n_iter = None\n", " n_tree_search = 1\n", " normalize = False\n", " opt_interval = 100\n", " reg_depth = 1.0\n", " sl2 = None\n", " test_interval = 100\n", "
RGF(algorithm='RGF', calc_prob='sigmoid', init_model=None, l2=0.1, learning_rate=0.5, loss='Log', max_leaf=1000, min_samples_leaf=10, n_iter=None, n_tree_search=1, normalize=False, opt_interval=100, reg_depth=1.0, sl2=None, test_interval=100)\n", "
class RGF(NormalizeFillNAMixin, CustomModel, RGFClassifier):\n", " ignored_params = ['memory_policy', 'n_jobs', 'verbose']\n", "