{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: liac-arff>=2.4.0 in /home/hirzel/python3.6venv/lib/python3.6/site-packages (2.4.0)\n",
"\u001b[33mWARNING: You are using pip version 20.2.2; however, version 20.2.3 is available.\n",
"You should consider upgrading via the '/home/hirzel/python3.6venv/bin/python3.6 -m pip install --upgrade pip' command.\u001b[0m\n"
]
}
],
"source": [
"#Required for accessing openml datasets from Lale\n",
"!pip install 'liac-arff>=2.4.0'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import lale.datasets.openml\n",
"import pandas as pd\n",
"(train_X, train_y), (test_X, test_y) = lale.datasets.openml.fetch(\n",
" 'credit-g', 'classification', preprocess=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler as Standard, MinMaxScaler as MinMax\n",
"from sklearn.decomposition import PCA\n",
"from sklearn.kernel_approximation import Nystroem as Nys\n",
"from lale.lib.lale import NoOp\n",
"from sklearn.linear_model import LogisticRegression as LR\n",
"from sklearn.ensemble import RandomForestClassifier as RF\n",
"from xgboost import XGBClassifier as XGBoost\n",
"from lale.lib.lale import TopKVotingClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from lale.lib.lale import Hyperopt\n",
"from sklearn.ensemble import VotingClassifier\n",
"lale.wrap_imported_operators()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The optimizer supported as of now is Hyperopt and args_to_optimizer\n",
"is a dictionary of values that Hyperopt would accept as per\n",
"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.hyperopt.html"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"planned_pipeline = (NoOp | Standard | MinMax) >> (NoOp | PCA | Nys) >> (LR | RF | XGBoost)\n",
"ensemble = TopKVotingClassifier(\n",
" estimator=planned_pipeline, k=3, optimizer=Hyperopt,\n",
" args_to_optimizer={'max_evals':25, 'scoring':'accuracy'})\n",
"ensemble.visualize()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100%|██████████████████████████████| 25/25 [02:51<00:00, 6.87s/trial, best loss: -0.7417910447761193]\n",
"100%|█████████████████████████████████| 1/1 [01:23<00:00, 83.42s/trial, best loss: -0.755223880597015]\n"
]
}
],
"source": [
"trained_ensemble = ensemble.fit(train_X, train_y)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#Note that you could also pass just the planned pipeline as below and Hyperopt with its default setting would be used.\n",
"ensemble = TopKVotingClassifier(estimator=planned_pipeline)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.7696969696969697\n"
]
}
],
"source": [
"predictions = trained_ensemble.predict(test_X)\n",
"print(accuracy_score(test_y, predictions))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best_pipeline = trained_ensemble.get_pipeline()\n",
"best_pipeline.visualize()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'class': 'lale.lib.sklearn.voting_classifier.VotingClassifierImpl',\n",
" 'state': 'trained',\n",
" 'operator': 'VotingClassifier',\n",
" 'label': 'VotingClassifier',\n",
" 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.voting_classifier.html',\n",
" 'hyperparams': {'estimators': [('p17', {'$ref': '../steps/pipeline'}),\n",
" ('p9', {'$ref': '../steps/pipeline_0'}),\n",
" ('p16', {'$ref': '../steps/pipeline_1'})],\n",
" 'voting': 'soft'},\n",
" 'steps': {'pipeline': {'class': 'lale.operators.TrainablePipeline',\n",
" 'state': 'trainable',\n",
" 'edges': [['min_max', 'no_op'], ['no_op', 'lr']],\n",
" 'steps': {'min_max': {'class': 'lale.lib.sklearn.min_max_scaler.MinMaxScalerImpl',\n",
" 'state': 'trainable',\n",
" 'operator': 'MinMax',\n",
" 'label': 'MinMax',\n",
" 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.min_max_scaler.html',\n",
" 'hyperparams': {},\n",
" 'is_frozen_trainable': True},\n",
" 'no_op': {'class': 'lale.lib.lale.no_op.NoOpImpl',\n",
" 'state': 'trained',\n",
" 'operator': 'NoOp',\n",
" 'label': 'NoOp',\n",
" 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html',\n",
" 'hyperparams': {},\n",
" 'is_frozen_trainable': True,\n",
" 'coefs': None,\n",
" 'is_frozen_trained': True},\n",
" 'lr': {'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl',\n",
" 'state': 'trainable',\n",
" 'operator': 'LR',\n",
" 'label': 'LR',\n",
" 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html',\n",
" 'hyperparams': {'C': 22334.598583769228,\n",
" 'fit_intercept': False,\n",
" 'tol': 0.012327172789706938},\n",
" 'is_frozen_trainable': True}}},\n",
" 'pipeline_0': {'class': 'lale.operators.TrainablePipeline',\n",
" 'state': 'trainable',\n",
" 'edges': [['standard', 'pca'], ['pca', 'lr_0']],\n",
" 'steps': {'standard': {'class': 'lale.lib.sklearn.standard_scaler.StandardScalerImpl',\n",
" 'state': 'trainable',\n",
" 'operator': 'Standard',\n",
" 'label': 'Standard',\n",
" 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.standard_scaler.html',\n",
" 'hyperparams': {'with_mean': False, 'with_std': False},\n",
" 'is_frozen_trainable': True},\n",
" 'pca': {'class': 'lale.lib.sklearn.pca.PCAImpl',\n",
" 'state': 'trainable',\n",
" 'operator': 'PCA',\n",
" 'label': 'PCA',\n",
" 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html',\n",
" 'hyperparams': {'svd_solver': 'randomized'},\n",
" 'is_frozen_trainable': True},\n",
" 'lr_0': {'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl',\n",
" 'state': 'trainable',\n",
" 'operator': 'LR',\n",
" 'label': 'LR',\n",
" 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html',\n",
" 'hyperparams': {'C': 31347.843540495694,\n",
" 'penalty': 'l1',\n",
" 'tol': 0.03811266583210131},\n",
" 'is_frozen_trainable': True}}},\n",
" 'pipeline_1': {'class': 'lale.operators.TrainablePipeline',\n",
" 'state': 'trainable',\n",
" 'edges': [['min_max_0', 'no_op_0'], ['no_op_0', 'xg_boost']],\n",
" 'steps': {'min_max_0': {'class': 'lale.lib.sklearn.min_max_scaler.MinMaxScalerImpl',\n",
" 'state': 'trainable',\n",
" 'operator': 'MinMax',\n",
" 'label': 'MinMax',\n",
" 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.min_max_scaler.html',\n",
" 'hyperparams': {},\n",
" 'is_frozen_trainable': True},\n",
" 'no_op_0': {'class': 'lale.lib.lale.no_op.NoOpImpl',\n",
" 'state': 'trained',\n",
" 'operator': 'NoOp',\n",
" 'label': 'NoOp',\n",
" 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html',\n",
" 'hyperparams': {},\n",
" 'is_frozen_trainable': True,\n",
" 'coefs': None,\n",
" 'is_frozen_trained': True},\n",
" 'xg_boost': {'class': 'lale.lib.xgboost.xgb_classifier.XGBClassifierImpl',\n",
" 'state': 'trainable',\n",
" 'operator': 'XGBoost',\n",
" 'label': 'XGBoost',\n",
" 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.xgboost.XGBClassifier.html',\n",
" 'hyperparams': {'booster': 'dart',\n",
" 'colsample_bylevel': 0.47096071538468853,\n",
" 'colsample_bytree': 0.7593792234753081,\n",
" 'learning_rate': 0.20970693320349945,\n",
" 'max_depth': 18,\n",
" 'min_child_weight': 2,\n",
" 'n_estimators': 1063,\n",
" 'reg_alpha': 0.31844178826811975,\n",
" 'reg_lambda': 0.7797218315351517,\n",
" 'subsample': 0.64136510248406},\n",
" 'is_frozen_trainable': True}}}},\n",
" 'is_frozen_trainable': True,\n",
" 'coefs': 'coefs_not_available',\n",
" 'is_frozen_trained': False}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"best_pipeline.to_json()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}