{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from lale.lib.lale import ConcatFeatures as Concat\n", "from lale.lib.lale import NoOp\n", "from lale.lib.lale import Both\n", "from lale.lib.sklearn import KNeighborsClassifier as KNN\n", "from lale.lib.sklearn import LogisticRegression as LR\n", "from lale.lib.sklearn import MinMaxScaler as Scaler\n", "from lale.lib.sklearn import Nystroem\n", "from lale.lib.sklearn import PCA" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combinator-Style\n", "| Symbol | Name | Description | Sklearn feature |\n", "| ------ | ---- | ------------ | --------------- |\n", "| >> | pipe | Feed to next | `make_pipeline` |\n", "| & | and | Run both | `make_union`, includes concat |\n", "| | | or | Choose one | (missing) |" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "cluster:(root)\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "scaler\n", "\n", "\n", "Scaler\n", "\n", "\n", "\n", "\n", "no_op\n", "\n", "\n", "No-\n", "Op\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "scl = Scaler | NoOp\n", "scl.visualize()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "concat\n", "\n", "\n", "Concat\n", "\n", "\n", "\n", "\n", "pca->concat\n", "\n", "\n", "\n", "\n", "nystroem\n", "\n", "\n", "Nystroem\n", "\n", "\n", "\n", "\n", "nystroem->concat\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tfm = (PCA & Nystroem) >> Concat\n", "tfm.visualize()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "cluster:(root)\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "clf = KNN | LR\n", "clf.visualize()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "cluster:choice_0\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "cluster:choice_1\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "scaler\n", "\n", "\n", "Scaler\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "scaler->pca\n", "\n", "\n", "\n", "\n", "nystroem\n", "\n", "\n", "Nystroem\n", "\n", "\n", "\n", "\n", "scaler->nystroem\n", "\n", "\n", "\n", "\n", "no_op\n", "\n", "\n", "No-\n", "Op\n", "\n", "\n", "\n", "\n", "concat\n", "\n", "\n", "Concat\n", "\n", "\n", "\n", "\n", "pca->concat\n", "\n", "\n", "\n", "\n", "nystroem->concat\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "concat->knn\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "pipeline = (Scaler | NoOp) >> (PCA & Nystroem) >> Concat >> (KNN | LR)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "optimizable = scl >> tfm >> clf\n", "optimizable.visualize()\n", "optimizable.pretty_print(ipython_display=True, show_imports=False)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "optimizable.pretty_print(ipython_display='input')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# generated by pretty_print(ipython_display='input') from previous cell\n", "from lale.lib.sklearn import MinMaxScaler as Scaler\n", "from lale.lib.lale import NoOp\n", "from lale.lib.sklearn import PCA\n", "from lale.lib.sklearn import Nystroem\n", "from lale.lib.lale import ConcatFeatures as Concat\n", "from lale.lib.sklearn import KNeighborsClassifier as KNN\n", "from lale.lib.sklearn import LogisticRegression as LR\n", "import lale\n", "lale.wrap_imported_operators()\n", "\n", "pipeline = (Scaler | NoOp) >> (PCA & Nystroem) >> Concat >> (KNN | LR)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sklearn-Style" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "cluster:(root)\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "scaler\n", "\n", "\n", "Scaler\n", "\n", "\n", "\n", "\n", "no_op\n", "\n", "\n", "No-\n", "Op\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from lale.operators import make_choice, make_pipeline, make_union\n", "scl = make_choice(Scaler, NoOp)\n", "scl.visualize()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "concat\n", "\n", "\n", "Concat\n", "\n", "\n", "\n", "\n", "pca->concat\n", "\n", "\n", "\n", "\n", "nystroem\n", "\n", "\n", "Nystroem\n", "\n", "\n", "\n", "\n", "nystroem->concat\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tfm = make_union(PCA, Nystroem)\n", "tfm.visualize()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "cluster:(root)\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "clf = make_choice(KNN, LR)\n", "clf.visualize()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "cluster:choice_0\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "cluster:choice_1\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "scaler\n", "\n", "\n", "Scaler\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "scaler->pca\n", "\n", "\n", "\n", "\n", "nystroem\n", "\n", "\n", "Nystroem\n", "\n", "\n", "\n", "\n", "scaler->nystroem\n", "\n", "\n", "\n", "\n", "no_op\n", "\n", "\n", "No-\n", "Op\n", "\n", "\n", "\n", "\n", "concat\n", "\n", "\n", "Concat\n", "\n", "\n", "\n", "\n", "pca->concat\n", "\n", "\n", "\n", "\n", "nystroem->concat\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "concat->knn\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "choice_0 = make_choice(Scaler, NoOp)\n", "union = make_union(PCA, Nystroem)\n", "choice_1 = make_choice(KNN, LR)\n", "pipeline = make_pipeline(choice_0, union, choice_1)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "optimizable = make_pipeline(scl, tfm, clf)\n", "optimizable.visualize()\n", "optimizable.pretty_print(ipython_display=True, show_imports=False, combinators=False)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "optimizable.pretty_print(ipython_display='input', combinators=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# generated by pretty_print(ipython_display='input') from previous cell\n", "from lale.lib.sklearn import MinMaxScaler as Scaler\n", "from lale.lib.lale import NoOp\n", "from lale.operators import make_choice\n", "from lale.lib.sklearn import PCA\n", "from lale.lib.sklearn import Nystroem\n", "from lale.operators import make_union\n", "from lale.lib.sklearn import KNeighborsClassifier as KNN\n", "from lale.lib.sklearn import LogisticRegression as LR\n", "from lale.operators import make_pipeline\n", "\n", "choice_0 = make_choice(Scaler, NoOp)\n", "union = make_union(PCA, Nystroem)\n", "choice_1 = make_choice(KNN, LR)\n", "pipeline = make_pipeline(choice_0, union, choice_1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Nested" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "cluster:choice\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "cluster:pipeline_1\n", "\n", "\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "lr_0\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "pca->lr_0\n", "\n", "\n", "\n", "\n", "no_op\n", "\n", "\n", "No-\n", "Op\n", "\n", "\n", "\n", "\n", "lr_1\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "no_op->lr_1\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "lr_0 = LR(C=0.09)\n", "lr_1 = LR(C=0.19)\n", "pipeline = PCA >> (lr_0 | NoOp >> lr_1)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "nested = PCA >> (LR(C=0.09) | NoOp >> LR(C=0.19))\n", "nested.visualize()\n", "nested.pretty_print(ipython_display=True, show_imports=False)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "```python\n", "{\n", " 'class': 'lale.operators.PlannedPipeline',\n", " 'state': 'planned',\n", " 'edges': [['pca', 'choice']],\n", " 'steps': {\n", " 'pca': {\n", " 'class': 'lale.lib.sklearn.pca.PCAImpl',\n", " 'state': 'planned',\n", " 'operator': 'PCA',\n", " 'label': 'PCA',\n", " 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html'},\n", " 'choice': {\n", " 'class': 'lale.operators.OperatorChoice',\n", " 'state': 'planned',\n", " 'operator': 'OperatorChoice',\n", " 'steps': {\n", " 'lr_0': {\n", " 'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl',\n", " 'state': 'trainable',\n", " 'operator': 'LogisticRegression',\n", " 'label': 'LR',\n", " 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html',\n", " 'hyperparams': {\n", " 'C': 0.09},\n", " 'is_frozen_trainable': false},\n", " 'pipeline_1': {\n", " 'class': 'lale.operators.TrainablePipeline',\n", " 'state': 'trainable',\n", " 'edges': [['no_op', 'lr_1']],\n", " 'steps': {\n", " 'no_op': {\n", " 'class': 'lale.lib.lale.no_op.NoOpImpl',\n", " 'state': 'trained',\n", " 'operator': 'NoOp',\n", " 'label': 'NoOp',\n", " 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html',\n", " 'hyperparams': null,\n", " 'is_frozen_trainable': true,\n", " 'coefs': null,\n", " 'is_frozen_trained': true},\n", " 'lr_1': {\n", " 'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl',\n", " 'state': 'trainable',\n", " 'operator': 'LogisticRegression',\n", " 'label': 'LR',\n", " 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html',\n", " 'hyperparams': {\n", " 'C': 0.19},\n", " 'is_frozen_trainable': false}}}}}}}\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from lale.pretty_print import ipython_display\n", "ipython_display(nested.to_json())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Higher-order" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "cluster:both\n", "\n", "\n", "Both\n", "\n", "\n", "\n", "cluster:choice\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "pca->knn\n", "\n", "\n", "\n", "\n", "nystroem\n", "\n", "\n", "Nystroem\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "pca = PCA(n_components=2)\n", "both = Both(op1=pca, op2=Nystroem)\n", "pipeline = both >> (KNN | LR)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "higher_order = Both(op1=PCA(n_components=2), op2=Nystroem) >> (KNN | LR)\n", "higher_order.visualize()\n", "higher_order.pretty_print(ipython_display=True, show_imports=False)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "```python\n", "{\n", " 'class': 'lale.operators.PlannedPipeline',\n", " 'state': 'planned',\n", " 'edges': [['both', 'choice']],\n", " 'steps': {\n", " 'both': {\n", " 'class': 'lale.lib.lale.both.BothImpl',\n", " 'state': 'trainable',\n", " 'operator': 'Both',\n", " 'label': 'Both',\n", " 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.both.html',\n", " 'hyperparams': {\n", " 'op1': {\n", " '$ref': '../steps/pca'},\n", " 'op2': {\n", " '$ref': '../steps/nystroem'}},\n", " 'steps': {\n", " 'pca': {\n", " 'class': 'lale.lib.sklearn.pca.PCAImpl',\n", " 'state': 'trainable',\n", " 'operator': 'PCA',\n", " 'label': 'PCA',\n", " 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html',\n", " 'hyperparams': {\n", " 'n_components': 2},\n", " 'is_frozen_trainable': false},\n", " 'nystroem': {\n", " 'class': 'lale.lib.sklearn.nystroem.NystroemImpl',\n", " 'state': 'planned',\n", " 'operator': 'Nystroem',\n", " 'label': 'Nystroem',\n", " 'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.Nystroem.html'}},\n", " 'is_frozen_trainable': false},\n", " 'choice': {\n", " 'class': 'lale.operators.OperatorChoice',\n", " 'state': 'planned',\n", " 'operator': 'OperatorChoice',\n", " 'steps': {\n", " 'knn': {\n", " 'class': 'lale.lib.sklearn.k_neighbors_classifier.KNeighborsClassifierImpl',\n", " 'state': 'planned',\n", " 'operator': 'KNeighborsClassifier',\n", " 'label': 'KNN',\n", " 'documentation_url': 'https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html'},\n", " 'lr': {\n", " 'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl',\n", " 'state': 'planned',\n", " 'operator': 'LogisticRegression',\n", " 'label': 'LR',\n", " 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html'}}}}}\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ipython_display(higher_order.to_json())" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "cluster:(root)\n", "\n", "\n", "Vote\n", "\n", "\n", "\n", "cluster:pipeline\n", "\n", "\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "pca->lr\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "pipeline = Vote(estimators=[('knn', KNN), ('pipeline', PCA() >> LR)], voting='soft')\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from lale.lib.sklearn import VotingClassifier as Vote\n", "vote = Vote(estimators=[('knn',KNN), ('pipeline',PCA()>>LR)], voting='soft')\n", "vote.visualize()\n", "vote.pretty_print(ipython_display=True, show_imports=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }