{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from lale.lib.lale import ConcatFeatures as Concat\n", "from lale.lib.lale import NoOp\n", "from lale.lib.lale import Both\n", "from lale.lib.sklearn import KNeighborsClassifier as KNN\n", "from lale.lib.sklearn import LogisticRegression as LR\n", "from lale.lib.sklearn import MinMaxScaler as Scaler\n", "from lale.lib.sklearn import Nystroem\n", "from lale.lib.sklearn import PCA" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combinator-Style\n", "| Symbol | Name | Description | Sklearn feature |\n", "| ------ | ---- | ------------ | --------------- |\n", "| >> | pipe | Feed to next | `make_pipeline` |\n", "| & | and | Run both | `make_union`, includes concat |\n", "| | | or | Choose one | (missing) |" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "cluster:(root)\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "scaler\n", "\n", "\n", "Scaler\n", "\n", "\n", "\n", "\n", "no_op\n", "\n", "\n", "No-\n", "Op\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "scl = Scaler | NoOp\n", "scl.visualize()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "concat\n", "\n", "\n", "Concat\n", "\n", "\n", "\n", "\n", "pca->concat\n", "\n", "\n", "\n", "\n", "nystroem\n", "\n", "\n", "Nystroem\n", "\n", "\n", "\n", "\n", "nystroem->concat\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tfm = (PCA & Nystroem) >> Concat\n", "tfm.visualize()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "cluster:(root)\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "clf = KNN | LR\n", "clf.visualize()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "cluster:choice_0\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "cluster:choice_1\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "scaler\n", "\n", "\n", "Scaler\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "scaler->pca\n", "\n", "\n", "\n", "\n", "nystroem\n", "\n", "\n", "Nystroem\n", "\n", "\n", "\n", "\n", "scaler->nystroem\n", "\n", "\n", "\n", "\n", "no_op\n", "\n", "\n", "No-\n", "Op\n", "\n", "\n", "\n", "\n", "concat\n", "\n", "\n", "Concat\n", "\n", "\n", "\n", "\n", "pca->concat\n", "\n", "\n", "\n", "\n", "nystroem->concat\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "concat->knn\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "pipeline = (Scaler | NoOp) >> (PCA & Nystroem) >> Concat >> (KNN | LR)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "optimizable = scl >> tfm >> clf\n", "optimizable.visualize()\n", "optimizable.pretty_print(ipython_display=True, show_imports=False)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "optimizable.pretty_print(ipython_display='input')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# generated by pretty_print(ipython_display='input') from previous cell\n", "from sklearn.preprocessing import MinMaxScaler as Scaler\n", "from lale.lib.lale import NoOp\n", "from sklearn.decomposition import PCA\n", "from sklearn.kernel_approximation import Nystroem\n", "from lale.lib.lale import ConcatFeatures as Concat\n", "from sklearn.neighbors import KNeighborsClassifier as KNN\n", "from sklearn.linear_model import LogisticRegression as LR\n", "import lale\n", "\n", "lale.wrap_imported_operators()\n", "pipeline = (Scaler | NoOp) >> (PCA & Nystroem) >> Concat >> (KNN | LR)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sklearn-Style" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "cluster:(root)\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "scaler\n", "\n", "\n", "Scaler\n", "\n", "\n", "\n", "\n", "no_op\n", "\n", "\n", "No-\n", "Op\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from lale.operators import make_choice, make_pipeline, make_union\n", "scl = make_choice(Scaler, NoOp)\n", "scl.visualize()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "concat\n", "\n", "\n", "Concat\n", "\n", "\n", "\n", "\n", "pca->concat\n", "\n", "\n", "\n", "\n", "nystroem\n", "\n", "\n", "Nystroem\n", "\n", "\n", "\n", "\n", "nystroem->concat\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tfm = make_union(PCA, Nystroem)\n", "tfm.visualize()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "cluster:(root)\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "clf = make_choice(KNN, LR)\n", "clf.visualize()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "cluster:choice_0\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "cluster:choice_1\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "scaler\n", "\n", "\n", "Scaler\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "scaler->pca\n", "\n", "\n", "\n", "\n", "nystroem\n", "\n", "\n", "Nystroem\n", "\n", "\n", "\n", "\n", "scaler->nystroem\n", "\n", "\n", "\n", "\n", "no_op\n", "\n", "\n", "No-\n", "Op\n", "\n", "\n", "\n", "\n", "concat\n", "\n", "\n", "Concat\n", "\n", "\n", "\n", "\n", "pca->concat\n", "\n", "\n", "\n", "\n", "nystroem->concat\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "concat->knn\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "choice_0 = make_choice(Scaler, NoOp)\n", "union = make_union(PCA, Nystroem)\n", "choice_1 = make_choice(KNN, LR)\n", "pipeline = make_pipeline(choice_0, union, choice_1)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "optimizable = make_pipeline(scl, tfm, clf)\n", "optimizable.visualize()\n", "optimizable.pretty_print(ipython_display=True, show_imports=False, combinators=False)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "optimizable.pretty_print(ipython_display='input', combinators=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# generated by pretty_print(ipython_display='input') from previous cell\n", "from sklearn.preprocessing import MinMaxScaler as Scaler\n", "from lale.lib.lale import NoOp\n", "from lale.operators import make_choice\n", "from sklearn.decomposition import PCA\n", "from sklearn.kernel_approximation import Nystroem\n", "from lale.operators import make_union\n", "from sklearn.neighbors import KNeighborsClassifier as KNN\n", "from sklearn.linear_model import LogisticRegression as LR\n", "from lale.operators import make_pipeline\n", "\n", "choice_0 = make_choice(Scaler, NoOp)\n", "union = make_union(PCA, Nystroem)\n", "choice_1 = make_choice(KNN, LR)\n", "pipeline = make_pipeline(choice_0, union, choice_1)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "import lale\n", "lale.wrap_imported_operators() #so combinators work" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Nested" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "cluster:choice\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "cluster:pipeline_1\n", "\n", "\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "lr_0\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "pca->lr_0\n", "\n", "\n", "\n", "\n", "no_op\n", "\n", "\n", "No-\n", "Op\n", "\n", "\n", "\n", "\n", "lr_1\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "no_op->lr_1\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "lr_0 = LR(C=0.09)\n", "lr_1 = LR(C=0.19)\n", "pipeline = PCA >> (lr_0 | NoOp >> lr_1)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "nested = PCA >> (LR(C=0.09) | NoOp >> LR(C=0.19))\n", "nested.visualize()\n", "nested.pretty_print(ipython_display=True, show_imports=False)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "```python\n", "{\n", " \"class\": \"lale.operators.PlannedPipeline\",\n", " \"state\": \"planned\",\n", " \"edges\": [[\"pca\", \"choice\"]],\n", " \"steps\": {\n", " \"pca\": {\n", " \"class\": \"lale.lib.sklearn.pca.PCAImpl\",\n", " \"state\": \"planned\",\n", " \"operator\": \"PCA\",\n", " \"label\": \"PCA\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html\",\n", " },\n", " \"choice\": {\n", " \"class\": \"lale.operators.OperatorChoice\",\n", " \"state\": \"planned\",\n", " \"operator\": \"OperatorChoice\",\n", " \"steps\": {\n", " \"lr_0\": {\n", " \"class\": \"lale.lib.sklearn.logistic_regression.LogisticRegressionImpl\",\n", " \"state\": \"trainable\",\n", " \"operator\": \"LogisticRegression\",\n", " \"label\": \"LR\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\",\n", " \"hyperparams\": {\"C\": 0.09},\n", " \"is_frozen_trainable\": false,\n", " },\n", " \"pipeline_1\": {\n", " \"class\": \"lale.operators.TrainablePipeline\",\n", " \"state\": \"trainable\",\n", " \"edges\": [[\"no_op\", \"lr_1\"]],\n", " \"steps\": {\n", " \"no_op\": {\n", " \"class\": \"lale.lib.lale.no_op.NoOpImpl\",\n", " \"state\": \"trained\",\n", " \"operator\": \"NoOp\",\n", " \"label\": \"NoOp\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\",\n", " \"hyperparams\": null,\n", " \"is_frozen_trainable\": true,\n", " \"coefs\": null,\n", " \"is_frozen_trained\": true,\n", " },\n", " \"lr_1\": {\n", " \"class\": \"lale.lib.sklearn.logistic_regression.LogisticRegressionImpl\",\n", " \"state\": \"trainable\",\n", " \"operator\": \"LogisticRegression\",\n", " \"label\": \"LR\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\",\n", " \"hyperparams\": {\"C\": 0.19},\n", " \"is_frozen_trainable\": false,\n", " },\n", " },\n", " },\n", " },\n", " },\n", " },\n", "}\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from lale.pretty_print import ipython_display\n", "ipython_display(nested.to_json())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Higher-order" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "\n", "\n", "\n", "cluster:both\n", "\n", "\n", "Both\n", "\n", "\n", "\n", "cluster:choice\n", "\n", "\n", "Choice\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "pca->knn\n", "\n", "\n", "\n", "\n", "nystroem\n", "\n", "\n", "Nystroem\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "pca = PCA(n_components=2)\n", "both = Both(op1=pca, op2=Nystroem)\n", "pipeline = both >> (KNN | LR)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "higher_order = Both(op1=PCA(n_components=2), op2=Nystroem) >> (KNN | LR)\n", "higher_order.visualize()\n", "higher_order.pretty_print(ipython_display=True, show_imports=False)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "```python\n", "{\n", " \"class\": \"lale.operators.PlannedPipeline\",\n", " \"state\": \"planned\",\n", " \"edges\": [[\"both\", \"choice\"]],\n", " \"steps\": {\n", " \"both\": {\n", " \"class\": \"lale.lib.lale.both.BothImpl\",\n", " \"state\": \"trainable\",\n", " \"operator\": \"Both\",\n", " \"label\": \"Both\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.both.html\",\n", " \"hyperparams\": {\n", " \"op1\": {\"$ref\": \"../steps/pca\"},\n", " \"op2\": {\"$ref\": \"../steps/nystroem\"},\n", " },\n", " \"steps\": {\n", " \"pca\": {\n", " \"class\": \"lale.lib.sklearn.pca.PCAImpl\",\n", " \"state\": \"trainable\",\n", " \"operator\": \"PCA\",\n", " \"label\": \"PCA\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html\",\n", " \"hyperparams\": {\"n_components\": 2},\n", " \"is_frozen_trainable\": false,\n", " },\n", " \"nystroem\": {\n", " \"class\": \"lale.lib.sklearn.nystroem.NystroemImpl\",\n", " \"state\": \"planned\",\n", " \"operator\": \"Nystroem\",\n", " \"label\": \"Nystroem\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.nystroem.html\",\n", " },\n", " },\n", " \"is_frozen_trainable\": false,\n", " },\n", " \"choice\": {\n", " \"class\": \"lale.operators.OperatorChoice\",\n", " \"state\": \"planned\",\n", " \"operator\": \"OperatorChoice\",\n", " \"steps\": {\n", " \"knn\": {\n", " \"class\": \"lale.lib.sklearn.k_neighbors_classifier.KNeighborsClassifierImpl\",\n", " \"state\": \"planned\",\n", " \"operator\": \"KNeighborsClassifier\",\n", " \"label\": \"KNN\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.k_neighbors_classifier.html\",\n", " },\n", " \"lr\": {\n", " \"class\": \"lale.lib.sklearn.logistic_regression.LogisticRegressionImpl\",\n", " \"state\": \"planned\",\n", " \"operator\": \"LogisticRegression\",\n", " \"label\": \"LR\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\",\n", " },\n", " },\n", " },\n", " },\n", "}\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ipython_display(higher_order.to_json())" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "cluster:(root)\n", "\n", "cluster:(root)\n", "\n", "\n", "Vote\n", "\n", "\n", "\n", "cluster:pipeline\n", "\n", "\n", "\n", "\n", "\n", "\n", "knn\n", "\n", "\n", "KNN\n", "\n", "\n", "\n", "\n", "pca\n", "\n", "\n", "PCA\n", "\n", "\n", "\n", "\n", "lr\n", "\n", "\n", "LR\n", "\n", "\n", "\n", "\n", "pca->lr\n", "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "pipeline = Vote(\n", " estimators=[(\"knn\", KNN), (\"pipeline\", PCA() >> LR)], voting=\"soft\"\n", ")\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from lale.lib.sklearn import VotingClassifier as Vote\n", "vote = Vote(estimators=[('knn',KNN), ('pipeline',PCA()>>LR)], voting='soft')\n", "vote.visualize()\n", "vote.pretty_print(ipython_display=True, show_imports=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }