cluster:(root)

{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from lale.lib.lale import ConcatFeatures as Concat\n", "from lale.lib.lale import NoOp\n", "from lale.lib.lale import Both\n", "from lale.lib.sklearn import KNeighborsClassifier as KNN\n", "from lale.lib.sklearn import LogisticRegression as LR\n", "from lale.lib.sklearn import MinMaxScaler as Scaler\n", "from lale.lib.sklearn import Nystroem\n", "from lale.lib.sklearn import PCA" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combinator-Style\n", "| Symbol | Name | Description | Sklearn feature |\n", "| ------ | ---- | ------------ | --------------- |\n", "| >> | pipe | Feed to next | `make_pipeline` |\n", "| & | and | Run both | `make_union`, includes concat |\n", "| | | or | Choose one | (missing) |" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "scl = Scaler | NoOp\n", "scl.visualize()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tfm = (PCA & Nystroem) >> Concat\n", "tfm.visualize()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "clf = KNN | LR\n", "clf.visualize()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "pipeline = (Scaler | NoOp) >> (PCA & Nystroem) >> Concat >> (KNN | LR)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "optimizable = scl >> tfm >> clf\n", "optimizable.visualize()\n", "optimizable.pretty_print(ipython_display=True, show_imports=False)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "optimizable.pretty_print(ipython_display='input')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# generated by pretty_print(ipython_display='input') from previous cell\n", "from sklearn.preprocessing import MinMaxScaler as Scaler\n", "from lale.lib.lale import NoOp\n", "from sklearn.decomposition import PCA\n", "from sklearn.kernel_approximation import Nystroem\n", "from lale.lib.lale import ConcatFeatures as Concat\n", "from sklearn.neighbors import KNeighborsClassifier as KNN\n", "from sklearn.linear_model import LogisticRegression as LR\n", "import lale\n", "\n", "lale.wrap_imported_operators()\n", "pipeline = (Scaler | NoOp) >> (PCA & Nystroem) >> Concat >> (KNN | LR)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sklearn-Style" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from lale.operators import make_choice, make_pipeline, make_union\n", "scl = make_choice(Scaler, NoOp)\n", "scl.visualize()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tfm = make_union(PCA, Nystroem)\n", "tfm.visualize()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "clf = make_choice(KNN, LR)\n", "clf.visualize()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "choice_0 = make_choice(Scaler, NoOp)\n", "union = make_union(PCA, Nystroem)\n", "choice_1 = make_choice(KNN, LR)\n", "pipeline = make_pipeline(choice_0, union, choice_1)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "optimizable = make_pipeline(scl, tfm, clf)\n", "optimizable.visualize()\n", "optimizable.pretty_print(ipython_display=True, show_imports=False, combinators=False)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "optimizable.pretty_print(ipython_display='input', combinators=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# generated by pretty_print(ipython_display='input') from previous cell\n", "from sklearn.preprocessing import MinMaxScaler as Scaler\n", "from lale.lib.lale import NoOp\n", "from lale.operators import make_choice\n", "from sklearn.decomposition import PCA\n", "from sklearn.kernel_approximation import Nystroem\n", "from lale.operators import make_union\n", "from sklearn.neighbors import KNeighborsClassifier as KNN\n", "from sklearn.linear_model import LogisticRegression as LR\n", "from lale.operators import make_pipeline\n", "\n", "choice_0 = make_choice(Scaler, NoOp)\n", "union = make_union(PCA, Nystroem)\n", "choice_1 = make_choice(KNN, LR)\n", "pipeline = make_pipeline(choice_0, union, choice_1)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "import lale\n", "lale.wrap_imported_operators() #so combinators work" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Nested" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "lr_0 = LR(C=0.09)\n", "lr_1 = LR(C=0.19)\n", "pipeline = PCA >> (lr_0 | NoOp >> lr_1)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "nested = PCA >> (LR(C=0.09) | NoOp >> LR(C=0.19))\n", "nested.visualize()\n", "nested.pretty_print(ipython_display=True, show_imports=False)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "```python\n", "{\n", " \"class\": \"lale.operators.PlannedPipeline\",\n", " \"state\": \"planned\",\n", " \"edges\": [[\"pca\", \"choice\"]],\n", " \"steps\": {\n", " \"pca\": {\n", " \"class\": \"lale.lib.sklearn.pca.PCAImpl\",\n", " \"state\": \"planned\",\n", " \"operator\": \"PCA\",\n", " \"label\": \"PCA\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html\",\n", " },\n", " \"choice\": {\n", " \"class\": \"lale.operators.OperatorChoice\",\n", " \"state\": \"planned\",\n", " \"operator\": \"OperatorChoice\",\n", " \"steps\": {\n", " \"lr_0\": {\n", " \"class\": \"lale.lib.sklearn.logistic_regression.LogisticRegressionImpl\",\n", " \"state\": \"trainable\",\n", " \"operator\": \"LogisticRegression\",\n", " \"label\": \"LR\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\",\n", " \"hyperparams\": {\"C\": 0.09},\n", " \"is_frozen_trainable\": false,\n", " },\n", " \"pipeline_1\": {\n", " \"class\": \"lale.operators.TrainablePipeline\",\n", " \"state\": \"trainable\",\n", " \"edges\": [[\"no_op\", \"lr_1\"]],\n", " \"steps\": {\n", " \"no_op\": {\n", " \"class\": \"lale.lib.lale.no_op.NoOpImpl\",\n", " \"state\": \"trained\",\n", " \"operator\": \"NoOp\",\n", " \"label\": \"NoOp\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\",\n", " \"hyperparams\": null,\n", " \"is_frozen_trainable\": true,\n", " \"coefs\": null,\n", " \"is_frozen_trained\": true,\n", " },\n", " \"lr_1\": {\n", " \"class\": \"lale.lib.sklearn.logistic_regression.LogisticRegressionImpl\",\n", " \"state\": \"trainable\",\n", " \"operator\": \"LogisticRegression\",\n", " \"label\": \"LR\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\",\n", " \"hyperparams\": {\"C\": 0.19},\n", " \"is_frozen_trainable\": false,\n", " },\n", " },\n", " },\n", " },\n", " },\n", " },\n", "}\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from lale.pretty_print import ipython_display\n", "ipython_display(nested.to_json())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Higher-order" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "pca = PCA(n_components=2)\n", "both = Both(op1=pca, op2=Nystroem)\n", "pipeline = both >> (KNN | LR)\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "higher_order = Both(op1=PCA(n_components=2), op2=Nystroem) >> (KNN | LR)\n", "higher_order.visualize()\n", "higher_order.pretty_print(ipython_display=True, show_imports=False)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "```python\n", "{\n", " \"class\": \"lale.operators.PlannedPipeline\",\n", " \"state\": \"planned\",\n", " \"edges\": [[\"both\", \"choice\"]],\n", " \"steps\": {\n", " \"both\": {\n", " \"class\": \"lale.lib.lale.both.BothImpl\",\n", " \"state\": \"trainable\",\n", " \"operator\": \"Both\",\n", " \"label\": \"Both\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.both.html\",\n", " \"hyperparams\": {\n", " \"op1\": {\"$ref\": \"../steps/pca\"},\n", " \"op2\": {\"$ref\": \"../steps/nystroem\"},\n", " },\n", " \"steps\": {\n", " \"pca\": {\n", " \"class\": \"lale.lib.sklearn.pca.PCAImpl\",\n", " \"state\": \"trainable\",\n", " \"operator\": \"PCA\",\n", " \"label\": \"PCA\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html\",\n", " \"hyperparams\": {\"n_components\": 2},\n", " \"is_frozen_trainable\": false,\n", " },\n", " \"nystroem\": {\n", " \"class\": \"lale.lib.sklearn.nystroem.NystroemImpl\",\n", " \"state\": \"planned\",\n", " \"operator\": \"Nystroem\",\n", " \"label\": \"Nystroem\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.nystroem.html\",\n", " },\n", " },\n", " \"is_frozen_trainable\": false,\n", " },\n", " \"choice\": {\n", " \"class\": \"lale.operators.OperatorChoice\",\n", " \"state\": \"planned\",\n", " \"operator\": \"OperatorChoice\",\n", " \"steps\": {\n", " \"knn\": {\n", " \"class\": \"lale.lib.sklearn.k_neighbors_classifier.KNeighborsClassifierImpl\",\n", " \"state\": \"planned\",\n", " \"operator\": \"KNeighborsClassifier\",\n", " \"label\": \"KNN\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.k_neighbors_classifier.html\",\n", " },\n", " \"lr\": {\n", " \"class\": \"lale.lib.sklearn.logistic_regression.LogisticRegressionImpl\",\n", " \"state\": \"planned\",\n", " \"operator\": \"LogisticRegression\",\n", " \"label\": \"LR\",\n", " \"documentation_url\": \"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\",\n", " },\n", " },\n", " },\n", " },\n", "}\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ipython_display(higher_order.to_json())" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "```python\n", "pipeline = Vote(\n", " estimators=[(\"knn\", KNN), (\"pipeline\", PCA() >> LR)], voting=\"soft\"\n", ")\n", "```" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from lale.lib.sklearn import VotingClassifier as Vote\n", "vote = Vote(estimators=[('knn',KNN), ('pipeline',PCA()>>LR)], voting='soft')\n", "vote.visualize()\n", "vote.pretty_print(ipython_display=True, show_imports=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }