{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Example Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: liac-arff>=2.4.0 in /home/hirzel/python3.6venv/lib/python3.6/site-packages (2.4.0)\r\n"
     ]
    }
   ],
   "source": [
    "!pip install 'liac-arff>=2.4.0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>y</th>\n",
       "      <th>checking_status</th>\n",
       "      <th>duration</th>\n",
       "      <th>credit_history</th>\n",
       "      <th>purpose</th>\n",
       "      <th>credit_amount</th>\n",
       "      <th>savings_status</th>\n",
       "      <th>employment</th>\n",
       "      <th>installment_commitment</th>\n",
       "      <th>personal_status</th>\n",
       "      <th>...</th>\n",
       "      <th>residence_since</th>\n",
       "      <th>property_magnitude</th>\n",
       "      <th>age</th>\n",
       "      <th>other_payment_plans</th>\n",
       "      <th>housing</th>\n",
       "      <th>existing_credits</th>\n",
       "      <th>job</th>\n",
       "      <th>num_dependents</th>\n",
       "      <th>own_telephone</th>\n",
       "      <th>foreign_worker</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>835</th>\n",
       "      <td>0</td>\n",
       "      <td>&lt;0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>no credits/all paid</td>\n",
       "      <td>new car</td>\n",
       "      <td>1082.0</td>\n",
       "      <td>&lt;100</td>\n",
       "      <td>1&lt;=X&lt;4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>male single</td>\n",
       "      <td>...</td>\n",
       "      <td>4.0</td>\n",
       "      <td>car</td>\n",
       "      <td>48.0</td>\n",
       "      <td>bank</td>\n",
       "      <td>own</td>\n",
       "      <td>2.0</td>\n",
       "      <td>skilled</td>\n",
       "      <td>1.0</td>\n",
       "      <td>none</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192</th>\n",
       "      <td>0</td>\n",
       "      <td>0&lt;=X&lt;200</td>\n",
       "      <td>27.0</td>\n",
       "      <td>existing paid</td>\n",
       "      <td>business</td>\n",
       "      <td>3915.0</td>\n",
       "      <td>&lt;100</td>\n",
       "      <td>1&lt;=X&lt;4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>male single</td>\n",
       "      <td>...</td>\n",
       "      <td>2.0</td>\n",
       "      <td>car</td>\n",
       "      <td>36.0</td>\n",
       "      <td>none</td>\n",
       "      <td>own</td>\n",
       "      <td>1.0</td>\n",
       "      <td>skilled</td>\n",
       "      <td>2.0</td>\n",
       "      <td>yes</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>629</th>\n",
       "      <td>1</td>\n",
       "      <td>no checking</td>\n",
       "      <td>9.0</td>\n",
       "      <td>existing paid</td>\n",
       "      <td>education</td>\n",
       "      <td>3832.0</td>\n",
       "      <td>no known savings</td>\n",
       "      <td>&gt;=7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>male single</td>\n",
       "      <td>...</td>\n",
       "      <td>4.0</td>\n",
       "      <td>real estate</td>\n",
       "      <td>64.0</td>\n",
       "      <td>none</td>\n",
       "      <td>own</td>\n",
       "      <td>1.0</td>\n",
       "      <td>unskilled resident</td>\n",
       "      <td>1.0</td>\n",
       "      <td>none</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559</th>\n",
       "      <td>0</td>\n",
       "      <td>0&lt;=X&lt;200</td>\n",
       "      <td>18.0</td>\n",
       "      <td>critical/other existing credit</td>\n",
       "      <td>furniture/equipment</td>\n",
       "      <td>1928.0</td>\n",
       "      <td>&lt;100</td>\n",
       "      <td>&lt;1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>male single</td>\n",
       "      <td>...</td>\n",
       "      <td>2.0</td>\n",
       "      <td>real estate</td>\n",
       "      <td>31.0</td>\n",
       "      <td>none</td>\n",
       "      <td>own</td>\n",
       "      <td>2.0</td>\n",
       "      <td>unskilled resident</td>\n",
       "      <td>1.0</td>\n",
       "      <td>none</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>684</th>\n",
       "      <td>1</td>\n",
       "      <td>0&lt;=X&lt;200</td>\n",
       "      <td>36.0</td>\n",
       "      <td>delayed previously</td>\n",
       "      <td>business</td>\n",
       "      <td>9857.0</td>\n",
       "      <td>100&lt;=X&lt;500</td>\n",
       "      <td>4&lt;=X&lt;7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>male single</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>life insurance</td>\n",
       "      <td>31.0</td>\n",
       "      <td>none</td>\n",
       "      <td>own</td>\n",
       "      <td>2.0</td>\n",
       "      <td>unskilled resident</td>\n",
       "      <td>2.0</td>\n",
       "      <td>yes</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     y checking_status  duration                  credit_history  \\\n",
       "835  0              <0      12.0             no credits/all paid   \n",
       "192  0        0<=X<200      27.0                   existing paid   \n",
       "629  1     no checking       9.0                   existing paid   \n",
       "559  0        0<=X<200      18.0  critical/other existing credit   \n",
       "684  1        0<=X<200      36.0              delayed previously   \n",
       "\n",
       "                 purpose  credit_amount    savings_status employment  \\\n",
       "835              new car         1082.0              <100     1<=X<4   \n",
       "192             business         3915.0              <100     1<=X<4   \n",
       "629            education         3832.0  no known savings        >=7   \n",
       "559  furniture/equipment         1928.0              <100         <1   \n",
       "684             business         9857.0        100<=X<500     4<=X<7   \n",
       "\n",
       "     installment_commitment personal_status  ... residence_since  \\\n",
       "835                     4.0     male single  ...             4.0   \n",
       "192                     4.0     male single  ...             2.0   \n",
       "629                     1.0     male single  ...             4.0   \n",
       "559                     2.0     male single  ...             2.0   \n",
       "684                     1.0     male single  ...             3.0   \n",
       "\n",
       "     property_magnitude   age  other_payment_plans housing existing_credits  \\\n",
       "835                 car  48.0                 bank     own              2.0   \n",
       "192                 car  36.0                 none     own              1.0   \n",
       "629         real estate  64.0                 none     own              1.0   \n",
       "559         real estate  31.0                 none     own              2.0   \n",
       "684      life insurance  31.0                 none     own              2.0   \n",
       "\n",
       "                    job num_dependents  own_telephone foreign_worker  \n",
       "835             skilled            1.0           none            yes  \n",
       "192             skilled            2.0            yes            yes  \n",
       "629  unskilled resident            1.0           none            yes  \n",
       "559  unskilled resident            1.0           none            yes  \n",
       "684  unskilled resident            2.0            yes            yes  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import lale.datasets.openml\n",
    "import pandas as pd\n",
    "(train_X, train_y), (test_X, test_y) = lale.datasets.openml.fetch(\n",
    "    'credit-g', 'classification', preprocess=False)\n",
    "pd.concat([pd.DataFrame({'y': train_y}, index=train_X.index).tail(),\n",
    "           train_X.tail()], axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Algorithm Selection and Hyperparameter Tuning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import Normalizer as Norm\n",
    "from sklearn.preprocessing import OneHotEncoder as OneHot\n",
    "from lale.lib.lale import Project, ConcatFeatures, NoOp\n",
    "from sklearn.linear_model import LogisticRegression as LR\n",
    "from sklearn.svm import LinearSVC\n",
    "from xgboost import XGBClassifier as XGBoost\n",
    "from lale import wrap_imported_operators\n",
    "wrap_imported_operators()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"366pt\" height=\"244pt\"\n",
       " viewBox=\"0.00 0.00 365.87 244.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 240)\">\n",
       "<title>cluster:(root)</title>\n",
       "<g id=\"a_graph0\"><a xlink:title=\"(root) = ...\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-240 361.865,-240 361.865,4 -4,4\"/>\n",
       "</a>\n",
       "</g>\n",
       "<g id=\"clust1\" class=\"cluster\"><title>cluster:choice_0</title>\n",
       "<g id=\"a_clust1\"><a xlink:title=\"choice_0 = norm | no_op\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"82,-106 82,-228 152,-228 152,-106 82,-106\"/>\n",
       "<text text-anchor=\"middle\" x=\"117\" y=\"-212.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust2\" class=\"cluster\"><title>cluster:choice_1</title>\n",
       "<g id=\"a_clust2\"><a xlink:title=\"choice_1 = lr | linear_svc | xg_boost\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"274.468,-8 274.468,-177 349.865,-177 349.865,-8 274.468,-8\"/>\n",
       "<text text-anchor=\"middle\" x=\"312.167\" y=\"-161.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- project_0 -->\n",
       "<g id=\"node1\" class=\"node\"><title>project_0</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.project.html\" xlink:title=\"project_0 = Project(columns={&#39;type&#39;: &#39;number&#39;})\">\n",
       "<ellipse fill=\"#b0e2ff\" stroke=\"black\" cx=\"27\" cy=\"-179\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"27\" y=\"-176.2\" font-family=\"Times,serif\" font-size=\"11.00\">Project</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- norm -->\n",
       "<g id=\"node2\" class=\"node\"><title>norm</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer.html\" xlink:title=\"norm = Norm\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"117\" cy=\"-179\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"117\" y=\"-176.2\" font-family=\"Times,serif\" font-size=\"11.00\">Norm</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- project_0&#45;&gt;norm -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>project_0&#45;&gt;norm</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M54.4029,-179C62.3932,-179 71.3106,-179 79.8241,-179\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"72.0002,-182.5 82,-179 71.9998,-175.5 72.0002,-182.5\"/>\n",
       "</g>\n",
       "<!-- concat_features -->\n",
       "<g id=\"node6\" class=\"node\"><title>concat_features</title>\n",
       "<g id=\"a_node6\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.concat_features.html\" xlink:title=\"concat_features = ConcatFeatures()\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"213.234\" cy=\"-128\" rx=\"33.4697\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"213.234\" y=\"-131.2\" font-family=\"Times,serif\" font-size=\"11.00\">Concat&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"213.234\" y=\"-119.2\" font-family=\"Times,serif\" font-size=\"11.00\">Features</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- norm&#45;&gt;concat_features -->\n",
       "<g id=\"edge3\" class=\"edge\"><title>norm&#45;&gt;concat_features</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M151.682,-160.771C160.603,-155.943 170.224,-150.736 179.114,-145.925\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"181.031,-148.867 188.159,-141.029 177.699,-142.711 181.031,-148.867\"/>\n",
       "</g>\n",
       "<!-- no_op -->\n",
       "<g id=\"node3\" class=\"node\"><title>no_op</title>\n",
       "<g id=\"a_node3\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op = NoOp\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"117\" cy=\"-134\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"117\" y=\"-137.2\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"117\" y=\"-125.2\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- project_1 -->\n",
       "<g id=\"node4\" class=\"node\"><title>project_1</title>\n",
       "<g id=\"a_node4\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.project.html\" xlink:title=\"project_1 = Project(columns={&#39;type&#39;: &#39;string&#39;})\">\n",
       "<ellipse fill=\"#b0e2ff\" stroke=\"black\" cx=\"27\" cy=\"-78\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"27\" y=\"-75.2\" font-family=\"Times,serif\" font-size=\"11.00\">Project</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- one_hot -->\n",
       "<g id=\"node5\" class=\"node\"><title>one_hot</title>\n",
       "<g id=\"a_node5\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html\" xlink:title=\"one_hot = OneHot\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"117\" cy=\"-78\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"117\" y=\"-81.2\" font-family=\"Times,serif\" font-size=\"11.00\">One&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"117\" y=\"-69.2\" font-family=\"Times,serif\" font-size=\"11.00\">Hot</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- project_1&#45;&gt;one_hot -->\n",
       "<g id=\"edge2\" class=\"edge\"><title>project_1&#45;&gt;one_hot</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M54.4029,-78C62.3932,-78 71.3106,-78 79.8241,-78\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"79.919,-81.5001 89.919,-78 79.919,-74.5001 79.919,-81.5001\"/>\n",
       "</g>\n",
       "<!-- one_hot&#45;&gt;concat_features -->\n",
       "<g id=\"edge4\" class=\"edge\"><title>one_hot&#45;&gt;concat_features</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M139.383,-89.3458C151.006,-95.513 165.644,-103.279 178.744,-110.23\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"177.471,-113.517 187.945,-115.113 180.752,-107.334 177.471,-113.517\"/>\n",
       "</g>\n",
       "<!-- lr -->\n",
       "<g id=\"node7\" class=\"node\"><title>lr</title>\n",
       "<g id=\"a_node7\"><a xlink:href=\"http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html\" xlink:title=\"lr = LR\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"312.167\" cy=\"-128\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"312.167\" y=\"-125.2\" font-family=\"Times,serif\" font-size=\"11.00\">LR</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- concat_features&#45;&gt;lr -->\n",
       "<g id=\"edge5\" class=\"edge\"><title>concat_features&#45;&gt;lr</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M246.511,-128C252.356,-128 258.555,-128 264.711,-128\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"264.961,-131.5 274.96,-128 264.96,-124.5 264.961,-131.5\"/>\n",
       "</g>\n",
       "<!-- linear_svc -->\n",
       "<g id=\"node8\" class=\"node\"><title>linear_svc</title>\n",
       "<g id=\"a_node8\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html\" xlink:title=\"linear_svc = LinearSVC(dual=False)\">\n",
       "<ellipse fill=\"#b0e2ff\" stroke=\"black\" cx=\"312.167\" cy=\"-83\" rx=\"29.8983\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"312.167\" y=\"-86.2\" font-family=\"Times,serif\" font-size=\"11.00\">Linear&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"312.167\" y=\"-74.2\" font-family=\"Times,serif\" font-size=\"11.00\">SVC</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- xg_boost -->\n",
       "<g id=\"node9\" class=\"node\"><title>xg_boost</title>\n",
       "<g id=\"a_node9\"><a xlink:href=\"https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.sklearn\" xlink:title=\"xg_boost = XGBoost\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"312.167\" cy=\"-36\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"312.167\" y=\"-39.2\" font-family=\"Times,serif\" font-size=\"11.00\">XG&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"312.167\" y=\"-27.2\" font-family=\"Times,serif\" font-size=\"11.00\">Boost</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7f88010fa630>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from lale.operators import make_pipeline, make_union\n",
    "\n",
    "planned_pipeline = make_pipeline(\n",
    "    make_union(\n",
    "        make_pipeline(Project(columns={'type': 'number'}), Norm | NoOp),\n",
    "        make_pipeline(Project(columns={'type': 'string'}), OneHot)),\n",
    "    LR | LinearSVC(dual=False)| XGBoost)\n",
    "planned_pipeline.visualize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100%|██████████| 10/10 [02:23<00:00, 14.32s/it, best loss: -0.7507273649370062]\n"
     ]
    }
   ],
   "source": [
    "from lale.lib.lale import Hyperopt\n",
    "trained_pipeline = planned_pipeline.auto_configure(\n",
    "    train_X, train_y, Hyperopt, cv=3, max_evals=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Displaying Automation Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy 72.1%\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "predictions = trained_pipeline.predict(test_X)\n",
    "print(f'accuracy {accuracy_score(test_y, predictions):.1%}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"350pt\" height=\"91pt\"\n",
       " viewBox=\"0.00 0.00 349.87 90.80\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 86.799)\">\n",
       "<title>cluster:(root)</title>\n",
       "<g id=\"a_graph0\"><a xlink:title=\"(root) = ...\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-86.799 345.865,-86.799 345.865,4 -4,4\"/>\n",
       "</a>\n",
       "</g>\n",
       "<!-- project_0 -->\n",
       "<g id=\"node1\" class=\"node\"><title>project_0</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.project.html\" xlink:title=\"project_0 = Project(columns={&#39;type&#39;: &#39;number&#39;})\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"27\" cy=\"-64.799\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"27\" y=\"-61.999\" font-family=\"Times,serif\" font-size=\"11.00\">Project</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- norm -->\n",
       "<g id=\"node2\" class=\"node\"><title>norm</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer.html\" xlink:title=\"norm = Norm(norm=&#39;l1&#39;)\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"117\" cy=\"-64.799\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"117\" y=\"-61.999\" font-family=\"Times,serif\" font-size=\"11.00\">Norm</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- project_0&#45;&gt;norm -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>project_0&#45;&gt;norm</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M54.4029,-64.799C62.3932,-64.799 71.3106,-64.799 79.8241,-64.799\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"79.919,-68.2991 89.919,-64.799 79.919,-61.2991 79.919,-68.2991\"/>\n",
       "</g>\n",
       "<!-- concat_features -->\n",
       "<g id=\"node5\" class=\"node\"><title>concat_features</title>\n",
       "<g id=\"a_node5\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.concat_features.html\" xlink:title=\"concat_features = ConcatFeatures()\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"213.234\" cy=\"-41.799\" rx=\"33.4697\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"213.234\" y=\"-44.999\" font-family=\"Times,serif\" font-size=\"11.00\">Concat&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"213.234\" y=\"-32.999\" font-family=\"Times,serif\" font-size=\"11.00\">Features</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- norm&#45;&gt;concat_features -->\n",
       "<g id=\"edge3\" class=\"edge\"><title>norm&#45;&gt;concat_features</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M142.759,-58.756C151.838,-56.54 162.36,-53.972 172.425,-51.5153\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"173.383,-54.8843 182.268,-49.1129 171.723,-48.0839 173.383,-54.8843\"/>\n",
       "</g>\n",
       "<!-- project_1 -->\n",
       "<g id=\"node3\" class=\"node\"><title>project_1</title>\n",
       "<g id=\"a_node3\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.project.html\" xlink:title=\"project_1 = Project(columns={&#39;type&#39;: &#39;string&#39;})\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"27\" cy=\"-19.799\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"27\" y=\"-16.999\" font-family=\"Times,serif\" font-size=\"11.00\">Project</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- one_hot -->\n",
       "<g id=\"node4\" class=\"node\"><title>one_hot</title>\n",
       "<g id=\"a_node4\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html\" xlink:title=\"one_hot = OneHot()\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"117\" cy=\"-19.799\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"117\" y=\"-22.999\" font-family=\"Times,serif\" font-size=\"11.00\">One&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"117\" y=\"-10.999\" font-family=\"Times,serif\" font-size=\"11.00\">Hot</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- project_1&#45;&gt;one_hot -->\n",
       "<g id=\"edge2\" class=\"edge\"><title>project_1&#45;&gt;one_hot</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M54.4029,-19.799C62.3932,-19.799 71.3106,-19.799 79.8241,-19.799\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"79.919,-23.2991 89.919,-19.799 79.919,-16.2991 79.919,-23.2991\"/>\n",
       "</g>\n",
       "<!-- one_hot&#45;&gt;concat_features -->\n",
       "<g id=\"edge4\" class=\"edge\"><title>one_hot&#45;&gt;concat_features</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M143.253,-25.6946C152.074,-27.7539 162.208,-30.1199 171.945,-32.3931\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"171.343,-35.8465 181.877,-34.7117 172.934,-29.0298 171.343,-35.8465\"/>\n",
       "</g>\n",
       "<!-- linear_svc -->\n",
       "<g id=\"node6\" class=\"node\"><title>linear_svc</title>\n",
       "<g id=\"a_node6\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html\" xlink:title=\"linear_svc = LinearSVC(dual=False, C=9773.459065896624, tol=0.0006905227182226334)\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"312.167\" cy=\"-41.799\" rx=\"29.8983\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"312.167\" y=\"-44.999\" font-family=\"Times,serif\" font-size=\"11.00\">Linear&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"312.167\" y=\"-32.999\" font-family=\"Times,serif\" font-size=\"11.00\">SVC</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- concat_features&#45;&gt;linear_svc -->\n",
       "<g id=\"edge5\" class=\"edge\"><title>concat_features&#45;&gt;linear_svc</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M246.511,-41.799C254.733,-41.799 263.658,-41.799 272.176,-41.799\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"272.291,-45.2991 282.291,-41.799 272.291,-38.2991 272.291,-45.2991\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7f8800db5e48>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trained_pipeline.visualize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "```python\n",
       "project_0 = Project(columns={'type': 'number'})\n",
       "norm = Norm(norm='l1')\n",
       "project_1 = Project(columns={'type': 'string'})\n",
       "linear_svc = LinearSVC(dual=False, C=9773.459065896624, tol=0.0006905227182226334)\n",
       "pipeline = ((project_0 >> norm) & (project_1 >> OneHot())) >> ConcatFeatures() >> linear_svc\n",
       "```"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trained_pipeline.pretty_print(ipython_display=True, show_imports=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Call for Users and Contributors\n",
    "\n",
    "- Repository: https://github.com/IBM/lale\n",
    "\n",
    "- [Guide for scikit-learn users](https://nbviewer.jupyter.org/github/IBM/lale/blob/master/examples/docs_guide_for_sklearn_users.ipynb)\n",
    "\n",
    "- [How to add new operators](https://nbviewer.jupyter.org/github/IBM/lale/blob/master/examples/docs_new_operators.ipynb)\n",
    "\n",
    "<img src=\"https://github.com/IBM/lale/raw/master/docs/img/lale_logo.jpg\" style=\"width:300px\" align=\"left\">"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Grammar Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.decomposition import PCA\n",
    "from sklearn.preprocessing import PolynomialFeatures as PolyFeat\n",
    "wrap_imported_operators()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"409pt\" height=\"395pt\"\n",
       " viewBox=\"0.00 0.00 409.40 395.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 391)\">\n",
       "<title>cluster:(root)</title>\n",
       "<g id=\"a_graph0\"><a xlink:title=\"(root) = ...\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-391 405.397,-391 405.397,4 -4,4\"/>\n",
       "</a>\n",
       "</g>\n",
       "<g id=\"clust1\" class=\"cluster\"><title>cluster:choice_0</title>\n",
       "<g id=\"a_clust1\"><a xlink:title=\"choice_0 = no_op_0 | pipeline_1\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"8,-8 8,-379 310,-379 310,-8 8,-8\"/>\n",
       "<text text-anchor=\"middle\" x=\"159\" y=\"-363.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust2\" class=\"cluster\"><title>cluster:pipeline_1</title>\n",
       "<g id=\"a_clust2\"><a xlink:title=\"pipeline_1 = ...\">\n",
       "<path fill=\"#7ec0ee\" stroke=\"black\" d=\"M28,-64C28,-64 290,-64 290,-64 296,-64 302,-70 302,-76 302,-76 302,-336 302,-336 302,-342 296,-348 290,-348 290,-348 28,-348 28,-348 22,-348 16,-342 16,-336 16,-336 16,-76 16,-76 16,-70 22,-64 28,-64\"/>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust3\" class=\"cluster\"><title>cluster:choice_1</title>\n",
       "<g id=\"a_clust3\"><a xlink:title=\"choice_1 = no_op_1 | pipeline_2\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"24,-72 24,-340 216,-340 216,-72 24,-72\"/>\n",
       "<text text-anchor=\"middle\" x=\"120\" y=\"-324.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust4\" class=\"cluster\"><title>cluster:pipeline_2</title>\n",
       "<g id=\"a_clust4\"><a xlink:title=\"pipeline_2 = ...\">\n",
       "<path fill=\"#7ec0ee\" stroke=\"black\" d=\"M44,-128C44,-128 196,-128 196,-128 202,-128 208,-134 208,-140 208,-140 208,-297 208,-297 208,-303 202,-309 196,-309 196,-309 44,-309 44,-309 38,-309 32,-303 32,-297 32,-297 32,-140 32,-140 32,-134 38,-128 44,-128\"/>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust5\" class=\"cluster\"><title>cluster:choice_2</title>\n",
       "<g id=\"a_clust5\"><a xlink:title=\"choice_2 = no_op_2\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"40,-222 40,-301 110,-301 110,-222 40,-222\"/>\n",
       "<text text-anchor=\"middle\" x=\"75\" y=\"-285.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust6\" class=\"cluster\"><title>cluster:choice_3</title>\n",
       "<g id=\"a_clust6\"><a xlink:title=\"choice_3 = pca_0 | norm_0 | poly_feat_0\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"130,-136 130,-301 200,-301 200,-136 130,-136\"/>\n",
       "<text text-anchor=\"middle\" x=\"165\" y=\"-285.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust7\" class=\"cluster\"><title>cluster:choice_4</title>\n",
       "<g id=\"a_clust7\"><a xlink:title=\"choice_4 = pca_1 | norm_1 | poly_feat_1\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"224,-136 224,-301 294,-301 294,-136 224,-136\"/>\n",
       "<text text-anchor=\"middle\" x=\"259\" y=\"-285.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust8\" class=\"cluster\"><title>cluster:choice_5</title>\n",
       "<g id=\"a_clust8\"><a xlink:title=\"choice_5 = lr | linear_svc | xg_boost\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"318,-132 318,-301 393.397,-301 393.397,-132 318,-132\"/>\n",
       "<text text-anchor=\"middle\" x=\"355.698\" y=\"-285.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op_0 -->\n",
       "<g id=\"node1\" class=\"node\"><title>no_op_0</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op_0 = NoOp\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"75\" cy=\"-36\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"75\" y=\"-39.2\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"75\" y=\"-27.2\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op_1 -->\n",
       "<g id=\"node2\" class=\"node\"><title>no_op_1</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op_1 = NoOp\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"75\" cy=\"-100\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"75\" y=\"-103.2\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"75\" y=\"-91.2\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op_2 -->\n",
       "<g id=\"node3\" class=\"node\"><title>no_op_2</title>\n",
       "<g id=\"a_node3\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op_2 = NoOp\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"75\" cy=\"-250\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"75\" y=\"-253.2\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"75\" y=\"-241.2\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- pca_0 -->\n",
       "<g id=\"node4\" class=\"node\"><title>pca_0</title>\n",
       "<g id=\"a_node4\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html\" xlink:title=\"pca_0 = PCA\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"165\" cy=\"-252\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"165\" y=\"-249.2\" font-family=\"Times,serif\" font-size=\"11.00\">PCA</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op_2&#45;&gt;pca_0 -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>no_op_2&#45;&gt;pca_0</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M109.927,-250.771C115.79,-250.904 121.905,-251.043 127.824,-251.178\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"119.688,-253.839 130,-251.407 120.422,-246.878 119.688,-253.839\"/>\n",
       "</g>\n",
       "<!-- pca_1 -->\n",
       "<g id=\"node7\" class=\"node\"><title>pca_1</title>\n",
       "<g id=\"a_node7\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html\" xlink:title=\"pca_1 = PCA\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"259\" cy=\"-252\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"259\" y=\"-249.2\" font-family=\"Times,serif\" font-size=\"11.00\">PCA</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- pca_0&#45;&gt;pca_1 -->\n",
       "<g id=\"edge2\" class=\"edge\"><title>pca_0&#45;&gt;pca_1</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M215.936,-252C217.906,-252 219.866,-252 221.802,-252\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"214,-255.5 224,-252 214,-248.5 214,-255.5\"/>\n",
       "</g>\n",
       "<!-- norm_0 -->\n",
       "<g id=\"node5\" class=\"node\"><title>norm_0</title>\n",
       "<g id=\"a_node5\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer.html\" xlink:title=\"norm_0 = Norm\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"165\" cy=\"-209\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"165\" y=\"-206.2\" font-family=\"Times,serif\" font-size=\"11.00\">Norm</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- poly_feat_0 -->\n",
       "<g id=\"node6\" class=\"node\"><title>poly_feat_0</title>\n",
       "<g id=\"a_node6\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html\" xlink:title=\"poly_feat_0 = PolyFeat\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"165\" cy=\"-164\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"165\" y=\"-167.2\" font-family=\"Times,serif\" font-size=\"11.00\">Poly&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"165\" y=\"-155.2\" font-family=\"Times,serif\" font-size=\"11.00\">Feat</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- lr -->\n",
       "<g id=\"node10\" class=\"node\"><title>lr</title>\n",
       "<g id=\"a_node10\"><a xlink:href=\"http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html\" xlink:title=\"lr = LR\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"355.698\" cy=\"-252\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"355.698\" y=\"-249.2\" font-family=\"Times,serif\" font-size=\"11.00\">LR</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- pca_1&#45;&gt;lr -->\n",
       "<g id=\"edge3\" class=\"edge\"><title>pca_1&#45;&gt;lr</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M310,-252C312.156,-252 314.312,-252 316.469,-252\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"308.625,-255.5 318.625,-252 308.625,-248.5 308.625,-255.5\"/>\n",
       "</g>\n",
       "<!-- norm_1 -->\n",
       "<g id=\"node8\" class=\"node\"><title>norm_1</title>\n",
       "<g id=\"a_node8\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer.html\" xlink:title=\"norm_1 = Norm\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"259\" cy=\"-209\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"259\" y=\"-206.2\" font-family=\"Times,serif\" font-size=\"11.00\">Norm</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- poly_feat_1 -->\n",
       "<g id=\"node9\" class=\"node\"><title>poly_feat_1</title>\n",
       "<g id=\"a_node9\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html\" xlink:title=\"poly_feat_1 = PolyFeat\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"259\" cy=\"-164\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"259\" y=\"-167.2\" font-family=\"Times,serif\" font-size=\"11.00\">Poly&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"259\" y=\"-155.2\" font-family=\"Times,serif\" font-size=\"11.00\">Feat</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- linear_svc -->\n",
       "<g id=\"node11\" class=\"node\"><title>linear_svc</title>\n",
       "<g id=\"a_node11\"><a xlink:href=\"https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html\" xlink:title=\"linear_svc = LinearSVC(dual=False)\">\n",
       "<ellipse fill=\"#b0e2ff\" stroke=\"black\" cx=\"355.698\" cy=\"-207\" rx=\"29.8983\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"355.698\" y=\"-210.2\" font-family=\"Times,serif\" font-size=\"11.00\">Linear&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"355.698\" y=\"-198.2\" font-family=\"Times,serif\" font-size=\"11.00\">SVC</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- xg_boost -->\n",
       "<g id=\"node12\" class=\"node\"><title>xg_boost</title>\n",
       "<g id=\"a_node12\"><a xlink:href=\"https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.sklearn\" xlink:title=\"xg_boost = XGBoost\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"355.698\" cy=\"-160\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"355.698\" y=\"-163.2\" font-family=\"Times,serif\" font-size=\"11.00\">XG&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"355.698\" y=\"-151.2\" font-family=\"Times,serif\" font-size=\"11.00\">Boost</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7f8800dce0b8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from lale.grammar import Grammar\n",
    "\n",
    "g = Grammar()\n",
    "\n",
    "g.start    = make_pipeline(g.rec_tfms, g.prim_est)\n",
    "g.rec_tfms = NoOp | make_pipeline(g.rec_tfms, g.prim_tfm)\n",
    "g.prim_tfm = PCA | Norm | PolyFeat\n",
    "g.prim_est = LR | LinearSVC(dual=False) | XGBoost\n",
    "\n",
    "unrolled = g.unfold(3)\n",
    "unrolled.visualize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "```python\n",
       "linear_svc = LinearSVC(dual=False)\n",
       "pipeline = (NoOp | (NoOp | (NoOp) >> (PCA | Norm | PolyFeat)) >> (PCA | Norm | PolyFeat)) >> (LR | linear_svc | XGBoost)\n",
       "```"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "unrolled.pretty_print(ipython_display=True, show_imports=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}