{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MedInc</th>\n",
       "      <th>HouseAge</th>\n",
       "      <th>AveRooms</th>\n",
       "      <th>AveBedrms</th>\n",
       "      <th>Population</th>\n",
       "      <th>AveOccup</th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3.2596</td>\n",
       "      <td>33.0</td>\n",
       "      <td>5.017657</td>\n",
       "      <td>1.006421</td>\n",
       "      <td>2300.0</td>\n",
       "      <td>3.691814</td>\n",
       "      <td>32.71</td>\n",
       "      <td>-117.03</td>\n",
       "      <td>1.030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3.8125</td>\n",
       "      <td>49.0</td>\n",
       "      <td>4.473545</td>\n",
       "      <td>1.041005</td>\n",
       "      <td>1314.0</td>\n",
       "      <td>1.738095</td>\n",
       "      <td>33.77</td>\n",
       "      <td>-118.16</td>\n",
       "      <td>3.821</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.1563</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.645833</td>\n",
       "      <td>0.985119</td>\n",
       "      <td>915.0</td>\n",
       "      <td>2.723214</td>\n",
       "      <td>34.66</td>\n",
       "      <td>-120.48</td>\n",
       "      <td>1.726</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.9425</td>\n",
       "      <td>36.0</td>\n",
       "      <td>4.002817</td>\n",
       "      <td>1.033803</td>\n",
       "      <td>1418.0</td>\n",
       "      <td>3.994366</td>\n",
       "      <td>32.69</td>\n",
       "      <td>-117.11</td>\n",
       "      <td>0.934</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3.5542</td>\n",
       "      <td>43.0</td>\n",
       "      <td>6.268421</td>\n",
       "      <td>1.134211</td>\n",
       "      <td>874.0</td>\n",
       "      <td>2.300000</td>\n",
       "      <td>36.78</td>\n",
       "      <td>-119.80</td>\n",
       "      <td>0.965</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \\\n",
       "0  3.2596      33.0  5.017657   1.006421      2300.0  3.691814     32.71   \n",
       "1  3.8125      49.0  4.473545   1.041005      1314.0  1.738095     33.77   \n",
       "2  4.1563       4.0  5.645833   0.985119       915.0  2.723214     34.66   \n",
       "3  1.9425      36.0  4.002817   1.033803      1418.0  3.994366     32.69   \n",
       "4  3.5542      43.0  6.268421   1.134211       874.0  2.300000     36.78   \n",
       "\n",
       "   Longitude  target  \n",
       "0    -117.03   1.030  \n",
       "1    -118.16   3.821  \n",
       "2    -120.48   1.726  \n",
       "3    -117.11   0.934  \n",
       "4    -119.80   0.965  "
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import lale.datasets\n",
    "(train_X, train_y), (test_X, test_y) = lale.datasets.california_housing_df()\n",
    "pd.concat([train_X.head(), train_y.head()], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler as Scale\n",
    "from sklearn.preprocessing import Normalizer as Norm\n",
    "from lale.lib.lale import NoOp\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn.tree import DecisionTreeRegressor as Tree\n",
    "from sklearn.linear_model import LinearRegression as Linear\n",
    "from xgboost import XGBRegressor as XGB\n",
    "lale.wrap_imported_operators()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"274pt\" height=\"189pt\"\n",
       " viewBox=\"0.00 0.00 274.00 189.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 185)\">\n",
       "<title>cluster:(root)</title>\n",
       "<g id=\"a_graph0\"><a xlink:title=\"(root) = ...\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-185 270,-185 270,4 -4,4\"/>\n",
       "</a>\n",
       "</g>\n",
       "<g id=\"clust1\" class=\"cluster\"><title>cluster:choice_0</title>\n",
       "<g id=\"a_clust1\"><a xlink:title=\"choice_0 = scale | norm | no_op_0\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"8,-8 8,-173 78,-173 78,-8 8,-8\"/>\n",
       "<text text-anchor=\"middle\" x=\"43\" y=\"-157.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust2\" class=\"cluster\"><title>cluster:choice_1</title>\n",
       "<g id=\"a_clust2\"><a xlink:title=\"choice_1 = pca | no_op_1\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"98,-51 98,-173 168,-173 168,-51 98,-51\"/>\n",
       "<text text-anchor=\"middle\" x=\"133\" y=\"-157.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust3\" class=\"cluster\"><title>cluster:choice_2</title>\n",
       "<g id=\"a_clust3\"><a xlink:title=\"choice_2 = tree | linear | xgb\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"188,-12 188,-173 258,-173 258,-12 188,-12\"/>\n",
       "<text text-anchor=\"middle\" x=\"223\" y=\"-157.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- scale -->\n",
       "<g id=\"node1\" class=\"node\"><title>scale</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.standard_scaler.html\" xlink:title=\"scale = Scale\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"43\" cy=\"-124\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"43\" y=\"-121.2\" font-family=\"Times,serif\" font-size=\"11.00\">Scale</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- pca -->\n",
       "<g id=\"node4\" class=\"node\"><title>pca</title>\n",
       "<g id=\"a_node4\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html\" xlink:title=\"pca = PCA\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"133\" cy=\"-124\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"133\" y=\"-121.2\" font-family=\"Times,serif\" font-size=\"11.00\">PCA</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- scale&#45;&gt;pca -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>scale&#45;&gt;pca</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M77.7296,-124C83.6523,-124 89.838,-124 95.8241,-124\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"88.0002,-127.5 98,-124 87.9998,-120.5 88.0002,-127.5\"/>\n",
       "</g>\n",
       "<!-- norm -->\n",
       "<g id=\"node2\" class=\"node\"><title>norm</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.normalizer.html\" xlink:title=\"norm = Norm\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"43\" cy=\"-81\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"43\" y=\"-78.2\" font-family=\"Times,serif\" font-size=\"11.00\">Norm</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op_0 -->\n",
       "<g id=\"node3\" class=\"node\"><title>no_op_0</title>\n",
       "<g id=\"a_node3\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op_0 = NoOp\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"43\" cy=\"-36\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"43\" y=\"-39.2\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"43\" y=\"-27.2\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- tree -->\n",
       "<g id=\"node6\" class=\"node\"><title>tree</title>\n",
       "<g id=\"a_node6\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.decision_tree_regressor.html\" xlink:title=\"tree = Tree\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"223\" cy=\"-124\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"223\" y=\"-121.2\" font-family=\"Times,serif\" font-size=\"11.00\">Tree</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- pca&#45;&gt;tree -->\n",
       "<g id=\"edge2\" class=\"edge\"><title>pca&#45;&gt;tree</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M167.73,-124C173.652,-124 179.838,-124 185.824,-124\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"178,-127.5 188,-124 178,-120.5 178,-127.5\"/>\n",
       "</g>\n",
       "<!-- no_op_1 -->\n",
       "<g id=\"node5\" class=\"node\"><title>no_op_1</title>\n",
       "<g id=\"a_node5\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op_1 = NoOp\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"133\" cy=\"-79\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"133\" y=\"-82.2\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"133\" y=\"-70.2\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- linear -->\n",
       "<g id=\"node7\" class=\"node\"><title>linear</title>\n",
       "<g id=\"a_node7\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.linear_regression.html\" xlink:title=\"linear = Linear\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"223\" cy=\"-81\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"223\" y=\"-78.2\" font-family=\"Times,serif\" font-size=\"11.00\">Linear</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- xgb -->\n",
       "<g id=\"node8\" class=\"node\"><title>xgb</title>\n",
       "<g id=\"a_node8\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.xgboost.xgb_regressor.html\" xlink:title=\"xgb = XGB\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"223\" cy=\"-38\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"223\" y=\"-35.2\" font-family=\"Times,serif\" font-size=\"11.00\">XGB</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7f32b77655f8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "planned_pipeline = (Scale | Norm | NoOp) >> (PCA | NoOp) >> (Tree | Linear | XGB)\n",
    "planned_pipeline.visualize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 68%|████▊  | 34/50 [03:11<01:29,  5.62s/trial, best loss: -0.6110921251096775]\n"
     ]
    }
   ],
   "source": [
    "from lale.lib.lale import Hyperopt\n",
    "import sklearn.metrics\n",
    "r2 = sklearn.metrics.make_scorer(sklearn.metrics.r2_score)\n",
    "trained_pipeline = planned_pipeline.auto_configure(\n",
    "    train_X, train_y, optimizer=Hyperopt, scoring=r2,\n",
    "    max_opt_time=3*60, max_eval_time=30, cv=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "R2 score: 0.58\n"
     ]
    },
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"242pt\" height=\"48pt\"\n",
       " viewBox=\"0.00 0.00 242.00 47.60\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 43.598)\">\n",
       "<title>cluster:(root)</title>\n",
       "<g id=\"a_graph0\"><a xlink:title=\"(root) = ...\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-43.598 238,-43.598 238,4 -4,4\"/>\n",
       "</a>\n",
       "</g>\n",
       "<!-- no_op -->\n",
       "<g id=\"node1\" class=\"node\"><title>no_op</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op = NoOp()\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"27\" cy=\"-19.799\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"27\" y=\"-22.999\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"27\" y=\"-10.999\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- pca -->\n",
       "<g id=\"node2\" class=\"node\"><title>pca</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html\" xlink:title=\"pca = PCA(svd_solver=&#39;randomized&#39;)\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"117\" cy=\"-19.799\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"117\" y=\"-16.999\" font-family=\"Times,serif\" font-size=\"11.00\">PCA</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- no_op&#45;&gt;pca -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>no_op&#45;&gt;pca</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M54.4029,-19.799C62.3932,-19.799 71.3106,-19.799 79.8241,-19.799\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"79.919,-23.2991 89.919,-19.799 79.919,-16.2991 79.919,-23.2991\"/>\n",
       "</g>\n",
       "<!-- linear -->\n",
       "<g id=\"node3\" class=\"node\"><title>linear</title>\n",
       "<g id=\"a_node3\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.linear_regression.html\" xlink:title=\"linear = Linear(normalize=True)\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"207\" cy=\"-19.799\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"207\" y=\"-16.999\" font-family=\"Times,serif\" font-size=\"11.00\">Linear</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- pca&#45;&gt;linear -->\n",
       "<g id=\"edge2\" class=\"edge\"><title>pca&#45;&gt;linear</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M144.403,-19.799C152.393,-19.799 161.311,-19.799 169.824,-19.799\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"169.919,-23.2991 179.919,-19.799 169.919,-16.2991 169.919,-23.2991\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7f32b76cb4e0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "print(f'R2 score: {r2(trained_pipeline, test_X, test_y):.2f}')\n",
    "trained_pipeline.visualize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "```python\n",
       "from lale.lib.lale import NoOp\n",
       "from sklearn.decomposition import PCA\n",
       "from sklearn.linear_model import LinearRegression as Linear\n",
       "import lale\n",
       "\n",
       "lale.wrap_imported_operators()\n",
       "pca = PCA(svd_solver=\"randomized\")\n",
       "linear = Linear(normalize=True)\n",
       "pipeline = NoOp() >> pca >> linear\n",
       "```"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "trained_pipeline.pretty_print(ipython_display=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}