{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: liac-arff>=2.4.0 in /home/hirzel/python3.6venv/lib/python3.6/site-packages (2.4.0)\r\n"
     ]
    }
   ],
   "source": [
    "!pip install 'liac-arff>=2.4.0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>class</th>\n",
       "      <th>checking_status</th>\n",
       "      <th>duration</th>\n",
       "      <th>credit_history</th>\n",
       "      <th>purpose</th>\n",
       "      <th>credit_amount</th>\n",
       "      <th>savings_status</th>\n",
       "      <th>employment</th>\n",
       "      <th>installment_commitment</th>\n",
       "      <th>personal_status</th>\n",
       "      <th>...</th>\n",
       "      <th>residence_since</th>\n",
       "      <th>property_magnitude</th>\n",
       "      <th>age</th>\n",
       "      <th>other_payment_plans</th>\n",
       "      <th>housing</th>\n",
       "      <th>existing_credits</th>\n",
       "      <th>job</th>\n",
       "      <th>num_dependents</th>\n",
       "      <th>own_telephone</th>\n",
       "      <th>foreign_worker</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>835</th>\n",
       "      <td>bad</td>\n",
       "      <td>&lt;0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>no credits/all paid</td>\n",
       "      <td>new car</td>\n",
       "      <td>1082.0</td>\n",
       "      <td>&lt;100</td>\n",
       "      <td>1&lt;=X&lt;4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>male single</td>\n",
       "      <td>...</td>\n",
       "      <td>4.0</td>\n",
       "      <td>car</td>\n",
       "      <td>48.0</td>\n",
       "      <td>bank</td>\n",
       "      <td>own</td>\n",
       "      <td>2.0</td>\n",
       "      <td>skilled</td>\n",
       "      <td>1.0</td>\n",
       "      <td>none</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192</th>\n",
       "      <td>bad</td>\n",
       "      <td>0&lt;=X&lt;200</td>\n",
       "      <td>27.0</td>\n",
       "      <td>existing paid</td>\n",
       "      <td>business</td>\n",
       "      <td>3915.0</td>\n",
       "      <td>&lt;100</td>\n",
       "      <td>1&lt;=X&lt;4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>male single</td>\n",
       "      <td>...</td>\n",
       "      <td>2.0</td>\n",
       "      <td>car</td>\n",
       "      <td>36.0</td>\n",
       "      <td>none</td>\n",
       "      <td>own</td>\n",
       "      <td>1.0</td>\n",
       "      <td>skilled</td>\n",
       "      <td>2.0</td>\n",
       "      <td>yes</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>629</th>\n",
       "      <td>good</td>\n",
       "      <td>no checking</td>\n",
       "      <td>9.0</td>\n",
       "      <td>existing paid</td>\n",
       "      <td>education</td>\n",
       "      <td>3832.0</td>\n",
       "      <td>no known savings</td>\n",
       "      <td>&gt;=7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>male single</td>\n",
       "      <td>...</td>\n",
       "      <td>4.0</td>\n",
       "      <td>real estate</td>\n",
       "      <td>64.0</td>\n",
       "      <td>none</td>\n",
       "      <td>own</td>\n",
       "      <td>1.0</td>\n",
       "      <td>unskilled resident</td>\n",
       "      <td>1.0</td>\n",
       "      <td>none</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559</th>\n",
       "      <td>bad</td>\n",
       "      <td>0&lt;=X&lt;200</td>\n",
       "      <td>18.0</td>\n",
       "      <td>critical/other existing credit</td>\n",
       "      <td>furniture/equipment</td>\n",
       "      <td>1928.0</td>\n",
       "      <td>&lt;100</td>\n",
       "      <td>&lt;1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>male single</td>\n",
       "      <td>...</td>\n",
       "      <td>2.0</td>\n",
       "      <td>real estate</td>\n",
       "      <td>31.0</td>\n",
       "      <td>none</td>\n",
       "      <td>own</td>\n",
       "      <td>2.0</td>\n",
       "      <td>unskilled resident</td>\n",
       "      <td>1.0</td>\n",
       "      <td>none</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>684</th>\n",
       "      <td>good</td>\n",
       "      <td>0&lt;=X&lt;200</td>\n",
       "      <td>36.0</td>\n",
       "      <td>delayed previously</td>\n",
       "      <td>business</td>\n",
       "      <td>9857.0</td>\n",
       "      <td>100&lt;=X&lt;500</td>\n",
       "      <td>4&lt;=X&lt;7</td>\n",
       "      <td>1.0</td>\n",
       "      <td>male single</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>life insurance</td>\n",
       "      <td>31.0</td>\n",
       "      <td>none</td>\n",
       "      <td>own</td>\n",
       "      <td>2.0</td>\n",
       "      <td>unskilled resident</td>\n",
       "      <td>2.0</td>\n",
       "      <td>yes</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    class checking_status  duration                  credit_history  \\\n",
       "835   bad              <0      12.0             no credits/all paid   \n",
       "192   bad        0<=X<200      27.0                   existing paid   \n",
       "629  good     no checking       9.0                   existing paid   \n",
       "559   bad        0<=X<200      18.0  critical/other existing credit   \n",
       "684  good        0<=X<200      36.0              delayed previously   \n",
       "\n",
       "                 purpose  credit_amount    savings_status employment  \\\n",
       "835              new car         1082.0              <100     1<=X<4   \n",
       "192             business         3915.0              <100     1<=X<4   \n",
       "629            education         3832.0  no known savings        >=7   \n",
       "559  furniture/equipment         1928.0              <100         <1   \n",
       "684             business         9857.0        100<=X<500     4<=X<7   \n",
       "\n",
       "     installment_commitment personal_status  ... residence_since  \\\n",
       "835                     4.0     male single  ...             4.0   \n",
       "192                     4.0     male single  ...             2.0   \n",
       "629                     1.0     male single  ...             4.0   \n",
       "559                     2.0     male single  ...             2.0   \n",
       "684                     1.0     male single  ...             3.0   \n",
       "\n",
       "     property_magnitude   age  other_payment_plans housing existing_credits  \\\n",
       "835                 car  48.0                 bank     own              2.0   \n",
       "192                 car  36.0                 none     own              1.0   \n",
       "629         real estate  64.0                 none     own              1.0   \n",
       "559         real estate  31.0                 none     own              2.0   \n",
       "684      life insurance  31.0                 none     own              2.0   \n",
       "\n",
       "                    job num_dependents  own_telephone foreign_worker  \n",
       "835             skilled            1.0           none            yes  \n",
       "192             skilled            2.0            yes            yes  \n",
       "629  unskilled resident            1.0           none            yes  \n",
       "559  unskilled resident            1.0           none            yes  \n",
       "684  unskilled resident            2.0            yes            yes  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import lale.datasets.openml\n",
    "import pandas as pd\n",
    "(train_X, train_y), (test_X, test_y) = lale.datasets.openml.fetch(\n",
    "    'credit-g', 'classification', preprocess=False)\n",
    "pd.concat([train_y.tail(), train_X.tail()], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import Normalizer as Norm\n",
    "from lale.lib.lale import NoOp\n",
    "from sklearn.preprocessing import OneHotEncoder as OneHot\n",
    "from sklearn.linear_model import LogisticRegression as LR\n",
    "from xgboost import XGBClassifier as XGBoost\n",
    "from sklearn.svm import LinearSVC\n",
    "from sklearn.compose import ColumnTransformer\n",
    "from lale.operators import make_pipeline\n",
    "lale.wrap_imported_operators()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "num_cols = [col for col in train_X.columns\n",
    "            if np.issubdtype(train_X.dtypes[col], np.number)]\n",
    "cat_cols = [col for col in train_X.columns if col not in num_cols]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"219pt\" height=\"146pt\"\n",
       " viewBox=\"0.00 0.00 219.00 146.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 142)\">\n",
       "<title>cluster:(root)</title>\n",
       "<g id=\"a_graph0\"><a xlink:title=\"(root) = ...\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-142 215,-142 215,4 -4,4\"/>\n",
       "</a>\n",
       "</g>\n",
       "<g id=\"clust1\" class=\"cluster\"><title>cluster:column_transformer</title>\n",
       "<g id=\"a_clust1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.column_transformer.html\" xlink:title=\"column_transformer = ColumnTransformer(transformers=[(&#39;num_tfm&#39;, norm, [&#39;duration&#39;, &#39;credit_amount&#39;, &#39;installment_commitment&#39;, &#39;residence_since&#39;, &#39;age&#39;, &#39;existing_credits&#39;, &#39;num_dependents&#39;]), (&#39;cat_tfm&#39;, one_hot, [&#39;checking_status&#39;, &#39;credit_history&#39;, &#39;purpose&#39;, &#39;savings_status&#39;, &#39;employmen...)\">\n",
       "<polygon fill=\"#b0e2ff\" stroke=\"black\" points=\"0,-8 0,-130 129,-130 129,-8 0,-8\"/>\n",
       "<text text-anchor=\"middle\" x=\"64.5\" y=\"-114.8\" font-family=\"Times,serif\" font-size=\"14.00\">ColumnTransformer</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- norm -->\n",
       "<g id=\"node1\" class=\"node\"><title>norm</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.normalizer.html\" xlink:title=\"norm = Norm(norm=&#39;l1&#39;)\">\n",
       "<ellipse fill=\"#b0e2ff\" stroke=\"black\" cx=\"64\" cy=\"-81\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-78.2\" font-family=\"Times,serif\" font-size=\"11.00\">Norm</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- lr -->\n",
       "<g id=\"node3\" class=\"node\"><title>lr</title>\n",
       "<g id=\"a_node3\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\" xlink:title=\"lr = LR()\">\n",
       "<ellipse fill=\"#b0e2ff\" stroke=\"black\" cx=\"184\" cy=\"-81\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"184\" y=\"-78.2\" font-family=\"Times,serif\" font-size=\"11.00\">LR</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- norm&#45;&gt;lr -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>norm&#45;&gt;lr</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M128.758,-81C134.919,-81 141.01,-81 146.761,-81\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"146.885,-84.5001 156.885,-81 146.885,-77.5001 146.885,-84.5001\"/>\n",
       "</g>\n",
       "<!-- one_hot -->\n",
       "<g id=\"node2\" class=\"node\"><title>one_hot</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.one_hot_encoder.html\" xlink:title=\"one_hot = OneHot()\">\n",
       "<ellipse fill=\"#b0e2ff\" stroke=\"black\" cx=\"64\" cy=\"-36\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-39.2\" font-family=\"Times,serif\" font-size=\"11.00\">One&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-27.2\" font-family=\"Times,serif\" font-size=\"11.00\">Hot</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7fa36aa14b00>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pipeline_trainable = ColumnTransformer(\n",
    "    transformers=[\n",
    "        ('num_tfm', Norm(norm='l1'), num_cols),\n",
    "        ('cat_tfm', OneHot(), cat_cols)]) >> LR()\n",
    "pipeline_trainable.visualize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1 s, sys: 188 ms, total: 1.19 s\n",
      "Wall time: 1.06 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "pipeline_trained = pipeline_trainable.fit(train_X, train_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy 71.2%\n"
     ]
    }
   ],
   "source": [
    "import sklearn.metrics\n",
    "predictions = pipeline_trained.predict(test_X)\n",
    "print(f'accuracy {sklearn.metrics.accuracy_score(test_y, predictions):.1%}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"232pt\" height=\"233pt\"\n",
       " viewBox=\"0.00 0.00 232.40 233.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 229)\">\n",
       "<title>cluster:(root)</title>\n",
       "<g id=\"a_graph0\"><a xlink:title=\"(root) = ...\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-229 228.397,-229 228.397,4 -4,4\"/>\n",
       "</a>\n",
       "</g>\n",
       "<g id=\"clust1\" class=\"cluster\"><title>cluster:column_transformer</title>\n",
       "<g id=\"a_clust1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.column_transformer.html\" xlink:title=\"column_transformer = ColumnTransformer(transformers=[(&#39;num_tfm&#39;, choice, [&#39;duration&#39;, &#39;credit_amount&#39;, &#39;installment_commitment&#39;, &#39;residence_since&#39;, &#39;age&#39;, &#39;existing_credits&#39;, &#39;num_dependents&#39;]), (&#39;cat_tfm&#39;, one_hot, [&#39;checking_status&#39;, &#39;credit_history&#39;, &#39;purpose&#39;, &#39;savings_status&#39;, &#39;employm...)\">\n",
       "<polygon fill=\"#b0e2ff\" stroke=\"black\" points=\"0,-8 0,-217 129,-217 129,-8 0,-8\"/>\n",
       "<text text-anchor=\"middle\" x=\"64.5\" y=\"-201.8\" font-family=\"Times,serif\" font-size=\"14.00\">ColumnTransformer</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust2\" class=\"cluster\"><title>cluster:choice</title>\n",
       "<g id=\"a_clust2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.column_transformer.html\" xlink:title=\"choice = norm | no_op\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"29,-64 29,-186 99,-186 99,-64 29,-64\"/>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-170.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<g id=\"clust3\" class=\"cluster\"><title>cluster:choice_0</title>\n",
       "<g id=\"a_clust3\"><a xlink:title=\"choice_0 = lr | linear_svc | xg_boost\">\n",
       "<polygon fill=\"#7ec0ee\" stroke=\"black\" points=\"141,-17 141,-186 216.397,-186 216.397,-17 141,-17\"/>\n",
       "<text text-anchor=\"middle\" x=\"178.698\" y=\"-170.8\" font-family=\"Times,serif\" font-size=\"14.00\">Choice</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- norm -->\n",
       "<g id=\"node1\" class=\"node\"><title>norm</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.normalizer.html\" xlink:title=\"norm = Norm\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"64\" cy=\"-137\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-134.2\" font-family=\"Times,serif\" font-size=\"11.00\">Norm</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- lr -->\n",
       "<g id=\"node4\" class=\"node\"><title>lr</title>\n",
       "<g id=\"a_node4\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html\" xlink:title=\"lr = LR\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"178.698\" cy=\"-137\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"178.698\" y=\"-134.2\" font-family=\"Times,serif\" font-size=\"11.00\">LR</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- norm&#45;&gt;lr -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>norm&#45;&gt;lr</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M128.949,-137C129.588,-137 130.226,-137 130.862,-137\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"131.041,-140.5 141.041,-137 131.041,-133.5 131.041,-140.5\"/>\n",
       "</g>\n",
       "<!-- no_op -->\n",
       "<g id=\"node2\" class=\"node\"><title>no_op</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html\" xlink:title=\"no_op = NoOp\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"64\" cy=\"-92\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-95.2\" font-family=\"Times,serif\" font-size=\"11.00\">No&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-83.2\" font-family=\"Times,serif\" font-size=\"11.00\">Op</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- one_hot -->\n",
       "<g id=\"node3\" class=\"node\"><title>one_hot</title>\n",
       "<g id=\"a_node3\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.one_hot_encoder.html\" xlink:title=\"one_hot = OneHot\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"64\" cy=\"-36\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-39.2\" font-family=\"Times,serif\" font-size=\"11.00\">One&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-27.2\" font-family=\"Times,serif\" font-size=\"11.00\">Hot</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- linear_svc -->\n",
       "<g id=\"node5\" class=\"node\"><title>linear_svc</title>\n",
       "<g id=\"a_node5\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.linear_svc.html\" xlink:title=\"linear_svc = LinearSVC(dual=False)\">\n",
       "<ellipse fill=\"#b0e2ff\" stroke=\"black\" cx=\"178.698\" cy=\"-92\" rx=\"29.8983\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"178.698\" y=\"-95.2\" font-family=\"Times,serif\" font-size=\"11.00\">Linear&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"178.698\" y=\"-83.2\" font-family=\"Times,serif\" font-size=\"11.00\">SVC</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- xg_boost -->\n",
       "<g id=\"node6\" class=\"node\"><title>xg_boost</title>\n",
       "<g id=\"a_node6\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.xgboost.xgb_classifier.html\" xlink:title=\"xg_boost = XGBoost\">\n",
       "<ellipse fill=\"#7ec0ee\" stroke=\"black\" cx=\"178.698\" cy=\"-45\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"178.698\" y=\"-48.2\" font-family=\"Times,serif\" font-size=\"11.00\">XG&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"178.698\" y=\"-36.2\" font-family=\"Times,serif\" font-size=\"11.00\">Boost</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7fa4071df630>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pipeline_planned = make_pipeline(\n",
    "    ColumnTransformer(transformers=[\n",
    "        ('num_tfm', Norm | NoOp, num_cols),\n",
    "        ('cat_tfm', OneHot, cat_cols)]),\n",
    "    LR | LinearSVC(dual=False)| XGBoost)\n",
    "pipeline_planned.visualize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100%|█████████| 5/5 [00:52<00:00, 10.58s/trial, best loss: -0.7507273649370062]\n",
      "CPU times: user 55.7 s, sys: 3.28 s, total: 59 s\n",
      "Wall time: 54.6 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "from lale.lib.lale import Hyperopt\n",
    "pipeline_trained = pipeline_planned.auto_configure(\n",
    "    train_X, train_y, Hyperopt, cv=3, max_evals=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
       " -->\n",
       "<!-- Title: cluster:(root) Pages: 1 -->\n",
       "<svg width=\"224pt\" height=\"146pt\"\n",
       " viewBox=\"0.00 0.00 224.40 146.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 142)\">\n",
       "<title>cluster:(root)</title>\n",
       "<g id=\"a_graph0\"><a xlink:title=\"(root) = ...\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-142 220.397,-142 220.397,4 -4,4\"/>\n",
       "</a>\n",
       "</g>\n",
       "<g id=\"clust1\" class=\"cluster\"><title>cluster:column_transformer</title>\n",
       "<g id=\"a_clust1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.column_transformer.html\" xlink:title=\"column_transformer = ColumnTransformer(transformers=[(&#39;num_tfm&#39;, norm, [&#39;duration&#39;, &#39;credit_amount&#39;, &#39;installment_commitment&#39;, &#39;residence_since&#39;, &#39;age&#39;, &#39;existing_credits&#39;, &#39;num_dependents&#39;]), (&#39;cat_tfm&#39;, one_hot, [&#39;checking_status&#39;, &#39;credit_history&#39;, &#39;purpose&#39;, &#39;savings_status&#39;, &#39;employmen...)\">\n",
       "<polygon fill=\"white\" stroke=\"black\" points=\"0,-8 0,-130 129,-130 129,-8 0,-8\"/>\n",
       "<text text-anchor=\"middle\" x=\"64.5\" y=\"-114.8\" font-family=\"Times,serif\" font-size=\"14.00\">ColumnTransformer</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- norm -->\n",
       "<g id=\"node1\" class=\"node\"><title>norm</title>\n",
       "<g id=\"a_node1\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.normalizer.html\" xlink:title=\"norm = Norm(norm=&#39;l1&#39;)\">\n",
       "<ellipse fill=\"#b0e2ff\" stroke=\"black\" cx=\"64\" cy=\"-81\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-78.2\" font-family=\"Times,serif\" font-size=\"11.00\">Norm</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- linear_svc -->\n",
       "<g id=\"node3\" class=\"node\"><title>linear_svc</title>\n",
       "<g id=\"a_node3\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.linear_svc.html\" xlink:title=\"linear_svc = LinearSVC(dual=False, C=16757.615906506046, fit_intercept=False, tol=0.0006905134087360421)\">\n",
       "<ellipse fill=\"white\" stroke=\"black\" cx=\"186.698\" cy=\"-81\" rx=\"29.8983\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"186.698\" y=\"-84.2\" font-family=\"Times,serif\" font-size=\"11.00\">Linear&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"186.698\" y=\"-72.2\" font-family=\"Times,serif\" font-size=\"11.00\">SVC</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "<!-- norm&#45;&gt;linear_svc -->\n",
       "<g id=\"edge1\" class=\"edge\"><title>norm&#45;&gt;linear_svc</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M128.967,-81C134.97,-81 140.926,-81 146.595,-81\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"146.599,-84.5001 156.599,-81 146.599,-77.5001 146.599,-84.5001\"/>\n",
       "</g>\n",
       "<!-- one_hot -->\n",
       "<g id=\"node2\" class=\"node\"><title>one_hot</title>\n",
       "<g id=\"a_node2\"><a xlink:href=\"https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.one_hot_encoder.html\" xlink:title=\"one_hot = OneHot()\">\n",
       "<ellipse fill=\"#b0e2ff\" stroke=\"black\" cx=\"64\" cy=\"-36\" rx=\"27\" ry=\"19.6\"/>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-39.2\" font-family=\"Times,serif\" font-size=\"11.00\">One&#45;</text>\n",
       "<text text-anchor=\"middle\" x=\"64\" y=\"-27.2\" font-family=\"Times,serif\" font-size=\"11.00\">Hot</text>\n",
       "</a>\n",
       "</g>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.dot.Digraph at 0x7fa4071df6d8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/markdown": [
       "```python\n",
       "norm = Norm(norm=\"l1\")\n",
       "column_transformer = ColumnTransformer(\n",
       "    transformers=[\n",
       "        (\n",
       "            \"num_tfm\",\n",
       "            norm,\n",
       "            [\n",
       "                \"duration\", \"credit_amount\", \"installment_commitment\",\n",
       "                \"residence_since\", \"age\", \"existing_credits\", \"num_dependents\",\n",
       "            ],\n",
       "        ),\n",
       "        (\n",
       "            \"cat_tfm\",\n",
       "            OneHot(),\n",
       "            [\n",
       "                \"checking_status\", \"credit_history\", \"purpose\",\n",
       "                \"savings_status\", \"employment\", \"personal_status\",\n",
       "                \"other_parties\", \"property_magnitude\", \"other_payment_plans\",\n",
       "                \"housing\", \"job\", \"own_telephone\", \"foreign_worker\",\n",
       "            ],\n",
       "        ),\n",
       "    ]\n",
       ")\n",
       "linear_svc = LinearSVC(\n",
       "    dual=False,\n",
       "    C=16757.615906506046,\n",
       "    fit_intercept=False,\n",
       "    tol=0.0006905134087360421,\n",
       ")\n",
       "pipeline = column_transformer >> linear_svc\n",
       "```"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pipeline_trained.visualize()\n",
    "pipeline_trained.pretty_print(ipython_display=True, show_imports=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy 72.1%\n"
     ]
    }
   ],
   "source": [
    "predictions = pipeline_trained.predict(test_X)\n",
    "print(f'accuracy {sklearn.metrics.accuracy_score(test_y, predictions):.1%}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}