{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Cheat sheet\n",
    "\n",
    "## Table of contents:\n",
    "\n",
    "- <a href=\"#Pipeline\">Pipeline</a>\n",
    "- <a href=\"#FeatureUnion\">FeatureUnion</a>\n",
    "- <a href=\"#Custom-transformer\">Custom transformer</a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pipeline\n",
    "- Create"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.dummy import DummyClassifier\n",
    "\n",
    "pipe = Pipeline(steps=[\n",
    "    # ('estimator_name', estimator_object)\n",
    "    ('pred', DummyClassifier())\n",
    "])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- fit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_iris\n",
    "data, target = load_iris(return_X_y=True)\n",
    "\n",
    "pipe.fit(data, target)\n",
    "pipe"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pipe.predict(data[-1:])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- get parameters of pipeline steps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pipe.get_params()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- set parameters of pipeline steps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pipe.set_params(\n",
    "    # stepname__parametername=newvalue\n",
    "    pred__random_state=42\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# dict with stepname__parametername - newvalue pairs\n",
    "newvalues = {'pred__random_state': 42}\n",
    "# using the keyword argument unpacking operator **\n",
    "pipe.set_params(**newvalues)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## FeatureUnion\n",
    "- Create"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.pipeline import FeatureUnion\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn.feature_selection import SelectKBest\n",
    "\n",
    "union = FeatureUnion(transformer_list=[\n",
    "    ('pca', PCA(n_components=2)), \n",
    "    (\"univ_select\", SelectKBest(k=1))\n",
    "])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- fit and transform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-2.68420713,  0.32660731,  1.4       ],\n",
       "       [-2.71539062, -0.16955685,  1.4       ],\n",
       "       [-2.88981954, -0.13734561,  1.3       ],\n",
       "       [-2.7464372 , -0.31112432,  1.5       ],\n",
       "       [-2.72859298,  0.33392456,  1.4       ]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.datasets import load_iris\n",
    "data, target = load_iris(return_X_y=True)\n",
    "\n",
    "transformed = union.fit_transform(data, target)\n",
    "transformed[:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Custom transformer\n",
    "- Define"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.base import BaseEstimator, TransformerMixin\n",
    "\n",
    "class ItemSelector(BaseEstimator, TransformerMixin):\n",
    "    \n",
    "    def __init__(self, keys):\n",
    "        self.keys = keys\n",
    "\n",
    "    def fit(self, X, y=None):\n",
    "        return self\n",
    "\n",
    "    def transform(self, X, y=None):\n",
    "        return X[self.keys]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- Create"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "sel = ItemSelector('A')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- fit and transform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1\n",
       "1    2\n",
       "2    3\n",
       "3    4\n",
       "4    5\n",
       "Name: A, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.DataFrame([\n",
    "    {'A': 1, 'B': 2},\n",
    "    {'A': 2, 'B': 4},\n",
    "    {'A': 3, 'B': 8},\n",
    "    {'A': 4, 'B': 16},\n",
    "    {'A': 5, 'B': 32},\n",
    "])\n",
    "\n",
    "sel.fit_transform(df)"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}