# Cheat sheet

## Table of contents:

- Pipeline
- FeatureUnion
- Custom transformer

## Pipeline
- Create

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.dummy import DummyClassifier

pipe = Pipeline(steps=[
 # ('estimator_name', estimator_object)
 ('pred', DummyClassifier())
])

- fit

In [None]:
from sklearn.datasets import load_iris
data, target = load_iris(return_X_y=True)

pipe.fit(data, target)
pipe

- predict

In [None]:
pipe.predict(data[-1:])

- get parameters of pipeline steps

In [None]:
pipe.get_params()

- set parameters of pipeline steps

In [None]:
pipe.set_params(
 # stepname__parametername=newvalue
 pred__random_state=42
)

In [None]:
# dict with stepname__parametername - newvalue pairs
newvalues = {'pred__random_state': 42}
# using the keyword argument unpacking operator **
pipe.set_params(**newvalues)

## FeatureUnion
- Create

In [3]:
from sklearn.pipeline import FeatureUnion
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

union = FeatureUnion(transformer_list=[
 ('pca', PCA(n_components=2)), 
 ("univ_select", SelectKBest(k=1))
])

- fit and transform

In [7]:
from sklearn.datasets import load_iris
data, target = load_iris(return_X_y=True)

transformed = union.fit_transform(data, target)
transformed[:5]

array([[-2.68420713, 0.32660731, 1.4 ],
 [-2.71539062, -0.16955685, 1.4 ],
 [-2.88981954, -0.13734561, 1.3 ],
 [-2.7464372 , -0.31112432, 1.5 ],
 [-2.72859298, 0.33392456, 1.4 ]])

## Custom transformer
- Define

In [10]:
from sklearn.base import BaseEstimator, TransformerMixin

class ItemSelector(BaseEstimator, TransformerMixin):
 
 def __init__(self, keys):
 self.keys = keys

 def fit(self, X, y=None):
 return self

 def transform(self, X, y=None):
 return X[self.keys]

- Create

In [11]:
sel = ItemSelector('A')

- fit and transform

In [12]:
import pandas as pd
df = pd.DataFrame([
 {'A': 1, 'B': 2},
 {'A': 2, 'B': 4},
 {'A': 3, 'B': 8},
 {'A': 4, 'B': 16},
 {'A': 5, 'B': 32},
])

sel.fit_transform(df)

0 1
1 2
2 3
3 4
4 5
Name: A, dtype: int64