In [None]:
#hide
#skip
! [ -e /content ] && pip install -Uqq fastai # upgrade fastai on colab

In [None]:
#export
from fastai.torch_basics import *
from fastai.tabular.core import *

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#default_exp tabular.model

# Tabular model

> A basic model that can be used on tabular data

## Embeddings

In [None]:
#export
def emb_sz_rule(n_cat):
 "Rule of thumb to pick embedding size corresponding to `n_cat`"
 return min(600, round(1.6 * n_cat**0.56))

In [None]:
#export
def _one_emb_sz(classes, n, sz_dict=None):
 "Pick an embedding size for `n` depending on `classes` if not given in `sz_dict`."
 sz_dict = ifnone(sz_dict, {})
 n_cat = len(classes[n])
 sz = sz_dict.get(n, int(emb_sz_rule(n_cat))) # rule of thumb
 return n_cat,sz

Through trial and error, this general rule takes the lower of two values:
* A dimension space of 600
* A dimension space equal to 1.6 times the cardinality of the variable to 0.56.

This provides a good starter for a good embedding space for your variables. For more advanced users who wish to lean into this practice, you can tweak these values to your discretion. It is not uncommon for slight adjustments to this general formula to provide more success.

In [None]:
#export
def get_emb_sz(to, sz_dict=None):
 "Get default embedding size from `TabularPreprocessor` `proc` or the ones in `sz_dict`"
 return [_one_emb_sz(to.classes, n, sz_dict) for n in to.cat_names]

In [None]:
#export
class TabularModel(Module):
 "Basic model for tabular data."
 def __init__(self, emb_szs, n_cont, out_sz, layers, ps=None, embed_p=0.,
 y_range=None, use_bn=True, bn_final=False, bn_cont=True, act_cls=nn.ReLU(inplace=True),
 lin_first=True):
 ps = ifnone(ps, [0]*len(layers))
 if not is_listy(ps): ps = [ps]*len(layers)
 self.embeds = nn.ModuleList([Embedding(ni, nf) for ni,nf in emb_szs])
 self.emb_drop = nn.Dropout(embed_p)
 self.bn_cont = nn.BatchNorm1d(n_cont) if bn_cont else None
 n_emb = sum(e.embedding_dim for e in self.embeds)
 self.n_emb,self.n_cont = n_emb,n_cont
 sizes = [n_emb + n_cont] + layers + [out_sz]
 actns = [act_cls for _ in range(len(sizes)-2)] + [None]
 _layers = [LinBnDrop(sizes[i], sizes[i+1], bn=use_bn and (i!=len(actns)-1 or bn_final), p=p, act=a, lin_first=lin_first)
 for i,(p,a) in enumerate(zip(ps+[0.],actns))]
 if y_range is not None: _layers.append(SigmoidRange(*y_range))
 self.layers = nn.Sequential(*_layers)

 def forward(self, x_cat, x_cont=None):
 if self.n_emb != 0:
 x = [e(x_cat[:,i]) for i,e in enumerate(self.embeds)]
 x = torch.cat(x, 1)
 x = self.emb_drop(x)
 if self.n_cont != 0:
 if self.bn_cont is not None: x_cont = self.bn_cont(x_cont)
 x = torch.cat([x, x_cont], 1) if self.n_emb != 0 else x_cont
 return self.layers(x)

This model expects your `cat` and `cont` variables seperated. `cat` is passed through an `Embedding` layer and potential `Dropout`, while `cont` is passed though potential `BatchNorm1d`. Afterwards both are concatenated and passed through a series of `LinBnDrop`, before a final `Linear` layer corresponding to the expected outputs. 

In [None]:
emb_szs = [(4,2), (17,8)]
m = TabularModel(emb_szs, n_cont=2, out_sz=2, layers=[200,100]).eval()
x_cat = torch.tensor([[2,12]]).long()
x_cont = torch.tensor([[0.7633, -0.1887]]).float()
out = m(x_cat, x_cont)

In [None]:
#export
@delegates(TabularModel.__init__)
def tabular_config(**kwargs):
 "Convenience function to easily create a config for `TabularModel`"
 return kwargs

Any direct setup of `TabularModel`'s internals should be passed through here:

In [None]:
config = tabular_config(embed_p=0.6, use_bn=False); config

{'embed_p': 0.6, 'use_bn': False}

## Export -

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_torch_core.ipynb.
Converted 01_layers.ipynb.
Converted 01a_losses.ipynb.
Converted 02_data.load.ipynb.
Converted 03_data.core.ipynb.
Converted 04_data.external.ipynb.
Converted 05_data.transforms.ipynb.
Converted 06_data.block.ipynb.
Converted 07_vision.core.ipynb.
Converted 08_vision.data.ipynb.
Converted 09_vision.augment.ipynb.
Converted 09b_vision.utils.ipynb.
Converted 09c_vision.widgets.ipynb.
Converted 10_tutorial.pets.ipynb.
Converted 10b_tutorial.albumentations.ipynb.
Converted 11_vision.models.xresnet.ipynb.
Converted 12_optimizer.ipynb.
Converted 13_callback.core.ipynb.
Converted 13a_learner.ipynb.
Converted 13b_metrics.ipynb.
Converted 14_callback.schedule.ipynb.
Converted 14a_callback.data.ipynb.
Converted 15_callback.hook.ipynb.
Converted 15a_vision.models.unet.ipynb.
Converted 16_callback.progress.ipynb.
Converted 17_callback.tracker.ipynb.
Converted 18_callback.fp16.ipynb.
Converted 18a_callback.training.ipynb.
Converted 18b_callback.preds.ipynb.
Converted 