{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%reload_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "from nb_007a import *"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# IMDB"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Fine-tuning the LM"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Data has been prepared in csv files at the beginning 007a, we will use it know."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Loading the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "PATH = Path('../data/aclImdb/')\n",
    "CLAS_PATH = PATH/'clas'\n",
    "LM_PATH = PATH/'lm'\n",
    "MODEL_PATH = LM_PATH/'models'\n",
    "os.makedirs(CLAS_PATH, exist_ok=True)\n",
    "os.makedirs(LM_PATH, exist_ok=True)\n",
    "os.makedirs(MODEL_PATH, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = Tokenizer(rules=default_rules, special_cases=[BOS, FLD, UNK, PAD])\n",
    "bs,bptt = 50,70\n",
    "data = data_from_textcsv(LM_PATH, tokenizer, data_func=lm_data, max_vocab=60000, bs=bs, bptt=bptt)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Adapt the pre-trained weights to the new vocabulary"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Download the pretrained model and the corresponding itos dictionary [here](http://files.fast.ai/models/wt103_v1/) and put them in the MODEL_PATH folder."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "itos_wt = pickle.load(open(MODEL_PATH/'itos_wt103.pkl', 'rb'))\n",
    "stoi_wt = {v:k for k,v in enumerate(itos_wt)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "\n",
    "Weights = Dict[str,Tensor]\n",
    "\n",
    "def convert_weights(wgts:Weights, stoi_wgts:Dict[str,int], itos_new:Collection[str]) -> Weights:\n",
    "    \"Converts the model weights to go with a new vocabulary.\"\n",
    "    dec_bias, enc_wgts = wgts['1.decoder.bias'], wgts['0.encoder.weight']\n",
    "    bias_m, wgts_m = dec_bias.mean(0), enc_wgts.mean(0)\n",
    "    new_w = enc_wgts.new_zeros((len(itos_new),enc_wgts.size(1))).zero_()\n",
    "    new_b = dec_bias.new_zeros((len(itos_new),)).zero_()\n",
    "    for i,w in enumerate(itos_new):\n",
    "        r = stoi_wgts[w] if w in stoi_wgts else -1\n",
    "        new_w[i] = enc_wgts[r] if r>=0 else wgts_m\n",
    "        new_b[i] = dec_bias[r] if r>=0 else bias_m\n",
    "    wgts['0.encoder.weight'] = new_w\n",
    "    wgts['0.encoder_dp.emb.weight'] = new_w.clone()\n",
    "    wgts['1.decoder.weight'] = new_w.clone()\n",
    "    wgts['1.decoder.bias'] = new_b\n",
    "    return wgts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wgts = torch.load(MODEL_PATH/'lstm_wt103.pth', map_location=lambda storage, loc: storage)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wgts['1.decoder.bias'][:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "itos_wt[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wgts = convert_weights(wgts, stoi_wt, data.train_ds.vocab.itos)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wgts['1.decoder.bias'][:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.train_ds.vocab.itos[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def lm_split(model:Model) -> List[Model]:\n",
    "    \"Splits a RNN model in groups for differential learning rates.\"\n",
    "    groups = [nn.Sequential(rnn, dp) for rnn, dp in zip(model[0].rnns, model[0].hidden_dps)] \n",
    "    groups.append(nn.Sequential(model[0].encoder, model[0].encoder_dp, model[1]))\n",
    "    return groups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "SplitFunc = Callable[[Model], List[Model]]\n",
    "OptSplitFunc = Optional[SplitFunc]\n",
    "OptStrTuple = Optional[Tuple[str,str]]\n",
    "\n",
    "class RNNLearner(Learner):\n",
    "    \"Basic class for a Learner in RNN\"\n",
    "    def __init__(self, data:DataBunch, model:Model, bptt:int=70, split_func:OptSplitFunc=None, clip:float=None, \n",
    "                 adjust:bool=False, alpha:float=2., beta:float=1., **kwargs):\n",
    "        super().__init__(data, model)\n",
    "        self.callbacks.append(RNNTrainer(self, bptt, alpha=alpha, beta=beta, adjust=adjust))\n",
    "        if clip: self.callback_fns.append(partial(GradientClipping, clip=clip))\n",
    "        if split_func: self.split(split_func)\n",
    "        self.metrics = [accuracy]\n",
    "        \n",
    "    def save_encoder(self, name:str):\n",
    "        \"Saves the encoder to the model directory\"\n",
    "        torch.save(self.model[0].state_dict(), self.path/self.model_dir/f'{name}.pth')\n",
    "    \n",
    "    def load_encoder(self, name:srt):\n",
    "        \"Loads the encoder from the model directory\"\n",
    "        self.model[0].load_state_dict(torch.load(self.path/self.model_dir/f'{name}.pth'))\n",
    "    \n",
    "    def load_pretrained(self, wgts_fname:str, itos_fname:str):\n",
    "        \"Loads a pretrained model and adapts it to the data vocabulary.\"\n",
    "        old_itos = pickle.load(open(self.path/self.model_dir/f'{itos_fname}.pkl', 'rb'))\n",
    "        old_stoi = {v:k for k,v in enumerate(old_itos)}\n",
    "        wgts = torch.load(self.path/self.model_dir/f'{wgts_fname}.pth', map_location=lambda storage, loc: storage)\n",
    "        wgts = convert_weights(wgts, old_stoi, self.data.train_ds.vocab.itos)\n",
    "        self.model.load_state_dict(wgts)\n",
    "    \n",
    "    @classmethod\n",
    "    def language_model(cls, data:DataBunch, bptt:int=70, emb_sz:int=400, nh:int=1150, nl:int=3, pad_token:int=1, \n",
    "                       drop_mult:float=1., tie_weights:bool=True, bias:bool=True, qrnn:bool=False, \n",
    "                       pretrained_fnames:OptStrTuple=None, **kwargs) -> 'RNNLearner':\n",
    "        \"Creates a `Learner` with a language model.\"\n",
    "        dps = np.array([0.25, 0.1, 0.2, 0.02, 0.15]) * drop_mult\n",
    "        vocab_size = len(data.train_ds.vocab.itos)\n",
    "        model = get_language_model(vocab_size, emb_sz, nh, nl, pad_token, input_p=dps[0], output_p=dps[1], \n",
    "                    weight_p=dps[2], embed_p=dps[3], hidden_p=dps[4], tie_weights=tie_weights, bias=bias, qrnn=qrnn)\n",
    "        learn = cls(data, model, bptt, split_func=lm_split, **kwargs)\n",
    "        if pretrained_fnames is not None: learn.load_pretrained(*pretrained_fnames)\n",
    "        return learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data_from_textcsv(LM_PATH, Tokenizer(), data_func=lm_data, bs=bs)\n",
    "learn = RNNLearner.language_model(data, drop_mul=0.3, pretrained_fnames=['lstm_wt103', 'itos_wt103'])\n",
    "learn.freeze()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lr_find(learn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.recorder.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.fit_one_cycle(1, 1e-2, moms=(0.8,0.7), wd=0.03)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.save('fit_head')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.load('fit_head')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.unfreeze()\n",
    "learn.fit_one_cycle(10, 1e-3, moms=(0.8,0.7), wd=0.03 pct_start=0.25)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.save('fine_tuned60kb')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.save_encoder('fine_tuned_enc60kb')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "from torch.utils.data import Sampler, BatchSampler\n",
    "\n",
    "NPArrayList = Collection[np.ndarray]\n",
    "KeyFunc = Callable[[int], int]\n",
    "\n",
    "class SortSampler(Sampler):\n",
    "    \"Go through the text data by order of length\"\n",
    "    \n",
    "    def __init__(self, data_source:NPArrayList, key:KeyFunc): self.data_source,self.key = data_source,key\n",
    "    def __len__(self) -> int: return len(self.data_source)\n",
    "    def __iter__(self):\n",
    "        return iter(sorted(range(len(self.data_source)), key=self.key, reverse=True))\n",
    "\n",
    "\n",
    "class SortishSampler(Sampler):\n",
    "    \"Go through the text data by order of length with a bit of randomness\"\n",
    "    \n",
    "    def __init__(self, data_source:NPArrayList, key:KeyFunc, bs:int):\n",
    "        self.data_source,self.key,self.bs = data_source,key,bs\n",
    "\n",
    "    def __len__(self) -> int: return len(self.data_source)\n",
    "\n",
    "    def __iter__(self):\n",
    "        idxs = np.random.permutation(len(self.data_source))\n",
    "        sz = self.bs*50\n",
    "        ck_idx = [idxs[i:i+sz] for i in range(0, len(idxs), sz)]\n",
    "        sort_idx = np.concatenate([sorted(s, key=self.key, reverse=True) for s in ck_idx])\n",
    "        sz = self.bs\n",
    "        ck_idx = [sort_idx[i:i+sz] for i in range(0, len(sort_idx), sz)]\n",
    "        max_ck = np.argmax([self.key(ck[0]) for ck in ck_idx])  # find the chunk with the largest key,\n",
    "        ck_idx[0],ck_idx[max_ck] = ck_idx[max_ck],ck_idx[0]     # then make sure it goes first.\n",
    "        sort_idx = np.concatenate(np.random.permutation(ck_idx[1:]))\n",
    "        sort_idx = np.concatenate((ck_idx[0], sort_idx))\n",
    "        return iter(sort_idx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "BatchSamples = Collection[Tuple[Collection[int], int]]\n",
    "\n",
    "def pad_collate(samples:BatchSamples, pad_idx:int=1, pad_first:bool=True) -> Tuple[LongTensor, LongTensor]:\n",
    "    \"Function that collect samples and adds padding\"\n",
    "    max_len = max([len(s[0]) for s in samples])\n",
    "    res = torch.zeros(max_len, len(samples)).long() + pad_idx\n",
    "    for i,s in enumerate(samples): res[-len(s[0]):,i] = LongTensor(s[0])\n",
    "    return res, LongTensor([s[1] for s in samples]).squeeze()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def classifier_data(datasets:Collection[TextDataset], path:PathOrStr, **kwargs) -> DataBunch:\n",
    "    \"Function that transform the `datasets` in a `DataBunch` for classification\"\n",
    "    bs = kwargs.pop('bs') if 'bs' in kwargs else 64\n",
    "    pad_idx = kwargs.pop('pad_idx') if 'pad_idx' in kwargs else 1\n",
    "    train_sampler = SortishSampler(datasets[0].ids, key=lambda x: len(datasets[0].ids[x]), bs=bs//2)\n",
    "    train_dl = DeviceDataLoader.create(datasets[0], bs//2, sampler=train_sampler, collate_fn=pad_collate)\n",
    "    dataloaders = [train_dl]\n",
    "    for ds in datasets[1:]:\n",
    "        sampler = SortSampler(ds.ids, key=lambda x: len(ds.ids[x]))\n",
    "        dataloaders.append(DeviceDataLoader.create(ds, bs,  sampler=sampler, collate_fn=pad_collate))\n",
    "    return DataBunch(*dataloaders, path=path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We need to use the same vocab as for the LM."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vocab = Vocab(LM_PATH/'tmp')\n",
    "data = data_from_textcsv(CLAS_PATH, Tokenizer(), vocab=vocab, data_func=classifier_data, bs=50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.train_ds.vocab.itos[40:60]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vocab.itos[40:60]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x,y = next(iter(data.train_dl))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vocab.textify(x[:,15]), y[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "class MultiBatchRNNCore(RNNCore):\n",
    "    \"Creates a RNNCore module that can process a full sentence.\"\n",
    "    \n",
    "    def __init__(self, bptt:int, max_seq:int, *args, **kwargs):\n",
    "        self.max_seq,self.bptt = max_seq,bptt\n",
    "        super().__init__(*args, **kwargs)\n",
    "\n",
    "    def concat(self, arrs:Collection[Tensor]) -> Tensor:\n",
    "        \"Concatenates the arrays along the batch dimension.\"\n",
    "        return [torch.cat([l[si] for l in arrs]) for si in range(len(arrs[0]))]\n",
    "\n",
    "    def forward(self, input:LongTensor) -> Tuple[Tensor,Tensor]:\n",
    "        sl,bs = input.size()\n",
    "        self.reset()\n",
    "        raw_outputs, outputs = [],[]\n",
    "        for i in range(0, sl, self.bptt):\n",
    "            r, o = super().forward(input[i: min(i+self.bptt, sl)])\n",
    "            if i>(sl-self.max_seq):\n",
    "                raw_outputs.append(r)\n",
    "                outputs.append(o)\n",
    "        return self.concat(raw_outputs), self.concat(outputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "class PoolingLinearClassifier(nn.Module):\n",
    "    \"Creates a linear classifier with pooling.\"\n",
    "    \n",
    "    def __init__(self, layers:Collection[int], drops:Collection[float]):\n",
    "        super().__init__()\n",
    "        mod_layers = []\n",
    "        activs = [nn.ReLU(inplace=True)] * (len(layers) - 2) + [None]\n",
    "        for n_in,n_out,p,actn in zip(layers[:-1],layers[1:], drops, activs):\n",
    "            mod_layers += bn_drop_lin(n_in, n_out, p=p, actn=actn) \n",
    "        self.layers = nn.Sequential(*mod_layers)\n",
    "\n",
    "    def pool(self, x:Tensor, bs:int, is_max:bool):\n",
    "        \"Pools the tensor along the seq_len dimension.\"\n",
    "        f = F.adaptive_max_pool1d if is_max else F.adaptive_avg_pool1d\n",
    "        return f(x.permute(1,2,0), (1,)).view(bs,-1)\n",
    "\n",
    "    def forward(self, input:Tuple[Tensor,Tensor]) -> Tuple[Tensor,Tensor,Tensor]:\n",
    "        raw_outputs, outputs = input\n",
    "        output = outputs[-1]\n",
    "        sl,bs,_ = output.size()\n",
    "        avgpool = self.pool(output, bs, False)\n",
    "        mxpool = self.pool(output, bs, True)\n",
    "        x = torch.cat([output[-1], mxpool, avgpool], 1)\n",
    "        x = self.layers(x)\n",
    "        return x, raw_outputs, outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def rnn_classifier_split(model:Model) -> List[Model]:\n",
    "    \"Splits a RNN model in groups.\"\n",
    "    groups = [nn.Sequential(model[0].encoder, model[0].encoder_dp)]\n",
    "    groups += [nn.Sequential(rnn, dp) for rnn, dp in zip(model[0].rnns, model[0].hidden_dps)] \n",
    "    groups.append(model[1])\n",
    "    return groups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "def get_rnn_classifier(bptt:int, max_seq:int, n_class:int, vocab_sz:int, emb_sz:int, n_hid:int, n_layers:int, \n",
    "                       pad_token:int, layers:Collection[int], drops:Collection[float], bidir:bool=False, qrnn:bool=False, \n",
    "                       hidden_p:float=0.2, input_p:float=0.6, embed_p:float=0.1, weight_p:float=0.5) -> Model:\n",
    "    \"Creates a RNN classifier model\"\n",
    "    rnn_enc = MultiBatchRNNCore(bptt, max_seq, vocab_sz, emb_sz, n_hid, n_layers, pad_token=pad_token, bidir=bidir,\n",
    "                      qrnn=qrnn, hidden_p=hidden_p, input_p=input_p, embed_p=embed_p, weight_p=weight_p)\n",
    "    return SequentialRNN(rnn_enc, PoolingLinearClassifier(layers, drops))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#export\n",
    "SplitFunc = Callable[[Model], List[Model]]\n",
    "OptSplitFunc = Optional[SplitFunc]\n",
    "OptStrTuple = Optional[Tuple[str,str]]\n",
    "\n",
    "class RNNLearner(Learner):\n",
    "    \"Basic class for a Learner in RNN\"\n",
    "    def __init__(self, data:DataBunch, model:Model, bptt:int=70, split_func:OptSplitFunc=None, clip:float=None, \n",
    "                 adjust:bool=False, alpha:float=2., beta:float=1., **kwargs):\n",
    "        super().__init__(data, model)\n",
    "        self.callbacks.append(RNNTrainer(self, bptt, alpha=alpha, beta=beta, adjust=adjust))\n",
    "        if clip: self.callback_fns.append(partial(GradientClipping, clip=clip))\n",
    "        if split_func: self.split(split_func)\n",
    "        self.metrics = [accuracy]\n",
    "        \n",
    "    def save_encoder(self, name:str):\n",
    "        \"Saves the encoder to the model directory\"\n",
    "        torch.save(self.model[0].state_dict(), self.path/self.model_dir/f'{name}.pth')\n",
    "    \n",
    "    def load_encoder(self, name:str):\n",
    "        \"Loads the encoder from the model directory\"\n",
    "        self.model[0].load_state_dict(torch.load(self.path/self.model_dir/f'{name}.pth'))\n",
    "    \n",
    "    def load_pretrained(self, wgts_fname:str, itos_fname:str):\n",
    "        \"Loads a pretrained model and adapts it to the data vocabulary.\"\n",
    "        old_itos = pickle.load(open(self.path/self.model_dir/f'{itos_fname}.pkl', 'rb'))\n",
    "        old_stoi = {v:k for k,v in enumerate(old_itos)}\n",
    "        wgts = torch.load(self.path/self.model_dir/f'{wgts_fname}.pth', map_location=lambda storage, loc: storage)\n",
    "        wgts = convert_weights(wgts, old_stoi, self.data.train_ds.vocab.itos)\n",
    "        self.model.load_state_dict(wgts)\n",
    "    \n",
    "    @classmethod\n",
    "    def language_model(cls, data:DataBunch, bptt:int=70, emb_sz:int=400, nh:int=1150, nl:int=3, pad_token:int=1, \n",
    "                       drop_mult:float=1., tie_weights:bool=True, bias:bool=True, qrnn:bool=False, \n",
    "                       pretrained_fnames:OptStrTuple=None, **kwargs) -> 'RNNLearner':\n",
    "        \"Creates a `Learner` with a language model.\"\n",
    "        dps = np.array([0.25, 0.1, 0.2, 0.02, 0.15]) * drop_mult\n",
    "        vocab_size = len(data.train_ds.vocab.itos)\n",
    "        model = get_language_model(vocab_size, emb_sz, nh, nl, pad_token, input_p=dps[0], output_p=dps[1], \n",
    "                    weight_p=dps[2], embed_p=dps[3], hidden_p=dps[4], tie_weights=tie_weights, bias=bias, qrnn=qrnn)\n",
    "        learn = cls(data, model, bptt, split_func=lm_split, **kwargs)\n",
    "        if pretrained_fnames is not None: learn.load_pretrained(*pretrained_fnames)\n",
    "        return learn\n",
    "    \n",
    "    @classmethod\n",
    "    def classifier(cls, data:DataBunch, bptt:int=70, max_len:int=70*20, emb_sz:int=400, nh:int=1150, nl:int=3, \n",
    "                   layers:Collection[int]=None, drops:Collection[float]=None, pad_token:int=1, \n",
    "                   drop_mult:float=1., qrnn:bool=False, **kwargs) -> 'RNNLearner':\n",
    "        \"Creates a RNN classifier.\"\n",
    "        dps = np.array([0.4,0.5,0.05,0.3,0.4]) * drop_mult\n",
    "        if layers is None: layers = [50]\n",
    "        if drops is None:  drops = [0.1]\n",
    "        vocab_size = len(data.train_ds.vocab.itos)\n",
    "        n_class = len(data.train_ds.classes)\n",
    "        layers = [emb_sz*3] + layers + [n_class]\n",
    "        drops = [dps[4]] + drops\n",
    "        model = get_rnn_classifier(bptt, max_len, n_class, vocab_size, emb_sz, nh, nl, pad_token, \n",
    "                    layers, drops, input_p=dps[0], weight_p=dps[1], embed_p=dps[2], hidden_p=dps[3], qrnn=qrnn)\n",
    "        learn = cls(data, model, bptt, split_func=rnn_classifier_split, **kwargs)\n",
    "        return learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data_from_textcsv(CLAS_PATH, Tokenizer(), vocab=Vocab(LM_PATH/'tmp'), data_func=classifier_data, bs=50)\n",
    "learn = RNNLearner.classifier(data, drop_mult=0.5)\n",
    "learn.load_encoder('fine_tuned_enc60ka')\n",
    "learn.freeze()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.lr_find()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.recorder.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.fit_one_cycle(1, 2e-2, moms=(0.8,0.7))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.save('first')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.load('first')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.freeze_to(-2)\n",
    "learn.fit_one_cycle(1, slice(1e-2/2.6,1e-2), moms=(0.8,0.7), pct_start=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.save('second')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.load('second')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.freeze_to(-3)\n",
    "learn.fit_one_cycle(1, slice(5e-3/(2.6**2),5e-3), moms=(0.8,0.7), pct_start=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.save('third')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.load('third')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learn.unfreeze()\n",
    "learn.fit_one_cycle(2, slice(1e-3/(2.6**4),1e-3), moms=(0.8,0.7), pct_start=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}