{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "from nb_007a import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# IMDB" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fine-tuning the LM" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Data has been prepared in csv files at the beginning 007a, we will use it know." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Loading the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "PATH = Path('../data/aclImdb/')\n", "CLAS_PATH = PATH/'clas'\n", "LM_PATH = PATH/'lm'\n", "MODEL_PATH = LM_PATH/'models'\n", "os.makedirs(CLAS_PATH, exist_ok=True)\n", "os.makedirs(LM_PATH, exist_ok=True)\n", "os.makedirs(MODEL_PATH, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tokenizer = Tokenizer(rules=default_rules, special_cases=[BOS, FLD, UNK, PAD])\n", "bs,bptt = 50,70\n", "data = data_from_textcsv(LM_PATH, tokenizer, data_func=lm_data, max_vocab=60000, bs=bs, bptt=bptt)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Adapt the pre-trained weights to the new vocabulary" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Download the pretrained model and the corresponding itos dictionary [here](http://files.fast.ai/models/wt103_v1/) and put them in the MODEL_PATH folder." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "itos_wt = pickle.load(open(MODEL_PATH/'itos_wt103.pkl', 'rb'))\n", "stoi_wt = {v:k for k,v in enumerate(itos_wt)}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "\n", "Weights = Dict[str,Tensor]\n", "\n", "def convert_weights(wgts:Weights, stoi_wgts:Dict[str,int], itos_new:Collection[str]) -> Weights:\n", " \"Converts the model weights to go with a new vocabulary.\"\n", " dec_bias, enc_wgts = wgts['1.decoder.bias'], wgts['0.encoder.weight']\n", " bias_m, wgts_m = dec_bias.mean(0), enc_wgts.mean(0)\n", " new_w = enc_wgts.new_zeros((len(itos_new),enc_wgts.size(1))).zero_()\n", " new_b = dec_bias.new_zeros((len(itos_new),)).zero_()\n", " for i,w in enumerate(itos_new):\n", " r = stoi_wgts[w] if w in stoi_wgts else -1\n", " new_w[i] = enc_wgts[r] if r>=0 else wgts_m\n", " new_b[i] = dec_bias[r] if r>=0 else bias_m\n", " wgts['0.encoder.weight'] = new_w\n", " wgts['0.encoder_dp.emb.weight'] = new_w.clone()\n", " wgts['1.decoder.weight'] = new_w.clone()\n", " wgts['1.decoder.bias'] = new_b\n", " return wgts" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wgts = torch.load(MODEL_PATH/'lstm_wt103.pth', map_location=lambda storage, loc: storage)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wgts['1.decoder.bias'][:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "itos_wt[:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wgts = convert_weights(wgts, stoi_wt, data.train_ds.vocab.itos)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wgts['1.decoder.bias'][:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.train_ds.vocab.itos[:10]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define the model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def lm_split(model:Model) -> List[Model]:\n", " \"Splits a RNN model in groups for differential learning rates.\"\n", " groups = [nn.Sequential(rnn, dp) for rnn, dp in zip(model[0].rnns, model[0].hidden_dps)] \n", " groups.append(nn.Sequential(model[0].encoder, model[0].encoder_dp, model[1]))\n", " return groups" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "SplitFunc = Callable[[Model], List[Model]]\n", "OptSplitFunc = Optional[SplitFunc]\n", "OptStrTuple = Optional[Tuple[str,str]]\n", "\n", "class RNNLearner(Learner):\n", " \"Basic class for a Learner in RNN\"\n", " def __init__(self, data:DataBunch, model:Model, bptt:int=70, split_func:OptSplitFunc=None, clip:float=None, \n", " adjust:bool=False, alpha:float=2., beta:float=1., **kwargs):\n", " super().__init__(data, model)\n", " self.callbacks.append(RNNTrainer(self, bptt, alpha=alpha, beta=beta, adjust=adjust))\n", " if clip: self.callback_fns.append(partial(GradientClipping, clip=clip))\n", " if split_func: self.split(split_func)\n", " self.metrics = [accuracy]\n", " \n", " def save_encoder(self, name:str):\n", " \"Saves the encoder to the model directory\"\n", " torch.save(self.model[0].state_dict(), self.path/self.model_dir/f'{name}.pth')\n", " \n", " def load_encoder(self, name:srt):\n", " \"Loads the encoder from the model directory\"\n", " self.model[0].load_state_dict(torch.load(self.path/self.model_dir/f'{name}.pth'))\n", " \n", " def load_pretrained(self, wgts_fname:str, itos_fname:str):\n", " \"Loads a pretrained model and adapts it to the data vocabulary.\"\n", " old_itos = pickle.load(open(self.path/self.model_dir/f'{itos_fname}.pkl', 'rb'))\n", " old_stoi = {v:k for k,v in enumerate(old_itos)}\n", " wgts = torch.load(self.path/self.model_dir/f'{wgts_fname}.pth', map_location=lambda storage, loc: storage)\n", " wgts = convert_weights(wgts, old_stoi, self.data.train_ds.vocab.itos)\n", " self.model.load_state_dict(wgts)\n", " \n", " @classmethod\n", " def language_model(cls, data:DataBunch, bptt:int=70, emb_sz:int=400, nh:int=1150, nl:int=3, pad_token:int=1, \n", " drop_mult:float=1., tie_weights:bool=True, bias:bool=True, qrnn:bool=False, \n", " pretrained_fnames:OptStrTuple=None, **kwargs) -> 'RNNLearner':\n", " \"Creates a `Learner` with a language model.\"\n", " dps = np.array([0.25, 0.1, 0.2, 0.02, 0.15]) * drop_mult\n", " vocab_size = len(data.train_ds.vocab.itos)\n", " model = get_language_model(vocab_size, emb_sz, nh, nl, pad_token, input_p=dps[0], output_p=dps[1], \n", " weight_p=dps[2], embed_p=dps[3], hidden_p=dps[4], tie_weights=tie_weights, bias=bias, qrnn=qrnn)\n", " learn = cls(data, model, bptt, split_func=lm_split, **kwargs)\n", " if pretrained_fnames is not None: learn.load_pretrained(*pretrained_fnames)\n", " return learn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = data_from_textcsv(LM_PATH, Tokenizer(), data_func=lm_data, bs=bs)\n", "learn = RNNLearner.language_model(data, drop_mul=0.3, pretrained_fnames=['lstm_wt103', 'itos_wt103'])\n", "learn.freeze()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lr_find(learn)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.recorder.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.fit_one_cycle(1, 1e-2, moms=(0.8,0.7), wd=0.03)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('fit_head')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.load('fit_head')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.unfreeze()\n", "learn.fit_one_cycle(10, 1e-3, moms=(0.8,0.7), wd=0.03 pct_start=0.25)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('fine_tuned60kb')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save_encoder('fine_tuned_enc60kb')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classifier" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "from torch.utils.data import Sampler, BatchSampler\n", "\n", "NPArrayList = Collection[np.ndarray]\n", "KeyFunc = Callable[[int], int]\n", "\n", "class SortSampler(Sampler):\n", " \"Go through the text data by order of length\"\n", " \n", " def __init__(self, data_source:NPArrayList, key:KeyFunc): self.data_source,self.key = data_source,key\n", " def __len__(self) -> int: return len(self.data_source)\n", " def __iter__(self):\n", " return iter(sorted(range(len(self.data_source)), key=self.key, reverse=True))\n", "\n", "\n", "class SortishSampler(Sampler):\n", " \"Go through the text data by order of length with a bit of randomness\"\n", " \n", " def __init__(self, data_source:NPArrayList, key:KeyFunc, bs:int):\n", " self.data_source,self.key,self.bs = data_source,key,bs\n", "\n", " def __len__(self) -> int: return len(self.data_source)\n", "\n", " def __iter__(self):\n", " idxs = np.random.permutation(len(self.data_source))\n", " sz = self.bs*50\n", " ck_idx = [idxs[i:i+sz] for i in range(0, len(idxs), sz)]\n", " sort_idx = np.concatenate([sorted(s, key=self.key, reverse=True) for s in ck_idx])\n", " sz = self.bs\n", " ck_idx = [sort_idx[i:i+sz] for i in range(0, len(sort_idx), sz)]\n", " max_ck = np.argmax([self.key(ck[0]) for ck in ck_idx]) # find the chunk with the largest key,\n", " ck_idx[0],ck_idx[max_ck] = ck_idx[max_ck],ck_idx[0] # then make sure it goes first.\n", " sort_idx = np.concatenate(np.random.permutation(ck_idx[1:]))\n", " sort_idx = np.concatenate((ck_idx[0], sort_idx))\n", " return iter(sort_idx)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "BatchSamples = Collection[Tuple[Collection[int], int]]\n", "\n", "def pad_collate(samples:BatchSamples, pad_idx:int=1, pad_first:bool=True) -> Tuple[LongTensor, LongTensor]:\n", " \"Function that collect samples and adds padding\"\n", " max_len = max([len(s[0]) for s in samples])\n", " res = torch.zeros(max_len, len(samples)).long() + pad_idx\n", " for i,s in enumerate(samples): res[-len(s[0]):,i] = LongTensor(s[0])\n", " return res, LongTensor([s[1] for s in samples]).squeeze()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def classifier_data(datasets:Collection[TextDataset], path:PathOrStr, **kwargs) -> DataBunch:\n", " \"Function that transform the `datasets` in a `DataBunch` for classification\"\n", " bs = kwargs.pop('bs') if 'bs' in kwargs else 64\n", " pad_idx = kwargs.pop('pad_idx') if 'pad_idx' in kwargs else 1\n", " train_sampler = SortishSampler(datasets[0].ids, key=lambda x: len(datasets[0].ids[x]), bs=bs//2)\n", " train_dl = DeviceDataLoader.create(datasets[0], bs//2, sampler=train_sampler, collate_fn=pad_collate)\n", " dataloaders = [train_dl]\n", " for ds in datasets[1:]:\n", " sampler = SortSampler(ds.ids, key=lambda x: len(ds.ids[x]))\n", " dataloaders.append(DeviceDataLoader.create(ds, bs, sampler=sampler, collate_fn=pad_collate))\n", " return DataBunch(*dataloaders, path=path)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We need to use the same vocab as for the LM." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "vocab = Vocab(LM_PATH/'tmp')\n", "data = data_from_textcsv(CLAS_PATH, Tokenizer(), vocab=vocab, data_func=classifier_data, bs=50)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.train_ds.vocab.itos[40:60]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "vocab.itos[40:60]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x,y = next(iter(data.train_dl))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "vocab.textify(x[:,15]), y[2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class MultiBatchRNNCore(RNNCore):\n", " \"Creates a RNNCore module that can process a full sentence.\"\n", " \n", " def __init__(self, bptt:int, max_seq:int, *args, **kwargs):\n", " self.max_seq,self.bptt = max_seq,bptt\n", " super().__init__(*args, **kwargs)\n", "\n", " def concat(self, arrs:Collection[Tensor]) -> Tensor:\n", " \"Concatenates the arrays along the batch dimension.\"\n", " return [torch.cat([l[si] for l in arrs]) for si in range(len(arrs[0]))]\n", "\n", " def forward(self, input:LongTensor) -> Tuple[Tensor,Tensor]:\n", " sl,bs = input.size()\n", " self.reset()\n", " raw_outputs, outputs = [],[]\n", " for i in range(0, sl, self.bptt):\n", " r, o = super().forward(input[i: min(i+self.bptt, sl)])\n", " if i>(sl-self.max_seq):\n", " raw_outputs.append(r)\n", " outputs.append(o)\n", " return self.concat(raw_outputs), self.concat(outputs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "class PoolingLinearClassifier(nn.Module):\n", " \"Creates a linear classifier with pooling.\"\n", " \n", " def __init__(self, layers:Collection[int], drops:Collection[float]):\n", " super().__init__()\n", " mod_layers = []\n", " activs = [nn.ReLU(inplace=True)] * (len(layers) - 2) + [None]\n", " for n_in,n_out,p,actn in zip(layers[:-1],layers[1:], drops, activs):\n", " mod_layers += bn_drop_lin(n_in, n_out, p=p, actn=actn) \n", " self.layers = nn.Sequential(*mod_layers)\n", "\n", " def pool(self, x:Tensor, bs:int, is_max:bool):\n", " \"Pools the tensor along the seq_len dimension.\"\n", " f = F.adaptive_max_pool1d if is_max else F.adaptive_avg_pool1d\n", " return f(x.permute(1,2,0), (1,)).view(bs,-1)\n", "\n", " def forward(self, input:Tuple[Tensor,Tensor]) -> Tuple[Tensor,Tensor,Tensor]:\n", " raw_outputs, outputs = input\n", " output = outputs[-1]\n", " sl,bs,_ = output.size()\n", " avgpool = self.pool(output, bs, False)\n", " mxpool = self.pool(output, bs, True)\n", " x = torch.cat([output[-1], mxpool, avgpool], 1)\n", " x = self.layers(x)\n", " return x, raw_outputs, outputs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def rnn_classifier_split(model:Model) -> List[Model]:\n", " \"Splits a RNN model in groups.\"\n", " groups = [nn.Sequential(model[0].encoder, model[0].encoder_dp)]\n", " groups += [nn.Sequential(rnn, dp) for rnn, dp in zip(model[0].rnns, model[0].hidden_dps)] \n", " groups.append(model[1])\n", " return groups" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "def get_rnn_classifier(bptt:int, max_seq:int, n_class:int, vocab_sz:int, emb_sz:int, n_hid:int, n_layers:int, \n", " pad_token:int, layers:Collection[int], drops:Collection[float], bidir:bool=False, qrnn:bool=False, \n", " hidden_p:float=0.2, input_p:float=0.6, embed_p:float=0.1, weight_p:float=0.5) -> Model:\n", " \"Creates a RNN classifier model\"\n", " rnn_enc = MultiBatchRNNCore(bptt, max_seq, vocab_sz, emb_sz, n_hid, n_layers, pad_token=pad_token, bidir=bidir,\n", " qrnn=qrnn, hidden_p=hidden_p, input_p=input_p, embed_p=embed_p, weight_p=weight_p)\n", " return SequentialRNN(rnn_enc, PoolingLinearClassifier(layers, drops))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "SplitFunc = Callable[[Model], List[Model]]\n", "OptSplitFunc = Optional[SplitFunc]\n", "OptStrTuple = Optional[Tuple[str,str]]\n", "\n", "class RNNLearner(Learner):\n", " \"Basic class for a Learner in RNN\"\n", " def __init__(self, data:DataBunch, model:Model, bptt:int=70, split_func:OptSplitFunc=None, clip:float=None, \n", " adjust:bool=False, alpha:float=2., beta:float=1., **kwargs):\n", " super().__init__(data, model)\n", " self.callbacks.append(RNNTrainer(self, bptt, alpha=alpha, beta=beta, adjust=adjust))\n", " if clip: self.callback_fns.append(partial(GradientClipping, clip=clip))\n", " if split_func: self.split(split_func)\n", " self.metrics = [accuracy]\n", " \n", " def save_encoder(self, name:str):\n", " \"Saves the encoder to the model directory\"\n", " torch.save(self.model[0].state_dict(), self.path/self.model_dir/f'{name}.pth')\n", " \n", " def load_encoder(self, name:str):\n", " \"Loads the encoder from the model directory\"\n", " self.model[0].load_state_dict(torch.load(self.path/self.model_dir/f'{name}.pth'))\n", " \n", " def load_pretrained(self, wgts_fname:str, itos_fname:str):\n", " \"Loads a pretrained model and adapts it to the data vocabulary.\"\n", " old_itos = pickle.load(open(self.path/self.model_dir/f'{itos_fname}.pkl', 'rb'))\n", " old_stoi = {v:k for k,v in enumerate(old_itos)}\n", " wgts = torch.load(self.path/self.model_dir/f'{wgts_fname}.pth', map_location=lambda storage, loc: storage)\n", " wgts = convert_weights(wgts, old_stoi, self.data.train_ds.vocab.itos)\n", " self.model.load_state_dict(wgts)\n", " \n", " @classmethod\n", " def language_model(cls, data:DataBunch, bptt:int=70, emb_sz:int=400, nh:int=1150, nl:int=3, pad_token:int=1, \n", " drop_mult:float=1., tie_weights:bool=True, bias:bool=True, qrnn:bool=False, \n", " pretrained_fnames:OptStrTuple=None, **kwargs) -> 'RNNLearner':\n", " \"Creates a `Learner` with a language model.\"\n", " dps = np.array([0.25, 0.1, 0.2, 0.02, 0.15]) * drop_mult\n", " vocab_size = len(data.train_ds.vocab.itos)\n", " model = get_language_model(vocab_size, emb_sz, nh, nl, pad_token, input_p=dps[0], output_p=dps[1], \n", " weight_p=dps[2], embed_p=dps[3], hidden_p=dps[4], tie_weights=tie_weights, bias=bias, qrnn=qrnn)\n", " learn = cls(data, model, bptt, split_func=lm_split, **kwargs)\n", " if pretrained_fnames is not None: learn.load_pretrained(*pretrained_fnames)\n", " return learn\n", " \n", " @classmethod\n", " def classifier(cls, data:DataBunch, bptt:int=70, max_len:int=70*20, emb_sz:int=400, nh:int=1150, nl:int=3, \n", " layers:Collection[int]=None, drops:Collection[float]=None, pad_token:int=1, \n", " drop_mult:float=1., qrnn:bool=False, **kwargs) -> 'RNNLearner':\n", " \"Creates a RNN classifier.\"\n", " dps = np.array([0.4,0.5,0.05,0.3,0.4]) * drop_mult\n", " if layers is None: layers = [50]\n", " if drops is None: drops = [0.1]\n", " vocab_size = len(data.train_ds.vocab.itos)\n", " n_class = len(data.train_ds.classes)\n", " layers = [emb_sz*3] + layers + [n_class]\n", " drops = [dps[4]] + drops\n", " model = get_rnn_classifier(bptt, max_len, n_class, vocab_size, emb_sz, nh, nl, pad_token, \n", " layers, drops, input_p=dps[0], weight_p=dps[1], embed_p=dps[2], hidden_p=dps[3], qrnn=qrnn)\n", " learn = cls(data, model, bptt, split_func=rnn_classifier_split, **kwargs)\n", " return learn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = data_from_textcsv(CLAS_PATH, Tokenizer(), vocab=Vocab(LM_PATH/'tmp'), data_func=classifier_data, bs=50)\n", "learn = RNNLearner.classifier(data, drop_mult=0.5)\n", "learn.load_encoder('fine_tuned_enc60ka')\n", "learn.freeze()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.lr_find()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.recorder.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.fit_one_cycle(1, 2e-2, moms=(0.8,0.7))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('first')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.load('first')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.freeze_to(-2)\n", "learn.fit_one_cycle(1, slice(1e-2/2.6,1e-2), moms=(0.8,0.7), pct_start=0.1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('second')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.load('second')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.freeze_to(-3)\n", "learn.fit_one_cycle(1, slice(5e-3/(2.6**2),5e-3), moms=(0.8,0.7), pct_start=0.1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('third')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.load('third')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.unfreeze()\n", "learn.fit_one_cycle(2, slice(1e-3/(2.6**4),1e-3), moms=(0.8,0.7), pct_start=0.1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }