{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#export\n", "from nb_007b import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# IMDB" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fine-tuning the LM" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Data has been prepared in csv files at the beginning 007a, we will use it know." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Loading the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "PATH = Path('../data/aclImdb/')\n", "CLAS_PATH = PATH/'clas'\n", "LM_PATH = PATH/'lm'\n", "MODEL_PATH = PATH/'models'\n", "os.makedirs(CLAS_PATH, exist_ok=True)\n", "os.makedirs(LM_PATH, exist_ok=True)\n", "os.makedirs(MODEL_PATH, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = data_from_textcsv(LM_PATH, Tokenizer(), data_func=lm_data, bs=50)\n", "learn = RNNLearner.language_model(data, drop_mult=0.3, pretrained_fnames=['lstm_wt103', 'itos_wt103'])\n", "learn.freeze()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.fit_one_cycle(1, 1e-2, moms=(0.8,0.7))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('fit_head')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.load('fit_head')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.unfreeze()\n", "learn.fit_one_cycle(10, 1e-3, moms=(0.8,0.7), pct_start=0.25)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('fine_tuned')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save_encoder('fine_tuned_enc')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classifier" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We need to use the same vocab as for the LM." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "shutil.copy(LM_PATH/'models'/'fine_tuned_enc.pth', CLAS_PATH/'models'/'fine_tuned_enc.pth')\n", "shutil.copy(LM_PATH/'tmp'/'itos.pkl', CLAS_PATH/'tmp'/'itos.pkl')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = data_from_textcsv(CLAS_PATH, Tokenizer(), vocab=Vocab(LM_PATH/'tmp'), data_func=classifier_data, bs=50)\n", "learn = RNNLearner.classifier(data, drop_mult=0.5)\n", "learn.load_encoder('fine_tuned_enc')\n", "learn.freeze()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.fit_one_cycle(1, 2e-2, moms=(0.8,0.7))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.freeze_to(-2)\n", "learn.fit_one_cycle(1, slice(1e-2/2.6,1e-2), moms=(0.8,0.7), pct_start=0.1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.unfreeze()\n", "learn.fit_one_cycle(2, slice(1e-2/(2.6)**4,1e-2), moms=(0.8,0.7), pct_start=0.1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.load('first')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lr=1e-2\n", "lrm = 2.6\n", "lrs = np.array([lr/(lrm**4), lr/(lrm**3), lr/(lrm**2), lr/lrm, lr])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.freeze_to(-2)\n", "learn.fit_one_cycle(1, lrs, moms=(0.8,0.7), pct_start=0.1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('second')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.load('second')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lr=5e-3\n", "lrm = 2.6\n", "lrs = np.array([lr/(lrm**4), lr/(lrm**3), lr/(lrm**2), lr/lrm, lr])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.freeze_to(-3)\n", "learn.fit_one_cycle(1, lrs, moms=(0.8,0.7), pct_start=0.1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.save('third')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.load('third')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lr=1e-3\n", "lrm = 2.6\n", "lrs = np.array([lr/(lrm**4), lr/(lrm**3), lr/(lrm**2), lr/lrm, lr])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn.unfreeze()\n", "learn.fit_one_cycle(2, lrs, moms=(0.8,0.7), pct_start=0.1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }