{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## CIFAR 10" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import argparse\n", "import os\n", "import shutil\n", "import time\n", "\n", "from fastai.transforms import *\n", "from fastai.dataset import *\n", "from fastai.fp16 import *\n", "from fastai.conv_learner import *\n", "from pathlib import *\n", "\n", "import torch\n", "from torch.autograd import Variable\n", "import torch.nn as nn\n", "import torch.nn.parallel\n", "import torch.backends.cudnn as cudnn\n", "import torch.distributed as dist\n", "import torch.optim\n", "import torch.utils.data\n", "import torch.utils.data.distributed\n", "import torchvision.transforms as transforms\n", "import torchvision.datasets as datasets\n", "import models\n", "import models.cifar10 as cifar10models\n", "from distributed import DistributedDataParallel as DDP\n", "\n", "# print(models.cifar10.__dict__)\n", "model_names = sorted(name for name in models.__dict__\n", " if name.islower() and not name.startswith(\"__\")\n", " and callable(models.__dict__[name]))\n", "\n", "cifar10_names = sorted(name for name in cifar10models.__dict__\n", " if name.islower() and not name.startswith(\"__\")\n", " and callable(cifar10models.__dict__[name]))\n", "\n", "model_names = cifar10_names + model_names" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "\n", "# print(model_names)\n", "\n", "# Example usage: python run_fastai.py /home/paperspace/ILSVRC/Data/CLS-LOC/ -a resnext_50_32x4d --epochs 1 -j 4 -b 64 --fp16\n", "\n", "parser = argparse.ArgumentParser(description='PyTorch Cifar10 Training')\n", "parser.add_argument('data', metavar='DIR',\n", " help='path to dataset')\n", "parser.add_argument('--save-dir', type=str, default=Path.home()/'imagenet_training',\n", " help='Directory to save logs and models.')\n", "parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet56',\n", " choices=model_names,\n", " help='model architecture: ' +\n", " ' | '.join(model_names) +\n", " ' (default: resnet56)')\n", "parser.add_argument('-j', '--workers', default=7, type=int, metavar='N',\n", " help='number of data loading workers (default: 4)')\n", "parser.add_argument('--epochs', default=1, type=int, metavar='N',\n", " help='number of total epochs to run')\n", "parser.add_argument('--cycle-len', default=95, type=float, metavar='N',\n", " help='Length of cycle to run')\n", "# parser.add_argument('--start-epoch', default=0, type=int, metavar='N',\n", "# help='manual epoch number (useful on restarts)')\n", "parser.add_argument('-b', '--batch-size', default=512, type=int,\n", " metavar='N', help='mini-batch size (default: 256)')\n", "parser.add_argument('--lr', '--learning-rate', default=0.8, type=float,\n", " metavar='LR', help='initial learning rate')\n", "parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum')\n", "parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,\n", " metavar='W', help='weight decay (default: 1e-4)')\n", "# parser.add_argument('--print-freq', '-p', default=10, type=int,\n", "# metavar='N', help='print frequency (default: 10)')\n", "# parser.add_argument('--resume', default='', type=str, metavar='PATH',\n", "# help='path to latest checkpoint (default: none)')\n", "# parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',\n", "# help='evaluate model on validation set')\n", "parser.add_argument('--pretrained', dest='pretrained', action='store_true', help='use pre-trained model')\n", "parser.add_argument('--fp16', action='store_true', help='Run model fp16 mode.')\n", "parser.add_argument('--use-tta', default=True, type=bool, help='Validate model with TTA at the end of traiing.')\n", "parser.add_argument('--train-half', action='store_true', help='Train model on half images. TODO: allow custom epochs and LR')\n", "parser.add_argument('--sz', default=32, type=int, help='Size of transformed image.')\n", "# parser.add_argument('--decay-int', default=30, type=int, help='Decay LR by 10 every decay-int epochs')\n", "parser.add_argument('--use-clr', default='10,13.68,0.95,0.85', type=str, \n", " help='div,pct,max_mom,min_mom. Pass in a string delimited by commas. Ex: \"20,2,0.95,0.85\"')\n", "parser.add_argument('--loss-scale', type=float, default=128,\n", " help='Loss scaling, positive power of 2 values can improve fp16 convergence.')\n", "parser.add_argument('--prof', dest='prof', action='store_true', help='Only run a few iters for profiling.')\n", "\n", "parser.add_argument('--dist-url', default='file://sync.file', type=str,\n", " help='url used to set up distributed training')\n", "parser.add_argument('--dist-backend', default='nccl', type=str, help='distributed backend')\n", "\n", "parser.add_argument('--world-size', default=1, type=int,\n", " help='Number of GPUs to use. Can either be manually set ' +\n", " 'or automatically set by using \\'python -m multiproc\\'.')\n", "parser.add_argument('--rank', default=0, type=int,\n", " help='Used for multi-process training. Can either be manually set ' +\n", " 'or automatically set by using \\'python -m multiproc\\'.')\n", "\n", "class TorchModelData(ModelData):\n", " def __init__(self, path, trn_dl, val_dl, aug_dl=None):\n", " super().__init__(path, trn_dl, val_dl)\n", " self.aug_dl = aug_dl\n", "\n", "def torch_loader(data_path, size):\n", " # Data loading code\n", " traindir = os.path.join(data_path, 'train')\n", " valdir = os.path.join(data_path, 'test')\n", " normalize = transforms.Normalize(mean=[0.4914 , 0.48216, 0.44653], std=[0.24703, 0.24349, 0.26159])\n", "\n", " train_tfms = transforms.Compose([\n", " # transforms.RandomResizedCrop(size),\n", " transforms.ColorJitter(.3,.3,.3),\n", " transforms.RandomRotation(3),\n", " transforms.RandomHorizontalFlip(),\n", " transforms.ToTensor(),\n", " normalize,\n", " ])\n", " train_dataset = datasets.ImageFolder(traindir, train_tfms)\n", " train_sampler = (torch.utils.data.distributed.DistributedSampler(train_dataset)\n", " if args.distributed else None)\n", " train_loader = torch.utils.data.DataLoader(\n", " train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),\n", " num_workers=args.workers, pin_memory=True, sampler=train_sampler)\n", "\n", " val_tfms = transforms.Compose([\n", " # transforms.Resize(int(size*1.14)),\n", " # transforms.CenterCrop(size),\n", " transforms.ToTensor(),\n", " normalize,\n", " ])\n", " val_loader = torch.utils.data.DataLoader(\n", " datasets.ImageFolder(valdir, val_tfms),\n", " batch_size=args.batch_size, shuffle=False,\n", " num_workers=args.workers, pin_memory=True)\n", "\n", "\n", " aug_loader = torch.utils.data.DataLoader(\n", " datasets.ImageFolder(valdir, train_tfms),\n", " batch_size=args.batch_size, shuffle=False,\n", " num_workers=args.workers, pin_memory=True)\n", "\n", " train_loader = DataPrefetcher(train_loader)\n", " val_loader = DataPrefetcher(val_loader)\n", " aug_loader = DataPrefetcher(aug_loader)\n", " if args.prof:\n", " train_loader.stop_after = 200\n", " val_loader.stop_after = 0\n", "\n", " data = TorchModelData(data_path, train_loader, val_loader, aug_loader)\n", " return data, train_sampler\n", "\n", "\n", "# Seems to speed up training by ~2%\n", "class DataPrefetcher():\n", " def __init__(self, loader, stop_after=None):\n", " self.loader = loader\n", " self.dataset = loader.dataset\n", " self.stream = torch.cuda.Stream()\n", " self.stop_after = stop_after\n", " self.next_input = None\n", " self.next_target = None\n", "\n", " def __len__(self):\n", " return len(self.loader)\n", " \n", " def preload(self):\n", " try:\n", " self.next_input, self.next_target = next(self.loaditer)\n", " except StopIteration:\n", " self.next_input = None\n", " self.next_target = None\n", " return\n", " with torch.cuda.stream(self.stream):\n", " self.next_input = self.next_input.cuda(async=True)\n", " self.next_target = self.next_target.cuda(async=True)\n", "\n", " def __iter__(self):\n", " count = 0\n", " self.loaditer = iter(self.loader)\n", " self.preload()\n", " while self.next_input is not None:\n", " torch.cuda.current_stream().wait_stream(self.stream)\n", " input = self.next_input\n", " target = self.next_target\n", " self.preload()\n", " count += 1\n", " yield input, target\n", " if type(self.stop_after) is int and (count > self.stop_after):\n", " break\n", " \n", "def top5(output, target):\n", " \"\"\"Computes the precision@k for the specified values of k\"\"\"\n", " top5 = 5\n", " batch_size = target.size(0)\n", " _, pred = output.topk(top5, 1, True, True)\n", " pred = pred.t()\n", " correct = pred.eq(target.view(1, -1).expand_as(pred))\n", " correct_k = correct[:top5].view(-1).float().sum(0, keepdim=True)\n", " return correct_k.mul_(1.0 / batch_size)\n", "\n", "\n", "class ImagenetLoggingCallback(Callback):\n", " def __init__(self, save_path, print_every=50):\n", " super().__init__()\n", " self.save_path=save_path\n", " self.print_every=print_every\n", " def on_train_begin(self):\n", " self.batch = 0\n", " self.epoch = 0\n", " self.f = open(self.save_path, \"a\", 1)\n", " self.log(\"\\ton_train_begin\")\n", " def on_epoch_end(self, metrics):\n", " log_str = f'\\tEpoch:{self.epoch}\\ttrn_loss:{self.last_loss}'\n", " for (k,v) in zip(['val_loss', 'acc', 'top5', ''], metrics): log_str += f'\\t{k}:{v}'\n", " self.log(log_str)\n", " self.epoch += 1\n", " def on_batch_end(self, metrics):\n", " self.last_loss = metrics\n", " self.batch += 1\n", " if self.batch % self.print_every == 0:\n", " self.log(f'Epoch: {self.epoch} Batch: {self.batch} Metrics: {metrics}')\n", " def on_train_end(self):\n", " self.log(\"\\ton_train_end\")\n", " self.f.close()\n", " def log(self, string):\n", " self.f.write(time.strftime(\"%Y-%m-%dT%H:%M:%S\")+\"\\t\"+string+\"\\n\")\n", "\n", "# Logging + saving models\n", "def save_args(name, save_dir):\n", " if (args.rank != 0) or not args.save_dir: return {}\n", "\n", " log_dir = f'{save_dir}/training_logs'\n", " os.makedirs(log_dir, exist_ok=True)\n", " return {\n", " 'best_save_name': f'{name}_best_model',\n", " 'cycle_save_name': f'{name}',\n", " 'callbacks': [\n", " ImagenetLoggingCallback(f'{log_dir}/{name}_log.txt')\n", " ]\n", " }\n", "\n", "def save_sched(sched, save_dir):\n", " if (args.rank != 0) or not args.save_dir: return {}\n", " log_dir = f'{save_dir}/training_logs'\n", " sched.save_path = log_dir\n", " sched.plot_loss()\n", " sched.plot_lr()\n", "\n", "def update_model_dir(learner, base_dir):\n", " learner.tmp_path = f'{base_dir}/tmp'\n", " os.makedirs(learner.tmp_path, exist_ok=True)\n", " learner.models_path = f'{base_dir}/models'\n", " os.makedirs(learner.models_path, exist_ok=True)\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Resnet block" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "args_input = [\n", " '/home/paperspace/imagenet-fast/fp16/data/cifar10', \n", " '--save-dir', '/home/paperspace/data/cifar_training/test1', \n", "# '-a', 'resnet56', \n", "# '-j', '6', \n", "# '--prof', \n", "# '-b', '512', \n", "# '--sz', '32',\n", "# '--loss-scale', '128',\n", " '--fp16',\n", "# '--cycle-len', '95',\n", "# '--epochs', '1',\n", "# '--use-clr', '10,13.68,0.95,0.85',\n", " '--wd', '2e-4',\n", " '--lr', '1',\n", "# '--train-half' # With fp16, iterations are so fast this doesn't matter\n", "]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "\n", "# This is important for speed\n", "cudnn.benchmark = True\n", "global arg\n", "args = parser.parse_args(args_input); args\n", "if args.cycle_len > 1: args.cycle_len = int(args.cycle_len)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "args.distributed = args.world_size > 1\n", "args.gpu = 0\n", "if args.distributed:\n", " args.gpu = args.rank % torch.cuda.device_count()\n", "\n", "if args.distributed:\n", " torch.cuda.set_device(args.gpu)\n", " dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,\n", " world_size=args.world_size)\n", "\n", "if args.fp16:\n", " assert torch.backends.cudnn.enabled, \"fp16 mode requires cudnn backend to be enabled.\"" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "=> creating model 'resnet56'\n" ] } ], "source": [ "# create model\n", "model = cifar10models.__dict__[args.arch] if args.arch in cifar10_names else models.__dict__[args.arch] \n", "if args.pretrained:\n", " print(\"=> using pre-trained model '{}'\".format(args.arch))\n", " model = model(pretrained=True)\n", "else:\n", " print(\"=> creating model '{}'\".format(args.arch))\n", " model = model()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "model = model.cuda()\n", "if args.distributed:\n", " model = DDP(model)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "if args.train_half:\n", " data, train_sampler = torch_loader(args.data, 16)\n", "else:\n", " data, train_sampler = torch_loader(args.data, args.sz)\n", "\n", "learner = Learner.from_model_data(model, data)\n", "# learner.crit = F.nll_loss\n", "learner.crit = F.cross_entropy\n", "learner.metrics = [accuracy]\n", "if args.fp16: learner.half()\n", "\n", "if args.prof:\n", " args.epochs = 1\n", " args.cycle_len=.01\n", "if args.use_clr:\n", " args.use_clr = tuple(map(float, args.use_clr.split(',')))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "x,y = next(iter(data.trn_dl))" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([512, 3, 32, 32])" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAENxJREFUeJzt3X+QVfV5x/H3Iz+yLUu7IAorYFBCpjJOBboy1hi1WqMxdpBMk2qblk6drGnCtI7aGYqm0mo6JqM4dpIxWSMTjAYwagoTzVRLk8GmrbKC/FCioiEBWVgQtoKWKPD0j3N2ZiH3e+7dc889d+H7ec0we/c895zz7Nn9cH987/kec3dEJD6nNLsBEWkOhV8kUgq/SKQUfpFIKfwikVL4RSKl8ItESuEXiZTCLxKp4fWsbGZXAfcDw4Bvu/vdVe4f/jjhiIwVP8jVnkiU3N1quZ/l/XivmQ0DXgOuAHYAa4Hr3f2VjHXCO5uUsbMduVoUiVKt4a/naf9sYKu7v+nu7wPLgTl1bE9ESlRP+CcC2wd8vyNdJiIngHpe81d6avFrT+vNrBPorGM/ItIA9YR/BzB5wPeTgJ3H38ndu4AuqPKaX0RKVc/T/rXANDM7y8xGAtcBq4ppS0QaLfcjv7sfNrP5wL+RDPUtcfeXc3eid/RFSpV7qC/XzvS0X6ThyhjqE5ETmMIvEimFXyRSCr9IpBR+kUjVdVafDEXXBZa/lrHOukY0IkOcHvlFIqXwi0RK4ReJlMIvEimFXyRSerf/pPPhwPLpGevo3f4Y6ZFfJFIKv0ikFH6RSCn8IpFS+EUipfCLREpDfSeZSyacWXH5lGlTg+ssfa5R3chQpkd+kUgp/CKRUvhFIqXwi0RK4ReJlMIvEqm6hvrMbBtwADgCHHb3jiKakvym3PDfFZd/93sLS+5Ehroixvn/wN33FrAdESmRnvaLRKre8DvwjJm9aGadRTQkIuWo92n/x9x9p5mdDjxrZj9z9zUD75D+p6D/GESGmLoe+d19Z/q1F/gBMLvCfbrcvUNvBooMLbnDb2ajzGx0/23gE8DmohoTkcYyd8+3otnZJI/2kLx8+J67f6XKOvl2JiI1c3er5X65w5+Hwi/SeLWGX0N9IpFS+EUipfCLRErhF4mUwi8SKYVfJFIKv0ikFH6RSCn8IpFS+EUipfCLREqX6yrAJV+8IVjb+6O3g7Vxe04N1lpawvvbNTy8ze1Uru3bpWtyybH0yC8SKYVfJFIKv0ikFH6RSCn8IpFS+EUipaG+ArT1tQVrTz/+7WCtZXt4m6ecE671bgvXxl9Z0wxOInrkF4mVwi8SKYVfJFIKv0ikFH6RSCn8IpGqOtRnZkuAa4Bedz83XTYWWAFMAbYBn3X3/Y1rc2hbeNvNwdpvTs9YcVa49M6acO1vvnBn9aZEqqjlkf87wFXHLVsArHb3acDq9HsROYFUDb+7rwH2Hbd4DrA0vb0UuLbgvkSkwfK+5h/v7j0A6dfTi2tJRMrQ8I/3mlkn0Nno/YjI4OR95N9tZu0A6dfe0B3dvcvdO9y9I+e+RKQB8oZ/FTAvvT0PWFlMOyJSllqG+pYBlwLjzGwHcAdwN/CYmd0A/BL4TCObHOrWr30lWJsx/YxgbeT/hbc55pLzgrWjbMzoJjTz56GMdSRGVcPv7tcHSpcX3IuIlEif8BOJlMIvEimFXyRSCr9IpBR+kUhpAs8CLLt/ebB2ePuRYG3K5DODtXMmfDRYm9R+RbA299PHn4OV+MKXPxdcB3Zn1ORkpUd+kUgp/CKRUvhFIqXwi0RK4ReJlMIvEilz9/J2Zlbezhqi8hlzE/l4cI09vB2svc8bwdrEttnB2sOPfCtYu+xTZwVrIWZD5Pp+raeGawfDx1GO5e41/UL1yC8SKYVfJFIKv0ikFH6RSCn8IpHSiT2D8tsVl77Fs4Xv6dwLwyf25HlHP8vuDeH5/cafF5oTsAH0jn6p9MgvEimFXyRSCr9IpBR+kUgp/CKRUvhFIlX1xB4zWwJcA/S6+7npskXA54E96d0WuvvTVXd2gp/YM2fcworLV+7951zbu/zqW4K1p5+6J1gbmWtv+ax89N+DtWs/F55LUJqnyBN7vgNUmhXyPnefkf6rGnwRGVqqht/d1wD7SuhFREpUz2v++Wa20cyWmNmYwjoSkVLkDf8DwFRgBtAD3Bu6o5l1mlm3mXXn3JeINECu8Lv7bnc/4u5HgQeB4LQz7t7l7h3u3pG3SREpXq7wm1n7gG/nApuLaUdEylLLUN8y4FJgHMl1ne5Iv58BOLANuNHde6ru7AQf6lv0+0srLj935szgOi0XtgVrn/qzyXX31Ex/d3tXsHbPV24ssRMZqNahvqqn9Lr79RUWPzTojkRkSNEn/EQipfCLRErhF4mUwi8SKYVfJFK6XNegVL6clL+6N7xKeB7Ok1rRlwC7/c7Hg7XFq5YHa++tDa93stLlukQkk8IvEimFXyRSCr9IpBR+kUgp/CKR0lBfAW7/YnAuE+78xs0ldjJ0FD3UN37SJ4O1j8wOn1X50yfzTa56ItNQn4hkUvhFIqXwi0RK4ReJlMIvEim921+IUcHKW28cDNbOOC28xY1rdwZrrW2twdqUWb9VcfnhnEd+ZM437Vc+tb7i8uEjwr1fc2WkZ0EVTO/2i0gmhV8kUgq/SKQUfpFIKfwikVL4RSJVy+W6JgMPAxOAo0CXu99vZmOBFcAUkkt2fdbd91fZ1kk61Bf21qsZP/KucOlHK34SXu3Qu8Fay5jKw47nf3xicJ2L50wLN5LT0cBfwt7t4XX+9C/nB2ur13+jzo7iUeRQ32HgFnc/B7gA+JKZTQcWAKvdfRqwOv1eRE4QVcPv7j3uvi69fQDYAkwE5gD9V65cClzbqCZFpHiDes1vZlOAmcDzwPj+K/OmX08vujkRaZyqV+ntZ2atwBPATe7+Tq2TNZhZJ9CZrz0RaZSaHvnNbARJ8B919yfTxbvNrD2ttwO9ldZ19y5373D3jiIaFpFiVA2/JQ/xDwFb3H3xgNIqYF56ex6wsvj2RKRRahnquwh4DthEMtQHsJDkdf9jwJnAL4HPuPu+KtuKbqjvpy/vDtYunB5+m6TjtPCw14t7Bz/s9XtcF6x1+7JBb6+a3rsqL9/2evhsxUPTjgRrl3z5zHpbikatQ31VX/O7+38CoY1dPpimRGTo0Cf8RCKl8ItESuEXiZTCLxIphV8kUprAs4nezTj2owq+3FUWfznj1zI9XNq5Ilz72cOVT9/7nWlnBNe5ecU/BWsrdoVrcixN4CkimRR+kUgp/CKRUvhFIqXwi0RK4ReJVM2TeUjx7nhqSbB2yszZwdrR9S8Mel9fv+1fw8WM4bxR9kfB2nuVp3AAYB5/UnH5X316bnCdFbuWBmtSPD3yi0RK4ReJlMIvEimFXyRSCr9IpPRufxPt3xSe3+9f7g6fyDL/yqsGva/zL5416HUA3uOHudZbSuURic628FyC8Itc+5J89MgvEimFXyRSCr9IpBR+kUgp/CKRUvhFIlXL5bomAw8DE0gu19Xl7veb2SLg88Ce9K4L3f3pKtvSHH4D3NUZPpHltq//RbBmI8ub3694WaPLh0vr4mRW2OW6SH4jt7j7OjMbDbxoZs+mtfvc/Z68TYpI89Ryrb4eoCe9fcDMtgATG92YiDTWoF7zm9kUYCbJFXoB5pvZRjNbYmZjCu5NRBqo5vCbWSvwBHCTu78DPABMBWaQPDO4N7Bep5l1m1l3Af2KSEFqCr+ZjSAJ/qPu/iSAu+929yPufhR4EKg49Yy7d7l7h7t3FNW0iNSvavjNzICHgC3uvnjA8vYBd5sLbC6+PRFplFqG+i4CngM2kQz1ASwErid5yu/ANuDG9M3BrG1pqE+kwWod6tO1+kROMrpWn4hkUvhFIqXwi0RK4ReJlMIvEimFXyRSCr9IpBR+kUgp/CKRUvhFIqXwi0RK4ReJlMIvEimFXyRSCr9IpBR+kUgp/CKRUvhFIqXwi0Sqlst1FWZq60e5d+a3Ao20Btc7zMEc64QdOtgXXm94xiFpqbzVltHhPv7nvzYGa/f03RjeV4Zb2yofQ4BLLzu/4vK+Pb8KrtN38H+DtV09vwjW9u6t/HsB+ObhrwUq4XVubVscrA0PH2IOhzfJwcD+2oa3hbd3OPzX8/O+8PEYzpFgrY/w39wO3gqsE/699ARqR3k9uM7x9MgvEimFXyRSCr9IpBR+kUgp/CKRquVyXS3AGuBDJKMDj7v7HWZ2FrAcGAusA/7c3d/P2lbH9A7vfiRwsd7JGSu2BJZnvAOcqabrmRTk7nDJ/j7cyFiuCNbe3vBMeKO/W0tTJViXY52ZGbUyf2cfZNQO5aztz6iFRisyRjEIXBivY2EH3W90F3bFnl8Bl7n7eSTX5rvKzC4Avgrc5+7TSH60G2rZoYgMDVXD74n+/4NGpP8cuAx4PF2+FLi2IR2KSEPU9JrfzIaZ2UtAL/As8AbQ5+79n4bYAUxsTIsi0gg1hd/dj7j7DGASMBs4p9LdKq1rZp1m1m1m3Xv278nfqYgUalDv9rt7H/AT4AKgzcz6Pws7CdgZWKfL3TvcveO0MafV06uIFKhq+M3sNDNrS2//BvCHwBbgx8Afp3ebB6xsVJMiUrxaTuxpB5aa2TCS/ywec/cfmtkrwHIzuwtYDzxUbUOH3obXHq588sOkCcOC67WMrqHL45yS9ZOFhg4h84ygo4HlBzOGePr2hmuPnL82WGtvPzVYe/8/wtscGdpk1s+cJesMqSyh82ayfi/rc+4rh6MZP1fWj3wwY8hu6897w+v1vRusHeirfNLP8IyDNbq18jj3wX3hE7h+fftVuPtGKozAuvubJK//ReQEpE/4iURK4ReJlMIvEimFXyRSCr9IpKqe1Vfozsz2AP2ToI0DMgbCSqM+jqU+jnWi9fFhd6/p03Slhv+YHZt1u3tHU3auPtSH+tDTfpFYKfwikWpm+LuauO+B1Mex1MexTto+mvaaX0SaS0/7RSLVlPCb2VVm9qqZbTWzBc3oIe1jm5ltMrOXzCwws2hD9rvEzHrNbPOAZWPN7Fkzez39OqZJfSwys7fSY/KSmV1dQh+TzezHZrbFzF42s79Nl5d6TDL6KPWYmFmLmb1gZhvSPv4xXX6WmT2fHo8VZjayrh25e6n/gGEk04CdDYwENgDTy+4j7WUbMK4J+70YmAVsHrDsa8CC9PYC4KtN6mMRcGvJx6MdmJXeHg28Bkwv+5hk9FHqMSGZq7g1vT0CeJ5kAp3HgOvS5d8E/rqe/TTjkX82sNXd3/Rkqu/lwJwm9NE07r4G2Hfc4jkkE6FCSROiBvoonbv3uPu69PYBksliJlLyMcnoo1SeaPikuc0I/0Rg+4Dvmzn5pwPPmNmLZtbZpB76jXf3Hkj+CIHTm9jLfDPbmL4saPjLj4HMbArJ/BHP08RjclwfUPIxKWPS3GaEv9IFBZo15PAxd58FfBL4kpld3KQ+hpIHgKkk12joAe4ta8dm1go8Adzk7u+Utd8a+ij9mHgdk+bWqhnh38Gx1+cJTv7ZaO6+M/3aC/yA5s5MtNvM2gHSr+E5oRrI3Xenf3hHgQcp6ZiY2QiSwD3q7k+mi0s/JpX6aNYxSfc96Elza9WM8K8FpqXvXI4ErgNWld2EmY0ys9H9t4FPAJuz12qoVSQToUITJ0TtD1tqLiUcEzMzkjkgt7j74gGlUo9JqI+yj0lpk+aW9Q7mce9mXk3yTuobwG1N6uFskpGGDcDLZfYBLCN5+vgByTOhG4BTgdXA6+nXsU3q47vAJmAjSfjaS+jjIpKnsBuBl9J/V5d9TDL6KPWYkFxxcX26v83APwz4m30B2Ap8H/hQPfvRJ/xEIqVP+IlESuEXiZTCLxIphV8kUgq/SKQUfpFIKfwikVL4RSL1//OPmSnfPBJTAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.imshow(np.transpose(x[50], (1, 2, 0)))" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# %pdb off" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# 128x128\n", "if args.train_half: \n", " save_dir = args.save_dir+'/128'\n", " update_model_dir(learner, save_dir)\n", " sargs = save_args('first_run_128', save_dir)\n", " learner.fit(args.lr,args.epochs, cycle_len=45,\n", " train_sampler=train_sampler,\n", " wds=args.weight_decay,\n", " use_clr_beta=args.use_clr,\n", " loss_scale=args.loss_scale,\n", " **sargs\n", " )\n", " save_sched(learner.sched, save_dir)\n", " data, train_sampler = torch(args.data, args.sz)\n", " learner.set_data(data)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "scrolled": false }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "07db130cc2ea4fbbacceeeb514fbc6d0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=95), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss accuracy \n", " 0 1.964649 2.29585 0.2413 \n", " 1 1.629196 1.670711 0.4153 \n", " 2 1.279893 1.212244 0.5794 \n", " 3 1.022324 1.081357 0.621 \n", " 4 0.87207 0.947416 0.6629 \n", " 5 0.7623 1.424813 0.5834 \n", " 6 0.711834 0.880024 0.6867 \n", " 7 0.673645 0.882425 0.7036 \n", " 8 0.624824 0.75577 0.7406 \n", " 9 0.591317 0.977343 0.6912 \n", " 10 0.576992 0.738988 0.7422 \n", " 11 0.54849 0.760248 0.7389 \n", " 12 0.537716 0.806278 0.7491 \n", " 13 0.521323 0.857931 0.7177 \n", " 14 0.520798 0.709312 0.7678 \n", " 15 0.501003 1.247033 0.6533 \n", " 16 0.488959 0.970011 0.7197 \n", " 17 0.474377 0.710652 0.7681 \n", " 18 0.476366 0.686354 0.7693 \n", " 19 0.480246 1.048503 0.6718 \n", " 20 0.460859 0.705917 0.7693 \n", " 21 0.4575 0.928597 0.7142 \n", " 22 0.451474 1.079432 0.6969 \n", " 23 0.457522 0.714242 0.7713 \n", " 24 0.454815 0.958381 0.7166 \n", " 25 0.454017 0.721306 0.7626 \n", " 26 0.440028 0.671276 0.783 \n", " 27 0.432509 0.704762 0.7709 \n", " 28 0.434946 0.848601 0.7411 \n", " 29 0.436465 0.878166 0.7304 \n", " 30 0.427199 0.884603 0.7291 \n", " 31 0.434507 1.089957 0.6863 \n", " 32 0.417676 0.861309 0.7223 \n", " 33 0.421785 0.723549 0.7686 \n", " 34 0.422014 0.76699 0.7494 \n", " 35 0.414088 0.737529 0.7734 \n", " 36 0.42199 0.867438 0.7532 \n", " 37 0.410439 0.731212 0.7665 \n", " 38 0.419615 0.893294 0.722 \n", " 39 0.410767 0.783612 0.7611 \n", " 40 0.417583 0.834628 0.7361 \n", " 41 0.412891 0.63415 0.7936 \n", " 42 0.409541 0.940022 0.7131 \n", " 43 0.406647 0.683275 0.7813 \n", " 44 0.404456 0.787411 0.7438 \n", " 45 0.396082 0.752332 0.7505 \n", " 46 0.393867 0.762795 0.7638 \n", " 47 0.399886 0.699477 0.7797 \n", " 48 0.39515 0.909923 0.71 \n", " 49 0.393156 0.672227 0.7759 \n", " 50 0.378291 0.74518 0.7671 \n", " 51 0.379188 1.026341 0.7154 \n", " 52 0.384426 0.720828 0.7656 \n", " 53 0.376072 0.84426 0.7349 \n", " 54 0.367509 0.724659 0.771 \n", " 55 0.376669 0.787541 0.7382 \n", " 56 0.366169 0.679759 0.7823 \n", " 57 0.366301 0.766295 0.7787 \n", " 58 0.347275 0.569886 0.8152 \n", " 59 0.360935 0.670394 0.7923 \n", " 60 0.347934 0.603666 0.8008 \n", " 61 0.336586 0.581869 0.8179 \n", " 62 0.342905 0.801929 0.7625 \n", " 63 0.343652 0.529446 0.8312 \n", " 64 0.335459 0.678714 0.7988 \n", " 65 0.329838 0.817373 0.7575 \n", " 66 0.322143 0.718153 0.7865 \n", " 67 0.324445 0.632962 0.7979 \n", " 68 0.312053 0.57795 0.8145 \n", " 69 0.301993 0.502879 0.8408 \n", " 70 0.30651 0.496823 0.8404 \n", " 71 0.29968 0.694079 0.7911 \n", " 72 0.289324 0.570044 0.8077 \n", " 73 0.284152 0.791094 0.758 \n", " 74 0.27619 0.637103 0.8159 \n", " 75 0.262262 0.5558 0.823 \n", " 76 0.250873 0.483756 0.8489 \n", " 77 0.231588 0.470876 0.8502 \n", " 78 0.226033 0.506693 0.8422 \n", " 79 0.208242 0.403554 0.8717 \n", " 80 0.177001 0.499644 0.8528 \n", " 81 0.171563 0.455609 0.8591 \n", " 82 0.146025 0.644057 0.8225 \n", " 83 0.13099 0.407798 0.8785 \n", " 84 0.127202 0.411514 0.879 \n", " 85 0.109014 0.399722 0.8855 \n", " 86 0.102309 0.395437 0.8841 \n", " 87 0.082008 0.409852 0.8869 \n", " 88 0.072423 0.396025 0.8919 \n", " 89 0.059635 0.389407 0.8953 \n", " 90 0.044886 0.398666 0.8958 \n", " 91 0.036124 0.377657 0.9034 \n", " 92 0.027008 0.373316 0.9056 \n", " 93 0.02094 0.370854 0.9058 \n", " 94 0.018163 0.369598 0.9068 \n", "\n", "Finished!\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Full size\n", "update_model_dir(learner, args.save_dir)\n", "sargs = save_args('first_run', args.save_dir)\n", "learner.fit(args.lr,args.epochs, cycle_len=args.cycle_len,\n", " sampler=train_sampler,\n", " wds=args.weight_decay,\n", " use_clr_beta=args.use_clr,\n", " loss_scale=args.loss_scale,\n", " **sargs\n", " )\n", "save_sched(learner.sched, args.save_dir)\n", "\n", "print('Finished!')" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "learner.save('cifar10-wd4e4-lr1')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learner.sched.plot()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cfcb452e653f4e7489e2939c6a4a0e6f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " 81%|████████ | 79/98 [00:06<00:01, 12.36it/s, loss=0.0398] \n", " \r" ] } ], "source": [ "learner.lr_find()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "learner.sched.plot()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "dbed3f506e8245bda149bd62be36ed77", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss accuracy \n", " 0 0.006505 0.322454 0.911 \n", " 1 0.006266 0.323212 0.9128 \n", " 2 0.007029 0.323482 0.9125 \n", " 3 0.006095 0.321533 0.9118 \n", " 4 0.005952 0.323129 0.9117 \n", " 5 0.005498 0.322257 0.9112 \n", " 6 0.005841 0.323728 0.9117 \n", " 7 0.005956 0.322007 0.9119 \n", " 8 0.006588 0.321131 0.9119 \n", " 64%|██████▍ | 63/98 [00:05<00:02, 12.17it/s, loss=0.00652]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Process Process-1468:\n", "Process Process-1465:\n", "Process Process-1467:\n", "Process Process-1470:\n", "Process Process-1469:\n", "Process Process-1464:\n", "Process Process-1466:\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", "Traceback (most recent call last):\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 258, in _bootstrap\n", " self.run()\n", "Traceback (most recent call last):\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 258, in _bootstrap\n", " self.run()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 258, in _bootstrap\n", " self.run()\n", "Traceback (most recent call last):\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 258, in _bootstrap\n", " self.run()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 258, in _bootstrap\n", " self.run()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 258, in _bootstrap\n", " self.run()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py\", line 50, in _worker_loop\n", " r = index_queue.get()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", "Traceback (most recent call last):\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py\", line 50, in _worker_loop\n", " r = index_queue.get()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py\", line 55, in _worker_loop\n", " samples = collate_fn([dataset[i] for i in batch_indices])\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py\", line 55, in _worker_loop\n", " samples = collate_fn([dataset[i] for i in batch_indices])\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py\", line 50, in _worker_loop\n", " r = index_queue.get()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 258, in _bootstrap\n", " self.run()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py\", line 50, in _worker_loop\n", " r = index_queue.get()\n", "KeyboardInterrupt\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py\", line 55, in \n", " samples = collate_fn([dataset[i] for i in batch_indices])\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py\", line 55, in \n", " samples = collate_fn([dataset[i] for i in batch_indices])\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py\", line 93, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py\", line 335, in get\n", " res = self._reader.recv_bytes()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision/datasets/folder.py\", line 124, in __getitem__\n", " img = self.transform(img)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision/datasets/folder.py\", line 124, in __getitem__\n", " img = self.transform(img)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", "KeyboardInterrupt\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py\", line 50, in _worker_loop\n", " r = index_queue.get()\n", "KeyboardInterrupt\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision/transforms/transforms.py\", line 42, in __call__\n", " img = t(img)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision/transforms/transforms.py\", line 42, in __call__\n", " img = t(img)\n", "KeyboardInterrupt\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision/transforms/transforms.py\", line 61, in __call__\n", " return F.to_tensor(pic)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py\", line 334, in get\n", " with self._rlock:\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision/transforms/transforms.py\", line 61, in __call__\n", " return F.to_tensor(pic)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision/transforms/functional.py\", line 71, in to_tensor\n", " img = img.view(pic.size[1], pic.size[0], nchannel)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision/transforms/functional.py\", line 63, in to_tensor\n", " img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/synchronize.py\", line 96, in __enter__\n", " return self._semlock.__enter__()\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/PIL/Image.py\", line 721, in tobytes\n", " e = _getencoder(self.mode, encoder_name, args)\n", "ERROR:root:Internal Python error in the inspect module.\n", "Below is the traceback from this internal error.\n", "\n", "KeyboardInterrupt\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/PIL/Image.py\", line 453, in _getencoder\n", " encoder = getattr(core, encoder_name + \"_encoder\")\n", "KeyboardInterrupt\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/connection.py\", line 216, in recv_bytes\n", " buf = self._recv_bytes(maxlength)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/connection.py\", line 407, in _recv_bytes\n", " buf = self._recv(4)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/multiprocessing/connection.py\", line 379, in _recv\n", " chunk = read(handle, remaining)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Traceback (most recent call last):\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 2963, in run_code\n", " exec(code_obj, self.user_global_ns, self.user_ns)\n", " File \"\", line 5, in \n", " **sargs\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/learner.py\", line 251, in fit\n", " return self.fit_gen(self.model, self.data, layer_opt, n_cycle, **kwargs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/learner.py\", line 198, in fit_gen\n", " metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, fp16=self.fp16, **kwargs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/model.py\", line 115, in fit\n", " loss = stepper.step(V(x),V(y), epoch)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/model.py\", line 47, in step\n", " output = self.m(*xs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py\", line 357, in __call__\n", " result = self.forward(*input, **kwargs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/fastai/fp16.py\", line 11, in forward\n", " return self.module(input.half())\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py\", line 357, in __call__\n", " result = self.forward(*input, **kwargs)\n", " File \"/home/paperspace/imagenet-fast/cifar10/models/cifar10/clr_resnet.py\", line 48, in forward\n", " out = self.layer2(out)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py\", line 357, in __call__\n", " result = self.forward(*input, **kwargs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/container.py\", line 67, in forward\n", " input = module(input)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/modules/module.py\", line 357, in __call__\n", " result = self.forward(*input, **kwargs)\n", " File \"/home/paperspace/imagenet-fast/cifar10/models/cifar10/clr_resnet.py\", line 24, in forward\n", " out = self.bn2(self.conv2(F.relu(self.bn1(out))))\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/nn/functional.py\", line 583, in relu\n", " return threshold(input, 0, 0, inplace)\n", "KeyboardInterrupt\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/IPython/core/interactiveshell.py\", line 1863, in showtraceback\n", " stb = value._render_traceback_()\n", "AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/IPython/core/ultratb.py\", line 1095, in get_records\n", " return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/IPython/core/ultratb.py\", line 311, in wrapped\n", " return f(*args, **kwargs)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/IPython/core/ultratb.py\", line 345, in _fixed_getinnerframes\n", " records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/inspect.py\", line 1483, in getinnerframes\n", " frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/inspect.py\", line 1441, in getframeinfo\n", " filename = getsourcefile(frame) or getfile(frame)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/inspect.py\", line 696, in getsourcefile\n", " if getattr(getmodule(object, filename), '__loader__', None) is not None:\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/inspect.py\", line 742, in getmodule\n", " os.path.realpath(f)] = module.__name__\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/posixpath.py\", line 388, in realpath\n", " path, ok = _joinrealpath(filename[:0], filename, {})\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/posixpath.py\", line 422, in _joinrealpath\n", " if not islink(newpath):\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/posixpath.py\", line 171, in islink\n", " st = os.lstat(path)\n", " File \"/home/paperspace/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py\", line 175, in handler\n", " _error_if_any_worker_fails()\n", "RuntimeError: DataLoader worker (pid 17315) exited unexpectedly with exit code 1.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "KeyboardInterrupt\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\r", " 64%|██████▍ | 63/98 [00:19<00:11, 3.15it/s, loss=0.00652]" ] } ], "source": [ "learner.fit(1e-5,1, cycle_len=15,\n", " sampler=train_sampler,\n", " wds=args.weight_decay,\n", " loss_scale=args.loss_scale,\n", " **sargs\n", " )" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TTA acc: 0.9226 \n" ] } ], "source": [ "\n", "if args.use_tta:\n", " log_preds,y = learner.TTA()\n", " preds = np.mean(np.exp(log_preds),0)\n", " acc = accuracy(torch.FloatTensor(preds),torch.LongTensor(y))\n", " print('TTA acc:', acc)\n", " \n", " with open(args.save_dir+'/tta_accuracy.txt', \"a\", 1) as f:\n", " f.write(time.strftime(\"%Y-%m-%dT%H:%M:%S\")+f\"\\tTTA accuracty: {acc}\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" }, "toc": { "nav_menu": { "height": "266px", "width": "252px" }, "number_sections": true, "sideBar": true, "skip_h1_title": false, "toc_cell": false, "toc_position": {}, "toc_section_display": "block", "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }