{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import argparse\n", "import logging \n", "import time\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "from torch.utils.data import DataLoader\n", "import torchvision.datasets as datasets\n", "import torchvision.transforms as transforms\n", "from tqdm import tqdm_notebook as tqdm\n", "\n", "from fixed_grid import Euler, Midpoint, RK4\n", "from dopri5 import Dopri5Solver\n", "\n", "from misc import _decreasing, _check_inputs, _flatten, _flatten_convert_none_to_zeros" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**ODEINT и odeint_adjoint**\n", "\n", "Функции ODEINT и odeint_adjoint применяют численные методы для вычисления выхода модели. Причем в odeint_adjoint используются сопряженные переменные, поэтому объем его используемой памяти - константа O(1), в отличие от ODEINT." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def ODEINT(func, y0, t, rtol=1e-7, atol=1e-9, method=None, options=None):\n", " \"\"\"Integrate a system of ordinary differential equations.\n", " Solves the initial value problem for a non-stiff system of first order ODEs:\n", " ```\n", " dy/dt = func(t, y), y(t[0]) = y0\n", " ```\n", " where y is a Tensor of any shape.\n", " Output dtypes and numerical precision are based on the dtypes of the inputs `y0`.\n", " Args:\n", " func: Function that maps a Tensor holding the state `y` and a scalar Tensor\n", " `t` into a Tensor of state derivatives with respect to time.\n", " y0: N-D Tensor giving starting value of `y` at time point `t[0]`. May\n", " have any floating point or complex dtype.\n", " t: 1-D Tensor holding a sequence of time points for which to solve for\n", " `y`. The initial time point should be the first element of this sequence,\n", " and each time must be larger than the previous time. May have any floating\n", " point dtype. Converted to a Tensor with float64 dtype.\n", " rtol: optional float64 Tensor specifying an upper bound on relative error,\n", " per element of `y`.\n", " atol: optional float64 Tensor specifying an upper bound on absolute error,\n", " per element of `y`.\n", " method: optional string indicating the integration method to use.\n", " options: optional dict of configuring options for the indicated integration\n", " method. Can only be provided if a `method` is explicitly set.\n", " name: Optional name for this operation.\n", " Returns:\n", " y: Tensor, where the first dimension corresponds to different\n", " time points. Contains the solved value of y for each desired time point in\n", " `t`, with the initial value `y0` being the first element along the first\n", " dimension.\n", " Raises:\n", " ValueError: if an invalid `method` is provided.\n", " TypeError: if `options` is supplied without `method`, or if `t` or `y0` has\n", " an invalid dtype.\n", " \"\"\"\n", "\n", " tensor_input, func, y0, t = _check_inputs(func, y0, t)\n", "\n", " if options is None:\n", " options = {}\n", " elif method is None:\n", " raise ValueError('cannot supply `options` without specifying `method`')\n", "\n", " if method is None:\n", " method = 'dopri5' # ЧМЫ\n", "\n", " solver = SOLVERS[method](func, y0, rtol=rtol, atol=atol, **options)\n", " solution = solver.integrate(t)\n", "\n", " if tensor_input:\n", " solution = solution[0]\n", " return solution\n", "\n", "class OdeintAdjointMethod(torch.autograd.Function):\n", "\n", " @staticmethod\n", " def forward(ctx, *args):\n", " assert len(args) >= 8, 'Internal error: all arguments required.'\n", " y0, func, t, flat_params, rtol, atol, method, options = \\\n", " args[:-7], args[-7], args[-6], args[-5], args[-4], args[-3], args[-2], args[-1]\n", "\n", " ctx.func, ctx.rtol, ctx.atol, ctx.method, ctx.options = func, rtol, atol, method, options\n", "\n", " with torch.no_grad():\n", " ans = ODEINT(func, y0, t, rtol=rtol, atol=atol, method=method, options=options)\n", " ctx.save_for_backward(t, flat_params, *ans)\n", " return ans\n", "\n", " @staticmethod\n", " def backward(ctx, *grad_output):\n", "\n", " t, flat_params, *ans = ctx.saved_tensors\n", " ans = tuple(ans)\n", " func, rtol, atol, method, options = ctx.func, ctx.rtol, ctx.atol, ctx.method, ctx.options\n", " n_tensors = len(ans)\n", " f_params = tuple(func.parameters())\n", "\n", " # TODO: use a nn.Module and call odeint_adjoint to implement higher order derivatives.\n", " def augmented_dynamics(t, y_aug):\n", " # Dynamics of the original system augmented with\n", " # the adjoint wrt y, and an integrator wrt t and args.\n", " y, adj_y = y_aug[:n_tensors], y_aug[n_tensors:2 * n_tensors] # Ignore adj_time and adj_params.\n", "\n", " with torch.set_grad_enabled(True):\n", " t = t.to(y[0].device).detach().requires_grad_(True)\n", " y = tuple(y_.detach().requires_grad_(True) for y_ in y)\n", " func_eval = func(t, y)\n", " vjp_t, *vjp_y_and_params = torch.autograd.grad(\n", " func_eval, (t,) + y + f_params,\n", " tuple(-adj_y_ for adj_y_ in adj_y), allow_unused=True, retain_graph=True\n", " )\n", " vjp_y = vjp_y_and_params[:n_tensors]\n", " vjp_params = vjp_y_and_params[n_tensors:]\n", "\n", " # autograd.grad returns None if no gradient, set to zero.\n", " vjp_t = torch.zeros_like(t) if vjp_t is None else vjp_t\n", " vjp_y = tuple(torch.zeros_like(y_) if vjp_y_ is None else vjp_y_ for vjp_y_, y_ in zip(vjp_y, y))\n", " vjp_params = _flatten_convert_none_to_zeros(vjp_params, f_params)\n", "\n", " if len(f_params) == 0:\n", " vjp_params = torch.tensor(0.).to(vjp_y[0])\n", " return (*func_eval, *vjp_y, vjp_t, vjp_params)\n", "\n", " T = ans[0].shape[0]\n", " with torch.no_grad():\n", " adj_y = tuple(grad_output_[-1] for grad_output_ in grad_output)\n", " adj_params = torch.zeros_like(flat_params)\n", " adj_time = torch.tensor(0.).to(t)\n", " time_vjps = []\n", " for i in range(T - 1, 0, -1):\n", "\n", " ans_i = tuple(ans_[i] for ans_ in ans)\n", " grad_output_i = tuple(grad_output_[i] for grad_output_ in grad_output)\n", " func_i = func(t[i], ans_i)\n", "\n", " # Compute the effect of moving the current time measurement point.\n", " dLd_cur_t = sum(\n", " torch.dot(func_i_.reshape(-1), grad_output_i_.reshape(-1)).reshape(1)\n", " for func_i_, grad_output_i_ in zip(func_i, grad_output_i)\n", " )\n", " adj_time = adj_time - dLd_cur_t\n", " time_vjps.append(dLd_cur_t)\n", "\n", " # Run the augmented system backwards in time.\n", " if adj_params.numel() == 0:\n", " adj_params = torch.tensor(0.).to(adj_y[0])\n", " aug_y0 = (*ans_i, *adj_y, adj_time, adj_params)\n", " aug_ans = ODEINT(\n", " augmented_dynamics, aug_y0,\n", " torch.tensor([t[i], t[i - 1]]), rtol=rtol, atol=atol, method=method, options=options\n", " )\n", "\n", " # Unpack aug_ans.\n", " adj_y = aug_ans[n_tensors:2 * n_tensors]\n", " adj_time = aug_ans[2 * n_tensors]\n", " adj_params = aug_ans[2 * n_tensors + 1]\n", "\n", " adj_y = tuple(adj_y_[1] if len(adj_y_) > 0 else adj_y_ for adj_y_ in adj_y)\n", " if len(adj_time) > 0: adj_time = adj_time[1]\n", " if len(adj_params) > 0: adj_params = adj_params[1]\n", "\n", " adj_y = tuple(adj_y_ + grad_output_[i - 1] for adj_y_, grad_output_ in zip(adj_y, grad_output))\n", "\n", " del aug_y0, aug_ans\n", "\n", " time_vjps.append(adj_time)\n", " time_vjps = torch.cat(time_vjps[::-1])\n", "\n", " return (*adj_y, None, time_vjps, adj_params, None, None, None, None, None)\n", "\n", "\n", "def odeint_adjoint(func, y0, t, rtol=1e-6, atol=1e-12, method=None, options=None):\n", "\n", " # We need this in order to access the variables inside this module,\n", " # since we have no other way of getting variables along the execution path.\n", " if not isinstance(func, nn.Module):\n", " raise ValueError('func is required to be an instance of nn.Module.')\n", "\n", " tensor_input = False\n", " if torch.is_tensor(y0):\n", "\n", " class TupleFunc(nn.Module):\n", "\n", " def __init__(self, base_func):\n", " super(TupleFunc, self).__init__()\n", " self.base_func = base_func\n", "\n", " def forward(self, t, y):\n", " return (self.base_func(t, y[0]),)\n", "\n", " tensor_input = True\n", " y0 = (y0,)\n", " func = TupleFunc(func)\n", "\n", " flat_params = _flatten(func.parameters())\n", " ys = OdeintAdjointMethod.apply(*y0, func, t, flat_params, rtol, atol, method, options)\n", "\n", " if tensor_input:\n", " ys = ys[0]\n", " return ys" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Вспомогательные функции" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "class Flatten(nn.Module):\n", "\n", " def __init__(self):\n", " super(Flatten, self).__init__()\n", "\n", " def forward(self, x):\n", " shape = torch.prod(torch.tensor(x.shape[1:])).item()\n", " return x.view(-1, shape)\n", "\n", "\n", "class RunningAverageMeter(object):\n", " \"\"\"Computes and stores the average and current value\"\"\"\n", "\n", " def __init__(self, momentum=0.99):\n", " self.momentum = momentum\n", " self.reset()\n", "\n", " def reset(self):\n", " self.val = None\n", " self.avg = 0\n", "\n", " def update(self, val):\n", " if self.val is None:\n", " self.avg = val\n", " else:\n", " self.avg = self.avg * self.momentum + val * (1 - self.momentum)\n", " self.val = val\n", "\n", "\n", "def get_mnist_loaders(data_aug=False, batch_size=128, test_batch_size=1000, perc=1.0): \n", " # загружает данные\n", " if data_aug:\n", " transform_train = transforms.Compose([\n", " transforms.RandomCrop(28, padding=4),\n", " transforms.ToTensor(),\n", " ])\n", " else:\n", " transform_train = transforms.Compose([\n", " transforms.ToTensor(),\n", " ])\n", "\n", " transform_test = transforms.Compose([\n", " transforms.ToTensor(),\n", " ])\n", "\n", " train_loader = DataLoader(\n", " datasets.MNIST(root='data/mnist', train=True, download=True, transform=transform_train), batch_size=batch_size,\n", " shuffle=True, num_workers=0, drop_last=True\n", " )\n", "\n", " train_eval_loader = DataLoader(\n", " datasets.MNIST(root='data/mnist', train=True, download=True, transform=transform_test),\n", " batch_size=test_batch_size, shuffle=False, num_workers=0, drop_last=True\n", " )\n", "\n", " test_loader = DataLoader(\n", " datasets.MNIST(root='data/mnist', train=False, download=True, transform=transform_test),\n", " batch_size=test_batch_size, shuffle=False, num_workers=0, drop_last=True\n", " )\n", "\n", " return train_loader, test_loader, train_eval_loader #loader\n", "\n", "\n", "def inf_generator(iterable): \n", " \"\"\"Allows training with DataLoaders in a single infinite loop:\n", " for i, (x, y) in enumerate(inf_generator(train_loader)):\n", " \"\"\"\n", " iterator = iterable.__iter__()\n", " while True:\n", " try:\n", " yield iterator.__next__()\n", " except StopIteration:\n", " iterator = iterable.__iter__()\n", "\n", "\n", "\n", " \n", "def learning_rate_with_decay(batch_size, batch_denom, batches_per_epoch, boundary_epochs, decay_rates):\n", " # реализует затухание lr\n", " initial_learning_rate = LR * batch_size / batch_denom\n", "\n", " boundaries = [int(batches_per_epoch * epoch) for epoch in boundary_epochs]\n", " vals = [initial_learning_rate * decay for decay in decay_rates]\n", "\n", " def learning_rate_fn(itr):\n", " lt = [itr < b for b in boundaries] + [True]\n", " i = np.argmax(lt)\n", " return vals[i]\n", "\n", " return learning_rate_fn\n", "\n", "\n", "def one_hot(x, K):\n", " #one hot кодирование\n", " return np.array(x[:, None] == np.arange(K)[None, :], dtype=int)\n", "\n", "\n", "def accuracy(model, dataset_loader):\n", " total_correct = 0\n", " for x, y in dataset_loader:\n", " x = x.to(device)\n", " y = one_hot(np.array(y.numpy()), 10)\n", "\n", " target_class = np.argmax(y, axis=1)\n", " predicted_class = np.argmax(model(x).cpu().detach().numpy(), axis=1)\n", " total_correct += np.sum(predicted_class == target_class)\n", " return (total_correct / len(dataset_loader.dataset)) * 100 # просто точность\n", "\n", "\n", "def count_parameters(model):\n", " return sum(p.numel() for p in model.parameters() if p.requires_grad) # число параметров модели,что показывает её сложность\n", "\n", "\n", "def makedirs(dirname): # создаёт дирректорию\n", " if not os.path.exists(dirname):\n", " os.makedirs(dirname)\n", "\n", "def conv3x3(in_planes, out_planes, stride=1):\n", " \"\"\"3x3 convolution with padding\"\"\"\n", " return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)\n", "\n", "\n", "def conv1x1(in_planes, out_planes, stride=1):\n", " \"\"\"1x1 convolution\"\"\"\n", " return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)\n", "\n", "\n", "def norm(dim):\n", " \"\"\"нормализация: https://arxiv.org/pdf/1803.08494.pdf\"\"\"\n", " return nn.GroupNorm(min(32, dim), dim)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Res** блоки задают архитектуру остаточной сети" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "class ResBlock(nn.Module):\n", " \"\"\"Блок для ResNet\"\"\"\n", " expansion = 1\n", "\n", " def __init__(self, inplanes, planes, stride=1, downsample=None):\n", " super(ResBlock, self).__init__()\n", " self.norm1 = norm(inplanes)\n", " self.relu = nn.ReLU(inplace=True)\n", " self.downsample = downsample \n", " self.conv1 = conv3x3(inplanes, planes, stride)\n", " self.norm2 = norm(planes)\n", " self.conv2 = conv3x3(planes, planes)\n", "\n", " def forward(self, x):\n", " shortcut = x\n", "\n", " out = self.relu(self.norm1(x))\n", "\n", " if self.downsample is not None:\n", " shortcut = self.downsample(out) \n", "\n", " out = self.conv1(out)\n", " out = self.norm2(out)\n", " out = self.relu(out)\n", " out = self.conv2(out)\n", "\n", " return out + shortcut # F(x) + x" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**ODE** блоки Задают архитектуру ODE сети" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "class ConcatConv2d(nn.Module):\n", " \"\"\"Особая свёртка\"\"\"\n", " def __init__(self, dim_in, dim_out, ksize=3, stride=1, padding=0, dilation=1, groups=1, bias=True, transpose=False):\n", " super(ConcatConv2d, self).__init__()\n", " module = nn.ConvTranspose2d if transpose else nn.Conv2d\n", " self._layer = module(\n", " dim_in + 1, dim_out, kernel_size=ksize, stride=stride, padding=padding, dilation=dilation, groups=groups,\n", " bias=bias\n", " )\n", "\n", " def forward(self, t, x):\n", " tt = torch.ones_like(x[:, :1, :, :]) * t \n", " ttx = torch.cat([tt, x], 1)\n", " return self._layer(ttx)\n", " \n", "\n", "class ODEfunc(nn.Module):\n", "\n", " def __init__(self, dim): \n", " super(ODEfunc, self).__init__()\n", " self.norm1 = norm(dim)\n", " self.relu = nn.ReLU(inplace=True)\n", " self.conv1 = ConcatConv2d(dim, dim, 3, 1, 1)\n", " self.norm2 = norm(dim)\n", " self.conv2 = ConcatConv2d(dim, dim, 3, 1, 1)\n", " self.norm3 = norm(dim)\n", " self.nfe = 0 # number of function evaluations \n", " # объем вычислений, зависит от размера сетки в численном методе\n", "\n", " def forward(self, t, x):\n", " self.nfe += 1 \n", " out = self.norm1(x)\n", " out = self.relu(out)\n", " out = self.conv1(t, out) # t участвует в свертках, тех самых concat conv\n", " out = self.norm2(out)\n", " out = self.relu(out)\n", " out = self.conv2(t, out)\n", " out = self.norm3(out)\n", " return out\n", "\n", "\n", "class ODEBlock(nn.Module):\n", "\n", " def __init__(self, odefunc):\n", " super(ODEBlock, self).__init__()\n", " self.odefunc = odefunc # класс ODEfunc описан выше\n", " self.integration_time = torch.tensor([0, 1]).float()\n", "\n", " def forward(self, x):\n", " self.integration_time = self.integration_time.type_as(x)\n", " out = odeint(self.odefunc, x, self.integration_time, rtol= TOL, atol = TOL, method = METHOD)\n", " return out[1]\n", "\n", " @property\n", " def nfe(self):\n", " return self.odefunc.nfe\n", "\n", " @nfe.setter\n", " def nfe(self, value):\n", " self.odefunc.nfe = value\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Параметры эксперимента\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**TOL**(tolernce), параметр для функций **ODEINT** и **odeint_adjoint**. Этот параметр позволяет адаптировать модель, изменяя его, можно получить более быструю но менее точную модель" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**METHOD**, параметр для функций **ODEINT** и **odeint_adjoint**. Этот параметр определяет какой именно численный метод будет использоваться. От него зависит точность и время работы метода." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Параметры эксперимента**\n", "\n", "1) Чтобы удостовериться в влиянии **TOL** можно его увеличить и уменьшить и посмотреть соответствеено результат.\n", "\n", "\n", "2) Также можно поменять параметр **METHOD**, выбрав его из **SOLVERS**.\n", "\n", "\n", "3) Можно сравнить **ODE Net** с **Res Net**, посмотрев на число параметров, на скорость сходимости, для этого следует переключать параметр **is_odenet**.\n", "\n", "4) Для того чтобы Проверить утверждение о крнстантной памяти при использовании odeint_adjoint, нужно изменить параметр **odeint** на **odeint_adjoint** и значительно увеличить **BTCH_SZ** например до 200. При таком размере **BTCH_SZ** и спользовании метода **odeint**, ноутбук скорее всего упадёт.\n", "\n", "5) Если хочется провести точный эксперимент такой же как в статье то можно выставить число эпох (**NEPOCHS**) на 128, но это будет довольно долго." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "TOL = 1e-3 # испоьзуется в odeint\n", "SOLVERS = {'dopri5': Dopri5Solver, 'euler': Euler, 'midpoint': Midpoint, 'rk4': RK4} # Численные методы\n", "METHOD = 'rk4'# Выбрать из 'dopri5' 'euler' 'midpoint' 'rk4'\n", "LR = 0.1 # используется в learning_rate_with_decay\n", "odeint = ODEINT# ODEINT или odeint_adjoint\n", "is_odenet = True # тут может быть False, тогда будет в эксперименте участвовать resnet а не ODE-Net\n", "BTCH_SZ = 50\n", "NEPOCHS = 5" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Эксперимент" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "makedirs(\"./experiment\")\n", "device = 'cpu'" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "downsampling_layers = [\n", " nn.Conv2d(1, 64, 3, 1),\n", " norm(64),\n", " nn.ReLU(inplace=True),\n", " nn.Conv2d(64, 64, 4, 2, 1),\n", " norm(64),\n", " nn.ReLU(inplace=True),\n", " nn.Conv2d(64, 64, 4, 2, 1),\n", "]\n", "\n", "feature_layers = [ODEBlock(ODEfunc(64))] if is_odenet else [ResBlock(64, 64) for _ in range(6)]\n", "fc_layers = [norm(64), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)), Flatten(), nn.Linear(64, 10)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "инициализация модели" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\n", "Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\n", "Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\n", "Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\n", "Processing...\n", "Done!\n" ] } ], "source": [ "# сама модель\n", "model = nn.Sequential(*downsampling_layers, *feature_layers, *fc_layers).to(device)\n", "\n", "# Для оптимизации\n", "criterion = nn.CrossEntropyLoss().to(device)\n", "\n", "# поучаем loader-ы\n", "train_loader, test_loader, train_eval_loader = get_mnist_loaders(\n", " data_aug=True, batch_size = BTCH_SZ, test_batch_size=1000)\n", "\n", "# Получаем бесконечный итератор\n", "\n", "data_gen = inf_generator(train_loader)\n", "batches_per_epoch = len(train_loader)\n", "\n", "# Уменьшение lr от итерации\n", "\n", "lr_fn = learning_rate_with_decay(\n", " batch_size = BTCH_SZ, batch_denom=128, batches_per_epoch=batches_per_epoch, boundary_epochs=[60, 100, 140],\n", " decay_rates=[1, 0.1, 0.01, 0.001]\n", " )\n", "\n", "# Оптимизатор\n", "\n", "optimizer = torch.optim.SGD(model.parameters(), lr= LR, momentum=0.9)\n", "\n", "# Инициализируем 3 различных RunningAverageMeter\n", "best_acc = 0\n", "batch_time_meter = RunningAverageMeter()\n", "f_nfe_meter = RunningAverageMeter()\n", "b_nfe_meter = RunningAverageMeter()\n", "end = time.time()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**число параметров** \n", "\n", "можно заметить что у ResNet их больше " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "132096 75392 778\n", "Number of parameters: 208266\n" ] } ], "source": [ "print(count_parameters(nn.Sequential(*downsampling_layers)),\n", " count_parameters(nn.Sequential(*feature_layers)), count_parameters(nn.Sequential(*fc_layers)))\n", "\n", "print('Number of parameters: {}'.format(count_parameters(model)) )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**запуск эксперимента**" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7a3703b99cc048a0b57e67b08752f912", "version_major": 2, "version_minor": 0 }, "text/html": [ "
Failed to display Jupyter Widget of type HBox
.
\n", " If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n", " that the widgets JavaScript is still loading. If this message persists, it\n", " likely means that the widgets JavaScript library is either not installed or\n", " not enabled. See the Jupyter\n", " Widgets Documentation for setup instructions.\n", "
\n", "\n", " If you're reading this message in another frontend (for example, a static\n", " rendering on GitHub or NBViewer),\n", " it may mean that your frontend doesn't currently support widgets.\n", "
\n" ], "text/plain": [ "HBox(children=(IntProgress(value=0, max=6000), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch: 1.0 te_err (%) : 2.969999999999999\n", "epoch: 2.0 te_err (%) : 0.9399999999999977\n", "epoch: 3.0 te_err (%) : 1.2199999999999989\n", "epoch: 4.0 te_err (%) : 0.7600000000000051\n", "epoch: 5.0 te_err (%) : 1.0100000000000051\n", "\n" ] } ], "source": [ "# Главный цикл\n", "for itr in tqdm(range(1, NEPOCHS * batches_per_epoch + 1)):\n", "\n", " for param_group in optimizer.param_groups:\n", " param_group['lr'] = lr_fn(itr)\n", "\n", " optimizer.zero_grad()\n", " x, y = data_gen.__next__()\n", " x = x.to(device)\n", " y = y.to(device)\n", " logits = model(x)\n", " loss = criterion(logits, y)\n", "\n", " if is_odenet:\n", " nfe_forward = feature_layers[0].nfe\n", " feature_layers[0].nfe = 0\n", " \n", " loss.backward()\n", " optimizer.step()\n", " \n", " if is_odenet:\n", " nfe_backward = feature_layers[0].nfe\n", " feature_layers[0].nfe = 0\n", "\n", " batch_time_meter.update(time.time() - end)\n", " if is_odenet:\n", " f_nfe_meter.update(nfe_forward)\n", " b_nfe_meter.update(nfe_backward)\n", " end = time.time()\n", " \n", " if itr % batches_per_epoch == 0:\n", " with torch.no_grad():\n", " val_acc = accuracy(model, test_loader)\n", " print(\"epoch: \", itr/ batches_per_epoch,\"te_err (%) : \", 100 - val_acc)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }