{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import torch\n", "from torchvision.models import vgg16,densenet121,resnet152\n", "from time import time\n", "import torch.nn as nn\n", "import torch.backends.cudnn as cudnn\n", "import torch.optim\n", "from torch.autograd import Variable\n", "import torchvision.models as models\n", "torch.backends.cudnn.benchmark=True\n", "model_names = sorted(name for name in models.__dict__\n", " if name.islower() and not name.startswith(\"__\")\n", " and callable(models.__dict__[name]))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cuda version= 9.0.176\n", "cudnn version= 7005\n", "densenet121 FP 32 avg over 100 runs: 71.4666223526001 ms\n", "densenet121 FP 16 avg over 100 runs: 59.28621768951416 ms\n", "vgg16 FP 32 avg over 100 runs: 87.8125810623169 ms\n", "vgg16 FP 16 avg over 100 runs: 58.735389709472656 ms\n", "resnet152 FP 32 avg over 100 runs: 143.52030992507935 ms\n", "resnet152 FP 16 avg over 100 runs: 92.21737623214722 ms\n" ] } ], "source": [ "print('cuda version=', torch.version.cuda)\n", "print('cudnn version=', torch.backends.cudnn.version())\n", "\n", "for arch in ['densenet121', 'vgg16', 'resnet152']:\n", " model = models.__dict__[arch]().cuda()\n", " loss = nn.CrossEntropyLoss().cuda()\n", " optimizer = torch.optim.SGD(model.parameters(), 0.001,\n", " momentum=0.9,\n", " weight_decay=1e-5)\n", " durations = []\n", " num_runs = 100\n", "\n", " for i in range(num_runs + 1):\n", " x = torch.rand(16, 3, 224, 224)\n", " x_var = torch.autograd.Variable(x).cuda()\n", " target = Variable(torch.LongTensor(16).fill_(1).cuda())\n", " torch.cuda.synchronize()\n", " t1 = time()\n", " out = model(x_var)\n", " err = loss(out, target)\n", " err.backward()\n", " optimizer.step()\n", " torch.cuda.synchronize()\n", " t2 = time()\n", "\n", " # treat the initial run as warm up and don't count\n", " if i > 0:\n", " durations.append(t2 - t1)\n", "\n", " print('{} FP 32 avg over {} runs: {} ms'.format(arch, len(durations), sum(durations) / len(durations) * 1000)) \n", "\n", " model = models.__dict__[arch]().cuda().half()\n", " loss = nn.CrossEntropyLoss().cuda()\n", " optimizer = torch.optim.SGD(model.parameters(), 0.001,\n", " momentum=0.9,\n", " weight_decay=1e-5)\n", " durations = []\n", " num_runs = 100\n", "\n", " for i in range(num_runs + 1):\n", " x = torch.rand(16, 3, 224, 224)\n", " x_var = torch.autograd.Variable(x).cuda().half()\n", " target = Variable(torch.LongTensor(16).fill_(1).cuda())\n", " torch.cuda.synchronize()\n", " t1 = time()\n", " out = model(x_var)\n", " err = loss(out, target)\n", " err.backward()\n", " optimizer.step()\n", " torch.cuda.synchronize()\n", " t2 = time()\n", "\n", " # treat the initial run as warm up and don't count\n", " if i > 0:\n", " durations.append(t2 - t1)\n", "\n", " print('{} FP 16 avg over {} runs: {} ms'.format(arch, len(durations), sum(durations) / len(durations) * 1000))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }