3 from __future__
import absolute_import
4 from __future__
import division
5 from __future__
import print_function
6 from __future__
import unicode_literals
8 Benchmark for common convnets. 10 Speed on Titan X, with 10 warmup steps and 10 main steps and with different 11 versions of cudnn, are as follows (time reported below is per-batch time, 12 forward / forward+backward): 15 AlexNet 32.5 / 108.0 27.4 / 90.1 16 OverFeat 113.0 / 342.3 91.7 / 276.5 17 Inception 134.5 / 485.8 125.7 / 450.6 18 VGG (batch 64) 200.8 / 650.0 164.1 / 551.7 20 Speed on Inception with varied batch sizes and CuDNN v4 is as follows: 22 Batch Size Speed per batch Speed per image 23 16 22.8 / 72.7 1.43 / 4.54 24 32 38.0 / 127.5 1.19 / 3.98 25 64 67.2 / 233.6 1.05 / 3.65 26 128 125.7 / 450.6 0.98 / 3.52 28 Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn 32 OverFeat 210.5 / 630.3 33 Inception 300.2 / 1122.2 34 VGG (batch 64) 405.8 / 1327.7 36 (Note that these numbers involve a "full" backprop, i.e. the gradient 37 with respect to the input image is also computed.) 39 To get the numbers, simply run: 41 for MODEL in AlexNet OverFeat Inception; do 42 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 43 --batch_size 128 --model $MODEL --forward_only True 45 for MODEL in AlexNet OverFeat Inception; do 46 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 47 --batch_size 128 --model $MODEL 49 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 50 --batch_size 64 --model VGGA --forward_only True 51 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 52 --batch_size 64 --model VGGA 54 for BS in 16 32 64 128; do 55 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 56 --batch_size $BS --model Inception --forward_only True 57 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 58 --batch_size $BS --model Inception 61 Note that VGG needs to be run at batch 64 due to memory limit on the backward 73 def MLP(order, cudnn_ws, mkl):
74 model = ModelHelper(name=
"benchmark")
78 for i
in range(depth):
79 for j
in range(width):
80 current =
"fc_{}_{}".format(i, j)
if i > 0
else "data" 81 next_ =
"fc_{}_{}".format(i + 1, j)
86 weight_init=(
'XavierFill', {}),
87 bias_init=(
'XavierFill', {}))
89 brew.sum(model, [
"fc_{}_{}".format(depth, j)
for j
in range(width)], [
"sum"])
90 brew.fc(model,
"sum",
"last",
91 dim_in=d, dim_out=1000,
92 weight_init=(
'XavierFill', {}),
93 bias_init=(
'XavierFill', {}))
94 xent = model.LabelCrossEntropy([
"last",
"label"],
"xent")
96 model.AveragedLoss(xent,
"loss")
100 def ResNet50(order, cudnn_ws, mkl):
101 my_arg_scope = {
'order': order,
'use_cudnn':
True,
102 'cudnn_exhaustive_search':
True,
103 'ws_nbytes_limit': str(cudnn_ws)}
104 model = ModelHelper(name=
"alexnet", arg_scope=my_arg_scope)
105 resnet.create_resnet50(model,
"data", 3, 1000, is_test=
True,
109 def AlexNet(order, cudnn_ws, mkl):
110 my_arg_scope = {
'order': order,
'use_cudnn':
True,
111 'cudnn_exhaustive_search':
True,
112 'ws_nbytes_limit': str(cudnn_ws)}
113 model = ModelHelper(name=
"alexnet", arg_scope=my_arg_scope)
122 (
'ConstantFill', {}),
126 relu1 = brew.relu(model, conv1,
"conv1")
127 pool1 = brew.max_pool(model, relu1,
"pool1", kernel=3, stride=2)
136 (
'ConstantFill', {}),
139 relu2 = brew.relu(model, conv2,
"conv2")
140 pool2 = brew.max_pool(model, relu2,
"pool2", kernel=3, stride=2)
149 (
'ConstantFill', {}),
152 relu3 = brew.relu(model, conv3,
"conv3")
161 (
'ConstantFill', {}),
164 relu4 = brew.relu(model, conv4,
"conv4")
173 (
'ConstantFill', {}),
176 relu5 = brew.relu(model, conv5,
"conv5")
177 pool5 = brew.max_pool(model, relu5,
"pool5", kernel=3, stride=2)
179 model, pool5,
"fc6", 256 * 6 * 6, 4096, (
'XavierFill', {}),
182 relu6 = brew.relu(model, fc6,
"fc6")
184 model, relu6,
"fc7", 4096, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
186 relu7 = brew.relu(model, fc7,
"fc7")
188 model, relu7,
"fc8", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
190 pred = brew.softmax(model, fc8,
"pred")
191 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
193 loss = model.AveragedLoss(xent,
"loss")
197 def OverFeat(order, cudnn_ws, mkl):
198 my_arg_scope = {
'order': order,
'use_cudnn':
True,
199 'cudnn_exhaustive_search':
True,
200 'ws_nbytes_limit': str(cudnn_ws)}
201 model = ModelHelper(name=
'overfeat', arg_scope=my_arg_scope)
210 (
'ConstantFill', {}),
213 relu1 = brew.relu(model, conv1,
"conv1")
214 pool1 = brew.max_pool(model, relu1,
"pool1", kernel=2, stride=2)
216 model, pool1,
"conv2", 96, 256, 5, (
'XavierFill', {}), (
'ConstantFill', {})
218 relu2 = brew.relu(model, conv2,
"conv2")
219 pool2 = brew.max_pool(model, relu2,
"pool2", kernel=2, stride=2)
228 (
'ConstantFill', {}),
231 relu3 = brew.relu(model, conv3,
"conv3")
240 (
'ConstantFill', {}),
243 relu4 = brew.relu(model, conv4,
"conv4")
252 (
'ConstantFill', {}),
255 relu5 = brew.relu(model, conv5,
"conv5")
256 pool5 = brew.max_pool(model, relu5,
"pool5", kernel=2, stride=2)
258 model, pool5,
"fc6", 1024 * 6 * 6, 3072, (
'XavierFill', {}),
261 relu6 = brew.relu(model, fc6,
"fc6")
263 model, relu6,
"fc7", 3072, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
265 relu7 = brew.relu(model, fc7,
"fc7")
267 model, relu7,
"fc8", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
269 pred = brew.softmax(model, fc8,
"pred")
270 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
272 loss = model.AveragedLoss(xent,
"loss")
276 def VGGA(order, cudnn_ws, mkl):
277 my_arg_scope = {
'order': order,
'use_cudnn':
True,
278 'cudnn_exhaustive_search':
True,
279 'ws_nbytes_limit': str(cudnn_ws)}
280 model = ModelHelper(name=
'vgg-a', arg_scope=my_arg_scope)
289 (
'ConstantFill', {}),
292 relu1 = brew.relu(model, conv1,
"conv1")
293 pool1 = brew.max_pool(model, relu1,
"pool1", kernel=2, stride=2)
302 (
'ConstantFill', {}),
305 relu2 = brew.relu(model, conv2,
"conv2")
306 pool2 = brew.max_pool(model, relu2,
"pool2", kernel=2, stride=2)
315 (
'ConstantFill', {}),
318 relu3 = brew.relu(model, conv3,
"conv3")
327 (
'ConstantFill', {}),
330 relu4 = brew.relu(model, conv4,
"conv4")
331 pool4 = brew.max_pool(model, relu4,
"pool4", kernel=2, stride=2)
340 (
'ConstantFill', {}),
343 relu5 = brew.relu(model, conv5,
"conv5")
352 (
'ConstantFill', {}),
355 relu6 = brew.relu(model, conv6,
"conv6")
356 pool6 = brew.max_pool(model, relu6,
"pool6", kernel=2, stride=2)
365 (
'ConstantFill', {}),
368 relu7 = brew.relu(model, conv7,
"conv7")
377 (
'ConstantFill', {}),
380 relu8 = brew.relu(model, conv8,
"conv8")
381 pool8 = brew.max_pool(model, relu8,
"pool8", kernel=2, stride=2)
384 model, pool8,
"fcix", 512 * 7 * 7, 4096, (
'XavierFill', {}),
387 reluix = brew.relu(model, fcix,
"fcix")
389 model, reluix,
"fcx", 4096, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
391 relux = brew.relu(model, fcx,
"fcx")
393 model, relux,
"fcxi", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
395 pred = brew.softmax(model, fcxi,
"pred")
396 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
398 loss = model.AveragedLoss(xent,
"loss")
402 def _InceptionModule(
403 model, input_blob, input_depth, output_name, conv1_depth, conv3_depths,
404 conv5_depths, pool_depth
408 model, input_blob, output_name +
":conv1", input_depth, conv1_depth, 1,
409 (
'XavierFill', {}), (
'ConstantFill', {})
411 conv1 = brew.relu(model, conv1, conv1)
413 conv3_reduce = brew.conv(
414 model, input_blob, output_name +
":conv3_reduce", input_depth,
415 conv3_depths[0], 1, (
'XavierFill', {}), (
'ConstantFill', {})
417 conv3_reduce = brew.relu(model, conv3_reduce, conv3_reduce)
421 output_name +
":conv3",
426 (
'ConstantFill', {}),
429 conv3 = brew.relu(model, conv3, conv3)
431 conv5_reduce = brew.conv(
432 model, input_blob, output_name +
":conv5_reduce", input_depth,
433 conv5_depths[0], 1, (
'XavierFill', {}), (
'ConstantFill', {})
435 conv5_reduce = brew.relu(model, conv5_reduce, conv5_reduce)
439 output_name +
":conv5",
444 (
'ConstantFill', {}),
447 conv5 = brew.relu(model, conv5, conv5)
449 pool = brew.max_pool(
452 output_name +
":pool",
457 pool_proj = brew.conv(
458 model, pool, output_name +
":pool_proj", input_depth, pool_depth, 1,
459 (
'XavierFill', {}), (
'ConstantFill', {})
461 pool_proj = brew.relu(model, pool_proj, pool_proj)
462 output = brew.concat(model, [conv1, conv3, conv5, pool_proj], output_name)
466 def Inception(order, cudnn_ws, mkl):
467 my_arg_scope = {
'order': order,
'use_cudnn':
True,
468 'cudnn_exhaustive_search':
True,
469 'ws_nbytes_limit': str(cudnn_ws)}
470 model = ModelHelper(name=
"inception", arg_scope=my_arg_scope)
479 (
'ConstantFill', {}),
483 relu1 = brew.relu(model, conv1,
"conv1")
484 pool1 = brew.max_pool(model, relu1,
"pool1", kernel=3, stride=2, pad=1)
486 model, pool1,
"conv2a", 64, 64, 1,
487 (
'XavierFill', {}), (
'ConstantFill', {})
489 conv2a = brew.relu(model, conv2a, conv2a)
498 (
'ConstantFill', {}),
501 relu2 = brew.relu(model, conv2,
"conv2")
502 pool2 = brew.max_pool(model, relu2,
"pool2", kernel=3, stride=2, pad=1)
504 inc3 = _InceptionModule(
505 model, pool2, 192,
"inc3", 64, [96, 128], [16, 32], 32
507 inc4 = _InceptionModule(
508 model, inc3, 256,
"inc4", 128, [128, 192], [32, 96], 64
510 pool5 = brew.max_pool(model, inc4,
"pool5", kernel=3, stride=2, pad=1)
511 inc5 = _InceptionModule(
512 model, pool5, 480,
"inc5", 192, [96, 208], [16, 48], 64
514 inc6 = _InceptionModule(
515 model, inc5, 512,
"inc6", 160, [112, 224], [24, 64], 64
517 inc7 = _InceptionModule(
518 model, inc6, 512,
"inc7", 128, [128, 256], [24, 64], 64
520 inc8 = _InceptionModule(
521 model, inc7, 512,
"inc8", 112, [144, 288], [32, 64], 64
523 inc9 = _InceptionModule(
524 model, inc8, 528,
"inc9", 256, [160, 320], [32, 128], 128
526 pool9 = brew.max_pool(model, inc9,
"pool9", kernel=3, stride=2, pad=1)
527 inc10 = _InceptionModule(
528 model, pool9, 832,
"inc10", 256, [160, 320], [32, 128], 128
530 inc11 = _InceptionModule(
531 model, inc10, 832,
"inc11", 384, [192, 384], [48, 128], 128
533 pool11 = brew.average_pool(model, inc11,
"pool11", kernel=7, stride=1)
535 model, pool11,
"fc", 1024, 1000,
536 (
'XavierFill', {}), (
'ConstantFill', {})
541 pred = brew.softmax(model, fc,
"pred")
542 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
544 loss = model.AveragedLoss(xent,
"loss")
549 """ Simple plain SGD update -- not tuned to actually train the models """ 550 ITER = brew.iter(model,
"iter")
551 LR = model.LearningRate(
552 ITER,
"LR", base_lr=-1e-8, policy=
"step", stepsize=10000, gamma=0.999)
553 ONE = model.param_init_net.ConstantFill([],
"ONE", shape=[1], value=1.0)
554 for param
in model.params:
555 param_grad = model.param_to_grad[param]
556 model.WeightedSum([param, ONE, param_grad, LR], param)
559 def Benchmark(model_gen, arg):
560 model, input_size = model_gen(arg.order, arg.cudnn_ws, arg.mkl)
561 model.Proto().type = arg.net_type
562 model.Proto().num_workers = arg.num_workers
566 if arg.order ==
"NCHW":
567 input_shape = [arg.batch_size, 3, input_size, input_size]
569 input_shape = [arg.batch_size, input_size, input_size, 3]
570 if arg.model ==
"MLP":
571 input_shape = [arg.batch_size, input_size]
573 model.param_init_net.GaussianFill(
582 label = np.random.randint(low=0, high=1000, size=(arg.batch_size,)).astype(np.int32)
583 workspace.FeedBlob(
"label", label)
585 model.param_init_net.UniformIntFill(
588 shape=[arg.batch_size, ],
594 print(
'{}: running forward only.'.format(arg.model))
599 'forward-backward not supported yet in MKL, so exiting' 601 print(
'{}: running forward-backward.'.format(arg.model))
602 model.AddGradientOperators([
"loss"])
604 if arg.order ==
'NHWC':
607 'NHWC order with CuDNN may not be supported yet, so I might\n' 613 model.param_init_net.RunAllOnMKL()
614 model.net.RunAllOnMKL()
616 model.param_init_net.RunAllOnGPU()
617 model.net.RunAllOnGPU()
620 for op
in model.net.Proto().op:
621 op.engine = arg.engine
626 "{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size),
"w" 628 fid.write(str(model.param_init_net.Proto()))
629 with open(
"{0}.pbtxt".format(arg.model, arg.batch_size),
"w")
as fid:
630 fid.write(str(model.net.Proto()))
632 workspace.RunNetOnce(model.param_init_net)
633 workspace.CreateNet(model.net)
634 workspace.BenchmarkNet(
635 model.net.Proto().name, arg.warmup_iterations, arg.iterations,
636 arg.layer_wise_benchmark)
639 def GetArgumentParser():
640 parser = argparse.ArgumentParser(description=
"Caffe2 benchmark.")
645 help=
"The batch size." 647 parser.add_argument(
"--model", type=str, help=
"The model to benchmark.")
652 help=
"The order to evaluate." 657 help=
"The cudnn workspace size." 663 help=
"Number of iterations to run the network." 666 "--warmup_iterations",
669 help=
"Number of warm-up iterations before benchmarking." 674 help=
"If set, only run the forward pass." 677 "--layer_wise_benchmark",
679 help=
"If True, run the layer-wise benchmark as well." 684 help=
"If True, run testing on CPU instead of GPU." 689 help=
"If True, run testing on CPU-MKL instead of GPU." 695 help=
"If set, blindly prefer the given engine(s) for every op.")
699 help=
"If True, dump the model prototxts to disk." 701 parser.add_argument(
"--net_type", type=str, default=
"simple")
702 parser.add_argument(
"--num_workers", type=int, default=2)
703 parser.add_argument(
"--use-nvtx", default=
False, action=
'store_true')
704 parser.add_argument(
"--htrace_span_log_path", type=str)
708 if __name__ ==
'__main__':
709 args, extra_args = GetArgumentParser().parse_known_args()
711 not args.batch_size
or not args.model
or not args.order
713 GetArgumentParser().print_help()
715 workspace.GlobalInit(
716 [
'caffe2',
'--caffe2_log_level=0'] + extra_args +
717 ([
'--caffe2_use_nvtx']
if args.use_nvtx
else []) +
718 ([
'--caffe2_htrace_span_log_path=' + args.htrace_span_log_path]
719 if args.htrace_span_log_path
else []))
723 'OverFeat': OverFeat,
725 'Inception': Inception,
726 'ResNet50': ResNet50,
729 Benchmark(model_map[args.model], args)
def AddParameterUpdate(model)