3 from __future__
import absolute_import
4 from __future__
import division
5 from __future__
import print_function
6 from __future__
import unicode_literals
8 Benchmark for common convnets. 10 (NOTE: Numbers below prior with missing parameter=update step, TODO to update) 12 Speed on Titan X, with 10 warmup steps and 10 main steps and with different 13 versions of cudnn, are as follows (time reported below is per-batch time, 14 forward / forward+backward): 17 AlexNet 32.5 / 108.0 27.4 / 90.1 18 OverFeat 113.0 / 342.3 91.7 / 276.5 19 Inception 134.5 / 485.8 125.7 / 450.6 20 VGG (batch 64) 200.8 / 650.0 164.1 / 551.7 22 Speed on Inception with varied batch sizes and CuDNN v4 is as follows: 24 Batch Size Speed per batch Speed per image 25 16 22.8 / 72.7 1.43 / 4.54 26 32 38.0 / 127.5 1.19 / 3.98 27 64 67.2 / 233.6 1.05 / 3.65 28 128 125.7 / 450.6 0.98 / 3.52 30 Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn 34 OverFeat 210.5 / 630.3 35 Inception 300.2 / 1122.2 36 VGG (batch 64) 405.8 / 1327.7 38 (Note that these numbers involve a "full" backprop, i.e. the gradient 39 with respect to the input image is also computed.) 41 To get the numbers, simply run: 43 for MODEL in AlexNet OverFeat Inception; do 44 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 45 --batch_size 128 --model $MODEL --forward_only True 47 for MODEL in AlexNet OverFeat Inception; do 48 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 49 --batch_size 128 --model $MODEL 51 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 52 --batch_size 64 --model VGGA --forward_only True 53 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 54 --batch_size 64 --model VGGA 56 for BS in 16 32 64 128; do 57 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 58 --batch_size $BS --model Inception --forward_only True 59 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 60 --batch_size $BS --model Inception 63 Note that VGG needs to be run at batch 64 due to memory limit on the backward 72 import caffe2.python.SparseTransformer
as SparseTransformer
76 model = cnn.CNNModelHelper()
80 for i
in range(depth):
81 for j
in range(width):
82 current =
"fc_{}_{}".format(i, j)
if i > 0
else "data" 83 next_ =
"fc_{}_{}".format(i + 1, j)
87 weight_init=model.XavierInit,
88 bias_init=model.XavierInit)
89 model.Sum([
"fc_{}_{}".format(depth, j)
90 for j
in range(width)], [
"sum"])
91 model.FC(
"sum",
"last",
92 dim_in=d, dim_out=1000,
93 weight_init=model.XavierInit,
94 bias_init=model.XavierInit)
95 xent = model.LabelCrossEntropy([
"last",
"label"],
"xent")
96 model.AveragedLoss(xent,
"loss")
101 model = cnn.CNNModelHelper(order, name=
"alexnet",
102 use_cudnn=
True, cudnn_exhaustive_search=
True)
110 (
'ConstantFill', {}),
115 relu1 = model.Relu(conv1,
"conv1")
116 pool1 = model.MaxPool(relu1,
"pool1", kernel=3, stride=2)
124 (
'ConstantFill', {}),
127 relu2 = model.Relu(conv2,
"conv2")
128 pool2 = model.MaxPool(relu2,
"pool2", kernel=3, stride=2)
136 (
'ConstantFill', {}),
139 relu3 = model.Relu(conv3,
"conv3")
147 (
'ConstantFill', {}),
150 relu4 = model.Relu(conv4,
"conv4")
158 (
'ConstantFill', {}),
161 relu5 = model.Relu(conv5,
"conv5")
162 pool5 = model.MaxPool(relu5,
"pool5", kernel=3, stride=2)
164 pool5,
"fc6", 256 * 6 * 6, 4096, (
'XavierFill', {}),
167 relu6 = model.Relu(fc6,
"fc6")
169 relu6,
"fc7", 4096, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
171 relu7 = model.Relu(fc7,
"fc7")
173 relu7,
"fc8", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
175 pred = model.Softmax(fc8,
"pred")
176 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
177 model.AveragedLoss(xent,
"loss")
182 model = cnn.CNNModelHelper(order, name=
"overfeat",
183 use_cudnn=
True, cudnn_exhaustive_search=
True)
191 (
'ConstantFill', {}),
194 relu1 = model.Relu(conv1,
"conv1")
195 pool1 = model.MaxPool(relu1,
"pool1", kernel=2, stride=2)
197 pool1,
"conv2", 96, 256, 5, (
'XavierFill', {}), (
'ConstantFill', {})
199 relu2 = model.Relu(conv2,
"conv2")
200 pool2 = model.MaxPool(relu2,
"pool2", kernel=2, stride=2)
208 (
'ConstantFill', {}),
211 relu3 = model.Relu(conv3,
"conv3")
219 (
'ConstantFill', {}),
222 relu4 = model.Relu(conv4,
"conv4")
230 (
'ConstantFill', {}),
233 relu5 = model.Relu(conv5,
"conv5")
234 pool5 = model.MaxPool(relu5,
"pool5", kernel=2, stride=2)
236 pool5,
"fc6", 1024 * 6 * 6, 3072, (
'XavierFill', {}),
239 relu6 = model.Relu(fc6,
"fc6")
241 relu6,
"fc7", 3072, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
243 relu7 = model.Relu(fc7,
"fc7")
245 relu7,
"fc8", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
247 pred = model.Softmax(fc8,
"pred")
248 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
249 model.AveragedLoss(xent,
"loss")
254 model = cnn.CNNModelHelper(order, name=
'vgg-a',
255 use_cudnn=
True, cudnn_exhaustive_search=
True)
263 (
'ConstantFill', {}),
266 relu1 = model.Relu(conv1,
"conv1")
267 pool1 = model.MaxPool(relu1,
"pool1", kernel=2, stride=2)
275 (
'ConstantFill', {}),
278 relu2 = model.Relu(conv2,
"conv2")
279 pool2 = model.MaxPool(relu2,
"pool2", kernel=2, stride=2)
287 (
'ConstantFill', {}),
290 relu3 = model.Relu(conv3,
"conv3")
298 (
'ConstantFill', {}),
301 relu4 = model.Relu(conv4,
"conv4")
302 pool4 = model.MaxPool(relu4,
"pool4", kernel=2, stride=2)
310 (
'ConstantFill', {}),
313 relu5 = model.Relu(conv5,
"conv5")
321 (
'ConstantFill', {}),
324 relu6 = model.Relu(conv6,
"conv6")
325 pool6 = model.MaxPool(relu6,
"pool6", kernel=2, stride=2)
333 (
'ConstantFill', {}),
336 relu7 = model.Relu(conv7,
"conv7")
344 (
'ConstantFill', {}),
347 relu8 = model.Relu(conv8,
"conv8")
348 pool8 = model.MaxPool(relu8,
"pool8", kernel=2, stride=2)
351 pool8,
"fcix", 512 * 7 * 7, 4096, (
'XavierFill', {}),
354 reluix = model.Relu(fcix,
"fcix")
356 reluix,
"fcx", 4096, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
358 relux = model.Relu(fcx,
"fcx")
360 relux,
"fcxi", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
362 pred = model.Softmax(fcxi,
"pred")
363 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
364 model.AveragedLoss(xent,
"loss")
368 def net_DAG_Builder(model):
369 print(
"====================================================")
370 print(
" Start Building DAG ")
371 print(
"====================================================")
376 def _InceptionModule(
377 model, input_blob, input_depth, output_name, conv1_depth, conv3_depths,
378 conv5_depths, pool_depth
382 input_blob, output_name +
":conv1", input_depth, conv1_depth, 1,
383 (
'XavierFill', {}), (
'ConstantFill', {})
385 conv1 = model.Relu(conv1, conv1)
387 conv3_reduce = model.Conv(
388 input_blob, output_name +
389 ":conv3_reduce", input_depth, conv3_depths[0],
390 1, (
'XavierFill', {}), (
'ConstantFill', {})
392 conv3_reduce = model.Relu(conv3_reduce, conv3_reduce)
395 output_name +
":conv3",
400 (
'ConstantFill', {}),
403 conv3 = model.Relu(conv3, conv3)
405 conv5_reduce = model.Conv(
406 input_blob, output_name +
407 ":conv5_reduce", input_depth, conv5_depths[0],
408 1, (
'XavierFill', {}), (
'ConstantFill', {})
410 conv5_reduce = model.Relu(conv5_reduce, conv5_reduce)
413 output_name +
":conv5",
418 (
'ConstantFill', {}),
421 conv5 = model.Relu(conv5, conv5)
423 pool = model.MaxPool(
425 output_name +
":pool",
430 pool_proj = model.Conv(
431 pool, output_name +
":pool_proj", input_depth, pool_depth, 1,
432 (
'XavierFill', {}), (
'ConstantFill', {})
434 pool_proj = model.Relu(pool_proj, pool_proj)
435 output = model.Concat([conv1, conv3, conv5, pool_proj], output_name)
439 def Inception(order):
440 model = cnn.CNNModelHelper(order, name=
"inception",
441 use_cudnn=
True, cudnn_exhaustive_search=
True)
449 (
'ConstantFill', {}),
453 relu1 = model.Relu(conv1,
"conv1")
454 pool1 = model.MaxPool(relu1,
"pool1", kernel=3, stride=2, pad=1)
456 pool1,
"conv2a", 64, 64, 1, (
'XavierFill', {}), (
'ConstantFill', {})
458 conv2a = model.Relu(conv2a, conv2a)
466 (
'ConstantFill', {}),
469 relu2 = model.Relu(conv2,
"conv2")
470 pool2 = model.MaxPool(relu2,
"pool2", kernel=3, stride=2, pad=1)
472 inc3 = _InceptionModule(
473 model, pool2, 192,
"inc3", 64, [96, 128], [16, 32], 32
475 inc4 = _InceptionModule(
476 model, inc3, 256,
"inc4", 128, [128, 192], [32, 96], 64
478 pool5 = model.MaxPool(inc4,
"pool5", kernel=3, stride=2, pad=1)
479 inc5 = _InceptionModule(
480 model, pool5, 480,
"inc5", 192, [96, 208], [16, 48], 64
482 inc6 = _InceptionModule(
483 model, inc5, 512,
"inc6", 160, [112, 224], [24, 64], 64
485 inc7 = _InceptionModule(
486 model, inc6, 512,
"inc7", 128, [128, 256], [24, 64], 64
488 inc8 = _InceptionModule(
489 model, inc7, 512,
"inc8", 112, [144, 288], [32, 64], 64
491 inc9 = _InceptionModule(
492 model, inc8, 528,
"inc9", 256, [160, 320], [32, 128], 128
494 pool9 = model.MaxPool(inc9,
"pool9", kernel=3, stride=2, pad=1)
495 inc10 = _InceptionModule(
496 model, pool9, 832,
"inc10", 256, [160, 320], [32, 128], 128
498 inc11 = _InceptionModule(
499 model, inc10, 832,
"inc11", 384, [192, 384], [48, 128], 128
501 pool11 = model.AveragePool(inc11,
"pool11", kernel=7, stride=1)
503 pool11,
"fc", 1024, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
508 pred = model.Softmax(fc,
"pred")
509 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
510 model.AveragedLoss(xent,
"loss")
515 """Adds the data input part.""" 516 data_uint8, label = model.TensorProtosDBInput(
517 [], [
"data_uint8",
"label"], batch_size=batch_size,
518 db=db, db_type=db_type
520 data = model.Cast(data_uint8,
"data_nhwc", to=core.DataType.FLOAT)
521 data = model.NHWC2NCHW(data,
"data")
522 data = model.Scale(data, data, scale=float(1. / 256))
523 data = model.StopGradient(data, data)
528 """ Simple plain SGD update -- not tuned to actually train the models """ 529 ITER = model.Iter(
"iter")
530 LR = model.LearningRate(
531 ITER,
"LR", base_lr=-1e-8, policy=
"step", stepsize=10000, gamma=0.999)
532 ONE = model.param_init_net.ConstantFill([],
"ONE", shape=[1], value=1.0)
533 for param
in model.params:
534 param_grad = model.param_to_grad[param]
535 model.WeightedSum([param, ONE, param_grad, LR], param)
538 def Benchmark(model_gen, arg):
539 model, input_size = model_gen(arg.order)
540 model.Proto().type = arg.net_type
541 model.Proto().num_workers = arg.num_workers
546 if arg.order ==
"NCHW":
547 input_shape = [arg.batch_size, 3, input_size, input_size]
549 input_shape = [arg.batch_size, input_size, input_size, 3]
550 if arg.model ==
"MLP":
551 input_shape = [arg.batch_size, input_size]
553 model.param_init_net.GaussianFill(
560 model.param_init_net.UniformIntFill(
563 shape=[arg.batch_size, ],
569 print(
'{}: running forward only.'.format(arg.model))
571 print(
'{}: running forward-backward.'.format(arg.model))
572 model.AddGradientOperators([
"loss"])
575 if arg.order ==
'NHWC':
578 'NHWC order with CuDNN may not be supported yet, so I might\n' 583 model.param_init_net.RunAllOnGPU()
584 model.net.RunAllOnGPU()
589 "{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size),
"w" 591 fid.write(str(model.param_init_net.Proto()))
592 with open(
"{0}.pbtxt".format(arg.model,
593 arg.batch_size),
"w")
as fid:
594 fid.write(str(model.net.Proto()))
596 workspace.RunNetOnce(model.param_init_net)
597 workspace.CreateNet(model.net)
598 for i
in range(arg.warmup_iterations):
599 workspace.RunNet(model.net.Proto().name)
601 plan = core.Plan(
"plan")
602 plan.AddStep(core.ExecutionStep(
"run", model.net, arg.iterations))
604 workspace.RunPlan(plan)
605 print(
'Spent: {}'.format((time.time() - start) / arg.iterations))
606 if arg.layer_wise_benchmark:
607 print(
'Layer-wise benchmark.')
608 workspace.BenchmarkNet(model.net.Proto().name, 1, arg.iterations,
True)
611 def GetArgumentParser():
612 parser = argparse.ArgumentParser(description=
"Caffe2 benchmark.")
617 help=
"The batch size." 619 parser.add_argument(
"--model", type=str, help=
"The model to benchmark.")
624 help=
"The order to evaluate." 630 help=
"The cudnn workspace size." 636 help=
"Number of iterations to run the network." 639 "--warmup_iterations",
642 help=
"Number of warm-up iterations before benchmarking." 647 help=
"If set, only run the forward pass." 650 "--layer_wise_benchmark",
652 help=
"If True, run the layer-wise benchmark as well." 657 help=
"If True, run testing on CPU instead of GPU." 662 help=
"If True, dump the model prototxts to disk." 664 parser.add_argument(
"--net_type", type=str, default=
"dag")
665 parser.add_argument(
"--num_workers", type=int, default=2)
669 if __name__ ==
'__main__':
670 args = GetArgumentParser().parse_args()
672 not args.batch_size
or not args.model
or not args.order
or 675 GetArgumentParser().print_help()
677 workspace.GlobalInit([
'caffe2',
'--caffe2_log_level=0'])
680 'OverFeat': OverFeat,
682 'Inception': Inception,
685 Benchmark(model_map[args.model], args)
def AddInput(model, batch_size, db, db_type)
def AddParameterUpdate(model)