Caffe2 - Python API
A deep learning, cross platform ML framework
convnet_benchmarks.py
1 ## @package convnet_benchmarks
2 # Module caffe2.experiments.python.convnet_benchmarks
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 """
8 Benchmark for common convnets.
9 
10 (NOTE: Numbers below prior with missing parameter=update step, TODO to update)
11 
12 Speed on Titan X, with 10 warmup steps and 10 main steps and with different
13 versions of cudnn, are as follows (time reported below is per-batch time,
14 forward / forward+backward):
15 
16  CuDNN V3 CuDNN v4
17  AlexNet 32.5 / 108.0 27.4 / 90.1
18  OverFeat 113.0 / 342.3 91.7 / 276.5
19  Inception 134.5 / 485.8 125.7 / 450.6
20  VGG (batch 64) 200.8 / 650.0 164.1 / 551.7
21 
22 Speed on Inception with varied batch sizes and CuDNN v4 is as follows:
23 
24 Batch Size Speed per batch Speed per image
25 16 22.8 / 72.7 1.43 / 4.54
26 32 38.0 / 127.5 1.19 / 3.98
27 64 67.2 / 233.6 1.05 / 3.65
28 128 125.7 / 450.6 0.98 / 3.52
29 
30 Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn
31 v4, is as follows:
32 
33 AlexNet 68.4 / 218.1
34 OverFeat 210.5 / 630.3
35 Inception 300.2 / 1122.2
36 VGG (batch 64) 405.8 / 1327.7
37 
38 (Note that these numbers involve a "full" backprop, i.e. the gradient
39 with respect to the input image is also computed.)
40 
41 To get the numbers, simply run:
42 
43 for MODEL in AlexNet OverFeat Inception; do
44 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
45  --batch_size 128 --model $MODEL --forward_only True
46 done
47 for MODEL in AlexNet OverFeat Inception; do
48 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
49  --batch_size 128 --model $MODEL
50 done
51 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
52  --batch_size 64 --model VGGA --forward_only True
53 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
54  --batch_size 64 --model VGGA
55 
56 for BS in 16 32 64 128; do
57 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
58  --batch_size $BS --model Inception --forward_only True
59 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
60  --batch_size $BS --model Inception
61 done
62 
63 Note that VGG needs to be run at batch 64 due to memory limit on the backward
64 pass.
65 """
66 
67 import argparse
68 import time
69 
70 from caffe2.python import cnn, workspace, core
71 
72 import caffe2.python.SparseTransformer as SparseTransformer
73 
74 
75 def MLP(order):
76  model = cnn.CNNModelHelper()
77  d = 256
78  depth = 20
79  width = 3
80  for i in range(depth):
81  for j in range(width):
82  current = "fc_{}_{}".format(i, j) if i > 0 else "data"
83  next_ = "fc_{}_{}".format(i + 1, j)
84  model.FC(
85  current, next_,
86  dim_in=d, dim_out=d,
87  weight_init=model.XavierInit,
88  bias_init=model.XavierInit)
89  model.Sum(["fc_{}_{}".format(depth, j)
90  for j in range(width)], ["sum"])
91  model.FC("sum", "last",
92  dim_in=d, dim_out=1000,
93  weight_init=model.XavierInit,
94  bias_init=model.XavierInit)
95  xent = model.LabelCrossEntropy(["last", "label"], "xent")
96  model.AveragedLoss(xent, "loss")
97  return model, d
98 
99 
100 def AlexNet(order):
101  model = cnn.CNNModelHelper(order, name="alexnet",
102  use_cudnn=True, cudnn_exhaustive_search=True)
103  conv1 = model.Conv(
104  "data",
105  "conv1",
106  3,
107  64,
108  11,
109  ('XavierFill', {}),
110  ('ConstantFill', {}),
111  stride=4,
112  pad=2
113  )
114 
115  relu1 = model.Relu(conv1, "conv1")
116  pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2)
117  conv2 = model.Conv(
118  pool1,
119  "conv2",
120  64,
121  192,
122  5,
123  ('XavierFill', {}),
124  ('ConstantFill', {}),
125  pad=2
126  )
127  relu2 = model.Relu(conv2, "conv2")
128  pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2)
129  conv3 = model.Conv(
130  pool2,
131  "conv3",
132  192,
133  384,
134  3,
135  ('XavierFill', {}),
136  ('ConstantFill', {}),
137  pad=1
138  )
139  relu3 = model.Relu(conv3, "conv3")
140  conv4 = model.Conv(
141  relu3,
142  "conv4",
143  384,
144  256,
145  3,
146  ('XavierFill', {}),
147  ('ConstantFill', {}),
148  pad=1
149  )
150  relu4 = model.Relu(conv4, "conv4")
151  conv5 = model.Conv(
152  relu4,
153  "conv5",
154  256,
155  256,
156  3,
157  ('XavierFill', {}),
158  ('ConstantFill', {}),
159  pad=1
160  )
161  relu5 = model.Relu(conv5, "conv5")
162  pool5 = model.MaxPool(relu5, "pool5", kernel=3, stride=2)
163  fc6 = model.FC(
164  pool5, "fc6", 256 * 6 * 6, 4096, ('XavierFill', {}),
165  ('ConstantFill', {})
166  )
167  relu6 = model.Relu(fc6, "fc6")
168  fc7 = model.FC(
169  relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
170  )
171  relu7 = model.Relu(fc7, "fc7")
172  fc8 = model.FC(
173  relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
174  )
175  pred = model.Softmax(fc8, "pred")
176  xent = model.LabelCrossEntropy([pred, "label"], "xent")
177  model.AveragedLoss(xent, "loss")
178  return model, 224
179 
180 
181 def OverFeat(order):
182  model = cnn.CNNModelHelper(order, name="overfeat",
183  use_cudnn=True, cudnn_exhaustive_search=True)
184  conv1 = model.Conv(
185  "data",
186  "conv1",
187  3,
188  96,
189  11,
190  ('XavierFill', {}),
191  ('ConstantFill', {}),
192  stride=4
193  )
194  relu1 = model.Relu(conv1, "conv1")
195  pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
196  conv2 = model.Conv(
197  pool1, "conv2", 96, 256, 5, ('XavierFill', {}), ('ConstantFill', {})
198  )
199  relu2 = model.Relu(conv2, "conv2")
200  pool2 = model.MaxPool(relu2, "pool2", kernel=2, stride=2)
201  conv3 = model.Conv(
202  pool2,
203  "conv3",
204  256,
205  512,
206  3,
207  ('XavierFill', {}),
208  ('ConstantFill', {}),
209  pad=1
210  )
211  relu3 = model.Relu(conv3, "conv3")
212  conv4 = model.Conv(
213  relu3,
214  "conv4",
215  512,
216  1024,
217  3,
218  ('XavierFill', {}),
219  ('ConstantFill', {}),
220  pad=1
221  )
222  relu4 = model.Relu(conv4, "conv4")
223  conv5 = model.Conv(
224  relu4,
225  "conv5",
226  1024,
227  1024,
228  3,
229  ('XavierFill', {}),
230  ('ConstantFill', {}),
231  pad=1
232  )
233  relu5 = model.Relu(conv5, "conv5")
234  pool5 = model.MaxPool(relu5, "pool5", kernel=2, stride=2)
235  fc6 = model.FC(
236  pool5, "fc6", 1024 * 6 * 6, 3072, ('XavierFill', {}),
237  ('ConstantFill', {})
238  )
239  relu6 = model.Relu(fc6, "fc6")
240  fc7 = model.FC(
241  relu6, "fc7", 3072, 4096, ('XavierFill', {}), ('ConstantFill', {})
242  )
243  relu7 = model.Relu(fc7, "fc7")
244  fc8 = model.FC(
245  relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
246  )
247  pred = model.Softmax(fc8, "pred")
248  xent = model.LabelCrossEntropy([pred, "label"], "xent")
249  model.AveragedLoss(xent, "loss")
250  return model, 231
251 
252 
253 def VGGA(order):
254  model = cnn.CNNModelHelper(order, name='vgg-a',
255  use_cudnn=True, cudnn_exhaustive_search=True)
256  conv1 = model.Conv(
257  "data",
258  "conv1",
259  3,
260  64,
261  3,
262  ('XavierFill', {}),
263  ('ConstantFill', {}),
264  pad=1
265  )
266  relu1 = model.Relu(conv1, "conv1")
267  pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
268  conv2 = model.Conv(
269  pool1,
270  "conv2",
271  64,
272  128,
273  3,
274  ('XavierFill', {}),
275  ('ConstantFill', {}),
276  pad=1
277  )
278  relu2 = model.Relu(conv2, "conv2")
279  pool2 = model.MaxPool(relu2, "pool2", kernel=2, stride=2)
280  conv3 = model.Conv(
281  pool2,
282  "conv3",
283  128,
284  256,
285  3,
286  ('XavierFill', {}),
287  ('ConstantFill', {}),
288  pad=1
289  )
290  relu3 = model.Relu(conv3, "conv3")
291  conv4 = model.Conv(
292  relu3,
293  "conv4",
294  256,
295  256,
296  3,
297  ('XavierFill', {}),
298  ('ConstantFill', {}),
299  pad=1
300  )
301  relu4 = model.Relu(conv4, "conv4")
302  pool4 = model.MaxPool(relu4, "pool4", kernel=2, stride=2)
303  conv5 = model.Conv(
304  pool4,
305  "conv5",
306  256,
307  512,
308  3,
309  ('XavierFill', {}),
310  ('ConstantFill', {}),
311  pad=1
312  )
313  relu5 = model.Relu(conv5, "conv5")
314  conv6 = model.Conv(
315  relu5,
316  "conv6",
317  512,
318  512,
319  3,
320  ('XavierFill', {}),
321  ('ConstantFill', {}),
322  pad=1
323  )
324  relu6 = model.Relu(conv6, "conv6")
325  pool6 = model.MaxPool(relu6, "pool6", kernel=2, stride=2)
326  conv7 = model.Conv(
327  pool6,
328  "conv7",
329  512,
330  512,
331  3,
332  ('XavierFill', {}),
333  ('ConstantFill', {}),
334  pad=1
335  )
336  relu7 = model.Relu(conv7, "conv7")
337  conv8 = model.Conv(
338  relu7,
339  "conv8",
340  512,
341  512,
342  3,
343  ('XavierFill', {}),
344  ('ConstantFill', {}),
345  pad=1
346  )
347  relu8 = model.Relu(conv8, "conv8")
348  pool8 = model.MaxPool(relu8, "pool8", kernel=2, stride=2)
349 
350  fcix = model.FC(
351  pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}),
352  ('ConstantFill', {})
353  )
354  reluix = model.Relu(fcix, "fcix")
355  fcx = model.FC(
356  reluix, "fcx", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
357  )
358  relux = model.Relu(fcx, "fcx")
359  fcxi = model.FC(
360  relux, "fcxi", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
361  )
362  pred = model.Softmax(fcxi, "pred")
363  xent = model.LabelCrossEntropy([pred, "label"], "xent")
364  model.AveragedLoss(xent, "loss")
365  return model, 231
366 
367 
368 def net_DAG_Builder(model):
369  print("====================================================")
370  print(" Start Building DAG ")
371  print("====================================================")
372  net_root = SparseTransformer.netbuilder(model)
373  return net_root
374 
375 
376 def _InceptionModule(
377  model, input_blob, input_depth, output_name, conv1_depth, conv3_depths,
378  conv5_depths, pool_depth
379 ):
380  # path 1: 1x1 conv
381  conv1 = model.Conv(
382  input_blob, output_name + ":conv1", input_depth, conv1_depth, 1,
383  ('XavierFill', {}), ('ConstantFill', {})
384  )
385  conv1 = model.Relu(conv1, conv1)
386  # path 2: 1x1 conv + 3x3 conv
387  conv3_reduce = model.Conv(
388  input_blob, output_name +
389  ":conv3_reduce", input_depth, conv3_depths[0],
390  1, ('XavierFill', {}), ('ConstantFill', {})
391  )
392  conv3_reduce = model.Relu(conv3_reduce, conv3_reduce)
393  conv3 = model.Conv(
394  conv3_reduce,
395  output_name + ":conv3",
396  conv3_depths[0],
397  conv3_depths[1],
398  3,
399  ('XavierFill', {}),
400  ('ConstantFill', {}),
401  pad=1
402  )
403  conv3 = model.Relu(conv3, conv3)
404  # path 3: 1x1 conv + 5x5 conv
405  conv5_reduce = model.Conv(
406  input_blob, output_name +
407  ":conv5_reduce", input_depth, conv5_depths[0],
408  1, ('XavierFill', {}), ('ConstantFill', {})
409  )
410  conv5_reduce = model.Relu(conv5_reduce, conv5_reduce)
411  conv5 = model.Conv(
412  conv5_reduce,
413  output_name + ":conv5",
414  conv5_depths[0],
415  conv5_depths[1],
416  5,
417  ('XavierFill', {}),
418  ('ConstantFill', {}),
419  pad=2
420  )
421  conv5 = model.Relu(conv5, conv5)
422  # path 4: pool + 1x1 conv
423  pool = model.MaxPool(
424  input_blob,
425  output_name + ":pool",
426  kernel=3,
427  stride=1,
428  pad=1
429  )
430  pool_proj = model.Conv(
431  pool, output_name + ":pool_proj", input_depth, pool_depth, 1,
432  ('XavierFill', {}), ('ConstantFill', {})
433  )
434  pool_proj = model.Relu(pool_proj, pool_proj)
435  output = model.Concat([conv1, conv3, conv5, pool_proj], output_name)
436  return output
437 
438 
439 def Inception(order):
440  model = cnn.CNNModelHelper(order, name="inception",
441  use_cudnn=True, cudnn_exhaustive_search=True)
442  conv1 = model.Conv(
443  "data",
444  "conv1",
445  3,
446  64,
447  7,
448  ('XavierFill', {}),
449  ('ConstantFill', {}),
450  stride=2,
451  pad=3
452  )
453  relu1 = model.Relu(conv1, "conv1")
454  pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2, pad=1)
455  conv2a = model.Conv(
456  pool1, "conv2a", 64, 64, 1, ('XavierFill', {}), ('ConstantFill', {})
457  )
458  conv2a = model.Relu(conv2a, conv2a)
459  conv2 = model.Conv(
460  conv2a,
461  "conv2",
462  64,
463  192,
464  3,
465  ('XavierFill', {}),
466  ('ConstantFill', {}),
467  pad=1
468  )
469  relu2 = model.Relu(conv2, "conv2")
470  pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2, pad=1)
471  # Inception modules
472  inc3 = _InceptionModule(
473  model, pool2, 192, "inc3", 64, [96, 128], [16, 32], 32
474  )
475  inc4 = _InceptionModule(
476  model, inc3, 256, "inc4", 128, [128, 192], [32, 96], 64
477  )
478  pool5 = model.MaxPool(inc4, "pool5", kernel=3, stride=2, pad=1)
479  inc5 = _InceptionModule(
480  model, pool5, 480, "inc5", 192, [96, 208], [16, 48], 64
481  )
482  inc6 = _InceptionModule(
483  model, inc5, 512, "inc6", 160, [112, 224], [24, 64], 64
484  )
485  inc7 = _InceptionModule(
486  model, inc6, 512, "inc7", 128, [128, 256], [24, 64], 64
487  )
488  inc8 = _InceptionModule(
489  model, inc7, 512, "inc8", 112, [144, 288], [32, 64], 64
490  )
491  inc9 = _InceptionModule(
492  model, inc8, 528, "inc9", 256, [160, 320], [32, 128], 128
493  )
494  pool9 = model.MaxPool(inc9, "pool9", kernel=3, stride=2, pad=1)
495  inc10 = _InceptionModule(
496  model, pool9, 832, "inc10", 256, [160, 320], [32, 128], 128
497  )
498  inc11 = _InceptionModule(
499  model, inc10, 832, "inc11", 384, [192, 384], [48, 128], 128
500  )
501  pool11 = model.AveragePool(inc11, "pool11", kernel=7, stride=1)
502  fc = model.FC(
503  pool11, "fc", 1024, 1000, ('XavierFill', {}), ('ConstantFill', {})
504  )
505  # It seems that Soumith's benchmark does not have softmax on top
506  # for Inception. We will add it anyway so we can have a proper
507  # backward pass.
508  pred = model.Softmax(fc, "pred")
509  xent = model.LabelCrossEntropy([pred, "label"], "xent")
510  model.AveragedLoss(xent, "loss")
511  return model, 224
512 
513 
514 def AddInput(model, batch_size, db, db_type):
515  """Adds the data input part."""
516  data_uint8, label = model.TensorProtosDBInput(
517  [], ["data_uint8", "label"], batch_size=batch_size,
518  db=db, db_type=db_type
519  )
520  data = model.Cast(data_uint8, "data_nhwc", to=core.DataType.FLOAT)
521  data = model.NHWC2NCHW(data, "data")
522  data = model.Scale(data, data, scale=float(1. / 256))
523  data = model.StopGradient(data, data)
524  return data, label
525 
526 
528  """ Simple plain SGD update -- not tuned to actually train the models """
529  ITER = model.Iter("iter")
530  LR = model.LearningRate(
531  ITER, "LR", base_lr=-1e-8, policy="step", stepsize=10000, gamma=0.999)
532  ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
533  for param in model.params:
534  param_grad = model.param_to_grad[param]
535  model.WeightedSum([param, ONE, param_grad, LR], param)
536 
537 
538 def Benchmark(model_gen, arg):
539  model, input_size = model_gen(arg.order)
540  model.Proto().type = arg.net_type
541  model.Proto().num_workers = arg.num_workers
542 
543  # In order to be able to run everything without feeding more stuff, let's
544  # add the data and label blobs to the parameter initialization net as well.
545 
546  if arg.order == "NCHW":
547  input_shape = [arg.batch_size, 3, input_size, input_size]
548  else:
549  input_shape = [arg.batch_size, input_size, input_size, 3]
550  if arg.model == "MLP":
551  input_shape = [arg.batch_size, input_size]
552 
553  model.param_init_net.GaussianFill(
554  [],
555  "data",
556  shape=input_shape,
557  mean=0.0,
558  std=1.0
559  )
560  model.param_init_net.UniformIntFill(
561  [],
562  "label",
563  shape=[arg.batch_size, ],
564  min=0,
565  max=999
566  )
567 
568  if arg.forward_only:
569  print('{}: running forward only.'.format(arg.model))
570  else:
571  print('{}: running forward-backward.'.format(arg.model))
572  model.AddGradientOperators(["loss"])
573  AddParameterUpdate(model)
574 
575  if arg.order == 'NHWC':
576  print(
577  '==WARNING==\n'
578  'NHWC order with CuDNN may not be supported yet, so I might\n'
579  'exit suddenly.'
580  )
581 
582  if not arg.cpu:
583  model.param_init_net.RunAllOnGPU()
584  model.net.RunAllOnGPU()
585 
586  if arg.dump_model:
587  # Writes out the pbtxt for benchmarks on e.g. Android
588  with open(
589  "{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size), "w"
590  ) as fid:
591  fid.write(str(model.param_init_net.Proto()))
592  with open("{0}.pbtxt".format(arg.model,
593  arg.batch_size), "w") as fid:
594  fid.write(str(model.net.Proto()))
595 
596  workspace.RunNetOnce(model.param_init_net)
597  workspace.CreateNet(model.net)
598  for i in range(arg.warmup_iterations):
599  workspace.RunNet(model.net.Proto().name)
600 
601  plan = core.Plan("plan")
602  plan.AddStep(core.ExecutionStep("run", model.net, arg.iterations))
603  start = time.time()
604  workspace.RunPlan(plan)
605  print('Spent: {}'.format((time.time() - start) / arg.iterations))
606  if arg.layer_wise_benchmark:
607  print('Layer-wise benchmark.')
608  workspace.BenchmarkNet(model.net.Proto().name, 1, arg.iterations, True)
609 
610 
611 def GetArgumentParser():
612  parser = argparse.ArgumentParser(description="Caffe2 benchmark.")
613  parser.add_argument(
614  "--batch_size",
615  type=int,
616  default=128,
617  help="The batch size."
618  )
619  parser.add_argument("--model", type=str, help="The model to benchmark.")
620  parser.add_argument(
621  "--order",
622  type=str,
623  default="NCHW",
624  help="The order to evaluate."
625  )
626  parser.add_argument(
627  "--cudnn_ws",
628  type=int,
629  default=-1,
630  help="The cudnn workspace size."
631  )
632  parser.add_argument(
633  "--iterations",
634  type=int,
635  default=10,
636  help="Number of iterations to run the network."
637  )
638  parser.add_argument(
639  "--warmup_iterations",
640  type=int,
641  default=10,
642  help="Number of warm-up iterations before benchmarking."
643  )
644  parser.add_argument(
645  "--forward_only",
646  action='store_true',
647  help="If set, only run the forward pass."
648  )
649  parser.add_argument(
650  "--layer_wise_benchmark",
651  action='store_true',
652  help="If True, run the layer-wise benchmark as well."
653  )
654  parser.add_argument(
655  "--cpu",
656  action='store_true',
657  help="If True, run testing on CPU instead of GPU."
658  )
659  parser.add_argument(
660  "--dump_model",
661  action='store_true',
662  help="If True, dump the model prototxts to disk."
663  )
664  parser.add_argument("--net_type", type=str, default="dag")
665  parser.add_argument("--num_workers", type=int, default=2)
666  return parser
667 
668 
669 if __name__ == '__main__':
670  args = GetArgumentParser().parse_args()
671  if (
672  not args.batch_size or not args.model or not args.order or
673  not args.cudnn_ws
674  ):
675  GetArgumentParser().print_help()
676 
677  workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
678  model_map = {
679  'AlexNet': AlexNet,
680  'OverFeat': OverFeat,
681  'VGGA': VGGA,
682  'Inception': Inception,
683  'MLP': MLP,
684  }
685  Benchmark(model_map[args.model], args)
def AddInput(model, batch_size, db, db_type)