[net] # how many images are in each batch to average the loss over? batch=1 # into how many sub-batches shall each batch be divided to handle images in each sub-batch in parallel? subdivisions=1 # input size of the network height=448 width=448 channels=3 # learning parameters momentum=0.9 decay=0.0005 b_debug=0 # base learning rate learning_rate=0.001 # change learning rate after the corresponding steps policy=steps # need to have as many steps as scale steps=200,400,600,20000,30000 # re-scale the current learning rate by the correponding factor once the number of steps is reached scales=2.5,2,2,.1,.1 # max number of "iterations" max_batches = 40000 # snapshow the learned weights after every k "iterations" i_snapshot_iteration=1000 # # c_ending_gt_files=.txt ####### # the following part is identical to the first layers of the classification network specified in extraction.cfg # thereby, we can use the learned filters from that network as initialization for training the detection network specified here # [convolutional] filters=64 size=7 stride=2 pad=1 activation=leaky [maxpool] size=2 stride=2 [convolutional] filters=192 size=3 stride=1 pad=1 activation=leaky [maxpool] size=2 stride=2 [convolutional] filters=128 size=1 stride=1 pad=1 activation=leaky [convolutional] filters=256 size=3 stride=1 pad=1 activation=leaky [convolutional] filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] filters=512 size=3 stride=1 pad=1 activation=leaky [maxpool] size=2 stride=2 [convolutional] filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] filters=512 size=3 stride=1 pad=1 activation=leaky [convolutional] filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] filters=512 size=3 stride=1 pad=1 activation=leaky [convolutional] filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] filters=512 size=3 stride=1 pad=1 activation=leaky [convolutional] filters=256 size=1 stride=1 pad=1 activation=leaky [convolutional] filters=512 size=3 stride=1 pad=1 activation=leaky [convolutional] filters=512 size=1 stride=1 pad=1 activation=leaky [convolutional] filters=1024 size=3 stride=1 pad=1 activation=leaky [maxpool] size=2 stride=2 [convolutional] filters=512 size=1 stride=1 pad=1 activation=leaky [convolutional] filters=1024 size=3 stride=1 pad=1 activation=leaky [convolutional] filters=512 size=1 stride=1 pad=1 activation=leaky [convolutional] filters=1024 size=3 stride=1 pad=1 activation=leaky # here ends the part copied from extract.cfg (in that network, only 1 x avgpool, 1x fully connect and a soft max are added afterwards) # ####### # # here starts the new part specifically added for our detection network # 4 conv layers to learn "detectin-specific" high-level patterns # 2 fully connected layers # 1 detection layers (basically also fully connected, but outputs have specific interpretation) [convolutional] size=3 stride=1 pad=1 filters=1024 activation=leaky [convolutional] size=3 stride=2 pad=1 filters=1024 activation=leaky [convolutional] size=3 stride=1 pad=1 filters=1024 activation=leaky [convolutional] size=3 stride=1 pad=1 filters=1024 activation=leaky [connected] output=4096 activation=leaky [connected] #needs to be side*side*((1 + l.coords)*l.n + l.classes) as defined in the first convolutional layer # l.coords -- 4 coordinates to specify a bounding box # l.n -- how many bounding boxes are predicted from each cell # e.g., 20 classes, side 7 -> 1470 # e.g., 20 class, side 9 -> 2430 output= 1470 activation=linear [detection] # 20 for pascal voc (l.classes) classes=20 # bounding boxes -> 4 parameters (l.n) coords=4 rescore=1 b_debug=1 # number of cell in x and y direction side=7 # number of predicted boxes per cell num=2 softmax=0 # adapt loss function on bounding box width and height values sqrt=1 jitter=.2 # relative weights for the combined loss function object_scale=1 noobject_scale=.5 class_scale=1 coord_scale=5