# SSD with Mobilenet v2 FPN-lite (go/fpn-lite) feature extractor, shared box # predictor and focal loss (a mobile version of Retinanet). # Retinanet: see Lin et al, https://arxiv.org/abs/1708.02002 # Trained on COCO, initialized from Imagenet classification checkpoint # Train on TPU-8 # # Achieves 22.2 mAP on COCO17 Val model { ssd { inplace_batchnorm_update: true freeze_batchnorm: false num_classes: 90 box_coder { faster_rcnn_box_coder { y_scale: 10.0 x_scale: 10.0 height_scale: 5.0 width_scale: 5.0 } } matcher { argmax_matcher { matched_threshold: 0.5 unmatched_threshold: 0.5 ignore_thresholds: false negatives_lower_than_unmatched: true force_match_for_each_row: true use_matmul_gather: true } } similarity_calculator { iou_similarity { } } encode_background_as_zeros: true anchor_generator { multiscale_anchor_generator { min_level: 3 max_level: 7 anchor_scale: 4.0 aspect_ratios: [1.0, 2.0, 0.5] scales_per_octave: 2 } } image_resizer { fixed_shape_resizer { height: 320 width: 320 } } box_predictor { weight_shared_convolutional_box_predictor { depth: 128 class_prediction_bias_init: -4.6 conv_hyperparams { activation: RELU_6, regularizer { l2_regularizer { weight: 0.00004 } } initializer { random_normal_initializer { stddev: 0.01 mean: 0.0 } } batch_norm { scale: true, decay: 0.997, epsilon: 0.001, } } num_layers_before_predictor: 4 share_prediction_tower: true use_depthwise: true kernel_size: 3 } } feature_extractor { type: 'ssd_mobilenet_v2_fpn_keras' use_depthwise: true fpn { min_level: 3 max_level: 7 additional_layer_depth: 128 } min_depth: 16 depth_multiplier: 1.0 conv_hyperparams { activation: RELU_6, regularizer { l2_regularizer { weight: 0.00004 } } initializer { random_normal_initializer { stddev: 0.01 mean: 0.0 } } batch_norm { scale: true, decay: 0.997, epsilon: 0.001, } } override_base_feature_extractor_hyperparams: true } loss { classification_loss { weighted_sigmoid_focal { alpha: 0.25 gamma: 2.0 } } localization_loss { weighted_smooth_l1 { } } classification_weight: 1.0 localization_weight: 1.0 } normalize_loss_by_num_matches: true normalize_loc_loss_by_codesize: true post_processing { batch_non_max_suppression { score_threshold: 1e-8 iou_threshold: 0.6 max_detections_per_class: 100 max_total_detections: 100 } score_converter: SIGMOID } } } train_config: { fine_tune_checkpoint_version: V2 fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/mobilenet_v2.ckpt-1" fine_tune_checkpoint_type: "classification" batch_size: 128 sync_replicas: true startup_delay_steps: 0 replicas_to_aggregate: 8 num_steps: 50000 data_augmentation_options { random_horizontal_flip { } } data_augmentation_options { random_crop_image { min_object_covered: 0.0 min_aspect_ratio: 0.75 max_aspect_ratio: 3.0 min_area: 0.75 max_area: 1.0 overlap_thresh: 0.0 } } optimizer { momentum_optimizer: { learning_rate: { cosine_decay_learning_rate { learning_rate_base: .08 total_steps: 50000 warmup_learning_rate: .026666 warmup_steps: 1000 } } momentum_optimizer_value: 0.9 } use_moving_average: false } max_number_of_boxes: 100 unpad_groundtruth_tensors: false } train_input_reader: { label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" tf_record_input_reader { input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord" } } eval_config: { metrics_set: "coco_detection_metrics" use_moving_averages: false } eval_input_reader: { label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" shuffle: false num_epochs: 1 tf_record_input_reader { input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord" } }