From 66fac57b2945c4f75aa7efebabf5e6ea12db29bf Mon Sep 17 00:00:00 2001 From: Lin Xie Date: Wed, 27 Feb 2019 07:32:27 +0800 Subject: [PATCH] Refine features of IE filters * Enable do inference with specific intervals * Change detection label index starting from 0 Change-Id: I619ba4fe301fba5b0fd077b8f6bb2d5c714f137e --- libavfilter/inference.c | 28 ++++++++++++++++++++-------- libavfilter/inference.h | 2 -- libavfilter/vf_inference_classify.c | 30 +++++++++++++++++------------- libavfilter/vf_inference_detect.c | 17 +++++++++-------- libavformat/iemetadataenc.c | 3 +-- 5 files changed, 47 insertions(+), 33 deletions(-) diff --git a/libavfilter/inference.c b/libavfilter/inference.c index e569620..20934fc 100644 --- a/libavfilter/inference.c +++ b/libavfilter/inference.c @@ -37,8 +37,6 @@ struct InferenceBaseContext { char *infer_type; int batch_size; - int every_nth_frame; - float threshold; DNNModule *module; DNNModel *model; @@ -186,14 +184,30 @@ void av_split(char *str, const char *delim, char **array, int *num, int max) if (!str || !delim || !array || !num) return; - p = strtok(str, delim); - while (p != NULL) { - array[i++] = p; + while (p = strtok(str, delim)) { + int j = 0; + char *s; + size_t end; + /* remove head blanks */ + while (p[j] == '\n' || p[j] == ' ') + j++; + + if (!p[j]) continue; + + /* remove tail blanks */ + s = p + j; + end = strlen(s) - 1; + while (s[end] == '\n' || s[end] == ' ') + s[end--] = '\0'; + + array[i++] = s; av_assert0 (i < max); - p = strtok(NULL, delim); + /* string is cached */ + str = NULL; } + *num = i; } @@ -285,8 +299,6 @@ int ff_inference_base_create(AVFilterContext *ctx, DNN_ERR_CHECK(ctx); s->batch_size = param->batch_size; - s->every_nth_frame = param->every_nth_frame; - s->threshold = param->threshold; s->preprocess = param->preprocess; ret = s->model->create_model(s->model->model); diff --git a/libavfilter/inference.h b/libavfilter/inference.h index 843e05c..33de54b 100644 --- a/libavfilter/inference.h +++ b/libavfilter/inference.h @@ -36,8 +36,6 @@ typedef struct InferenceParam { char *gpu_extension; int batch_size; - int every_nth_frame; - float threshold; // TODO: inputs attributes are different int input_layout; diff --git a/libavfilter/vf_inference_classify.c b/libavfilter/vf_inference_classify.c index cba547a..b2e435b 100644 --- a/libavfilter/vf_inference_classify.c +++ b/libavfilter/vf_inference_classify.c @@ -43,7 +43,6 @@ #define PI 3.1415926 #define MAX_MODEL_NUM 8 #define FACE_FEATURE_VECTOR_LEN 256 -#define THRESHOLD_RECOGNITION 70 static char string_age[] = "age"; static char string_gender[] = "gender"; @@ -65,11 +64,13 @@ typedef struct InferenceClassifyContext { char *model_file; char *feature_file; ///< binary feature file for face identification int feature_num; ///< identification face feature number + double feature_angle; ///< face identification threshold angle value int loaded_num; int backend_type; int device_type; int batch_size; + int frame_number; int every_nth_frame; void *priv[MAX_MODEL_NUM]; @@ -375,7 +376,7 @@ static int face_identify_result_process(AVFilterContext *ctx, angles[i] = acos((dot_product - 0.0001f) / (f->norm_std[i] * norm_feature)) / PI * 180.0; - if (angles[i] < THRESHOLD_RECOGNITION && angles[i] < min_angle) { + if (angles[i] < s->feature_angle && angles[i] < min_angle) { label_id = i; min_angle = angles[i]; } @@ -450,7 +451,6 @@ static av_cold int classify_init(AVFilterContext *ctx) p.backend_type = s->backend_type; p.device_type = s->device_type; p.batch_size = s->batch_size; - p.every_nth_frame = s->every_nth_frame; p.input_precision = DNN_DATA_PRECISION_U8; p.input_layout = DNN_DATA_LAYOUT_NCHW; p.input_is_image = 1; @@ -486,7 +486,6 @@ static av_cold int classify_init(AVFilterContext *ctx) n = fread(buffer, sizeof(buffer), 1, fp); fclose(fp); - buffer[strcspn(buffer, "\n")] = 0; av_split(buffer, ",", _labels, &labels_num, 100); larray = av_mallocz(sizeof(*larray)); @@ -565,6 +564,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) ClassifyArray *c_array; InferClassificationMeta *c_meta; + if (s->frame_number % s->every_nth_frame != 0) + goto done; + sd = av_frame_get_side_data(in, AV_FRAME_DATA_INFERENCE_DETECTION); if (!sd) goto done; @@ -640,6 +642,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) } done: + s->frame_number++; return ff_filter_frame(outlink, in); fail: av_frame_free(&in); @@ -691,15 +694,16 @@ static av_cold int config_output(AVFilterLink *outlink) } static const AVOption inference_classify_options[] = { - { "dnn_backend", "DNN backend for model execution", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = DNN_INTEL_IE }, 0, 2, FLAGS, "engine" }, - { "model", "path to model files for network", OFFSET(model_file), AV_OPT_TYPE_STRING, { .str = NULL}, 0, 0, FLAGS }, - { "label", "labels for classify", OFFSET(labels), AV_OPT_TYPE_STRING, { .str = NULL}, 0, 0, FLAGS }, - { "name", "classify type names", OFFSET(names), AV_OPT_TYPE_STRING, { .str = NULL}, 0, 0, FLAGS }, - { "device", "running on device type", OFFSET(device_type), AV_OPT_TYPE_FLAGS, { .i64 = DNN_TARGET_DEVICE_CPU }, 0, 12, FLAGS }, - { "interval", "do infer every Nth frame", OFFSET(every_nth_frame), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 15, FLAGS }, - { "batch_size", "batch size per infer", OFFSET(batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1024, FLAGS }, - { "feature_file", "registered face feature data", OFFSET(feature_file), AV_OPT_TYPE_STRING, { .str = NULL}, 0, 0, FLAGS, "face_identify" }, - { "feature_num", "registered face number", OFFSET(feature_num), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 1024, FLAGS, "face_identify" }, + { "dnn_backend", "DNN backend for model execution", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = DNN_INTEL_IE }, 0, 2, FLAGS, "engine" }, + { "model", "path to model files for network", OFFSET(model_file), AV_OPT_TYPE_STRING, { .str = NULL}, 0, 0, FLAGS }, + { "label", "labels for classify", OFFSET(labels), AV_OPT_TYPE_STRING, { .str = NULL}, 0, 0, FLAGS }, + { "name", "classify type names", OFFSET(names), AV_OPT_TYPE_STRING, { .str = NULL}, 0, 0, FLAGS }, + { "device", "running on device type", OFFSET(device_type), AV_OPT_TYPE_FLAGS, { .i64 = DNN_TARGET_DEVICE_CPU }, 0, 12, FLAGS }, + { "interval", "do infer every Nth frame", OFFSET(every_nth_frame), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1024, FLAGS }, + { "batch_size", "batch size per infer", OFFSET(batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1024, FLAGS }, + { "feature_file", "registered face feature data", OFFSET(feature_file), AV_OPT_TYPE_STRING, { .str = NULL}, 0, 0, FLAGS, "face_identify" }, + { "feature_num", "registered face number", OFFSET(feature_num), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 1024, FLAGS, "face_identify" }, + { "identify_angle", "face identify threshold angle", OFFSET(feature_angle), AV_OPT_TYPE_DOUBLE, { .dbl = 70}, 0, 90, FLAGS, "face_identify" }, { NULL } }; diff --git a/libavfilter/vf_inference_detect.c b/libavfilter/vf_inference_detect.c index 73d77ab..624cb47 100644 --- a/libavfilter/vf_inference_detect.c +++ b/libavfilter/vf_inference_detect.c @@ -55,6 +55,7 @@ typedef struct InferenceDetectContext { int device_type; int batch_size; + int frame_number; int every_nth_frame; float threshold; @@ -63,7 +64,6 @@ typedef struct InferenceDetectContext { int input_is_image; char *name; - char *params; AVBufferRef *label_buf; } InferenceDetectContext; @@ -308,7 +308,6 @@ static av_cold int detect_init(AVFilterContext *ctx) n = fread(buffer, sizeof(buffer), 1, fp); fclose(fp); - buffer[strcspn(buffer, "\n")] = 0; av_split(buffer, ",", _labels, &labels_num, 100); larray = av_mallocz(sizeof(*larray)); @@ -329,8 +328,6 @@ static av_cold int detect_init(AVFilterContext *ctx) p.backend_type = s->backend_type; p.device_type = s->device_type; p.batch_size = s->batch_size; - p.every_nth_frame = s->every_nth_frame; - p.threshold = s->threshold; p.input_precision = DNN_DATA_PRECISION_U8; p.input_layout = DNN_DATA_LAYOUT_NCHW; p.input_is_image = 1; @@ -362,6 +359,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) AVFilterLink *outlink = inlink->dst->outputs[0]; InferTensorMeta tensor_meta = { }; + if (s->frame_number % s->every_nth_frame != 0) + goto done; + ret = ff_inference_base_filter_frame(s->base, in); if (ret < 0) goto fail; @@ -372,6 +372,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) detect_postprocess(ctx, &tensor_meta, in); +done: + s->frame_number++; return ff_filter_frame(outlink, in); fail: av_frame_free(&in); @@ -383,12 +385,11 @@ static const AVOption inference_detect_options[] = { { "model", "path to model file for network", OFFSET(model_file), AV_OPT_TYPE_STRING, { .str = NULL}, 0, 0, FLAGS }, { "device", "running on device type", OFFSET(device_type), AV_OPT_TYPE_FLAGS, { .i64 = DNN_TARGET_DEVICE_CPU }, 0, 12, FLAGS }, { "label", "label file path for detection", OFFSET(label_file), AV_OPT_TYPE_STRING, { .str = NULL}, 0, 0, FLAGS }, - { "interval", "detect every Nth frame", OFFSET(every_nth_frame), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 15, FLAGS}, - { "batch_size", "batch size per infer", OFFSET(batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1024, FLAGS}, - { "threshold", "threshod to filter output data", OFFSET(threshold), AV_OPT_TYPE_FLOAT, { .dbl = 0.5}, 0, 1, FLAGS}, + { "interval", "detect every Nth frame", OFFSET(every_nth_frame), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1024, FLAGS}, + { "batch_size", "batch size per infer", OFFSET(batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1024, FLAGS}, + { "threshold", "threshod to filter output data", OFFSET(threshold), AV_OPT_TYPE_FLOAT, { .dbl = 0.5}, 0, 1, FLAGS}, { "name", "detection type name", OFFSET(name), AV_OPT_TYPE_STRING, .flags = FLAGS, "detection" }, - { "filter_params", NULL, OFFSET(params), AV_OPT_TYPE_STRING, .flags = FLAGS, "detection" }, { NULL } }; diff --git a/libavformat/iemetadataenc.c b/libavformat/iemetadataenc.c index 8539e7f..506b82a 100644 --- a/libavformat/iemetadataenc.c +++ b/libavformat/iemetadataenc.c @@ -241,8 +241,7 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt) if (!bboxes->bbox[i]->label_buf) { sprintf(tmp_str, "%s", "face"); } else { - // object detection label index start from 1 - int label_id = bboxes->bbox[i]->label_id - 1; + int label_id = bboxes->bbox[i]->label_id; LabelsArray *array = (LabelsArray*)(bboxes->bbox[i]->label_buf->data); sprintf(tmp_str, "%s", array->label[label_id]); } -- 2.7.4