From cbeac2d05ecc3b5c0809540404850269cea5cf38 Mon Sep 17 00:00:00 2001 From: "Wang, Shaofei" Date: Wed, 23 Jan 2019 15:59:51 -0500 Subject: [PATCH] iemetadata convertor muxer to convert meta data from Intel inference engine including detection and classification with -f iemetadata Change-Id: I79dbda78ec7da929ccf3062325212f22ea110c3c --- libavformat/Makefile | 1 + libavformat/allformats.c | 1 + libavformat/iemetadataenc.c | 283 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 285 insertions(+) create mode 100644 libavformat/iemetadataenc.c diff --git a/libavformat/Makefile b/libavformat/Makefile index e4d997c..f7e1698 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -171,6 +171,7 @@ OBJS-$(CONFIG_EAC3_MUXER) += rawenc.o OBJS-$(CONFIG_EPAF_DEMUXER) += epafdec.o pcm.o OBJS-$(CONFIG_FFMETADATA_DEMUXER) += ffmetadec.o OBJS-$(CONFIG_FFMETADATA_MUXER) += ffmetaenc.o +OBJS-$(CONFIG_IEMETADATA_MUXER) += iemetadataenc.o OBJS-$(CONFIG_FIFO_MUXER) += fifo.o OBJS-$(CONFIG_FIFO_TEST_MUXER) += fifo_test.o OBJS-$(CONFIG_FILMSTRIP_DEMUXER) += filmstripdec.o diff --git a/libavformat/allformats.c b/libavformat/allformats.c index 498077e..39b435d 100644 --- a/libavformat/allformats.c +++ b/libavformat/allformats.c @@ -133,6 +133,7 @@ extern AVInputFormat ff_epaf_demuxer; extern AVOutputFormat ff_f4v_muxer; extern AVInputFormat ff_ffmetadata_demuxer; extern AVOutputFormat ff_ffmetadata_muxer; +extern AVOutputFormat ff_iemetadata_muxer; extern AVOutputFormat ff_fifo_muxer; extern AVOutputFormat ff_fifo_test_muxer; extern AVInputFormat ff_filmstrip_demuxer; diff --git a/libavformat/iemetadataenc.c b/libavformat/iemetadataenc.c new file mode 100644 index 0000000..eb332ef --- /dev/null +++ b/libavformat/iemetadataenc.c @@ -0,0 +1,283 @@ +/* + * IE meta data muxer + * Copyright (c) 2019 Shaofei Wang + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "avformat.h" +#include "internal.h" +#include "libavutil/dict.h" +#include "libavfilter/avfilter.h" +#include "libavfilter/inference.h" + + +#define JSON_HEAD "{\n" +#define JSON_TAIL "},\n" +#define JSON_ESCAPE " " +#define JSON_ELEMENT_HEAD(name) #name ": {\n" +#define JSON_ARRAY_HEAD(name) #name ": [\n" +#define JSON_ARRAY_TAIL "]\n" +#define JSON_FVALUE(str, name, value) sprintf(str, "\"%s\": %.1f,\n", name, value) +#define JSON_IVALUE(str, name, value) sprintf(str, "\"%s\": %ld,\n", name, value) +#define JSON_STRING(str, name, value) sprintf(str, "\"%s\": \"%s\",\n", name, value) + +#define BUFFER_SIZE 256 + +typedef struct IeMetaDataMuxer { + char *meta_data_strings; + size_t meta_data_length; + unsigned int current_escape_num; +} IeMetaDataMuxer; + +static int fill_content(AVFormatContext *s, const char *str, int flush) +{ + IeMetaDataMuxer *md = s->priv_data; + unsigned int len = strlen(str); + + if (str[len] == '\0') + len++; + if (((len + md->meta_data_length) > BUFFER_SIZE)) { + avio_write(s->pb, md->meta_data_strings, md->meta_data_length); + avio_flush(s->pb); + md->meta_data_length = 0; + } + memcpy(md->meta_data_strings + md->meta_data_length, str, len); + md->meta_data_length += len - 1; + if (flush) { + avio_write(s->pb, md->meta_data_strings, md->meta_data_length); + avio_flush(s->pb); + md->meta_data_length = 0; + } + return md->meta_data_length; +} + +static int escape(AVFormatContext *s, unsigned int n) +{ + unsigned int i; + for (i = 0; i < n; i++) + fill_content(s, JSON_ESCAPE, 1); + return 0; +} + +static int fill_line(AVFormatContext *s, const char *str, unsigned int num_esp, int flush) +{ + escape(s, num_esp); + fill_content(s, str, flush); + return 0; +} + +static int init(AVFormatContext *s) +{ + IeMetaDataMuxer *md = s->priv_data; + + md->meta_data_strings = av_mallocz(BUFFER_SIZE); + if (!md->meta_data_strings) { + av_log(s, AV_LOG_ERROR, "fail to alloc buffer for meta data\n"); + } + md->meta_data_length = 0; + md->current_escape_num = 1; + return 0; +} + +static void deinit(AVFormatContext *s) +{ + IeMetaDataMuxer *md = s->priv_data; + av_free(md->meta_data_strings); +} + +static int write_header(AVFormatContext *s) +{ + //fill_content(s, JSON_HEAD, sizeof(JSON_HEAD), 1); + return 0; +} + +static int write_trailer(AVFormatContext *s) +{ + //fill_content(s, JSON_TAIL, sizeof(JSON_TAIL), 1); + return 0; +} + +static int jhead_write(AVFormatContext *s, AVFrame *frm_data) +{ + char tmp_str[80]; + IeMetaDataMuxer *md = s->priv_data; + + fill_content(s, JSON_HEAD, 0); + md->current_escape_num = 1; + JSON_IVALUE(tmp_str, "timestamp", frm_data->best_effort_timestamp); + fill_line(s, tmp_str, md->current_escape_num, 0); + JSON_STRING(tmp_str, "source", "string"); + fill_line(s, tmp_str, md->current_escape_num, 0); + sprintf(tmp_str, "\"resolution\":{\"width\":%d,\"height\":%d},\n", + frm_data->width, frm_data->height); + fill_line(s, tmp_str, md->current_escape_num, 0); + sprintf(tmp_str, "\"tag\":{\"custom_key\":\"custom_value\"},\n"); + fill_line(s, tmp_str, md->current_escape_num, 0); + + fill_line(s, JSON_ARRAY_HEAD("objects"), md->current_escape_num, 1); + + return 0; +} + +static int jtail_write(AVFormatContext *s) +{ + IeMetaDataMuxer *md = s->priv_data; + + fill_line(s, JSON_ARRAY_TAIL, md->current_escape_num, 0); + md->current_escape_num = 0; + fill_line(s, JSON_TAIL, md->current_escape_num, 1); + return 0; +} + +static int write_packet(AVFormatContext *s, AVPacket *pkt) +{ + IeMetaDataMuxer *md = s->priv_data; + int i, j, head_written = 0; + char tmp_str[80]; + AVFrame *frm_data = (AVFrame *)pkt->data; + if ( !frm_data ) + return 0; + AVFrameSideData *sd; + InferDetectionMeta *meta; + LabelsArray *labels; + BBoxesArray *bboxes; + AVFrameSideData *c_sd; + InferClassificationMeta *cmeta; + ClassifyArray *c_array; + + sd = av_frame_get_side_data(frm_data, AV_FRAME_DATA_INFERENCE_DETECTION); + c_sd = av_frame_get_side_data(frm_data, AV_FRAME_DATA_INFERENCE_CLASSIFICATION); + if (sd) { + meta = (InferDetectionMeta *)sd->data; + + if (meta) { + bboxes = meta->bboxes; + labels = meta->labels; + if (bboxes) { + if (bboxes->num > 0) { + jhead_write(s, frm_data); + head_written = 1; + } + + md->current_escape_num++; + for (i = 0; i < bboxes->num; i++) { + fill_line(s, JSON_HEAD, md->current_escape_num, 0); + + md->current_escape_num++; + fill_line(s, JSON_ELEMENT_HEAD("detection"), md->current_escape_num, 0); + md->current_escape_num++; + fill_line(s, JSON_ELEMENT_HEAD("bounding_box"), md->current_escape_num, 0); + + md->current_escape_num++; + JSON_FVALUE(tmp_str, "x_min", bboxes->bbox[i]->x_min); + fill_line(s, tmp_str, md->current_escape_num, 0); + JSON_FVALUE(tmp_str, "y_min", bboxes->bbox[i]->y_min); + fill_line(s, tmp_str, md->current_escape_num, 0); + JSON_FVALUE(tmp_str, "x_max", bboxes->bbox[i]->x_max); + fill_line(s, tmp_str, md->current_escape_num, 0); + JSON_FVALUE(tmp_str, "y_max", bboxes->bbox[i]->y_max); + fill_line(s, tmp_str, md->current_escape_num, 0); + + md->current_escape_num--; + fill_line(s, JSON_TAIL, md->current_escape_num, 0); + + JSON_IVALUE(tmp_str, "object_id", bboxes->bbox[i]->object_id); + fill_line(s, tmp_str, md->current_escape_num, 0); + JSON_STRING(tmp_str, "label", ""); + fill_line(s, tmp_str, md->current_escape_num, 0); + JSON_IVALUE(tmp_str, "label_id", bboxes->bbox[i]->label_id); + fill_line(s, tmp_str, md->current_escape_num, 0); + JSON_FVALUE(tmp_str, "confidence", bboxes->bbox[i]->confidence); + fill_line(s, tmp_str, md->current_escape_num, 0); + sprintf(tmp_str, "\"model\":{\"name\":\"\", \"version\":1},\n"); + fill_line(s, tmp_str, md->current_escape_num, 0); + + md->current_escape_num--; + fill_line(s, JSON_TAIL, md->current_escape_num, 0); + + //emotion, age, gender + if (c_sd) { + cmeta = (InferClassificationMeta *)c_sd->data; + if (cmeta) { + c_array = cmeta->c_array; + if (c_array) { + for (j = 0; j < c_array->num; j++) { + if (c_array->classifications[j]->detect_id == i) { + char *name = c_array->classifications[j]->name; + sprintf(tmp_str, "\"%s\": {\n", name); + fill_line(s, tmp_str, md->current_escape_num, 0); + if (strncmp(name, "emotion", strlen("emotion")) == 0 || + strncmp(name, "gender", strlen("gender")) == 0 || + strncmp(name, "face_id", strlen("face_id")) == 0) { + md->current_escape_num++; + JSON_STRING(tmp_str, "label", + ((LabelsArray*)c_array->classifications[j]->label_buf->data)->label[c_array->classifications[j]->label_id]); + fill_line(s, tmp_str, md->current_escape_num, 0); + JSON_IVALUE(tmp_str, "label_id", c_array->classifications[j]->label_id); + fill_line(s, tmp_str, md->current_escape_num, 0); + JSON_FVALUE(tmp_str, "confidence", c_array->classifications[j]->confidence); + fill_line(s, tmp_str, md->current_escape_num, 0); + sprintf(tmp_str, "\"model\":{\"name\":\"\", \"version\":1},\n"); + fill_line(s, tmp_str, md->current_escape_num, 0); + + md->current_escape_num--; + fill_line(s, JSON_TAIL, md->current_escape_num, 0); + } else if (strncmp(name, "age", strlen("age")) == 0) { + md->current_escape_num++; + JSON_FVALUE(tmp_str, "value", c_array->classifications[j]->value); + fill_line(s, tmp_str, md->current_escape_num, 0); + JSON_FVALUE(tmp_str, "confidence", c_array->classifications[j]->confidence); + fill_line(s, tmp_str, md->current_escape_num, 0); + sprintf(tmp_str, "\"model\":{\"name\":\"\", \"version\":1},\n"); + fill_line(s, tmp_str, md->current_escape_num, 0); + + md->current_escape_num--; + fill_line(s, JSON_TAIL, md->current_escape_num, 0); + } + } + } + } + } + } + md->current_escape_num--; + fill_line(s, JSON_TAIL, md->current_escape_num, 1); + } + } + } + } + + md->current_escape_num--; + if (head_written) + jtail_write(s); + + return 0; +} + +AVOutputFormat ff_iemetadata_muxer = { + .name = "iemetadata", + .long_name = NULL_IF_CONFIG_SMALL("Inference engine meta data muxer"), + .extensions = "json", + .priv_data_size = sizeof(IeMetaDataMuxer), + .init = init, + .deinit = deinit, + .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME, + .write_header = write_header, + .write_packet = write_packet, + .write_trailer = write_trailer, + .flags = AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS, +}; -- 2.7.4