* [FFmpeg-devel] [PATCH 1/3] libavfilter/dnn: add layout option to openvino backend
@ 2023-09-20 2:26 wenbin.chen-at-intel.com
2023-09-20 2:26 ` [FFmpeg-devel] [PATCH 2/3] libavfilter/dnn: Add scale and mean preprocess " wenbin.chen-at-intel.com
2023-09-20 2:26 ` [FFmpeg-devel] [PATCH 3/3] libavfilter/dnn: Initialze DNNData variables wenbin.chen-at-intel.com
0 siblings, 2 replies; 5+ messages in thread
From: wenbin.chen-at-intel.com @ 2023-09-20 2:26 UTC (permalink / raw)
To: ffmpeg-devel
From: Wenbin Chen <wenbin.chen@intel.com>
Dnn models have different input layout (NCHW or NHWC), so a
"layout" option is added
Use openvino's API to do layout conversion for input data. Use swscale
to do layout conversion for output data as openvino doesn't have
similiar C API for output.
Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
---
libavfilter/dnn/dnn_backend_openvino.c | 47 +++++++-
libavfilter/dnn/dnn_io_proc.c | 151 ++++++++++++++++++++++---
libavfilter/dnn_interface.h | 7 ++
3 files changed, 185 insertions(+), 20 deletions(-)
diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index 4922833b07..3ba5f5331a 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -45,6 +45,7 @@ typedef struct OVOptions{
uint8_t async;
int batch_size;
int input_resizable;
+ DNNLayout layout;
} OVOptions;
typedef struct OVContext {
@@ -100,6 +101,10 @@ static const AVOption dnn_openvino_options[] = {
DNN_BACKEND_COMMON_OPTIONS
{ "batch_size", "batch size per request", OFFSET(options.batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1000, FLAGS},
{ "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+ { "layout", "input layout of model", OFFSET(options.layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, "layout" },
+ { "none", "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, "layout"},
+ { "nchw", "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, "layout"},
+ { "nhwc", "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, "layout"},
{ NULL }
};
@@ -231,9 +236,9 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request)
avpriv_report_missing_feature(ctx, "Do not support dynamic model.");
return AVERROR(ENOSYS);
}
- input.height = dims[2];
- input.width = dims[3];
- input.channels = dims[1];
+ input.height = dims[1];
+ input.width = dims[2];
+ input.channels = dims[3];
input.dt = precision_to_datatype(precision);
input.data = av_malloc(input.height * input.width * input.channels * get_datatype_size(input.dt));
if (!input.data)
@@ -403,6 +408,7 @@ static void infer_completion_callback(void *args)
av_assert0(request->lltask_count <= dims.dims[0]);
#endif
output.dt = precision_to_datatype(precision);
+ output.layout = ctx->options.layout;
av_assert0(request->lltask_count >= 1);
for (int i = 0; i < request->lltask_count; ++i) {
@@ -521,11 +527,14 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
OVContext *ctx = &ov_model->ctx;
#if HAVE_OPENVINO2
ov_status_e status;
- ov_preprocess_input_tensor_info_t* input_tensor_info;
- ov_preprocess_output_tensor_info_t* output_tensor_info;
+ ov_preprocess_input_tensor_info_t* input_tensor_info = NULL;
+ ov_preprocess_output_tensor_info_t* output_tensor_info = NULL;
+ ov_preprocess_input_model_info_t* input_model_info = NULL;
ov_model_t *tmp_ov_model;
ov_layout_t* NHWC_layout = NULL;
+ ov_layout_t* NCHW_layout = NULL;
const char* NHWC_desc = "NHWC";
+ const char* NCHW_desc = "NCHW";
const char* device = ctx->options.device_type;
#else
IEStatusCode status;
@@ -570,6 +579,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
//set input layout
status = ov_layout_create(NHWC_desc, &NHWC_layout);
+ status |= ov_layout_create(NCHW_desc, &NCHW_layout);
if (status != OK) {
av_log(ctx, AV_LOG_ERROR, "Failed to create layout for input.\n");
ret = ov2_map_error(status, NULL);
@@ -583,6 +593,22 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
goto err;
}
+ status = ov_preprocess_input_info_get_model_info(ov_model->input_info, &input_model_info);
+ if (status != OK) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to get input model info\n");
+ ret = ov2_map_error(status, NULL);
+ goto err;
+ }
+ if (ctx->options.layout == DL_NCHW)
+ status = ov_preprocess_input_model_info_set_layout(input_model_info, NCHW_layout);
+ else if (ctx->options.layout == DL_NHWC)
+ status = ov_preprocess_input_model_info_set_layout(input_model_info, NHWC_layout);
+ if (status != OK) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to get set input model layout\n");
+ ret = ov2_map_error(status, NULL);
+ goto err;
+ }
+
if (ov_model->model->func_type != DFT_PROCESS_FRAME)
//set precision only for detect and classify
status = ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8);
@@ -618,6 +644,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
ret = ov2_map_error(status, NULL);
goto err;
}
+ ov_preprocess_input_model_info_free(input_model_info);
+ ov_layout_free(NCHW_layout);
+ ov_layout_free(NHWC_layout);
#else
if (ctx->options.batch_size > 1) {
input_shapes_t input_shapes;
@@ -762,6 +791,14 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
return 0;
err:
+#if HAVE_OPENVINO2
+ if (NCHW_layout)
+ ov_layout_free(NCHW_layout);
+ if (NHWC_layout)
+ ov_layout_free(NHWC_layout);
+ if (input_model_info)
+ ov_preprocess_input_model_info_free(input_model_info);
+#endif
dnn_free_model_ov(&ov_model->model);
return ret;
}
diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c
index 7961bf6b95..dfa0d5e5da 100644
--- a/libavfilter/dnn/dnn_io_proc.c
+++ b/libavfilter/dnn/dnn_io_proc.c
@@ -27,6 +27,12 @@
int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
{
struct SwsContext *sws_ctx;
+ int ret = 0;
+ int linesize[4] = { 0 };
+ void **dst_data = NULL;
+ void *middle_data = NULL;
+ uint8_t *planar_data[4] = { 0 };
+ int plane_size = frame->width * frame->height * sizeof(uint8_t);
int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
if (bytewidth < 0) {
return AVERROR(EINVAL);
@@ -35,6 +41,17 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
avpriv_report_missing_feature(log_ctx, "data type rather than DNN_FLOAT");
return AVERROR(ENOSYS);
}
+ dst_data = (void **)frame->data;
+ linesize[0] = frame->linesize[0];
+ if (output->layout == DL_NCHW) {
+ middle_data = av_malloc(plane_size * output->channels);
+ if (!middle_data) {
+ ret = AVERROR(ENOMEM);
+ goto err;
+ }
+ dst_data = &middle_data;
+ linesize[0] = frame->width * 3;
+ }
switch (frame->format) {
case AV_PIX_FMT_RGB24:
@@ -51,18 +68,52 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32), frame->width * 3, frame->height,
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height);
- return AVERROR(EINVAL);
+ ret = AVERROR(EINVAL);
+ goto err;
}
sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
(const int[4]){frame->width * 3 * sizeof(float), 0, 0, 0}, 0, frame->height,
- (uint8_t * const*)frame->data, frame->linesize);
+ (uint8_t * const*)dst_data, linesize);
sws_freeContext(sws_ctx);
- return 0;
+ // convert data from planar to packed
+ if (output->layout == DL_NCHW) {
+ sws_ctx = sws_getContext(frame->width,
+ frame->height,
+ AV_PIX_FMT_GBRP,
+ frame->width,
+ frame->height,
+ frame->format,
+ 0, NULL, NULL, NULL);
+ if (!sws_ctx) {
+ av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
+ "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
+ av_get_pix_fmt_name(AV_PIX_FMT_GBRP), frame->width, frame->height,
+ av_get_pix_fmt_name(frame->format),frame->width, frame->height);
+ ret = AVERROR(EINVAL);
+ goto err;
+ }
+ if (frame->format == AV_PIX_FMT_RGB24) {
+ planar_data[0] = (uint8_t *)middle_data + plane_size;
+ planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
+ planar_data[2] = (uint8_t *)middle_data;
+ } else if (frame->format == AV_PIX_FMT_BGR24) {
+ planar_data[0] = (uint8_t *)middle_data + plane_size;
+ planar_data[1] = (uint8_t *)middle_data;
+ planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
+ }
+ sws_scale(sws_ctx, (const uint8_t * const *)planar_data,
+ (const int [4]){frame->width * sizeof(uint8_t),
+ frame->width * sizeof(uint8_t),
+ frame->width * sizeof(uint8_t), 0},
+ 0, frame->height, frame->data, frame->linesize);
+ sws_freeContext(sws_ctx);
+ }
+ break;
case AV_PIX_FMT_GRAYF32:
av_image_copy_plane(frame->data[0], frame->linesize[0],
output->data, bytewidth,
bytewidth, frame->height);
- return 0;
+ break;
case AV_PIX_FMT_YUV420P:
case AV_PIX_FMT_YUV422P:
case AV_PIX_FMT_YUV444P:
@@ -82,24 +133,34 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32), frame->width, frame->height,
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height);
- return AVERROR(EINVAL);
+ ret = AVERROR(EINVAL);
+ goto err;
}
sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
(const int[4]){frame->width * sizeof(float), 0, 0, 0}, 0, frame->height,
(uint8_t * const*)frame->data, frame->linesize);
sws_freeContext(sws_ctx);
- return 0;
+ break;
default:
avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));
- return AVERROR(ENOSYS);
+ ret = AVERROR(ENOSYS);
+ goto err;
}
- return 0;
+err:
+ av_free(middle_data);
+ return ret;
}
int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
{
struct SwsContext *sws_ctx;
+ int ret = 0;
+ int linesize[4] = { 0 };
+ void **src_data = NULL;
+ void *middle_data = NULL;
+ uint8_t *planar_data[4] = { 0 };
+ int plane_size = frame->width * frame->height * sizeof(uint8_t);
int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
if (bytewidth < 0) {
return AVERROR(EINVAL);
@@ -109,9 +170,54 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
return AVERROR(ENOSYS);
}
+ src_data = (void **)frame->data;
+ linesize[0] = frame->linesize[0];
+ if (input->layout == DL_NCHW) {
+ middle_data = av_malloc(plane_size * input->channels);
+ if (!middle_data) {
+ ret = AVERROR(ENOMEM);
+ goto err;
+ }
+ src_data = &middle_data;
+ linesize[0] = frame->width * 3;
+ }
+
switch (frame->format) {
case AV_PIX_FMT_RGB24:
case AV_PIX_FMT_BGR24:
+ // convert data from planar to packed
+ if (input->layout == DL_NCHW) {
+ sws_ctx = sws_getContext(frame->width,
+ frame->height,
+ frame->format,
+ frame->width,
+ frame->height,
+ AV_PIX_FMT_GBRP,
+ 0, NULL, NULL, NULL);
+ if (!sws_ctx) {
+ av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
+ "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
+ av_get_pix_fmt_name(frame->format), frame->width, frame->height,
+ av_get_pix_fmt_name(AV_PIX_FMT_GBRP),frame->width, frame->height);
+ ret = AVERROR(EINVAL);
+ goto err;
+ }
+ if (frame->format == AV_PIX_FMT_RGB24) {
+ planar_data[0] = (uint8_t *)middle_data + plane_size;
+ planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
+ planar_data[2] = (uint8_t *)middle_data;
+ } else if (frame->format == AV_PIX_FMT_BGR24) {
+ planar_data[0] = (uint8_t *)middle_data + plane_size;
+ planar_data[1] = (uint8_t *)middle_data;
+ planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
+ }
+ sws_scale(sws_ctx, (const uint8_t * const *)frame->data,
+ frame->linesize, 0, frame->height, planar_data,
+ (const int [4]){frame->width * sizeof(uint8_t),
+ frame->width * sizeof(uint8_t),
+ frame->width * sizeof(uint8_t), 0});
+ sws_freeContext(sws_ctx);
+ }
sws_ctx = sws_getContext(frame->width * 3,
frame->height,
AV_PIX_FMT_GRAY8,
@@ -124,10 +230,11 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height,
av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32),frame->width * 3, frame->height);
- return AVERROR(EINVAL);
+ ret = AVERROR(EINVAL);
+ goto err;
}
- sws_scale(sws_ctx, (const uint8_t **)frame->data,
- frame->linesize, 0, frame->height,
+ sws_scale(sws_ctx, (const uint8_t **)src_data,
+ linesize, 0, frame->height,
(uint8_t * const [4]){input->data, 0, 0, 0},
(const int [4]){frame->width * 3 * sizeof(float), 0, 0, 0});
sws_freeContext(sws_ctx);
@@ -156,7 +263,8 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height,
av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32),frame->width, frame->height);
- return AVERROR(EINVAL);
+ ret = AVERROR(EINVAL);
+ goto err;
}
sws_scale(sws_ctx, (const uint8_t **)frame->data,
frame->linesize, 0, frame->height,
@@ -166,10 +274,12 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
break;
default:
avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));
- return AVERROR(ENOSYS);
+ ret = AVERROR(ENOSYS);
+ goto err;
}
-
- return 0;
+err:
+ av_free(middle_data);
+ return ret;
}
static enum AVPixelFormat get_pixel_format(DNNData *data)
@@ -205,6 +315,11 @@ int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index
AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
av_assert0(sd);
+ if (input->layout == DL_NCHW) {
+ av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support layout: NCHW\n");
+ return AVERROR(ENOSYS);
+ }
+
header = (const AVDetectionBBoxHeader *)sd->data;
bbox = av_get_detection_bbox(header, bbox_index);
@@ -257,6 +372,12 @@ int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)
int linesizes[4];
int ret = 0;
enum AVPixelFormat fmt = get_pixel_format(input);
+
+ if (input->layout == DL_NCHW) {
+ av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support layout: NCHW\n");
+ return AVERROR(ENOSYS);
+ }
+
sws_ctx = sws_getContext(frame->width, frame->height, frame->format,
input->width, input->height, fmt,
SWS_FAST_BILINEAR, NULL, NULL, NULL);
diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index 20c6a0a896..956a63443a 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -56,12 +56,19 @@ typedef enum {
DFT_ANALYTICS_CLASSIFY, // classify for each bounding box
}DNNFunctionType;
+typedef enum {
+ DL_NONE,
+ DL_NCHW,
+ DL_NHWC,
+} DNNLayout;
+
typedef struct DNNData{
void *data;
int width, height, channels;
// dt and order together decide the color format
DNNDataType dt;
DNNColorOrder order;
+ DNNLayout layout;
} DNNData;
typedef struct DNNExecBaseParams {
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* [FFmpeg-devel] [PATCH 2/3] libavfilter/dnn: Add scale and mean preprocess to openvino backend
2023-09-20 2:26 [FFmpeg-devel] [PATCH 1/3] libavfilter/dnn: add layout option to openvino backend wenbin.chen-at-intel.com
@ 2023-09-20 2:26 ` wenbin.chen-at-intel.com
2023-09-20 2:26 ` [FFmpeg-devel] [PATCH 3/3] libavfilter/dnn: Initialze DNNData variables wenbin.chen-at-intel.com
1 sibling, 0 replies; 5+ messages in thread
From: wenbin.chen-at-intel.com @ 2023-09-20 2:26 UTC (permalink / raw)
To: ffmpeg-devel
From: Wenbin Chen <wenbin.chen@intel.com>
Dnn models has different data preprocess requirements. Scale and mean
parameters are added to preprocess input data.
Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
---
libavfilter/dnn/dnn_backend_openvino.c | 43 ++++++++++++--
libavfilter/dnn/dnn_io_proc.c | 82 +++++++++++++++++++++-----
libavfilter/dnn_interface.h | 2 +
3 files changed, 108 insertions(+), 19 deletions(-)
diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index 3ba5f5331a..4224600f94 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -46,6 +46,8 @@ typedef struct OVOptions{
int batch_size;
int input_resizable;
DNNLayout layout;
+ float scale;
+ float mean;
} OVOptions;
typedef struct OVContext {
@@ -105,6 +107,8 @@ static const AVOption dnn_openvino_options[] = {
{ "none", "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, "layout"},
{ "nchw", "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, "layout"},
{ "nhwc", "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, "layout"},
+ { "scale", "Add scale preprocess operation. Divide each element of input by specified value.", OFFSET(options.scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS},
+ { "mean", "Add mean preprocess operation. Subtract specified value from each element of input.", OFFSET(options.mean), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS},
{ NULL }
};
@@ -209,6 +213,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request)
ie_blob_t *input_blob = NULL;
#endif
+ memset(&input, 0, sizeof(input));
lltask = ff_queue_peek_front(ov_model->lltask_queue);
av_assert0(lltask);
task = lltask->task;
@@ -274,6 +279,9 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request)
// all models in openvino open model zoo use BGR as input,
// change to be an option when necessary.
input.order = DCO_BGR;
+ // We use preprocess_steps to scale input data, so disable scale and mean here.
+ input.scale = 1;
+ input.mean = 0;
for (int i = 0; i < ctx->options.batch_size; ++i) {
lltask = ff_queue_pop_front(ov_model->lltask_queue);
@@ -343,6 +351,7 @@ static void infer_completion_callback(void *args)
ov_shape_t output_shape = {0};
ov_element_type_e precision;
+ memset(&output, 0, sizeof(output));
status = ov_infer_request_get_output_tensor_by_index(request->infer_request, 0, &output_tensor);
if (status != OK) {
av_log(ctx, AV_LOG_ERROR,
@@ -409,6 +418,8 @@ static void infer_completion_callback(void *args)
#endif
output.dt = precision_to_datatype(precision);
output.layout = ctx->options.layout;
+ output.scale = ctx->options.scale;
+ output.mean = ctx->options.mean;
av_assert0(request->lltask_count >= 1);
for (int i = 0; i < request->lltask_count; ++i) {
@@ -542,7 +553,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
ie_config_t config = {NULL, NULL, NULL};
char *all_dev_names = NULL;
#endif
-
+ // We scale pixel by default when do frame processing.
+ if (fabsf(ctx->options.scale) < 1e-6f)
+ ctx->options.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 255 : 1;
// batch size
if (ctx->options.batch_size <= 0) {
ctx->options.batch_size = 1;
@@ -609,15 +622,37 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
goto err;
}
+ status = ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8);
if (ov_model->model->func_type != DFT_PROCESS_FRAME)
- //set precision only for detect and classify
- status = ov_preprocess_input_tensor_info_set_element_type(input_tensor_info, U8);
- status |= ov_preprocess_output_set_element_type(output_tensor_info, F32);
+ status |= ov_preprocess_output_set_element_type(output_tensor_info, F32);
+ else if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f)
+ status |= ov_preprocess_output_set_element_type(output_tensor_info, F32);
+ else
+ status |= ov_preprocess_output_set_element_type(output_tensor_info, U8);
if (status != OK) {
av_log(ctx, AV_LOG_ERROR, "Failed to set input/output element type\n");
ret = ov2_map_error(status, NULL);
goto err;
}
+ // set preprocess steps.
+ if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f) {
+ ov_preprocess_preprocess_steps_t* input_process_steps = NULL;
+ status = ov_preprocess_input_info_get_preprocess_steps(ov_model->input_info, &input_process_steps);
+ if (status != OK) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to get preprocess steps\n");
+ ret = ov2_map_error(status, NULL);
+ goto err;
+ }
+ status = ov_preprocess_preprocess_steps_convert_element_type(input_process_steps, F32);
+ status |= ov_preprocess_preprocess_steps_mean(input_process_steps, ctx->options.mean);
+ status |= ov_preprocess_preprocess_steps_scale(input_process_steps, ctx->options.scale);
+ if (status != OK) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to set preprocess steps\n");
+ ret = ov2_map_error(status, NULL);
+ goto err;
+ }
+ ov_preprocess_preprocess_steps_free(input_process_steps);
+ }
//update model
if(ov_model->ov_model)
diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c
index dfa0d5e5da..ab656e8ed7 100644
--- a/libavfilter/dnn/dnn_io_proc.c
+++ b/libavfilter/dnn/dnn_io_proc.c
@@ -24,6 +24,20 @@
#include "libavutil/avassert.h"
#include "libavutil/detection_bbox.h"
+static int get_datatype_size(DNNDataType dt)
+{
+ switch (dt)
+ {
+ case DNN_FLOAT:
+ return sizeof(float);
+ case DNN_UINT8:
+ return sizeof(uint8_t);
+ default:
+ av_assert0(!"not supported yet.");
+ return 1;
+ }
+}
+
int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
{
struct SwsContext *sws_ctx;
@@ -33,14 +47,26 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
void *middle_data = NULL;
uint8_t *planar_data[4] = { 0 };
int plane_size = frame->width * frame->height * sizeof(uint8_t);
+ enum AVPixelFormat src_fmt = AV_PIX_FMT_NONE;
+ int src_datatype_size = get_datatype_size(output->dt);
+
int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
if (bytewidth < 0) {
return AVERROR(EINVAL);
}
- if (output->dt != DNN_FLOAT) {
- avpriv_report_missing_feature(log_ctx, "data type rather than DNN_FLOAT");
+ /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
+ if (fabsf(output->scale - 1) < 1e-6f && fabsf(output->mean) < 1e-6 && output->dt == DNN_UINT8)
+ src_fmt = AV_PIX_FMT_GRAY8;
+ /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
+ else if ((fabsf(output->scale - 255) < 1e-6f || fabsf(output->scale) < 1e-6f) &&
+ fabsf(output->mean) < 1e-6 && output->dt == DNN_FLOAT)
+ src_fmt = AV_PIX_FMT_GRAYF32;
+ else {
+ av_log(log_ctx, AV_LOG_ERROR, "dnn_process output data doesn't type: UINT8 "
+ "scale: %f, mean: %f\n", output->scale, output->mean);
return AVERROR(ENOSYS);
}
+
dst_data = (void **)frame->data;
linesize[0] = frame->linesize[0];
if (output->layout == DL_NCHW) {
@@ -58,7 +84,7 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
case AV_PIX_FMT_BGR24:
sws_ctx = sws_getContext(frame->width * 3,
frame->height,
- AV_PIX_FMT_GRAYF32,
+ src_fmt,
frame->width * 3,
frame->height,
AV_PIX_FMT_GRAY8,
@@ -66,13 +92,13 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
if (!sws_ctx) {
av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
- av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32), frame->width * 3, frame->height,
+ av_get_pix_fmt_name(src_fmt), frame->width * 3, frame->height,
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height);
ret = AVERROR(EINVAL);
goto err;
}
sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
- (const int[4]){frame->width * 3 * sizeof(float), 0, 0, 0}, 0, frame->height,
+ (const int[4]){frame->width * 3 * src_datatype_size, 0, 0, 0}, 0, frame->height,
(uint8_t * const*)dst_data, linesize);
sws_freeContext(sws_ctx);
// convert data from planar to packed
@@ -131,13 +157,13 @@ int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
if (!sws_ctx) {
av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
- av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32), frame->width, frame->height,
+ av_get_pix_fmt_name(src_fmt), frame->width, frame->height,
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height);
ret = AVERROR(EINVAL);
goto err;
}
sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},
- (const int[4]){frame->width * sizeof(float), 0, 0, 0}, 0, frame->height,
+ (const int[4]){frame->width * src_datatype_size, 0, 0, 0}, 0, frame->height,
(uint8_t * const*)frame->data, frame->linesize);
sws_freeContext(sws_ctx);
break;
@@ -161,12 +187,22 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
void *middle_data = NULL;
uint8_t *planar_data[4] = { 0 };
int plane_size = frame->width * frame->height * sizeof(uint8_t);
+ enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE;
+ int dst_datatype_size = get_datatype_size(input->dt);
int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
if (bytewidth < 0) {
return AVERROR(EINVAL);
}
- if (input->dt != DNN_FLOAT) {
- avpriv_report_missing_feature(log_ctx, "data type rather than DNN_FLOAT");
+ /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
+ if (fabsf(input->scale - 1) < 1e-6f && fabsf(input->mean) < 1e-6 && input->dt == DNN_UINT8)
+ dst_fmt = AV_PIX_FMT_GRAY8;
+ /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
+ else if ((fabsf(input->scale - 255) < 1e-6f || fabsf(input->scale) < 1e-6f) &&
+ fabsf(input->mean) < 1e-6 && input->dt == DNN_FLOAT)
+ dst_fmt = AV_PIX_FMT_GRAYF32;
+ else {
+ av_log(log_ctx, AV_LOG_ERROR, "dnn_process input data doesn't support type: UINT8 "
+ "scale: %f, mean: %f\n", input->scale, input->mean);
return AVERROR(ENOSYS);
}
@@ -223,20 +259,20 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
AV_PIX_FMT_GRAY8,
frame->width * 3,
frame->height,
- AV_PIX_FMT_GRAYF32,
+ dst_fmt,
0, NULL, NULL, NULL);
if (!sws_ctx) {
av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height,
- av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32),frame->width * 3, frame->height);
+ av_get_pix_fmt_name(dst_fmt),frame->width * 3, frame->height);
ret = AVERROR(EINVAL);
goto err;
}
sws_scale(sws_ctx, (const uint8_t **)src_data,
linesize, 0, frame->height,
(uint8_t * const [4]){input->data, 0, 0, 0},
- (const int [4]){frame->width * 3 * sizeof(float), 0, 0, 0});
+ (const int [4]){frame->width * 3 * dst_datatype_size, 0, 0, 0});
sws_freeContext(sws_ctx);
break;
case AV_PIX_FMT_GRAYF32:
@@ -256,20 +292,20 @@ int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
AV_PIX_FMT_GRAY8,
frame->width,
frame->height,
- AV_PIX_FMT_GRAYF32,
+ dst_fmt,
0, NULL, NULL, NULL);
if (!sws_ctx) {
av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
"fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height,
- av_get_pix_fmt_name(AV_PIX_FMT_GRAYF32),frame->width, frame->height);
+ av_get_pix_fmt_name(dst_fmt),frame->width, frame->height);
ret = AVERROR(EINVAL);
goto err;
}
sws_scale(sws_ctx, (const uint8_t **)frame->data,
frame->linesize, 0, frame->height,
(uint8_t * const [4]){input->data, 0, 0, 0},
- (const int [4]){frame->width * sizeof(float), 0, 0, 0});
+ (const int [4]){frame->width * dst_datatype_size, 0, 0, 0});
sws_freeContext(sws_ctx);
break;
default:
@@ -315,6 +351,14 @@ int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index
AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);
av_assert0(sd);
+ /* (scale != 1 and scale != 0) or mean != 0 */
+ if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||
+ fabsf(input->mean) > 1e-6f) {
+ av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support "
+ "scale: %f, mean: %f\n", input->scale, input->mean);
+ return AVERROR(ENOSYS);
+ }
+
if (input->layout == DL_NCHW) {
av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support layout: NCHW\n");
return AVERROR(ENOSYS);
@@ -373,6 +417,14 @@ int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)
int ret = 0;
enum AVPixelFormat fmt = get_pixel_format(input);
+ /* (scale != 1 and scale != 0) or mean != 0 */
+ if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||
+ fabsf(input->mean) > 1e-6f) {
+ av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support "
+ "scale: %f, mean: %f\n", input->scale, input->mean);
+ return AVERROR(ENOSYS);
+ }
+
if (input->layout == DL_NCHW) {
av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support layout: NCHW\n");
return AVERROR(ENOSYS);
diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index 956a63443a..183d8418b2 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -69,6 +69,8 @@ typedef struct DNNData{
DNNDataType dt;
DNNColorOrder order;
DNNLayout layout;
+ float scale;
+ float mean;
} DNNData;
typedef struct DNNExecBaseParams {
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* [FFmpeg-devel] [PATCH 3/3] libavfilter/dnn: Initialze DNNData variables
2023-09-20 2:26 [FFmpeg-devel] [PATCH 1/3] libavfilter/dnn: add layout option to openvino backend wenbin.chen-at-intel.com
2023-09-20 2:26 ` [FFmpeg-devel] [PATCH 2/3] libavfilter/dnn: Add scale and mean preprocess " wenbin.chen-at-intel.com
@ 2023-09-20 2:26 ` wenbin.chen-at-intel.com
2023-09-20 9:26 ` "zhilizhao(赵志立)"
1 sibling, 1 reply; 5+ messages in thread
From: wenbin.chen-at-intel.com @ 2023-09-20 2:26 UTC (permalink / raw)
To: ffmpeg-devel
From: Wenbin Chen <wenbin.chen@intel.com>
Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
---
libavfilter/dnn/dnn_backend_tf.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index b521de7fbe..e1e8cef0d2 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -629,6 +629,7 @@ static int fill_model_input_tf(TFModel *tf_model, TFRequestItem *request) {
TFContext *ctx = &tf_model->ctx;
int ret = 0;
+ memset(&input, 0, sizeof(input));
lltask = ff_queue_pop_front(tf_model->lltask_queue);
av_assert0(lltask);
task = lltask->task;
@@ -724,7 +725,7 @@ static void infer_completion_callback(void *args) {
TFModel *tf_model = task->model;
TFContext *ctx = &tf_model->ctx;
- outputs = av_malloc_array(task->nb_output, sizeof(*outputs));
+ outputs = av_calloc(task->nb_output, sizeof(*outputs));
if (!outputs) {
av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *outputs\n");
goto err;
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] libavfilter/dnn: Initialze DNNData variables
2023-09-20 2:26 ` [FFmpeg-devel] [PATCH 3/3] libavfilter/dnn: Initialze DNNData variables wenbin.chen-at-intel.com
@ 2023-09-20 9:26 ` "zhilizhao(赵志立)"
2023-09-21 1:28 ` Chen, Wenbin
0 siblings, 1 reply; 5+ messages in thread
From: "zhilizhao(赵志立)" @ 2023-09-20 9:26 UTC (permalink / raw)
To: FFmpeg development discussions and patches
> On Sep 20, 2023, at 10:26, wenbin.chen-at-intel.com@ffmpeg.org wrote:
>
> From: Wenbin Chen <wenbin.chen@intel.com>
>
> Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> ---
> libavfilter/dnn/dnn_backend_tf.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
> index b521de7fbe..e1e8cef0d2 100644
> --- a/libavfilter/dnn/dnn_backend_tf.c
> +++ b/libavfilter/dnn/dnn_backend_tf.c
> @@ -629,6 +629,7 @@ static int fill_model_input_tf(TFModel *tf_model, TFRequestItem *request) {
> TFContext *ctx = &tf_model->ctx;
> int ret = 0;
>
> + memset(&input, 0, sizeof(input));
Can be simplified with DNNData input = { 0 };
> lltask = ff_queue_pop_front(tf_model->lltask_queue);
> av_assert0(lltask);
> task = lltask->task;
> @@ -724,7 +725,7 @@ static void infer_completion_callback(void *args) {
> TFModel *tf_model = task->model;
> TFContext *ctx = &tf_model->ctx;
>
> - outputs = av_malloc_array(task->nb_output, sizeof(*outputs));
> + outputs = av_calloc(task->nb_output, sizeof(*outputs));
> if (!outputs) {
> av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for *outputs\n");
> goto err;
> --
> 2.34.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] libavfilter/dnn: Initialze DNNData variables
2023-09-20 9:26 ` "zhilizhao(赵志立)"
@ 2023-09-21 1:28 ` Chen, Wenbin
0 siblings, 0 replies; 5+ messages in thread
From: Chen, Wenbin @ 2023-09-21 1:28 UTC (permalink / raw)
To: FFmpeg development discussions and patches
> > On Sep 20, 2023, at 10:26, wenbin.chen-at-intel.com@ffmpeg.org wrote:
> >
> > From: Wenbin Chen <wenbin.chen@intel.com>
> >
> > Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> > ---
> > libavfilter/dnn/dnn_backend_tf.c | 3 ++-
> > 1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/libavfilter/dnn/dnn_backend_tf.c
> b/libavfilter/dnn/dnn_backend_tf.c
> > index b521de7fbe..e1e8cef0d2 100644
> > --- a/libavfilter/dnn/dnn_backend_tf.c
> > +++ b/libavfilter/dnn/dnn_backend_tf.c
> > @@ -629,6 +629,7 @@ static int fill_model_input_tf(TFModel *tf_model,
> TFRequestItem *request) {
> > TFContext *ctx = &tf_model->ctx;
> > int ret = 0;
> >
> > + memset(&input, 0, sizeof(input));
>
> Can be simplified with DNNData input = { 0 };
Thanks for your advice. I update it in patch v2.
>
> > lltask = ff_queue_pop_front(tf_model->lltask_queue);
> > av_assert0(lltask);
> > task = lltask->task;
> > @@ -724,7 +725,7 @@ static void infer_completion_callback(void *args) {
> > TFModel *tf_model = task->model;
> > TFContext *ctx = &tf_model->ctx;
> >
> > - outputs = av_malloc_array(task->nb_output, sizeof(*outputs));
> > + outputs = av_calloc(task->nb_output, sizeof(*outputs));
> > if (!outputs) {
> > av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for
> *outputs\n");
> > goto err;
> > --
> > 2.34.1
> >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-09-21 1:28 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-20 2:26 [FFmpeg-devel] [PATCH 1/3] libavfilter/dnn: add layout option to openvino backend wenbin.chen-at-intel.com
2023-09-20 2:26 ` [FFmpeg-devel] [PATCH 2/3] libavfilter/dnn: Add scale and mean preprocess " wenbin.chen-at-intel.com
2023-09-20 2:26 ` [FFmpeg-devel] [PATCH 3/3] libavfilter/dnn: Initialze DNNData variables wenbin.chen-at-intel.com
2023-09-20 9:26 ` "zhilizhao(赵志立)"
2023-09-21 1:28 ` Chen, Wenbin
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git