Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
@ 2022-02-18 15:24 Victoria Zhislina
  2022-02-18 15:48 ` James Almer
  0 siblings, 1 reply; 17+ messages in thread
From: Victoria Zhislina @ 2022-02-18 15:24 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Victoria Zhislina

By ffmpeg threading support implementation via frame slicing and doing
zimg_filter_graph_build that used to take 30-60% of each frame processig
only if necessary (some parameters changed)
the performance increase vs original version
in video downscale and color conversion  >4x is seen
on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)

Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
---
 libavfilter/vf_zscale.c | 787 ++++++++++++++++++++++++----------------
 1 file changed, 475 insertions(+), 312 deletions(-)

diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
index 1288c5efc1..ea2565025f 100644
--- a/libavfilter/vf_zscale.c
+++ b/libavfilter/vf_zscale.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2015 Paul B Mahol
- *
+ * 2022 Victoria Zhislina, Intel
+ 
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -44,6 +45,8 @@
 #include "libavutil/imgutils.h"
 
 #define ZIMG_ALIGNMENT 32
+#define MIN_TILESIZE 64
+#define MAX_THREADS 64
 
 static const char *const var_names[] = {
     "in_w",   "iw",
@@ -113,13 +116,17 @@ typedef struct ZScaleContext {
 
     int force_original_aspect_ratio;
 
-    void *tmp;
-    size_t tmp_size;
+    void *tmp[MAX_THREADS]; //separate for each thread;
+    int nb_threads;
+    int slice_h;
 
     zimg_image_format src_format, dst_format;
     zimg_image_format alpha_src_format, alpha_dst_format;
+    zimg_image_format src_format_tmp, dst_format_tmp;
+    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
     zimg_graph_builder_params alpha_params, params;
-    zimg_filter_graph *alpha_graph, *graph;
+    zimg_graph_builder_params alpha_params_tmp, params_tmp;
+    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
 
     enum AVColorSpace in_colorspace, out_colorspace;
     enum AVColorTransferCharacteristic in_trc, out_trc;
@@ -128,10 +135,181 @@ typedef struct ZScaleContext {
     enum AVChromaLocation in_chromal, out_chromal;
 } ZScaleContext;
 
+
+typedef struct ThreadData {
+    const AVPixFmtDescriptor *desc, *odesc;
+    AVFrame *in, *out;
+} ThreadData;
+
+static int convert_chroma_location(enum AVChromaLocation chroma_location)
+{
+    switch (chroma_location) {
+    case AVCHROMA_LOC_UNSPECIFIED:
+    case AVCHROMA_LOC_LEFT:
+        return ZIMG_CHROMA_LEFT;
+    case AVCHROMA_LOC_CENTER:
+        return ZIMG_CHROMA_CENTER;
+    case AVCHROMA_LOC_TOPLEFT:
+        return ZIMG_CHROMA_TOP_LEFT;
+    case AVCHROMA_LOC_TOP:
+        return ZIMG_CHROMA_TOP;
+    case AVCHROMA_LOC_BOTTOMLEFT:
+        return ZIMG_CHROMA_BOTTOM_LEFT;
+    case AVCHROMA_LOC_BOTTOM:
+        return ZIMG_CHROMA_BOTTOM;
+    }
+    return ZIMG_CHROMA_LEFT;
+}
+
+static int convert_matrix(enum AVColorSpace colorspace)
+{
+    switch (colorspace) {
+    case AVCOL_SPC_RGB:
+        return ZIMG_MATRIX_RGB;
+    case AVCOL_SPC_BT709:
+        return ZIMG_MATRIX_709;
+    case AVCOL_SPC_UNSPECIFIED:
+        return ZIMG_MATRIX_UNSPECIFIED;
+    case AVCOL_SPC_FCC:
+        return ZIMG_MATRIX_FCC;
+    case AVCOL_SPC_BT470BG:
+        return ZIMG_MATRIX_470BG;
+    case AVCOL_SPC_SMPTE170M:
+        return ZIMG_MATRIX_170M;
+    case AVCOL_SPC_SMPTE240M:
+        return ZIMG_MATRIX_240M;
+    case AVCOL_SPC_YCGCO:
+        return ZIMG_MATRIX_YCGCO;
+    case AVCOL_SPC_BT2020_NCL:
+        return ZIMG_MATRIX_2020_NCL;
+    case AVCOL_SPC_BT2020_CL:
+        return ZIMG_MATRIX_2020_CL;
+    case AVCOL_SPC_CHROMA_DERIVED_NCL:
+        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
+    case AVCOL_SPC_CHROMA_DERIVED_CL:
+        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
+    case AVCOL_SPC_ICTCP:
+        return ZIMG_MATRIX_ICTCP;
+    }
+    return ZIMG_MATRIX_UNSPECIFIED;
+}
+
+static int convert_trc(enum AVColorTransferCharacteristic color_trc)
+{
+    switch (color_trc) {
+    case AVCOL_TRC_UNSPECIFIED:
+        return ZIMG_TRANSFER_UNSPECIFIED;
+    case AVCOL_TRC_BT709:
+        return ZIMG_TRANSFER_709;
+    case AVCOL_TRC_GAMMA22:
+        return ZIMG_TRANSFER_470_M;
+    case AVCOL_TRC_GAMMA28:
+        return ZIMG_TRANSFER_470_BG;
+    case AVCOL_TRC_SMPTE170M:
+        return ZIMG_TRANSFER_601;
+    case AVCOL_TRC_SMPTE240M:
+        return ZIMG_TRANSFER_240M;
+    case AVCOL_TRC_LINEAR:
+        return ZIMG_TRANSFER_LINEAR;
+    case AVCOL_TRC_LOG:
+        return ZIMG_TRANSFER_LOG_100;
+    case AVCOL_TRC_LOG_SQRT:
+        return ZIMG_TRANSFER_LOG_316;
+    case AVCOL_TRC_IEC61966_2_4:
+        return ZIMG_TRANSFER_IEC_61966_2_4;
+    case AVCOL_TRC_BT2020_10:
+        return ZIMG_TRANSFER_2020_10;
+    case AVCOL_TRC_BT2020_12:
+        return ZIMG_TRANSFER_2020_12;
+    case AVCOL_TRC_SMPTE2084:
+        return ZIMG_TRANSFER_ST2084;
+    case AVCOL_TRC_ARIB_STD_B67:
+        return ZIMG_TRANSFER_ARIB_B67;
+    case AVCOL_TRC_IEC61966_2_1:
+        return ZIMG_TRANSFER_IEC_61966_2_1;
+    }
+    return ZIMG_TRANSFER_UNSPECIFIED;
+}
+
+static int convert_primaries(enum AVColorPrimaries color_primaries)
+{
+    switch (color_primaries) {
+    case AVCOL_PRI_UNSPECIFIED:
+        return ZIMG_PRIMARIES_UNSPECIFIED;
+    case AVCOL_PRI_BT709:
+        return ZIMG_PRIMARIES_709;
+    case AVCOL_PRI_BT470M:
+        return ZIMG_PRIMARIES_470_M;
+    case AVCOL_PRI_BT470BG:
+        return ZIMG_PRIMARIES_470_BG;
+    case AVCOL_PRI_SMPTE170M:
+        return ZIMG_PRIMARIES_170M;
+    case AVCOL_PRI_SMPTE240M:
+        return ZIMG_PRIMARIES_240M;
+    case AVCOL_PRI_FILM:
+        return ZIMG_PRIMARIES_FILM;
+    case AVCOL_PRI_BT2020:
+        return ZIMG_PRIMARIES_2020;
+    case AVCOL_PRI_SMPTE428:
+        return ZIMG_PRIMARIES_ST428;
+    case AVCOL_PRI_SMPTE431:
+        return ZIMG_PRIMARIES_ST431_2;
+    case AVCOL_PRI_SMPTE432:
+        return ZIMG_PRIMARIES_ST432_1;
+    case AVCOL_PRI_JEDEC_P22:
+        return ZIMG_PRIMARIES_EBU3213_E;
+    }
+    return ZIMG_PRIMARIES_UNSPECIFIED;
+}
+
+static int convert_range(enum AVColorRange color_range)
+{
+    switch (color_range) {
+    case AVCOL_RANGE_UNSPECIFIED:
+    case AVCOL_RANGE_MPEG:
+        return ZIMG_RANGE_LIMITED;
+    case AVCOL_RANGE_JPEG:
+        return ZIMG_RANGE_FULL;
+    }
+    return ZIMG_RANGE_LIMITED;
+}
+
+static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
+{
+    switch (color_range) {
+    case ZIMG_RANGE_LIMITED:
+        return AVCOL_RANGE_MPEG;
+    case ZIMG_RANGE_FULL:
+        return AVCOL_RANGE_JPEG;
+    }
+    return AVCOL_RANGE_UNSPECIFIED;
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
     int ret;
+    int i;
+
+    for (i = 0; i < MAX_THREADS; i++) {
+        s->tmp[i] = NULL;
+        s->graph[i] = NULL;
+        s->alpha_graph[i] = NULL;
+    }
+    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
 
     if (s->size_str && (s->w_expr || s->h_expr)) {
         av_log(ctx, AV_LOG_ERROR,
@@ -158,7 +336,6 @@ static av_cold int init(AVFilterContext *ctx)
         av_opt_set(s, "w", "iw", 0);
     if (!s->h_expr)
         av_opt_set(s, "h", "ih", 0);
-
     return 0;
 }
 
@@ -194,6 +371,153 @@ static int query_formats(AVFilterContext *ctx)
     return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats);
 }
 
+/* returns 0 if image formats are the same and 1 otherwise */
+static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
+{
+    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
+#if ZIMG_API_VERSION >= 0x204
+        (img_fmt0->alpha != img_fmt1->alpha) ||
+#endif
+        (img_fmt0->color_family != img_fmt1->color_family) ||
+        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
+        (img_fmt0->depth != img_fmt1->depth) ||
+        (img_fmt0->field_parity != img_fmt1->field_parity) ||
+        (img_fmt0->height != img_fmt1->height) ||
+        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
+        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
+        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
+        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
+        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
+        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
+        (img_fmt0->width != img_fmt1->width));
+}
+
+/* returns 0 if graph builder parameters are the same and 1 otherwise */
+static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
+{
+    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
+    and NaN values that are default for some params are treated properly*/
+    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
+        (parm0->dither_type != parm1->dither_type) ||
+        (parm0->resample_filter != parm1->resample_filter) ||
+        (parm0->resample_filter_uv != parm1->resample_filter_uv);
+
+    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
+        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
+    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
+        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
+    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
+        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
+    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
+        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
+    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
+        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
+
+    return ret;
+}
+
+static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
+    int colorspace, int primaries, int transfer, int range, int location)
+{
+    format->width = frame->width;
+    format->height = frame->height;
+    format->subsample_w = desc->log2_chroma_w;
+    format->subsample_h = desc->log2_chroma_h;
+    format->depth = desc->comp[0].depth;
+    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
+    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
+    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
+    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
+    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
+    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
+}
+
+static int print_zimg_error(AVFilterContext *ctx)
+{
+    char err_msg[1024];
+    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
+
+    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
+
+    return AVERROR_EXTERNAL;
+}
+
+static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *desc, const AVPixFmtDescriptor *out_desc,
+    ZScaleContext *s, int job_nr)
+{
+    int ret;
+    size_t size;
+    zimg_image_format src_format;
+    zimg_image_format dst_format;
+    zimg_image_format alpha_src_format;
+    zimg_image_format alpha_dst_format;
+
+    src_format = s->src_format;
+    dst_format = s->dst_format;
+    /* The input slice is specified through the active_region field, 
+    unlike the output slice.
+    according to zimg requirements input and output slices should have even dimentions */
+    src_format.active_region.width = in->width;
+    src_format.active_region.height = s->slice_h;
+    src_format.active_region.left = 0;
+    src_format.active_region.top = job_nr * src_format.active_region.height;
+    //dst now is the single tile only!!
+    dst_format.width = out->width;
+    dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+    //the last slice could differ from the previous ones due to the slices division "tail"
+    if (job_nr == (s->nb_threads - 1)) {
+        src_format.active_region.height = src_format.height - src_format.active_region.top;
+        dst_format.height = out->height - job_nr * dst_format.height;
+    }
+
+    if (s->graph[job_nr]) {
+        zimg_filter_graph_free(s->graph[job_nr]);
+    }
+    s->graph[job_nr] = zimg_filter_graph_build(&src_format, &dst_format, &s->params);
+    if (!s->graph[job_nr])
+        return print_zimg_error(NULL);
+
+    ret = zimg_filter_graph_get_tmp_size(s->graph[job_nr], &size);
+    if (ret)
+        return print_zimg_error(NULL);
+
+    if (s->tmp[job_nr])
+        av_freep(&s->tmp[job_nr]);
+    s->tmp[job_nr] = av_malloc(size);
+    if (!s->tmp[job_nr])
+        return AVERROR(ENOMEM);
+
+    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && out_desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        alpha_src_format = s->alpha_src_format;
+        alpha_dst_format = s->alpha_dst_format;
+        /* The input slice is specified through the active_region field, unlike the output slice.
+        according to zimg requirements input and output slices should have even dimentions */
+        alpha_src_format.active_region.width = in->width;
+        alpha_src_format.active_region.height = s->slice_h;
+        alpha_src_format.active_region.left = 0;
+        alpha_src_format.active_region.top = job_nr * alpha_src_format.active_region.height;
+        //dst now is the single tile only!!
+        alpha_dst_format.width = out->width;
+        alpha_dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+        //the last slice could differ from the previous ones due to the slices division "tail"
+        if (job_nr == (s->nb_threads - 1)) {
+            alpha_src_format.active_region.height = alpha_src_format.height - alpha_src_format.active_region.top;
+            alpha_dst_format.height = out->height - job_nr * alpha_dst_format.height;
+        }
+
+        if (s->alpha_graph[job_nr]) {
+            zimg_filter_graph_free(s->alpha_graph[job_nr]);
+        }
+        s->alpha_graph[job_nr] = zimg_filter_graph_build(&alpha_src_format, &alpha_dst_format, &s->alpha_params);
+        if (!s->alpha_graph[job_nr])
+            return print_zimg_error(NULL);
+     }
+    return 0;
+}
+
 static int config_props(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
@@ -317,212 +641,15 @@ fail:
     return ret;
 }
 
-static int print_zimg_error(AVFilterContext *ctx)
-{
-    char err_msg[1024];
-    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
-
-    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
-
-    return AVERROR_EXTERNAL;
-}
-
-static int convert_chroma_location(enum AVChromaLocation chroma_location)
-{
-    switch (chroma_location) {
-    case AVCHROMA_LOC_UNSPECIFIED:
-    case AVCHROMA_LOC_LEFT:
-        return ZIMG_CHROMA_LEFT;
-    case AVCHROMA_LOC_CENTER:
-        return ZIMG_CHROMA_CENTER;
-    case AVCHROMA_LOC_TOPLEFT:
-        return ZIMG_CHROMA_TOP_LEFT;
-    case AVCHROMA_LOC_TOP:
-        return ZIMG_CHROMA_TOP;
-    case AVCHROMA_LOC_BOTTOMLEFT:
-        return ZIMG_CHROMA_BOTTOM_LEFT;
-    case AVCHROMA_LOC_BOTTOM:
-        return ZIMG_CHROMA_BOTTOM;
-    }
-    return ZIMG_CHROMA_LEFT;
-}
-
-static int convert_matrix(enum AVColorSpace colorspace)
-{
-    switch (colorspace) {
-    case AVCOL_SPC_RGB:
-        return ZIMG_MATRIX_RGB;
-    case AVCOL_SPC_BT709:
-        return ZIMG_MATRIX_709;
-    case AVCOL_SPC_UNSPECIFIED:
-        return ZIMG_MATRIX_UNSPECIFIED;
-    case AVCOL_SPC_FCC:
-        return ZIMG_MATRIX_FCC;
-    case AVCOL_SPC_BT470BG:
-        return ZIMG_MATRIX_470BG;
-    case AVCOL_SPC_SMPTE170M:
-        return ZIMG_MATRIX_170M;
-    case AVCOL_SPC_SMPTE240M:
-        return ZIMG_MATRIX_240M;
-    case AVCOL_SPC_YCGCO:
-        return ZIMG_MATRIX_YCGCO;
-    case AVCOL_SPC_BT2020_NCL:
-        return ZIMG_MATRIX_2020_NCL;
-    case AVCOL_SPC_BT2020_CL:
-        return ZIMG_MATRIX_2020_CL;
-    case AVCOL_SPC_CHROMA_DERIVED_NCL:
-        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
-    case AVCOL_SPC_CHROMA_DERIVED_CL:
-        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
-    case AVCOL_SPC_ICTCP:
-        return ZIMG_MATRIX_ICTCP;
-    }
-    return ZIMG_MATRIX_UNSPECIFIED;
-}
-
-static int convert_trc(enum AVColorTransferCharacteristic color_trc)
-{
-    switch (color_trc) {
-    case AVCOL_TRC_UNSPECIFIED:
-        return ZIMG_TRANSFER_UNSPECIFIED;
-    case AVCOL_TRC_BT709:
-        return ZIMG_TRANSFER_709;
-    case AVCOL_TRC_GAMMA22:
-        return ZIMG_TRANSFER_470_M;
-    case AVCOL_TRC_GAMMA28:
-        return ZIMG_TRANSFER_470_BG;
-    case AVCOL_TRC_SMPTE170M:
-        return ZIMG_TRANSFER_601;
-    case AVCOL_TRC_SMPTE240M:
-        return ZIMG_TRANSFER_240M;
-    case AVCOL_TRC_LINEAR:
-        return ZIMG_TRANSFER_LINEAR;
-    case AVCOL_TRC_LOG:
-        return ZIMG_TRANSFER_LOG_100;
-    case AVCOL_TRC_LOG_SQRT:
-        return ZIMG_TRANSFER_LOG_316;
-    case AVCOL_TRC_IEC61966_2_4:
-        return ZIMG_TRANSFER_IEC_61966_2_4;
-    case AVCOL_TRC_BT2020_10:
-        return ZIMG_TRANSFER_2020_10;
-    case AVCOL_TRC_BT2020_12:
-        return ZIMG_TRANSFER_2020_12;
-    case AVCOL_TRC_SMPTE2084:
-        return ZIMG_TRANSFER_ST2084;
-    case AVCOL_TRC_ARIB_STD_B67:
-        return ZIMG_TRANSFER_ARIB_B67;
-    case AVCOL_TRC_IEC61966_2_1:
-        return ZIMG_TRANSFER_IEC_61966_2_1;
-    }
-    return ZIMG_TRANSFER_UNSPECIFIED;
-}
-
-static int convert_primaries(enum AVColorPrimaries color_primaries)
-{
-    switch (color_primaries) {
-    case AVCOL_PRI_UNSPECIFIED:
-        return ZIMG_PRIMARIES_UNSPECIFIED;
-    case AVCOL_PRI_BT709:
-        return ZIMG_PRIMARIES_709;
-    case AVCOL_PRI_BT470M:
-        return ZIMG_PRIMARIES_470_M;
-    case AVCOL_PRI_BT470BG:
-        return ZIMG_PRIMARIES_470_BG;
-    case AVCOL_PRI_SMPTE170M:
-        return ZIMG_PRIMARIES_170M;
-    case AVCOL_PRI_SMPTE240M:
-        return ZIMG_PRIMARIES_240M;
-    case AVCOL_PRI_FILM:
-        return ZIMG_PRIMARIES_FILM;
-    case AVCOL_PRI_BT2020:
-        return ZIMG_PRIMARIES_2020;
-    case AVCOL_PRI_SMPTE428:
-        return ZIMG_PRIMARIES_ST428;
-    case AVCOL_PRI_SMPTE431:
-        return ZIMG_PRIMARIES_ST431_2;
-    case AVCOL_PRI_SMPTE432:
-        return ZIMG_PRIMARIES_ST432_1;
-    case AVCOL_PRI_JEDEC_P22:
-        return ZIMG_PRIMARIES_EBU3213_E;
-    }
-    return ZIMG_PRIMARIES_UNSPECIFIED;
-}
-
-static int convert_range(enum AVColorRange color_range)
-{
-    switch (color_range) {
-    case AVCOL_RANGE_UNSPECIFIED:
-    case AVCOL_RANGE_MPEG:
-        return ZIMG_RANGE_LIMITED;
-    case AVCOL_RANGE_JPEG:
-        return ZIMG_RANGE_FULL;
-    }
-    return ZIMG_RANGE_LIMITED;
-}
-
-static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
-{
-    switch (color_range) {
-    case ZIMG_RANGE_LIMITED:
-        return AVCOL_RANGE_MPEG;
-    case ZIMG_RANGE_FULL:
-        return AVCOL_RANGE_JPEG;
-    }
-    return AVCOL_RANGE_UNSPECIFIED;
-}
-
-static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
-                        int colorspace, int primaries, int transfer, int range, int location)
-{
-    format->width = frame->width;
-    format->height = frame->height;
-    format->subsample_w = desc->log2_chroma_w;
-    format->subsample_h = desc->log2_chroma_h;
-    format->depth = desc->comp[0].depth;
-    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
-    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
-    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
-    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
-    format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
-    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
-    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
-}
-
-static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
-                       zimg_image_format *src_format, zimg_image_format *dst_format,
-                       void **tmp, size_t *tmp_size)
-{
-    int ret;
-    size_t size;
-
-    zimg_filter_graph_free(*graph);
-    *graph = zimg_filter_graph_build(src_format, dst_format, params);
-    if (!*graph)
-        return print_zimg_error(NULL);
-
-    ret = zimg_filter_graph_get_tmp_size(*graph, &size);
-    if (ret)
-        return print_zimg_error(NULL);
-
-    if (size > *tmp_size) {
-        av_freep(tmp);
-        *tmp = av_malloc(size);
-        if (!*tmp)
-            return AVERROR(ENOMEM);
-
-        *tmp_size = size;
-    }
-
-    return 0;
-}
 
 static int realign_frame(const AVPixFmtDescriptor *desc, AVFrame **frame)
 {
     AVFrame *aligned = NULL;
-    int ret = 0, plane;
+    int ret = 0, plane, planes;
 
     /* Realign any unaligned input frame. */
-    for (plane = 0; plane < 3; plane++) {
+    planes = av_pix_fmt_count_planes(desc->nb_components);
+    for (plane = 0; plane < planes; plane++) {
         int p = desc->comp[plane].plane;
         if ((uintptr_t)(*frame)->data[p] % ZIMG_ALIGNMENT || (*frame)->linesize[p] % ZIMG_ALIGNMENT) {
             if (!(aligned = av_frame_alloc())) {
@@ -554,6 +681,7 @@ fail:
     return ret;
 }
 
+
 static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
 {
     if (s->colorspace != -1)
@@ -572,20 +700,77 @@ static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
         frame->chroma_location = (int)s->dst_format.chroma_location + 1;
 }
 
+static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
+{
+    ThreadData *td = data;
+    int ret = 0;
+    int p;
+    int out_sampl;
+    int need_gb;
+    ZScaleContext *s = ctx->priv;
+    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
+    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
+    int  dst_tile_height = ((unsigned int)(td->out->height / n_jobs)) & 0xfffffffe; 
+
+    /* create zimg filter graphs for each thread
+     only if not created earlier or there is some change in frame parameters */
+    need_gb = compare_zimg_image_formats(&s->src_format, &s->src_format_tmp) ||
+        compare_zimg_image_formats(&s->dst_format, &s->dst_format_tmp) ||
+        compare_zimg_graph_builder_params(&s->params, &s->params_tmp);
+    if(td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA)
+        need_gb = need_gb || compare_zimg_image_formats(&s->alpha_src_format, &s->alpha_src_format_tmp) ||
+            compare_zimg_image_formats(&s->alpha_dst_format, &s->alpha_dst_format_tmp) ||
+            compare_zimg_graph_builder_params(&s->alpha_params, &s->alpha_params_tmp);
+
+    if (need_gb){
+        ret = graphs_build(td->in, td->out, td->desc, td->odesc, s, job_nr);
+        if (ret < 0)
+            return print_zimg_error(ctx);
+    }
+    out_sampl = FFMAX3(td->out->linesize[0], td->out->linesize[1], td->out->linesize[2]);
+    for (int i = 0; i < 3; i++) {
+        p = td->desc->comp[i].plane;      
+
+        src_buf.plane[i].data = td->in->data[p];
+        src_buf.plane[i].stride = td->in->linesize[p];
+        src_buf.plane[i].mask = -1;
+
+        p = td->odesc->comp[i].plane;
+        dst_buf.plane[i].data = td->out->data[p] + td->out->linesize[p] * dst_tile_height * td->out->linesize[p] / out_sampl * job_nr;
+        dst_buf.plane[i].stride = td->out->linesize[p];
+        dst_buf.plane[i].mask = -1;
+    }
+    ret = zimg_filter_graph_process(s->graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+    if (ret) 
+        return  print_zimg_error(ctx);
+
+    if (td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        src_buf.plane[0].data = td->in->data[3];
+        src_buf.plane[0].stride = td->in->linesize[3];
+        src_buf.plane[0].mask = -1;
+
+        dst_buf.plane[0].data = td->out->data[3] + td->out->linesize[3] * dst_tile_height  * job_nr;
+        dst_buf.plane[0].stride = td->out->linesize[3];
+        dst_buf.plane[0].mask = -1;
+
+        ret = zimg_filter_graph_process(s->alpha_graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+        if (ret)
+            return print_zimg_error(ctx);
+    }
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
-    ZScaleContext *s = link->dst->priv;
-    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFilterContext *ctx = link->dst;
+    ZScaleContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
     const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
-    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
-    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
     char buf[32];
-    int ret = 0, plane;
+    int ret = 0;
     AVFrame *out = NULL;
-
-    if ((ret = realign_frame(desc, &in)) < 0)
-        goto fail;
+    ThreadData td;
 
     if (!(out = ff_get_video_buffer(outlink, outlink->w, outlink->h))) {
         ret =  AVERROR(ENOMEM);
@@ -596,35 +781,60 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     out->width  = outlink->w;
     out->height = outlink->h;
 
-    if(   in->width  != link->w
-       || in->height != link->h
-       || in->format != link->format
-       || s->in_colorspace != in->colorspace
-       || s->in_trc  != in->color_trc
-       || s->in_primaries != in->color_primaries
-       || s->in_range != in->color_range
-       || s->out_colorspace != out->colorspace
-       || s->out_trc  != out->color_trc
-       || s->out_primaries != out->color_primaries
-       || s->out_range != out->color_range
-       || s->in_chromal != in->chroma_location
-       || s->out_chromal != out->chroma_location) {
+    //we need to use this filter if something is different for an input and output only
+    //otherwise - just copy the input frame to the output
+    if ((link->w != outlink->w) ||
+        (link->h != outlink->h) ||
+        (s->src_format.chroma_location != s->dst_format.chroma_location) ||
+        (s->src_format.color_family !=s->dst_format.color_family) ||
+        (s->src_format.color_primaries !=s->dst_format.color_primaries) ||
+        (s->src_format.depth !=s->dst_format.depth) ||
+        (s->src_format.matrix_coefficients !=s->dst_format.matrix_coefficients) ||
+        (s->src_format.field_parity !=s->dst_format.field_parity) ||
+        (s->src_format.pixel_range !=s->dst_format.pixel_range) ||
+        (s->src_format.pixel_type !=s->dst_format.pixel_type) ||
+        (s->src_format.transfer_characteristics !=s->dst_format.transfer_characteristics)
+    ){
+        if ((ret = realign_frame(desc, &in)) < 0)
+            goto fail;
+
         snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
         av_opt_set(s, "w", buf, 0);
         snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
         av_opt_set(s, "h", buf, 0);
 
+ 
         link->dst->inputs[0]->format = in->format;
         link->dst->inputs[0]->w      = in->width;
         link->dst->inputs[0]->h      = in->height;
 
-        if ((ret = config_props(outlink)) < 0)
-            goto fail;
+        update_output_color_information(s, out);
+    
+        s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), link->h / MIN_TILESIZE);
+        s->slice_h = ((unsigned int)(link->h / s->nb_threads)) & 0xfffffffe; // slice_h should be even for zimg
+        s->in_colorspace = in->colorspace;
+        s->in_trc = in->color_trc;
+        s->in_primaries = in->color_primaries;
+        s->in_range = in->color_range;
+        s->out_colorspace = out->colorspace;
+        s->out_trc = out->color_trc;
+        s->out_primaries = out->color_primaries;
+        s->out_range = out->color_range;
+    
+        av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
+                  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
+                  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
+                  INT_MAX);
 
         zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
         zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
         zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
 
+        format_init(&s->src_format, in, desc, s->colorspace_in,
+            s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
+        format_init(&s->dst_format, out, odesc, s->colorspace,
+            s->primaries, s->trc, s->range, s->chromal);
+
         s->params.dither_type = s->dither;
         s->params.cpu_type = ZIMG_CPU_AUTO;
         s->params.resample_filter = s->filter;
@@ -634,27 +844,6 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
         s->params.filter_param_a = s->params.filter_param_a_uv = s->param_a;
         s->params.filter_param_b = s->params.filter_param_b_uv = s->param_b;
 
-        format_init(&s->src_format, in, desc, s->colorspace_in,
-                    s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
-        format_init(&s->dst_format, out, odesc, s->colorspace,
-                    s->primaries, s->trc, s->range, s->chromal);
-
-        update_output_color_information(s, out);
-
-        ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
-                          &s->tmp, &s->tmp_size);
-        if (ret < 0)
-            goto fail;
-
-        s->in_colorspace  = in->colorspace;
-        s->in_trc         = in->color_trc;
-        s->in_primaries   = in->color_primaries;
-        s->in_range       = in->color_range;
-        s->out_colorspace = out->colorspace;
-        s->out_trc        = out->color_trc;
-        s->out_primaries  = out->color_primaries;
-        s->out_range      = out->color_range;
-
         if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
             zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
             zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
@@ -670,76 +859,48 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
             s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
 
-            s->alpha_dst_format.width = out->width;
-            s->alpha_dst_format.height = out->height;
             s->alpha_dst_format.depth = odesc->comp[0].depth;
             s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
-
-            zimg_filter_graph_free(s->alpha_graph);
-            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
-            if (!s->alpha_graph) {
-                ret = print_zimg_error(link->dst);
-                goto fail;
-            }
         }
-    }
 
-    update_output_color_information(s, out);
+        td.in = in;
+        td.out = out;
+        td.desc = desc;
+        td.odesc = odesc;
 
-    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
-              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
-              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
-              INT_MAX);
-
-    for (plane = 0; plane < 3; plane++) {
-        int p = desc->comp[plane].plane;
-        src_buf.plane[plane].data   = in->data[p];
-        src_buf.plane[plane].stride = in->linesize[p];
-        src_buf.plane[plane].mask   = -1;
-
-        p = odesc->comp[plane].plane;
-        dst_buf.plane[plane].data   = out->data[p];
-        dst_buf.plane[plane].stride = out->linesize[p];
-        dst_buf.plane[plane].mask   = -1;
-    }
-
-    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-    if (ret) {
-        ret = print_zimg_error(link->dst);
-        goto fail;
-    }
+        ff_filter_execute(ctx, filter_slice, &td, NULL, s->nb_threads);
 
-    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        src_buf.plane[0].data   = in->data[3];
-        src_buf.plane[0].stride = in->linesize[3];
-        src_buf.plane[0].mask   = -1;
-
-        dst_buf.plane[0].data   = out->data[3];
-        dst_buf.plane[0].stride = out->linesize[3];
-        dst_buf.plane[0].mask   = -1;
-
-        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-        if (ret) {
-            ret = print_zimg_error(link->dst);
-            goto fail;
+        s->src_format_tmp = s->src_format;
+        s->dst_format_tmp = s->dst_format;
+        s->params_tmp = s->params;
+        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+            s->alpha_src_format_tmp = s->alpha_src_format;
+            s->alpha_dst_format_tmp = s->alpha_dst_format;
+            s->alpha_params_tmp = s->alpha_params;
         }
-    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        int x, y;
-
-        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
-            for (y = 0; y < out->height; y++) {
-                for (x = 0; x < out->width; x++) {
-                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
-                            av_float2int(1.0f));
+
+        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) && (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) ){
+            int x, y;
+            if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
+                for (y = 0; y < out->height; y++) {
+                    for (x = 0; x < out->width; x++) {
+                        AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
+                                av_float2int(1.0f));
+                    }
                 }
+            } else {
+                for (y = 0; y < outlink->h; y++)
+                    memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
             }
-        } else {
-            for (y = 0; y < outlink->h; y++)
-                memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
         }
     }
-
+    else {
+        /*no need for any filtering */
+        ret = av_frame_copy(out, in);
+        if (ret < 0)
+            return ret;
+    }
 fail:
     av_frame_free(&in);
     if (ret) {
@@ -753,11 +914,12 @@ fail:
 static av_cold void uninit(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
-
-    zimg_filter_graph_free(s->graph);
-    zimg_filter_graph_free(s->alpha_graph);
-    av_freep(&s->tmp);
-    s->tmp_size = 0;
+    int i;
+    for (i = 0; i < s->nb_threads; i++) {
+        if (s->tmp[i]) av_freep(&s->tmp[i]);
+        zimg_filter_graph_free(s->graph[i]);
+        zimg_filter_graph_free(s->alpha_graph[i]);
+    }
 }
 
 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
@@ -941,4 +1103,5 @@ const AVFilter ff_vf_zscale = {
     FILTER_OUTPUTS(avfilter_vf_zscale_outputs),
     FILTER_QUERY_FUNC(query_formats),
     .process_command = process_command,
+    .flags           = AVFILTER_FLAG_SLICE_THREADS,
 };
-- 
2.31.1.windows.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-02-18 15:24 [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x Victoria Zhislina
@ 2022-02-18 15:48 ` James Almer
  2022-02-18 16:00   ` Paul B Mahol
  0 siblings, 1 reply; 17+ messages in thread
From: James Almer @ 2022-02-18 15:48 UTC (permalink / raw)
  To: ffmpeg-devel



On 2/18/2022 12:24 PM, Victoria Zhislina wrote:
> By ffmpeg threading support implementation via frame slicing and doing
> zimg_filter_graph_build that used to take 30-60% of each frame processig
> only if necessary (some parameters changed)
> the performance increase vs original version
> in video downscale and color conversion  >4x is seen
> on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
> 
> Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
> ---
>   libavfilter/vf_zscale.c | 787 ++++++++++++++++++++++++----------------
>   1 file changed, 475 insertions(+), 312 deletions(-)
> 
> diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
> index 1288c5efc1..ea2565025f 100644
> --- a/libavfilter/vf_zscale.c
> +++ b/libavfilter/vf_zscale.c
> @@ -1,6 +1,7 @@
>   /*
>    * Copyright (c) 2015 Paul B Mahol
> - *
> + * 2022 Victoria Zhislina, Intel
> +
>    * This file is part of FFmpeg.
>    *
>    * FFmpeg is free software; you can redistribute it and/or
> @@ -44,6 +45,8 @@
>   #include "libavutil/imgutils.h"
>   
>   #define ZIMG_ALIGNMENT 32
> +#define MIN_TILESIZE 64
> +#define MAX_THREADS 64
>   
>   static const char *const var_names[] = {
>       "in_w",   "iw",
> @@ -113,13 +116,17 @@ typedef struct ZScaleContext {
>   
>       int force_original_aspect_ratio;
>   
> -    void *tmp;
> -    size_t tmp_size;
> +    void *tmp[MAX_THREADS]; //separate for each thread;
> +    int nb_threads;
> +    int slice_h;
>   
>       zimg_image_format src_format, dst_format;
>       zimg_image_format alpha_src_format, alpha_dst_format;
> +    zimg_image_format src_format_tmp, dst_format_tmp;
> +    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
>       zimg_graph_builder_params alpha_params, params;
> -    zimg_filter_graph *alpha_graph, *graph;
> +    zimg_graph_builder_params alpha_params_tmp, params_tmp;
> +    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
>   
>       enum AVColorSpace in_colorspace, out_colorspace;
>       enum AVColorTransferCharacteristic in_trc, out_trc;
> @@ -128,10 +135,181 @@ typedef struct ZScaleContext {
>       enum AVChromaLocation in_chromal, out_chromal;
>   } ZScaleContext;
>   
> +
> +typedef struct ThreadData {
> +    const AVPixFmtDescriptor *desc, *odesc;
> +    AVFrame *in, *out;
> +} ThreadData;
> +
> +static int convert_chroma_location(enum AVChromaLocation chroma_location)
> +{
> +    switch (chroma_location) {
> +    case AVCHROMA_LOC_UNSPECIFIED:
> +    case AVCHROMA_LOC_LEFT:
> +        return ZIMG_CHROMA_LEFT;
> +    case AVCHROMA_LOC_CENTER:
> +        return ZIMG_CHROMA_CENTER;
> +    case AVCHROMA_LOC_TOPLEFT:
> +        return ZIMG_CHROMA_TOP_LEFT;
> +    case AVCHROMA_LOC_TOP:
> +        return ZIMG_CHROMA_TOP;
> +    case AVCHROMA_LOC_BOTTOMLEFT:
> +        return ZIMG_CHROMA_BOTTOM_LEFT;
> +    case AVCHROMA_LOC_BOTTOM:
> +        return ZIMG_CHROMA_BOTTOM;
> +    }
> +    return ZIMG_CHROMA_LEFT;
> +}
> +
> +static int convert_matrix(enum AVColorSpace colorspace)
> +{
> +    switch (colorspace) {
> +    case AVCOL_SPC_RGB:
> +        return ZIMG_MATRIX_RGB;
> +    case AVCOL_SPC_BT709:
> +        return ZIMG_MATRIX_709;
> +    case AVCOL_SPC_UNSPECIFIED:
> +        return ZIMG_MATRIX_UNSPECIFIED;
> +    case AVCOL_SPC_FCC:
> +        return ZIMG_MATRIX_FCC;
> +    case AVCOL_SPC_BT470BG:
> +        return ZIMG_MATRIX_470BG;
> +    case AVCOL_SPC_SMPTE170M:
> +        return ZIMG_MATRIX_170M;
> +    case AVCOL_SPC_SMPTE240M:
> +        return ZIMG_MATRIX_240M;
> +    case AVCOL_SPC_YCGCO:
> +        return ZIMG_MATRIX_YCGCO;
> +    case AVCOL_SPC_BT2020_NCL:
> +        return ZIMG_MATRIX_2020_NCL;
> +    case AVCOL_SPC_BT2020_CL:
> +        return ZIMG_MATRIX_2020_CL;
> +    case AVCOL_SPC_CHROMA_DERIVED_NCL:
> +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
> +    case AVCOL_SPC_CHROMA_DERIVED_CL:
> +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
> +    case AVCOL_SPC_ICTCP:
> +        return ZIMG_MATRIX_ICTCP;
> +    }
> +    return ZIMG_MATRIX_UNSPECIFIED;
> +}
> +
> +static int convert_trc(enum AVColorTransferCharacteristic color_trc)
> +{
> +    switch (color_trc) {
> +    case AVCOL_TRC_UNSPECIFIED:
> +        return ZIMG_TRANSFER_UNSPECIFIED;
> +    case AVCOL_TRC_BT709:
> +        return ZIMG_TRANSFER_709;
> +    case AVCOL_TRC_GAMMA22:
> +        return ZIMG_TRANSFER_470_M;
> +    case AVCOL_TRC_GAMMA28:
> +        return ZIMG_TRANSFER_470_BG;
> +    case AVCOL_TRC_SMPTE170M:
> +        return ZIMG_TRANSFER_601;
> +    case AVCOL_TRC_SMPTE240M:
> +        return ZIMG_TRANSFER_240M;
> +    case AVCOL_TRC_LINEAR:
> +        return ZIMG_TRANSFER_LINEAR;
> +    case AVCOL_TRC_LOG:
> +        return ZIMG_TRANSFER_LOG_100;
> +    case AVCOL_TRC_LOG_SQRT:
> +        return ZIMG_TRANSFER_LOG_316;
> +    case AVCOL_TRC_IEC61966_2_4:
> +        return ZIMG_TRANSFER_IEC_61966_2_4;
> +    case AVCOL_TRC_BT2020_10:
> +        return ZIMG_TRANSFER_2020_10;
> +    case AVCOL_TRC_BT2020_12:
> +        return ZIMG_TRANSFER_2020_12;
> +    case AVCOL_TRC_SMPTE2084:
> +        return ZIMG_TRANSFER_ST2084;
> +    case AVCOL_TRC_ARIB_STD_B67:
> +        return ZIMG_TRANSFER_ARIB_B67;
> +    case AVCOL_TRC_IEC61966_2_1:
> +        return ZIMG_TRANSFER_IEC_61966_2_1;
> +    }
> +    return ZIMG_TRANSFER_UNSPECIFIED;
> +}
> +
> +static int convert_primaries(enum AVColorPrimaries color_primaries)
> +{
> +    switch (color_primaries) {
> +    case AVCOL_PRI_UNSPECIFIED:
> +        return ZIMG_PRIMARIES_UNSPECIFIED;
> +    case AVCOL_PRI_BT709:
> +        return ZIMG_PRIMARIES_709;
> +    case AVCOL_PRI_BT470M:
> +        return ZIMG_PRIMARIES_470_M;
> +    case AVCOL_PRI_BT470BG:
> +        return ZIMG_PRIMARIES_470_BG;
> +    case AVCOL_PRI_SMPTE170M:
> +        return ZIMG_PRIMARIES_170M;
> +    case AVCOL_PRI_SMPTE240M:
> +        return ZIMG_PRIMARIES_240M;
> +    case AVCOL_PRI_FILM:
> +        return ZIMG_PRIMARIES_FILM;
> +    case AVCOL_PRI_BT2020:
> +        return ZIMG_PRIMARIES_2020;
> +    case AVCOL_PRI_SMPTE428:
> +        return ZIMG_PRIMARIES_ST428;
> +    case AVCOL_PRI_SMPTE431:
> +        return ZIMG_PRIMARIES_ST431_2;
> +    case AVCOL_PRI_SMPTE432:
> +        return ZIMG_PRIMARIES_ST432_1;
> +    case AVCOL_PRI_JEDEC_P22:
> +        return ZIMG_PRIMARIES_EBU3213_E;
> +    }
> +    return ZIMG_PRIMARIES_UNSPECIFIED;
> +}
> +
> +static int convert_range(enum AVColorRange color_range)
> +{
> +    switch (color_range) {
> +    case AVCOL_RANGE_UNSPECIFIED:
> +    case AVCOL_RANGE_MPEG:
> +        return ZIMG_RANGE_LIMITED;
> +    case AVCOL_RANGE_JPEG:
> +        return ZIMG_RANGE_FULL;
> +    }
> +    return ZIMG_RANGE_LIMITED;
> +}
> +
> +static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
> +{
> +    switch (color_range) {
> +    case ZIMG_RANGE_LIMITED:
> +        return AVCOL_RANGE_MPEG;
> +    case ZIMG_RANGE_FULL:
> +        return AVCOL_RANGE_JPEG;
> +    }
> +    return AVCOL_RANGE_UNSPECIFIED;
> +}
> +
>   static av_cold int init(AVFilterContext *ctx)
>   {
>       ZScaleContext *s = ctx->priv;
>       int ret;
> +    int i;
> +
> +    for (i = 0; i < MAX_THREADS; i++) {
> +        s->tmp[i] = NULL;
> +        s->graph[i] = NULL;
> +        s->alpha_graph[i] = NULL;
> +    }
> +    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
>   
>       if (s->size_str && (s->w_expr || s->h_expr)) {
>           av_log(ctx, AV_LOG_ERROR,
> @@ -158,7 +336,6 @@ static av_cold int init(AVFilterContext *ctx)
>           av_opt_set(s, "w", "iw", 0);
>       if (!s->h_expr)
>           av_opt_set(s, "h", "ih", 0);
> -
>       return 0;
>   }
>   
> @@ -194,6 +371,153 @@ static int query_formats(AVFilterContext *ctx)
>       return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats);
>   }
>   
> +/* returns 0 if image formats are the same and 1 otherwise */
> +static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
> +{
> +    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
> +#if ZIMG_API_VERSION >= 0x204
> +        (img_fmt0->alpha != img_fmt1->alpha) ||
> +#endif
> +        (img_fmt0->color_family != img_fmt1->color_family) ||
> +        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
> +        (img_fmt0->depth != img_fmt1->depth) ||
> +        (img_fmt0->field_parity != img_fmt1->field_parity) ||
> +        (img_fmt0->height != img_fmt1->height) ||
> +        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
> +        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
> +        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
> +        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
> +        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
> +        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
> +        (img_fmt0->width != img_fmt1->width));
> +}
> +
> +/* returns 0 if graph builder parameters are the same and 1 otherwise */
> +static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
> +{
> +    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
> +    and NaN values that are default for some params are treated properly*/
> +    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
> +        (parm0->dither_type != parm1->dither_type) ||
> +        (parm0->resample_filter != parm1->resample_filter) ||
> +        (parm0->resample_filter_uv != parm1->resample_filter_uv);
> +
> +    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
> +        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
> +    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
> +        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
> +    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
> +        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
> +    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
> +        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
> +    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
> +        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
> +
> +    return ret;
> +}
> +
> +static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
> +    int colorspace, int primaries, int transfer, int range, int location)
> +{
> +    format->width = frame->width;
> +    format->height = frame->height;
> +    format->subsample_w = desc->log2_chroma_w;
> +    format->subsample_h = desc->log2_chroma_h;
> +    format->depth = desc->comp[0].depth;
> +    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> +    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
> +    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
> +    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> +    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
> +    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
> +    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
> +}

Why are you moving all these functions up in the file? They make the 
patch much harder to read.

If moving them is necessary, then please split this patch in two. One 
moving the functions, then one applying the actual changes to them and 
the rest of the file. It will make reviewing much easier.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-02-18 15:48 ` James Almer
@ 2022-02-18 16:00   ` Paul B Mahol
  0 siblings, 0 replies; 17+ messages in thread
From: Paul B Mahol @ 2022-02-18 16:00 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Fri, Feb 18, 2022 at 12:48:10PM -0300, James Almer wrote:
> 
> 
> On 2/18/2022 12:24 PM, Victoria Zhislina wrote:
> > By ffmpeg threading support implementation via frame slicing and doing
> > zimg_filter_graph_build that used to take 30-60% of each frame processig
> > only if necessary (some parameters changed)
> > the performance increase vs original version
> > in video downscale and color conversion  >4x is seen
> > on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
> > 
> > Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
> > ---
> >   libavfilter/vf_zscale.c | 787 ++++++++++++++++++++++++----------------
> >   1 file changed, 475 insertions(+), 312 deletions(-)
> > 
> > diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
> > index 1288c5efc1..ea2565025f 100644
> > --- a/libavfilter/vf_zscale.c
> > +++ b/libavfilter/vf_zscale.c
> > @@ -1,6 +1,7 @@
> >   /*
> >    * Copyright (c) 2015 Paul B Mahol
> > - *
> > + * 2022 Victoria Zhislina, Intel
> > +
> >    * This file is part of FFmpeg.
> >    *
> >    * FFmpeg is free software; you can redistribute it and/or
> > @@ -44,6 +45,8 @@
> >   #include "libavutil/imgutils.h"
> >   #define ZIMG_ALIGNMENT 32
> > +#define MIN_TILESIZE 64
> > +#define MAX_THREADS 64
> >   static const char *const var_names[] = {
> >       "in_w",   "iw",
> > @@ -113,13 +116,17 @@ typedef struct ZScaleContext {
> >       int force_original_aspect_ratio;
> > -    void *tmp;
> > -    size_t tmp_size;
> > +    void *tmp[MAX_THREADS]; //separate for each thread;
> > +    int nb_threads;
> > +    int slice_h;
> >       zimg_image_format src_format, dst_format;
> >       zimg_image_format alpha_src_format, alpha_dst_format;
> > +    zimg_image_format src_format_tmp, dst_format_tmp;
> > +    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
> >       zimg_graph_builder_params alpha_params, params;
> > -    zimg_filter_graph *alpha_graph, *graph;
> > +    zimg_graph_builder_params alpha_params_tmp, params_tmp;
> > +    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
> >       enum AVColorSpace in_colorspace, out_colorspace;
> >       enum AVColorTransferCharacteristic in_trc, out_trc;
> > @@ -128,10 +135,181 @@ typedef struct ZScaleContext {
> >       enum AVChromaLocation in_chromal, out_chromal;
> >   } ZScaleContext;
> > +
> > +typedef struct ThreadData {
> > +    const AVPixFmtDescriptor *desc, *odesc;
> > +    AVFrame *in, *out;
> > +} ThreadData;
> > +
> > +static int convert_chroma_location(enum AVChromaLocation chroma_location)
> > +{
> > +    switch (chroma_location) {
> > +    case AVCHROMA_LOC_UNSPECIFIED:
> > +    case AVCHROMA_LOC_LEFT:
> > +        return ZIMG_CHROMA_LEFT;
> > +    case AVCHROMA_LOC_CENTER:
> > +        return ZIMG_CHROMA_CENTER;
> > +    case AVCHROMA_LOC_TOPLEFT:
> > +        return ZIMG_CHROMA_TOP_LEFT;
> > +    case AVCHROMA_LOC_TOP:
> > +        return ZIMG_CHROMA_TOP;
> > +    case AVCHROMA_LOC_BOTTOMLEFT:
> > +        return ZIMG_CHROMA_BOTTOM_LEFT;
> > +    case AVCHROMA_LOC_BOTTOM:
> > +        return ZIMG_CHROMA_BOTTOM;
> > +    }
> > +    return ZIMG_CHROMA_LEFT;
> > +}
> > +
> > +static int convert_matrix(enum AVColorSpace colorspace)
> > +{
> > +    switch (colorspace) {
> > +    case AVCOL_SPC_RGB:
> > +        return ZIMG_MATRIX_RGB;
> > +    case AVCOL_SPC_BT709:
> > +        return ZIMG_MATRIX_709;
> > +    case AVCOL_SPC_UNSPECIFIED:
> > +        return ZIMG_MATRIX_UNSPECIFIED;
> > +    case AVCOL_SPC_FCC:
> > +        return ZIMG_MATRIX_FCC;
> > +    case AVCOL_SPC_BT470BG:
> > +        return ZIMG_MATRIX_470BG;
> > +    case AVCOL_SPC_SMPTE170M:
> > +        return ZIMG_MATRIX_170M;
> > +    case AVCOL_SPC_SMPTE240M:
> > +        return ZIMG_MATRIX_240M;
> > +    case AVCOL_SPC_YCGCO:
> > +        return ZIMG_MATRIX_YCGCO;
> > +    case AVCOL_SPC_BT2020_NCL:
> > +        return ZIMG_MATRIX_2020_NCL;
> > +    case AVCOL_SPC_BT2020_CL:
> > +        return ZIMG_MATRIX_2020_CL;
> > +    case AVCOL_SPC_CHROMA_DERIVED_NCL:
> > +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
> > +    case AVCOL_SPC_CHROMA_DERIVED_CL:
> > +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
> > +    case AVCOL_SPC_ICTCP:
> > +        return ZIMG_MATRIX_ICTCP;
> > +    }
> > +    return ZIMG_MATRIX_UNSPECIFIED;
> > +}
> > +
> > +static int convert_trc(enum AVColorTransferCharacteristic color_trc)
> > +{
> > +    switch (color_trc) {
> > +    case AVCOL_TRC_UNSPECIFIED:
> > +        return ZIMG_TRANSFER_UNSPECIFIED;
> > +    case AVCOL_TRC_BT709:
> > +        return ZIMG_TRANSFER_709;
> > +    case AVCOL_TRC_GAMMA22:
> > +        return ZIMG_TRANSFER_470_M;
> > +    case AVCOL_TRC_GAMMA28:
> > +        return ZIMG_TRANSFER_470_BG;
> > +    case AVCOL_TRC_SMPTE170M:
> > +        return ZIMG_TRANSFER_601;
> > +    case AVCOL_TRC_SMPTE240M:
> > +        return ZIMG_TRANSFER_240M;
> > +    case AVCOL_TRC_LINEAR:
> > +        return ZIMG_TRANSFER_LINEAR;
> > +    case AVCOL_TRC_LOG:
> > +        return ZIMG_TRANSFER_LOG_100;
> > +    case AVCOL_TRC_LOG_SQRT:
> > +        return ZIMG_TRANSFER_LOG_316;
> > +    case AVCOL_TRC_IEC61966_2_4:
> > +        return ZIMG_TRANSFER_IEC_61966_2_4;
> > +    case AVCOL_TRC_BT2020_10:
> > +        return ZIMG_TRANSFER_2020_10;
> > +    case AVCOL_TRC_BT2020_12:
> > +        return ZIMG_TRANSFER_2020_12;
> > +    case AVCOL_TRC_SMPTE2084:
> > +        return ZIMG_TRANSFER_ST2084;
> > +    case AVCOL_TRC_ARIB_STD_B67:
> > +        return ZIMG_TRANSFER_ARIB_B67;
> > +    case AVCOL_TRC_IEC61966_2_1:
> > +        return ZIMG_TRANSFER_IEC_61966_2_1;
> > +    }
> > +    return ZIMG_TRANSFER_UNSPECIFIED;
> > +}
> > +
> > +static int convert_primaries(enum AVColorPrimaries color_primaries)
> > +{
> > +    switch (color_primaries) {
> > +    case AVCOL_PRI_UNSPECIFIED:
> > +        return ZIMG_PRIMARIES_UNSPECIFIED;
> > +    case AVCOL_PRI_BT709:
> > +        return ZIMG_PRIMARIES_709;
> > +    case AVCOL_PRI_BT470M:
> > +        return ZIMG_PRIMARIES_470_M;
> > +    case AVCOL_PRI_BT470BG:
> > +        return ZIMG_PRIMARIES_470_BG;
> > +    case AVCOL_PRI_SMPTE170M:
> > +        return ZIMG_PRIMARIES_170M;
> > +    case AVCOL_PRI_SMPTE240M:
> > +        return ZIMG_PRIMARIES_240M;
> > +    case AVCOL_PRI_FILM:
> > +        return ZIMG_PRIMARIES_FILM;
> > +    case AVCOL_PRI_BT2020:
> > +        return ZIMG_PRIMARIES_2020;
> > +    case AVCOL_PRI_SMPTE428:
> > +        return ZIMG_PRIMARIES_ST428;
> > +    case AVCOL_PRI_SMPTE431:
> > +        return ZIMG_PRIMARIES_ST431_2;
> > +    case AVCOL_PRI_SMPTE432:
> > +        return ZIMG_PRIMARIES_ST432_1;
> > +    case AVCOL_PRI_JEDEC_P22:
> > +        return ZIMG_PRIMARIES_EBU3213_E;
> > +    }
> > +    return ZIMG_PRIMARIES_UNSPECIFIED;
> > +}
> > +
> > +static int convert_range(enum AVColorRange color_range)
> > +{
> > +    switch (color_range) {
> > +    case AVCOL_RANGE_UNSPECIFIED:
> > +    case AVCOL_RANGE_MPEG:
> > +        return ZIMG_RANGE_LIMITED;
> > +    case AVCOL_RANGE_JPEG:
> > +        return ZIMG_RANGE_FULL;
> > +    }
> > +    return ZIMG_RANGE_LIMITED;
> > +}
> > +
> > +static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
> > +{
> > +    switch (color_range) {
> > +    case ZIMG_RANGE_LIMITED:
> > +        return AVCOL_RANGE_MPEG;
> > +    case ZIMG_RANGE_FULL:
> > +        return AVCOL_RANGE_JPEG;
> > +    }
> > +    return AVCOL_RANGE_UNSPECIFIED;
> > +}
> > +
> >   static av_cold int init(AVFilterContext *ctx)
> >   {
> >       ZScaleContext *s = ctx->priv;
> >       int ret;
> > +    int i;
> > +
> > +    for (i = 0; i < MAX_THREADS; i++) {
> > +        s->tmp[i] = NULL;
> > +        s->graph[i] = NULL;
> > +        s->alpha_graph[i] = NULL;
> > +    }
> > +    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
> > +
> > +    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
> > +    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
> > +
> > +    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
> > +    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
> > +    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
> > +    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
> >       if (s->size_str && (s->w_expr || s->h_expr)) {
> >           av_log(ctx, AV_LOG_ERROR,
> > @@ -158,7 +336,6 @@ static av_cold int init(AVFilterContext *ctx)
> >           av_opt_set(s, "w", "iw", 0);
> >       if (!s->h_expr)
> >           av_opt_set(s, "h", "ih", 0);
> > -
> >       return 0;
> >   }
> > @@ -194,6 +371,153 @@ static int query_formats(AVFilterContext *ctx)
> >       return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats);
> >   }
> > +/* returns 0 if image formats are the same and 1 otherwise */
> > +static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
> > +{
> > +    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
> > +#if ZIMG_API_VERSION >= 0x204
> > +        (img_fmt0->alpha != img_fmt1->alpha) ||
> > +#endif
> > +        (img_fmt0->color_family != img_fmt1->color_family) ||
> > +        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
> > +        (img_fmt0->depth != img_fmt1->depth) ||
> > +        (img_fmt0->field_parity != img_fmt1->field_parity) ||
> > +        (img_fmt0->height != img_fmt1->height) ||
> > +        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
> > +        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
> > +        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
> > +        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
> > +        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
> > +        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
> > +        (img_fmt0->width != img_fmt1->width));
> > +}
> > +
> > +/* returns 0 if graph builder parameters are the same and 1 otherwise */
> > +static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
> > +{
> > +    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
> > +    and NaN values that are default for some params are treated properly*/
> > +    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
> > +        (parm0->dither_type != parm1->dither_type) ||
> > +        (parm0->resample_filter != parm1->resample_filter) ||
> > +        (parm0->resample_filter_uv != parm1->resample_filter_uv);
> > +
> > +    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
> > +        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
> > +    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
> > +        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
> > +    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
> > +        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
> > +    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
> > +        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
> > +    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
> > +        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
> > +
> > +    return ret;
> > +}
> > +
> > +static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
> > +    int colorspace, int primaries, int transfer, int range, int location)
> > +{
> > +    format->width = frame->width;
> > +    format->height = frame->height;
> > +    format->subsample_w = desc->log2_chroma_w;
> > +    format->subsample_h = desc->log2_chroma_h;
> > +    format->depth = desc->comp[0].depth;
> > +    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> > +    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
> > +    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
> > +    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> > +    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
> > +    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
> > +    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
> > +}
> 
> Why are you moving all these functions up in the file? They make the patch
> much harder to read.
> 
> If moving them is necessary, then please split this patch in two. One moving
> the functions, then one applying the actual changes to them and the rest of
> the file. It will make reviewing much easier.

Also please remove trailing whitespaces in patch.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-03-10 18:47         ` Paul B Mahol
@ 2022-03-11 16:42           ` Victoria Zhislina
  0 siblings, 0 replies; 17+ messages in thread
From: Victoria Zhislina @ 2022-03-11 16:42 UTC (permalink / raw)
  To: Paul B Mahol; +Cc: FFmpeg development discussions and patches

Awesome, thanks!

On Thu, Mar 10, 2022 at 9:45 PM Paul B Mahol <onemda@gmail.com> wrote:

>
>
> On Thu, Mar 10, 2022 at 7:41 PM Victoria Zhislina <niva213@gmail.com>
> wrote:
>
>> Paul and all, do you have any chances to view my patch from Feb,19? I
>> assume I'\ve fixed all you've kindly pointed out and even more. Please
>> correct me if I'm wrong. The only question remaining is - are you ok
>> with the combination of threading and conditional filter operation (= do
>> something if it is really required only) or you prefer to split it to 2
>> separate corresponding patches. I'd prefer the first option because it
>> makes git ffmpeg repo and ffmpeg development cleaner not dirtier...
>>
>
> Patch was already applied and some found issues fixed.
>
>
>>
>> On Tue, Feb 22, 2022 at 11:15 AM Paul B Mahol <onemda@gmail.com> wrote:
>>
>>> On Tue, Feb 22, 2022 at 9:15 AM Paul B Mahol <onemda@gmail.com> wrote:
>>>
>>> >
>>> >
>>> > On Tue, Feb 22, 2022 at 6:25 AM Lynne <dev@lynne.ee> wrote:
>>> >
>>> >> 19 Feb 2022, 14:58 by niva213@gmail.com:
>>> >>
>>> >> > By ffmpeg threading support implementation via frame slicing and
>>> doing
>>> >> > zimg_filter_graph_build that used to take 30-60% of each frame
>>> processig
>>> >> > only if necessary (some parameters changed)
>>> >> > the performance increase vs original version
>>> >> > in video downscale and color conversion  >4x is seen
>>> >> > on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
>>> >> >
>>> >> > Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
>>> >> >
>>> >>
>>> >> Can't you patch such a feature into the upstream instead?
>>> >>
>>> >
>>> > zscale already have own threading ability, but is very hard to use it,
>>> > last time i tried.
>>> >
>>>
>>> I mean zimg.
>>>
>>>
>>> >
>>> >
>>> >> _______________________________________________
>>> >> ffmpeg-devel mailing list
>>> >> ffmpeg-devel@ffmpeg.org
>>> >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>> >>
>>> >> To unsubscribe, visit link above, or email
>>> >> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>> >>
>>> >
>>> _______________________________________________
>>> ffmpeg-devel mailing list
>>> ffmpeg-devel@ffmpeg.org
>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>
>>> To unsubscribe, visit link above, or email
>>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>>
>>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-03-10 18:41       ` Victoria Zhislina
@ 2022-03-10 18:47         ` Paul B Mahol
  2022-03-11 16:42           ` Victoria Zhislina
  0 siblings, 1 reply; 17+ messages in thread
From: Paul B Mahol @ 2022-03-10 18:47 UTC (permalink / raw)
  To: Victoria Zhislina; +Cc: FFmpeg development discussions and patches

On Thu, Mar 10, 2022 at 7:41 PM Victoria Zhislina <niva213@gmail.com> wrote:

> Paul and all, do you have any chances to view my patch from Feb,19? I
> assume I'\ve fixed all you've kindly pointed out and even more. Please
> correct me if I'm wrong. The only question remaining is - are you ok
> with the combination of threading and conditional filter operation (= do
> something if it is really required only) or you prefer to split it to 2
> separate corresponding patches. I'd prefer the first option because it
> makes git ffmpeg repo and ffmpeg development cleaner not dirtier...
>

Patch was already applied and some found issues fixed.


>
> On Tue, Feb 22, 2022 at 11:15 AM Paul B Mahol <onemda@gmail.com> wrote:
>
>> On Tue, Feb 22, 2022 at 9:15 AM Paul B Mahol <onemda@gmail.com> wrote:
>>
>> >
>> >
>> > On Tue, Feb 22, 2022 at 6:25 AM Lynne <dev@lynne.ee> wrote:
>> >
>> >> 19 Feb 2022, 14:58 by niva213@gmail.com:
>> >>
>> >> > By ffmpeg threading support implementation via frame slicing and
>> doing
>> >> > zimg_filter_graph_build that used to take 30-60% of each frame
>> processig
>> >> > only if necessary (some parameters changed)
>> >> > the performance increase vs original version
>> >> > in video downscale and color conversion  >4x is seen
>> >> > on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
>> >> >
>> >> > Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
>> >> >
>> >>
>> >> Can't you patch such a feature into the upstream instead?
>> >>
>> >
>> > zscale already have own threading ability, but is very hard to use it,
>> > last time i tried.
>> >
>>
>> I mean zimg.
>>
>>
>> >
>> >
>> >> _______________________________________________
>> >> ffmpeg-devel mailing list
>> >> ffmpeg-devel@ffmpeg.org
>> >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>> >>
>> >> To unsubscribe, visit link above, or email
>> >> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>> >>
>> >
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-02-22  8:16     ` Paul B Mahol
  2022-02-22  9:34       ` Victoria Zhislina
@ 2022-03-10 18:41       ` Victoria Zhislina
  2022-03-10 18:47         ` Paul B Mahol
  1 sibling, 1 reply; 17+ messages in thread
From: Victoria Zhislina @ 2022-03-10 18:41 UTC (permalink / raw)
  To: FFmpeg development discussions and patches, onemda

Paul and all, do you have any chances to view my patch from Feb,19? I
assume I'\ve fixed all you've kindly pointed out and even more. Please
correct me if I'm wrong. The only question remaining is - are you ok
with the combination of threading and conditional filter operation (= do
something if it is really required only) or you prefer to split it to 2
separate corresponding patches. I'd prefer the first option because it
makes git ffmpeg repo and ffmpeg development cleaner not dirtier...

On Tue, Feb 22, 2022 at 11:15 AM Paul B Mahol <onemda@gmail.com> wrote:

> On Tue, Feb 22, 2022 at 9:15 AM Paul B Mahol <onemda@gmail.com> wrote:
>
> >
> >
> > On Tue, Feb 22, 2022 at 6:25 AM Lynne <dev@lynne.ee> wrote:
> >
> >> 19 Feb 2022, 14:58 by niva213@gmail.com:
> >>
> >> > By ffmpeg threading support implementation via frame slicing and doing
> >> > zimg_filter_graph_build that used to take 30-60% of each frame
> processig
> >> > only if necessary (some parameters changed)
> >> > the performance increase vs original version
> >> > in video downscale and color conversion  >4x is seen
> >> > on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
> >> >
> >> > Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
> >> >
> >>
> >> Can't you patch such a feature into the upstream instead?
> >>
> >
> > zscale already have own threading ability, but is very hard to use it,
> > last time i tried.
> >
>
> I mean zimg.
>
>
> >
> >
> >> _______________________________________________
> >> ffmpeg-devel mailing list
> >> ffmpeg-devel@ffmpeg.org
> >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >>
> >> To unsubscribe, visit link above, or email
> >> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >>
> >
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-02-22  8:16     ` Paul B Mahol
@ 2022-02-22  9:34       ` Victoria Zhislina
  2022-03-10 18:41       ` Victoria Zhislina
  1 sibling, 0 replies; 17+ messages in thread
From: Victoria Zhislina @ 2022-02-22  9:34 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Paul, I've got exactly the same feelings on zing threading usage myself and
decided to go along the standard ffmpeg threading route.
It looks more consistent here.
Megathanks for reviewing my patch, I've fixed everything you've mentioned
and even more.

On Tue, Feb 22, 2022 at 11:15 AM Paul B Mahol <onemda@gmail.com> wrote:

> On Tue, Feb 22, 2022 at 9:15 AM Paul B Mahol <onemda@gmail.com> wrote:
>
> >
> >
> > On Tue, Feb 22, 2022 at 6:25 AM Lynne <dev@lynne.ee> wrote:
> >
> >> 19 Feb 2022, 14:58 by niva213@gmail.com:
> >>
> >> > By ffmpeg threading support implementation via frame slicing and doing
> >> > zimg_filter_graph_build that used to take 30-60% of each frame
> processig
> >> > only if necessary (some parameters changed)
> >> > the performance increase vs original version
> >> > in video downscale and color conversion  >4x is seen
> >> > on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
> >> >
> >> > Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
> >> >
> >>
> >> Can't you patch such a feature into the upstream instead?
> >>
> >
> > zscale already have own threading ability, but is very hard to use it,
> > last time i tried.
> >
>
> I mean zimg.
>
>
> >
> >
> >> _______________________________________________
> >> ffmpeg-devel mailing list
> >> ffmpeg-devel@ffmpeg.org
> >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >>
> >> To unsubscribe, visit link above, or email
> >> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >>
> >
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-02-22  8:15   ` Paul B Mahol
@ 2022-02-22  8:16     ` Paul B Mahol
  2022-02-22  9:34       ` Victoria Zhislina
  2022-03-10 18:41       ` Victoria Zhislina
  0 siblings, 2 replies; 17+ messages in thread
From: Paul B Mahol @ 2022-02-22  8:16 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Tue, Feb 22, 2022 at 9:15 AM Paul B Mahol <onemda@gmail.com> wrote:

>
>
> On Tue, Feb 22, 2022 at 6:25 AM Lynne <dev@lynne.ee> wrote:
>
>> 19 Feb 2022, 14:58 by niva213@gmail.com:
>>
>> > By ffmpeg threading support implementation via frame slicing and doing
>> > zimg_filter_graph_build that used to take 30-60% of each frame processig
>> > only if necessary (some parameters changed)
>> > the performance increase vs original version
>> > in video downscale and color conversion  >4x is seen
>> > on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
>> >
>> > Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
>> >
>>
>> Can't you patch such a feature into the upstream instead?
>>
>
> zscale already have own threading ability, but is very hard to use it,
> last time i tried.
>

I mean zimg.


>
>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-02-22  5:25 ` Lynne
@ 2022-02-22  8:15   ` Paul B Mahol
  2022-02-22  8:16     ` Paul B Mahol
  0 siblings, 1 reply; 17+ messages in thread
From: Paul B Mahol @ 2022-02-22  8:15 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Tue, Feb 22, 2022 at 6:25 AM Lynne <dev@lynne.ee> wrote:

> 19 Feb 2022, 14:58 by niva213@gmail.com:
>
> > By ffmpeg threading support implementation via frame slicing and doing
> > zimg_filter_graph_build that used to take 30-60% of each frame processig
> > only if necessary (some parameters changed)
> > the performance increase vs original version
> > in video downscale and color conversion  >4x is seen
> > on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
> >
> > Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
> >
>
> Can't you patch such a feature into the upstream instead?
>

zscale already have own threading ability, but is very hard to use it, last
time i tried.


> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-02-19 13:58 Victoria Zhislina
  2022-02-20 18:51 ` Paul B Mahol
@ 2022-02-22  5:25 ` Lynne
  2022-02-22  8:15   ` Paul B Mahol
  1 sibling, 1 reply; 17+ messages in thread
From: Lynne @ 2022-02-22  5:25 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

19 Feb 2022, 14:58 by niva213@gmail.com:

> By ffmpeg threading support implementation via frame slicing and doing
> zimg_filter_graph_build that used to take 30-60% of each frame processig
> only if necessary (some parameters changed)
> the performance increase vs original version
> in video downscale and color conversion  >4x is seen
> on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
>
> Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
>

Can't you patch such a feature into the upstream instead?
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-02-21 11:22 ` Anton Khirnov
@ 2022-02-21 15:22   ` Victoria Zhislina
  0 siblings, 0 replies; 17+ messages in thread
From: Victoria Zhislina @ 2022-02-21 15:22 UTC (permalink / raw)
  To: FFmpeg development discussions and patches, Victoria Zhislina

Hi, Anton. Thanks for your input. But the patch does the single thing
described in the commit message - improves performance >4 :)  Sorry.
This patch creation is based on real experience and on real measurement.
 Please notice  I don't write ">40x" so it is not an advertisement :).
 Right you are, under the hood it does 2 main things and one small
additional one combined to achieve the performance gain mentioned in the
commit message :)
However the changes are extremely local - they cover just a couple of
functions in a single file and  It doesn't make sense to split them. It
seems to me split will make ffmpeg-devel mail list and ffmpeg  git log
dirtier, not cleaner.
So let's wait for the Paul B Mahol opinion - it is his code that I've
modified.

On Mon, Feb 21, 2022 at 2:22 PM Anton Khirnov <anton@khirnov.net> wrote:

> > libavfilter: zscale performance optimization >4x
>
> This reads like an advertisement rather than a useful description. It
> should say what the patch does, performance improvement numbers should
> be mentioned in the commit message body.
>
> Quoting Victoria Zhislina (2022-02-21 09:20:55)
> > By ffmpeg threading support implementation via frame slicing and doing
> > zimg_filter_graph_build that used to take 30-60% of each frame processig
> > only if necessary (some parameters changed)
> > the performance increase vs original version
> > in video downscale and color conversion  >4x is seen
> > on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
>
> This implies the patch does multiple unrelated things. Then it should be
> split in multiple patches, unless some important factor prevents that
> (then that factor should be described in the commit message).
>
> --
> Anton Khirnov
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-02-21  8:20 Victoria Zhislina
@ 2022-02-21 11:22 ` Anton Khirnov
  2022-02-21 15:22   ` Victoria Zhislina
  0 siblings, 1 reply; 17+ messages in thread
From: Anton Khirnov @ 2022-02-21 11:22 UTC (permalink / raw)
  To: FFmpeg development discussions and patches; +Cc: Victoria Zhislina

> libavfilter: zscale performance optimization >4x

This reads like an advertisement rather than a useful description. It
should say what the patch does, performance improvement numbers should
be mentioned in the commit message body.

Quoting Victoria Zhislina (2022-02-21 09:20:55)
> By ffmpeg threading support implementation via frame slicing and doing
> zimg_filter_graph_build that used to take 30-60% of each frame processig
> only if necessary (some parameters changed)
> the performance increase vs original version
> in video downscale and color conversion  >4x is seen
> on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)

This implies the patch does multiple unrelated things. Then it should be
split in multiple patches, unless some important factor prevents that
(then that factor should be described in the commit message).

-- 
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
@ 2022-02-21  8:20 Victoria Zhislina
  2022-02-21 11:22 ` Anton Khirnov
  0 siblings, 1 reply; 17+ messages in thread
From: Victoria Zhislina @ 2022-02-21  8:20 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Victoria Zhislina

By ffmpeg threading support implementation via frame slicing and doing
zimg_filter_graph_build that used to take 30-60% of each frame processig
only if necessary (some parameters changed)
the performance increase vs original version
in video downscale and color conversion  >4x is seen
on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)

Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
---
 libavfilter/vf_zscale.c | 413 +++++++++++++++++++++++++++-------------
 1 file changed, 284 insertions(+), 129 deletions(-)

diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
index 1288c5efc1..dd0017607e 100644
--- a/libavfilter/vf_zscale.c
+++ b/libavfilter/vf_zscale.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2015 Paul B Mahol
- *
+ * 2022 Victoria Zhislina, Intel
+
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -44,6 +45,8 @@
 #include "libavutil/imgutils.h"
 
 #define ZIMG_ALIGNMENT 32
+#define MIN_TILESIZE 64
+#define MAX_THREADS 64
 
 static const char *const var_names[] = {
     "in_w",   "iw",
@@ -113,13 +116,17 @@ typedef struct ZScaleContext {
 
     int force_original_aspect_ratio;
 
-    void *tmp;
-    size_t tmp_size;
+    void *tmp[MAX_THREADS]; //separate for each thread;
+    int nb_threads;
+    int slice_h;
 
     zimg_image_format src_format, dst_format;
     zimg_image_format alpha_src_format, alpha_dst_format;
+    zimg_image_format src_format_tmp, dst_format_tmp;
+    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
     zimg_graph_builder_params alpha_params, params;
-    zimg_filter_graph *alpha_graph, *graph;
+    zimg_graph_builder_params alpha_params_tmp, params_tmp;
+    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
 
     enum AVColorSpace in_colorspace, out_colorspace;
     enum AVColorTransferCharacteristic in_trc, out_trc;
@@ -128,10 +135,35 @@ typedef struct ZScaleContext {
     enum AVChromaLocation in_chromal, out_chromal;
 } ZScaleContext;
 
+typedef struct ThreadData {
+    const AVPixFmtDescriptor *desc, *odesc;
+    AVFrame *in, *out;
+} ThreadData;
+
 static av_cold int init(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
     int ret;
+    int i;
+    for (i = 0; i < MAX_THREADS; i++) {
+        s->tmp[i] = NULL;
+        s->graph[i] = NULL;
+        s->alpha_graph[i] = NULL;
+    }
+    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
 
     if (s->size_str && (s->w_expr || s->h_expr)) {
         av_log(ctx, AV_LOG_ERROR,
@@ -158,7 +190,6 @@ static av_cold int init(AVFilterContext *ctx)
         av_opt_set(s, "w", "iw", 0);
     if (!s->h_expr)
         av_opt_set(s, "h", "ih", 0);
-
     return 0;
 }
 
@@ -471,6 +502,51 @@ static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_r
     return AVCOL_RANGE_UNSPECIFIED;
 }
 
+/* returns 0 if image formats are the same and 1 otherwise */
+static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
+{
+    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
+#if ZIMG_API_VERSION >= 0x204
+        (img_fmt0->alpha != img_fmt1->alpha) ||
+#endif
+        (img_fmt0->color_family != img_fmt1->color_family) ||
+        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
+        (img_fmt0->depth != img_fmt1->depth) ||
+        (img_fmt0->field_parity != img_fmt1->field_parity) ||
+        (img_fmt0->height != img_fmt1->height) ||
+        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
+        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
+        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
+        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
+        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
+        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
+        (img_fmt0->width != img_fmt1->width));
+}
+
+/* returns 0 if graph builder parameters are the same and 1 otherwise */
+static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
+{
+    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
+    and NaN values that are default for some params are treated properly*/
+    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
+        (parm0->dither_type != parm1->dither_type) ||
+        (parm0->resample_filter != parm1->resample_filter) ||
+        (parm0->resample_filter_uv != parm1->resample_filter_uv);
+
+    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
+        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
+    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
+        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
+    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
+        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
+    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
+        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
+    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
+        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
+
+    return ret;
+}
+
 static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
                         int colorspace, int primaries, int transfer, int range, int location)
 {
@@ -483,46 +559,94 @@ static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFm
     format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
     format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
     format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
-    format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
+    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
     format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
     format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
 }
 
-static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
-                       zimg_image_format *src_format, zimg_image_format *dst_format,
-                       void **tmp, size_t *tmp_size)
+static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *desc, const AVPixFmtDescriptor *out_desc,
+                        ZScaleContext *s, int job_nr)
 {
     int ret;
     size_t size;
+    zimg_image_format src_format;
+    zimg_image_format dst_format;
+    zimg_image_format alpha_src_format;
+    zimg_image_format alpha_dst_format;
+
+    src_format = s->src_format;
+    dst_format = s->dst_format;
+    /* The input slice is specified through the active_region field,
+    unlike the output slice.
+    according to zimg requirements input and output slices should have even dimentions */
+    src_format.active_region.width = in->width;
+    src_format.active_region.height = s->slice_h;
+    src_format.active_region.left = 0;
+    src_format.active_region.top = job_nr * src_format.active_region.height;
+    //dst now is the single tile only!!
+    dst_format.width = out->width;
+    dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+    //the last slice could differ from the previous ones due to the slices division "tail"
+    if (job_nr == (s->nb_threads - 1)) {
+        src_format.active_region.height = src_format.height - src_format.active_region.top;
+        dst_format.height = out->height - job_nr * dst_format.height;
+    }
 
-    zimg_filter_graph_free(*graph);
-    *graph = zimg_filter_graph_build(src_format, dst_format, params);
-    if (!*graph)
+    if (s->graph[job_nr]) {
+        zimg_filter_graph_free(s->graph[job_nr]);
+    }
+    s->graph[job_nr] = zimg_filter_graph_build(&src_format, &dst_format, &s->params);
+    if (!s->graph[job_nr])
         return print_zimg_error(NULL);
 
-    ret = zimg_filter_graph_get_tmp_size(*graph, &size);
+    ret = zimg_filter_graph_get_tmp_size(s->graph[job_nr], &size);
     if (ret)
         return print_zimg_error(NULL);
 
-    if (size > *tmp_size) {
-        av_freep(tmp);
-        *tmp = av_malloc(size);
-        if (!*tmp)
-            return AVERROR(ENOMEM);
-
-        *tmp_size = size;
-    }
+    if (s->tmp[job_nr])
+        av_freep(&s->tmp[job_nr]);
+    s->tmp[job_nr] = av_malloc(size);
+    if (!s->tmp[job_nr])
+        return AVERROR(ENOMEM);
+
+    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && out_desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        alpha_src_format = s->alpha_src_format;
+        alpha_dst_format = s->alpha_dst_format;
+        /* The input slice is specified through the active_region field, unlike the output slice.
+        according to zimg requirements input and output slices should have even dimentions */
+        alpha_src_format.active_region.width = in->width;
+        alpha_src_format.active_region.height = s->slice_h;
+        alpha_src_format.active_region.left = 0;
+        alpha_src_format.active_region.top = job_nr * alpha_src_format.active_region.height;
+        //dst now is the single tile only!!
+        alpha_dst_format.width = out->width;
+        alpha_dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+        //the last slice could differ from the previous ones due to the slices division "tail"
+        if (job_nr == (s->nb_threads - 1)) {
+            alpha_src_format.active_region.height = alpha_src_format.height - alpha_src_format.active_region.top;
+            alpha_dst_format.height = out->height - job_nr * alpha_dst_format.height;
+        }
 
+        if (s->alpha_graph[job_nr]) {
+            zimg_filter_graph_free(s->alpha_graph[job_nr]);
+        }
+        s->alpha_graph[job_nr] = zimg_filter_graph_build(&alpha_src_format, &alpha_dst_format, &s->alpha_params);
+        if (!s->alpha_graph[job_nr])
+            return print_zimg_error(NULL);
+     }
     return 0;
 }
 
 static int realign_frame(const AVPixFmtDescriptor *desc, AVFrame **frame)
 {
     AVFrame *aligned = NULL;
-    int ret = 0, plane;
+    int ret = 0, plane, planes;
 
     /* Realign any unaligned input frame. */
-    for (plane = 0; plane < 3; plane++) {
+    planes = av_pix_fmt_count_planes(desc->nb_components);
+    for (plane = 0; plane < planes; plane++) {
         int p = desc->comp[plane].plane;
         if ((uintptr_t)(*frame)->data[p] % ZIMG_ALIGNMENT || (*frame)->linesize[p] % ZIMG_ALIGNMENT) {
             if (!(aligned = av_frame_alloc())) {
@@ -572,20 +696,77 @@ static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
         frame->chroma_location = (int)s->dst_format.chroma_location + 1;
 }
 
+static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
+{
+    ThreadData *td = data;
+    int ret = 0;
+    int p;
+    int out_sampl;
+    int need_gb;
+    ZScaleContext *s = ctx->priv;
+    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
+    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
+    int  dst_tile_height = ((unsigned int)(td->out->height / n_jobs)) & 0xfffffffe;
+
+    /* create zimg filter graphs for each thread
+     only if not created earlier or there is some change in frame parameters */
+    need_gb = compare_zimg_image_formats(&s->src_format, &s->src_format_tmp) ||
+        compare_zimg_image_formats(&s->dst_format, &s->dst_format_tmp) ||
+        compare_zimg_graph_builder_params(&s->params, &s->params_tmp);
+    if(td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA)
+        need_gb = need_gb || compare_zimg_image_formats(&s->alpha_src_format, &s->alpha_src_format_tmp) ||
+            compare_zimg_image_formats(&s->alpha_dst_format, &s->alpha_dst_format_tmp) ||
+            compare_zimg_graph_builder_params(&s->alpha_params, &s->alpha_params_tmp);
+
+    if (need_gb){
+        ret = graphs_build(td->in, td->out, td->desc, td->odesc, s, job_nr);
+        if (ret < 0)
+            return print_zimg_error(ctx);
+    }
+    out_sampl = FFMAX3(td->out->linesize[0], td->out->linesize[1], td->out->linesize[2]);
+    for (int i = 0; i < 3; i++) {
+        p = td->desc->comp[i].plane;
+
+        src_buf.plane[i].data = td->in->data[p];
+        src_buf.plane[i].stride = td->in->linesize[p];
+        src_buf.plane[i].mask = -1;
+
+        p = td->odesc->comp[i].plane;
+        dst_buf.plane[i].data = td->out->data[p] + td->out->linesize[p] * dst_tile_height * td->out->linesize[p] / out_sampl * job_nr;
+        dst_buf.plane[i].stride = td->out->linesize[p];
+        dst_buf.plane[i].mask = -1;
+    }
+    ret = zimg_filter_graph_process(s->graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+    if (ret)
+        return  print_zimg_error(ctx);
+
+    if (td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        src_buf.plane[0].data = td->in->data[3];
+        src_buf.plane[0].stride = td->in->linesize[3];
+        src_buf.plane[0].mask = -1;
+
+        dst_buf.plane[0].data = td->out->data[3] + td->out->linesize[3] * dst_tile_height  * job_nr;
+        dst_buf.plane[0].stride = td->out->linesize[3];
+        dst_buf.plane[0].mask = -1;
+
+        ret = zimg_filter_graph_process(s->alpha_graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+        if (ret)
+            return print_zimg_error(ctx);
+    }
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
-    ZScaleContext *s = link->dst->priv;
-    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFilterContext *ctx = link->dst;
+    ZScaleContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
     const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
-    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
-    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
     char buf[32];
-    int ret = 0, plane;
+    int ret = 0;
     AVFrame *out = NULL;
-
-    if ((ret = realign_frame(desc, &in)) < 0)
-        goto fail;
+    ThreadData td;
 
     if (!(out = ff_get_video_buffer(outlink, outlink->w, outlink->h))) {
         ret =  AVERROR(ENOMEM);
@@ -596,19 +777,23 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     out->width  = outlink->w;
     out->height = outlink->h;
 
-    if(   in->width  != link->w
-       || in->height != link->h
-       || in->format != link->format
-       || s->in_colorspace != in->colorspace
-       || s->in_trc  != in->color_trc
-       || s->in_primaries != in->color_primaries
-       || s->in_range != in->color_range
-       || s->out_colorspace != out->colorspace
-       || s->out_trc  != out->color_trc
-       || s->out_primaries != out->color_primaries
-       || s->out_range != out->color_range
-       || s->in_chromal != in->chroma_location
-       || s->out_chromal != out->chroma_location) {
+    //we need to use this filter if something is different for an input and output only
+    //otherwise - just copy the input frame to the output
+    if ((link->w != outlink->w) ||
+        (link->h != outlink->h) ||
+        (s->src_format.chroma_location != s->dst_format.chroma_location) ||
+        (s->src_format.color_family !=s->dst_format.color_family) ||
+        (s->src_format.color_primaries !=s->dst_format.color_primaries) ||
+        (s->src_format.depth !=s->dst_format.depth) ||
+        (s->src_format.matrix_coefficients !=s->dst_format.matrix_coefficients) ||
+        (s->src_format.field_parity !=s->dst_format.field_parity) ||
+        (s->src_format.pixel_range !=s->dst_format.pixel_range) ||
+        (s->src_format.pixel_type !=s->dst_format.pixel_type) ||
+        (s->src_format.transfer_characteristics !=s->dst_format.transfer_characteristics)
+    ){
+        if ((ret = realign_frame(desc, &in)) < 0)
+            goto fail;
+
         snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
         av_opt_set(s, "w", buf, 0);
         snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
@@ -618,13 +803,26 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
         link->dst->inputs[0]->w      = in->width;
         link->dst->inputs[0]->h      = in->height;
 
-        if ((ret = config_props(outlink)) < 0)
-            goto fail;
+        s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), link->h / MIN_TILESIZE);
+        s->slice_h = ((unsigned int)(link->h / s->nb_threads)) & 0xfffffffe; // slice_h should be even for zimg
+        s->in_colorspace = in->colorspace;
+        s->in_trc = in->color_trc;
+        s->in_primaries = in->color_primaries;
+        s->in_range = in->color_range;
+        s->out_colorspace = out->colorspace;
+        s->out_trc = out->color_trc;
+        s->out_primaries = out->color_primaries;
+        s->out_range = out->color_range;
 
         zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
         zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
         zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
 
+        format_init(&s->src_format, in, desc, s->colorspace_in,
+            s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
+        format_init(&s->dst_format, out, odesc, s->colorspace,
+            s->primaries, s->trc, s->range, s->chromal);
+
         s->params.dither_type = s->dither;
         s->params.cpu_type = ZIMG_CPU_AUTO;
         s->params.resample_filter = s->filter;
@@ -634,27 +832,6 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
         s->params.filter_param_a = s->params.filter_param_a_uv = s->param_a;
         s->params.filter_param_b = s->params.filter_param_b_uv = s->param_b;
 
-        format_init(&s->src_format, in, desc, s->colorspace_in,
-                    s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
-        format_init(&s->dst_format, out, odesc, s->colorspace,
-                    s->primaries, s->trc, s->range, s->chromal);
-
-        update_output_color_information(s, out);
-
-        ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
-                          &s->tmp, &s->tmp_size);
-        if (ret < 0)
-            goto fail;
-
-        s->in_colorspace  = in->colorspace;
-        s->in_trc         = in->color_trc;
-        s->in_primaries   = in->color_primaries;
-        s->in_range       = in->color_range;
-        s->out_colorspace = out->colorspace;
-        s->out_trc        = out->color_trc;
-        s->out_primaries  = out->color_primaries;
-        s->out_range      = out->color_range;
-
         if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
             zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
             zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
@@ -670,76 +847,52 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
             s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
 
-            s->alpha_dst_format.width = out->width;
-            s->alpha_dst_format.height = out->height;
             s->alpha_dst_format.depth = odesc->comp[0].depth;
             s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
-
-            zimg_filter_graph_free(s->alpha_graph);
-            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
-            if (!s->alpha_graph) {
-                ret = print_zimg_error(link->dst);
-                goto fail;
-            }
         }
-    }
-
-    update_output_color_information(s, out);
-
-    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
-              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
-              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
-              INT_MAX);
-
-    for (plane = 0; plane < 3; plane++) {
-        int p = desc->comp[plane].plane;
-        src_buf.plane[plane].data   = in->data[p];
-        src_buf.plane[plane].stride = in->linesize[p];
-        src_buf.plane[plane].mask   = -1;
-
-        p = odesc->comp[plane].plane;
-        dst_buf.plane[plane].data   = out->data[p];
-        dst_buf.plane[plane].stride = out->linesize[p];
-        dst_buf.plane[plane].mask   = -1;
-    }
 
-    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-    if (ret) {
-        ret = print_zimg_error(link->dst);
-        goto fail;
-    }
+        update_output_color_information(s, out);
+        av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
+                  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
+                  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
+                  INT_MAX);
 
-    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        src_buf.plane[0].data   = in->data[3];
-        src_buf.plane[0].stride = in->linesize[3];
-        src_buf.plane[0].mask   = -1;
+        td.in = in;
+        td.out = out;
+        td.desc = desc;
+        td.odesc = odesc;
 
-        dst_buf.plane[0].data   = out->data[3];
-        dst_buf.plane[0].stride = out->linesize[3];
-        dst_buf.plane[0].mask   = -1;
+        ff_filter_execute(ctx, filter_slice, &td, NULL, s->nb_threads);
 
-        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-        if (ret) {
-            ret = print_zimg_error(link->dst);
-            goto fail;
+        s->src_format_tmp = s->src_format;
+        s->dst_format_tmp = s->dst_format;
+        s->params_tmp = s->params;
+        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+            s->alpha_src_format_tmp = s->alpha_src_format;
+            s->alpha_dst_format_tmp = s->alpha_dst_format;
+            s->alpha_params_tmp = s->alpha_params;
         }
-    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        int x, y;
-
-        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
-            for (y = 0; y < out->height; y++) {
-                for (x = 0; x < out->width; x++) {
-                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
-                            av_float2int(1.0f));
+
+        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) && (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) ){
+            int x, y;
+            if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
+                for (y = 0; y < out->height; y++) {
+                    for (x = 0; x < out->width; x++) {
+                        AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
+                                av_float2int(1.0f));
+                    }
                 }
+            } else {
+                for (y = 0; y < outlink->h; y++)
+                    memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
             }
-        } else {
-            for (y = 0; y < outlink->h; y++)
-                memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
         }
     }
-
+    else {
+        /*no need for any filtering */
+        return ff_filter_frame(outlink, in);
+    }
 fail:
     av_frame_free(&in);
     if (ret) {
@@ -753,11 +906,12 @@ fail:
 static av_cold void uninit(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
-
-    zimg_filter_graph_free(s->graph);
-    zimg_filter_graph_free(s->alpha_graph);
-    av_freep(&s->tmp);
-    s->tmp_size = 0;
+    int i;
+    for (i = 0; i < s->nb_threads; i++) {
+        av_freep(&s->tmp[i]);
+        zimg_filter_graph_free(s->graph[i]);
+        zimg_filter_graph_free(s->alpha_graph[i]);
+    }
 }
 
 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
@@ -941,4 +1095,5 @@ const AVFilter ff_vf_zscale = {
     FILTER_OUTPUTS(avfilter_vf_zscale_outputs),
     FILTER_QUERY_FUNC(query_formats),
     .process_command = process_command,
+    .flags           = AVFILTER_FLAG_SLICE_THREADS,
 };
-- 
2.31.1.windows.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-02-19 13:58 Victoria Zhislina
@ 2022-02-20 18:51 ` Paul B Mahol
  2022-02-22  5:25 ` Lynne
  1 sibling, 0 replies; 17+ messages in thread
From: Paul B Mahol @ 2022-02-20 18:51 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Sat, Feb 19, 2022 at 04:58:09PM +0300, Victoria Zhislina wrote:
> By ffmpeg threading support implementation via frame slicing and doing
> zimg_filter_graph_build that used to take 30-60% of each frame processig
> only if necessary (some parameters changed)
> the performance increase vs original version
> in video downscale and color conversion  >4x is seen
> on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
> 
> Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
> ---
>  libavfilter/vf_zscale.c | 417 +++++++++++++++++++++++++++-------------
>  1 file changed, 288 insertions(+), 129 deletions(-)
> 
> diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
> index 1288c5efc1..61418d4a4a 100644
> --- a/libavfilter/vf_zscale.c
> +++ b/libavfilter/vf_zscale.c
> @@ -1,6 +1,7 @@
>  /*
>   * Copyright (c) 2015 Paul B Mahol
> - *
> + * 2022 Victoria Zhislina, Intel
> +
>   * This file is part of FFmpeg.
>   *
>   * FFmpeg is free software; you can redistribute it and/or
> @@ -44,6 +45,8 @@
>  #include "libavutil/imgutils.h"
>  
>  #define ZIMG_ALIGNMENT 32
> +#define MIN_TILESIZE 64
> +#define MAX_THREADS 64
>  
>  static const char *const var_names[] = {
>      "in_w",   "iw",
> @@ -113,13 +116,17 @@ typedef struct ZScaleContext {
>  
>      int force_original_aspect_ratio;
>  
> -    void *tmp;
> -    size_t tmp_size;
> +    void *tmp[MAX_THREADS]; //separate for each thread;
> +    int nb_threads;
> +    int slice_h;
>  
>      zimg_image_format src_format, dst_format;
>      zimg_image_format alpha_src_format, alpha_dst_format;
> +    zimg_image_format src_format_tmp, dst_format_tmp;
> +    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
>      zimg_graph_builder_params alpha_params, params;
> -    zimg_filter_graph *alpha_graph, *graph;
> +    zimg_graph_builder_params alpha_params_tmp, params_tmp;
> +    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
>  
>      enum AVColorSpace in_colorspace, out_colorspace;
>      enum AVColorTransferCharacteristic in_trc, out_trc;
> @@ -128,10 +135,36 @@ typedef struct ZScaleContext {
>      enum AVChromaLocation in_chromal, out_chromal;
>  } ZScaleContext;
>  
> +typedef struct ThreadData {
> +    const AVPixFmtDescriptor *desc, *odesc;
> +    AVFrame *in, *out;
> +} ThreadData;
> +
>  static av_cold int init(AVFilterContext *ctx)
>  {
>      ZScaleContext *s = ctx->priv;
>      int ret;
> +    int i;

remove that line above

> +
> +    for (i = 0; i < MAX_THREADS; i++) {

for (int i ...

> +        s->tmp[i] = NULL;
> +        s->graph[i] = NULL;
> +        s->alpha_graph[i] = NULL;
> +    }
> +    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
>  
>      if (s->size_str && (s->w_expr || s->h_expr)) {
>          av_log(ctx, AV_LOG_ERROR,
> @@ -158,7 +191,6 @@ static av_cold int init(AVFilterContext *ctx)
>          av_opt_set(s, "w", "iw", 0);
>      if (!s->h_expr)
>          av_opt_set(s, "h", "ih", 0);
> -
>      return 0;
>  }
>  
> @@ -471,6 +503,51 @@ static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_r
>      return AVCOL_RANGE_UNSPECIFIED;
>  }
>  
> +/* returns 0 if image formats are the same and 1 otherwise */
> +static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
> +{
> +    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
> +#if ZIMG_API_VERSION >= 0x204
> +        (img_fmt0->alpha != img_fmt1->alpha) ||
> +#endif
> +        (img_fmt0->color_family != img_fmt1->color_family) ||
> +        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
> +        (img_fmt0->depth != img_fmt1->depth) ||
> +        (img_fmt0->field_parity != img_fmt1->field_parity) ||
> +        (img_fmt0->height != img_fmt1->height) ||
> +        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
> +        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
> +        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
> +        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
> +        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
> +        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
> +        (img_fmt0->width != img_fmt1->width));
> +}
> +
> +/* returns 0 if graph builder parameters are the same and 1 otherwise */
> +static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
> +{
> +    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
> +    and NaN values that are default for some params are treated properly*/
> +    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
> +        (parm0->dither_type != parm1->dither_type) ||
> +        (parm0->resample_filter != parm1->resample_filter) ||
> +        (parm0->resample_filter_uv != parm1->resample_filter_uv);
> +
> +    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
> +        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
> +    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
> +        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
> +    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
> +        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
> +    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
> +        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
> +    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
> +        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
> +
> +    return ret;
> +}
> +
>  static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
>                          int colorspace, int primaries, int transfer, int range, int location)
>  {
> @@ -483,46 +560,94 @@ static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFm
>      format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
>      format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
>      format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> -    format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
> +    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
>      format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
>      format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
>  }
>  
> -static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
> -                       zimg_image_format *src_format, zimg_image_format *dst_format,
> -                       void **tmp, size_t *tmp_size)
> +static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *desc, const AVPixFmtDescriptor *out_desc,
> +                        ZScaleContext *s, int job_nr)
>  {
>      int ret;
>      size_t size;
> +    zimg_image_format src_format;
> +    zimg_image_format dst_format;
> +    zimg_image_format alpha_src_format;
> +    zimg_image_format alpha_dst_format;
> +
> +    src_format = s->src_format;
> +    dst_format = s->dst_format;
> +    /* The input slice is specified through the active_region field,
> +    unlike the output slice.
> +    according to zimg requirements input and output slices should have even dimentions */
> +    src_format.active_region.width = in->width;
> +    src_format.active_region.height = s->slice_h;
> +    src_format.active_region.left = 0;
> +    src_format.active_region.top = job_nr * src_format.active_region.height;
> +    //dst now is the single tile only!!
> +    dst_format.width = out->width;
> +    dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
> +
> +    //the last slice could differ from the previous ones due to the slices division "tail"
> +    if (job_nr == (s->nb_threads - 1)) {
> +        src_format.active_region.height = src_format.height - src_format.active_region.top;
> +        dst_format.height = out->height - job_nr * dst_format.height;
> +    }
>  
> -    zimg_filter_graph_free(*graph);
> -    *graph = zimg_filter_graph_build(src_format, dst_format, params);
> -    if (!*graph)
> +    if (s->graph[job_nr]) {
> +        zimg_filter_graph_free(s->graph[job_nr]);
> +    }
> +    s->graph[job_nr] = zimg_filter_graph_build(&src_format, &dst_format, &s->params);
> +    if (!s->graph[job_nr])
>          return print_zimg_error(NULL);
>  
> -    ret = zimg_filter_graph_get_tmp_size(*graph, &size);
> +    ret = zimg_filter_graph_get_tmp_size(s->graph[job_nr], &size);
>      if (ret)
>          return print_zimg_error(NULL);
>  
> -    if (size > *tmp_size) {
> -        av_freep(tmp);
> -        *tmp = av_malloc(size);
> -        if (!*tmp)
> -            return AVERROR(ENOMEM);
> -
> -        *tmp_size = size;
> -    }
> +    if (s->tmp[job_nr])

    not needed, remove line above

> +        av_freep(&s->tmp[job_nr]);
> +    s->tmp[job_nr] = av_malloc(size);
> +    if (!s->tmp[job_nr])
> +        return AVERROR(ENOMEM);
> +
> +    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && out_desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +        alpha_src_format = s->alpha_src_format;
> +        alpha_dst_format = s->alpha_dst_format;
> +        /* The input slice is specified through the active_region field, unlike the output slice.
> +        according to zimg requirements input and output slices should have even dimentions */
> +        alpha_src_format.active_region.width = in->width;
> +        alpha_src_format.active_region.height = s->slice_h;
> +        alpha_src_format.active_region.left = 0;
> +        alpha_src_format.active_region.top = job_nr * alpha_src_format.active_region.height;
> +        //dst now is the single tile only!!
> +        alpha_dst_format.width = out->width;
> +        alpha_dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
> +
> +        //the last slice could differ from the previous ones due to the slices division "tail"
> +        if (job_nr == (s->nb_threads - 1)) {
> +            alpha_src_format.active_region.height = alpha_src_format.height - alpha_src_format.active_region.top;
> +            alpha_dst_format.height = out->height - job_nr * alpha_dst_format.height;
> +        }
>  
> +        if (s->alpha_graph[job_nr]) {
> +            zimg_filter_graph_free(s->alpha_graph[job_nr]);
> +        }
> +        s->alpha_graph[job_nr] = zimg_filter_graph_build(&alpha_src_format, &alpha_dst_format, &s->alpha_params);
> +        if (!s->alpha_graph[job_nr])
> +            return print_zimg_error(NULL);
> +     }
>      return 0;
>  }
>  
>  static int realign_frame(const AVPixFmtDescriptor *desc, AVFrame **frame)
>  {
>      AVFrame *aligned = NULL;
> -    int ret = 0, plane;
> +    int ret = 0, plane, planes;
>  
>      /* Realign any unaligned input frame. */
> -    for (plane = 0; plane < 3; plane++) {
> +    planes = av_pix_fmt_count_planes(desc->nb_components);
> +    for (plane = 0; plane < planes; plane++) {
>          int p = desc->comp[plane].plane;
>          if ((uintptr_t)(*frame)->data[p] % ZIMG_ALIGNMENT || (*frame)->linesize[p] % ZIMG_ALIGNMENT) {
>              if (!(aligned = av_frame_alloc())) {
> @@ -572,20 +697,77 @@ static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
>          frame->chroma_location = (int)s->dst_format.chroma_location + 1;
>  }
>  
> +static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
> +{
> +    ThreadData *td = data;
> +    int ret = 0;
> +    int p;
> +    int out_sampl;
> +    int need_gb;
> +    ZScaleContext *s = ctx->priv;
> +    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
> +    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
> +    int  dst_tile_height = ((unsigned int)(td->out->height / n_jobs)) & 0xfffffffe;
> +
> +    /* create zimg filter graphs for each thread
> +     only if not created earlier or there is some change in frame parameters */
> +    need_gb = compare_zimg_image_formats(&s->src_format, &s->src_format_tmp) ||
> +        compare_zimg_image_formats(&s->dst_format, &s->dst_format_tmp) ||
> +        compare_zimg_graph_builder_params(&s->params, &s->params_tmp);
> +    if(td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA)
> +        need_gb = need_gb || compare_zimg_image_formats(&s->alpha_src_format, &s->alpha_src_format_tmp) ||
> +            compare_zimg_image_formats(&s->alpha_dst_format, &s->alpha_dst_format_tmp) ||
> +            compare_zimg_graph_builder_params(&s->alpha_params, &s->alpha_params_tmp);
> +
> +    if (need_gb){
> +        ret = graphs_build(td->in, td->out, td->desc, td->odesc, s, job_nr);
> +        if (ret < 0)
> +            return print_zimg_error(ctx);
> +    }
> +    out_sampl = FFMAX3(td->out->linesize[0], td->out->linesize[1], td->out->linesize[2]);
> +    for (int i = 0; i < 3; i++) {
> +        p = td->desc->comp[i].plane;
> +
> +        src_buf.plane[i].data = td->in->data[p];
> +        src_buf.plane[i].stride = td->in->linesize[p];
> +        src_buf.plane[i].mask = -1;
> +
> +        p = td->odesc->comp[i].plane;
> +        dst_buf.plane[i].data = td->out->data[p] + td->out->linesize[p] * dst_tile_height * td->out->linesize[p] / out_sampl * job_nr;
> +        dst_buf.plane[i].stride = td->out->linesize[p];
> +        dst_buf.plane[i].mask = -1;
> +    }
> +    ret = zimg_filter_graph_process(s->graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
> +    if (ret)
> +        return  print_zimg_error(ctx);
> +
> +    if (td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +        src_buf.plane[0].data = td->in->data[3];
> +        src_buf.plane[0].stride = td->in->linesize[3];
> +        src_buf.plane[0].mask = -1;
> +
> +        dst_buf.plane[0].data = td->out->data[3] + td->out->linesize[3] * dst_tile_height  * job_nr;
> +        dst_buf.plane[0].stride = td->out->linesize[3];
> +        dst_buf.plane[0].mask = -1;
> +
> +        ret = zimg_filter_graph_process(s->alpha_graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
> +        if (ret)
> +            return print_zimg_error(ctx);
> +    }
> +    return 0;
> +}
> +
>  static int filter_frame(AVFilterLink *link, AVFrame *in)
>  {
> -    ZScaleContext *s = link->dst->priv;
> -    AVFilterLink *outlink = link->dst->outputs[0];
> +    AVFilterContext *ctx = link->dst;
> +    ZScaleContext *s = ctx->priv;
> +    AVFilterLink *outlink = ctx->outputs[0];
>      const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
>      const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
> -    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
> -    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
>      char buf[32];
> -    int ret = 0, plane;
> +    int ret = 0;
>      AVFrame *out = NULL;
> -
> -    if ((ret = realign_frame(desc, &in)) < 0)
> -        goto fail;
> +    ThreadData td;
>  
>      if (!(out = ff_get_video_buffer(outlink, outlink->w, outlink->h))) {
>          ret =  AVERROR(ENOMEM);
> @@ -596,35 +778,53 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>      out->width  = outlink->w;
>      out->height = outlink->h;
>  
> -    if(   in->width  != link->w
> -       || in->height != link->h
> -       || in->format != link->format
> -       || s->in_colorspace != in->colorspace
> -       || s->in_trc  != in->color_trc
> -       || s->in_primaries != in->color_primaries
> -       || s->in_range != in->color_range
> -       || s->out_colorspace != out->colorspace
> -       || s->out_trc  != out->color_trc
> -       || s->out_primaries != out->color_primaries
> -       || s->out_range != out->color_range
> -       || s->in_chromal != in->chroma_location
> -       || s->out_chromal != out->chroma_location) {
> +    //we need to use this filter if something is different for an input and output only
> +    //otherwise - just copy the input frame to the output
> +    if ((link->w != outlink->w) ||
> +        (link->h != outlink->h) ||
> +        (s->src_format.chroma_location != s->dst_format.chroma_location) ||
> +        (s->src_format.color_family !=s->dst_format.color_family) ||
> +        (s->src_format.color_primaries !=s->dst_format.color_primaries) ||
> +        (s->src_format.depth !=s->dst_format.depth) ||
> +        (s->src_format.matrix_coefficients !=s->dst_format.matrix_coefficients) ||
> +        (s->src_format.field_parity !=s->dst_format.field_parity) ||
> +        (s->src_format.pixel_range !=s->dst_format.pixel_range) ||
> +        (s->src_format.pixel_type !=s->dst_format.pixel_type) ||
> +        (s->src_format.transfer_characteristics !=s->dst_format.transfer_characteristics)
> +    ){
> +        if ((ret = realign_frame(desc, &in)) < 0)
> +            goto fail;
> +
>          snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
>          av_opt_set(s, "w", buf, 0);
>          snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
>          av_opt_set(s, "h", buf, 0);
>  
> +

remove this not needed line addition above

>          link->dst->inputs[0]->format = in->format;
>          link->dst->inputs[0]->w      = in->width;
>          link->dst->inputs[0]->h      = in->height;
>  
> -        if ((ret = config_props(outlink)) < 0)
> -            goto fail;
> +        s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), link->h / MIN_TILESIZE);
> +        s->slice_h = ((unsigned int)(link->h / s->nb_threads)) & 0xfffffffe; // slice_h should be even for zimg
> +        s->in_colorspace = in->colorspace;
> +        s->in_trc = in->color_trc;
> +        s->in_primaries = in->color_primaries;
> +        s->in_range = in->color_range;
> +        s->out_colorspace = out->colorspace;
> +        s->out_trc = out->color_trc;
> +        s->out_primaries = out->color_primaries;
> +        s->out_range = out->color_range;
>  
>          zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
>          zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
>          zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
>  
> +        format_init(&s->src_format, in, desc, s->colorspace_in,
> +            s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
> +        format_init(&s->dst_format, out, odesc, s->colorspace,
> +            s->primaries, s->trc, s->range, s->chromal);
> +
>          s->params.dither_type = s->dither;
>          s->params.cpu_type = ZIMG_CPU_AUTO;
>          s->params.resample_filter = s->filter;
> @@ -634,27 +834,6 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>          s->params.filter_param_a = s->params.filter_param_a_uv = s->param_a;
>          s->params.filter_param_b = s->params.filter_param_b_uv = s->param_b;
>  
> -        format_init(&s->src_format, in, desc, s->colorspace_in,
> -                    s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
> -        format_init(&s->dst_format, out, odesc, s->colorspace,
> -                    s->primaries, s->trc, s->range, s->chromal);
> -
> -        update_output_color_information(s, out);
> -
> -        ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
> -                          &s->tmp, &s->tmp_size);
> -        if (ret < 0)
> -            goto fail;
> -
> -        s->in_colorspace  = in->colorspace;
> -        s->in_trc         = in->color_trc;
> -        s->in_primaries   = in->color_primaries;
> -        s->in_range       = in->color_range;
> -        s->out_colorspace = out->colorspace;
> -        s->out_trc        = out->color_trc;
> -        s->out_primaries  = out->color_primaries;
> -        s->out_range      = out->color_range;
> -
>          if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
>              zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
>              zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> @@ -670,76 +849,54 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>              s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>              s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
>  
> -            s->alpha_dst_format.width = out->width;
> -            s->alpha_dst_format.height = out->height;
>              s->alpha_dst_format.depth = odesc->comp[0].depth;
>              s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>              s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
> -
> -            zimg_filter_graph_free(s->alpha_graph);
> -            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
> -            if (!s->alpha_graph) {
> -                ret = print_zimg_error(link->dst);
> -                goto fail;
> -            }
>          }
> -    }
> -
> -    update_output_color_information(s, out);
> -
> -    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
> -              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
> -              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
> -              INT_MAX);
> -
> -    for (plane = 0; plane < 3; plane++) {
> -        int p = desc->comp[plane].plane;
> -        src_buf.plane[plane].data   = in->data[p];
> -        src_buf.plane[plane].stride = in->linesize[p];
> -        src_buf.plane[plane].mask   = -1;
> -
> -        p = odesc->comp[plane].plane;
> -        dst_buf.plane[plane].data   = out->data[p];
> -        dst_buf.plane[plane].stride = out->linesize[p];
> -        dst_buf.plane[plane].mask   = -1;
> -    }
>  
> -    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
> -    if (ret) {
> -        ret = print_zimg_error(link->dst);
> -        goto fail;
> -    }
> +        update_output_color_information(s, out);
> +        av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
> +                  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
> +                  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
> +                  INT_MAX);
>  
> -    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> -        src_buf.plane[0].data   = in->data[3];
> -        src_buf.plane[0].stride = in->linesize[3];
> -        src_buf.plane[0].mask   = -1;
> +        td.in = in;
> +        td.out = out;
> +        td.desc = desc;
> +        td.odesc = odesc;
>  
> -        dst_buf.plane[0].data   = out->data[3];
> -        dst_buf.plane[0].stride = out->linesize[3];
> -        dst_buf.plane[0].mask   = -1;
> +        ff_filter_execute(ctx, filter_slice, &td, NULL, s->nb_threads);
>  
> -        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
> -        if (ret) {
> -            ret = print_zimg_error(link->dst);
> -            goto fail;
> +        s->src_format_tmp = s->src_format;
> +        s->dst_format_tmp = s->dst_format;
> +        s->params_tmp = s->params;
> +        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +            s->alpha_src_format_tmp = s->alpha_src_format;
> +            s->alpha_dst_format_tmp = s->alpha_dst_format;
> +            s->alpha_params_tmp = s->alpha_params;
>          }
> -    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> -        int x, y;
> -
> -        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
> -            for (y = 0; y < out->height; y++) {
> -                for (x = 0; x < out->width; x++) {
> -                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
> -                            av_float2int(1.0f));
> +
> +        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) && (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) ){
> +            int x, y;
> +            if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
> +                for (y = 0; y < out->height; y++) {
> +                    for (x = 0; x < out->width; x++) {
> +                        AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
> +                                av_float2int(1.0f));
> +                    }
>                  }
> +            } else {
> +                for (y = 0; y < outlink->h; y++)
> +                    memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
>              }
> -        } else {
> -            for (y = 0; y < outlink->h; y++)
> -                memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
>          }
>      }
> -
> +    else {
> +        /*no need for any filtering */
> +        ret = av_frame_copy(out, in);

You could just return 'in' frame, no need to do full frame copy in nop scenario.

> +        if (ret < 0)
> +            return ret;
> +    }
>  fail:
>      av_frame_free(&in);
>      if (ret) {
> @@ -753,11 +910,12 @@ fail:
>  static av_cold void uninit(AVFilterContext *ctx)
>  {
>      ZScaleContext *s = ctx->priv;
> -
> -    zimg_filter_graph_free(s->graph);
> -    zimg_filter_graph_free(s->alpha_graph);
> -    av_freep(&s->tmp);
> -    s->tmp_size = 0;
> +    int i;
> +    for (i = 0; i < s->nb_threads; i++) {

    for (int i = 0; i < s->nb_threads; i++) {

> +        if (s->tmp[i]) av_freep(&s->tmp[i]);

use just:
           av_freep(&s->tmp[i]);

> +        zimg_filter_graph_free(s->graph[i]);
> +        zimg_filter_graph_free(s->alpha_graph[i]);
> +    }
>  }
>  
>  static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
> @@ -941,4 +1099,5 @@ const AVFilter ff_vf_zscale = {
>      FILTER_OUTPUTS(avfilter_vf_zscale_outputs),
>      FILTER_QUERY_FUNC(query_formats),
>      .process_command = process_command,
> +    .flags           = AVFILTER_FLAG_SLICE_THREADS,
>  };
> -- 
> 2.31.1.windows.1
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
@ 2022-02-19 13:58 Victoria Zhislina
  2022-02-20 18:51 ` Paul B Mahol
  2022-02-22  5:25 ` Lynne
  0 siblings, 2 replies; 17+ messages in thread
From: Victoria Zhislina @ 2022-02-19 13:58 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Victoria Zhislina

By ffmpeg threading support implementation via frame slicing and doing
zimg_filter_graph_build that used to take 30-60% of each frame processig
only if necessary (some parameters changed)
the performance increase vs original version
in video downscale and color conversion  >4x is seen
on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)

Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
---
 libavfilter/vf_zscale.c | 417 +++++++++++++++++++++++++++-------------
 1 file changed, 288 insertions(+), 129 deletions(-)

diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
index 1288c5efc1..61418d4a4a 100644
--- a/libavfilter/vf_zscale.c
+++ b/libavfilter/vf_zscale.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2015 Paul B Mahol
- *
+ * 2022 Victoria Zhislina, Intel
+
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -44,6 +45,8 @@
 #include "libavutil/imgutils.h"
 
 #define ZIMG_ALIGNMENT 32
+#define MIN_TILESIZE 64
+#define MAX_THREADS 64
 
 static const char *const var_names[] = {
     "in_w",   "iw",
@@ -113,13 +116,17 @@ typedef struct ZScaleContext {
 
     int force_original_aspect_ratio;
 
-    void *tmp;
-    size_t tmp_size;
+    void *tmp[MAX_THREADS]; //separate for each thread;
+    int nb_threads;
+    int slice_h;
 
     zimg_image_format src_format, dst_format;
     zimg_image_format alpha_src_format, alpha_dst_format;
+    zimg_image_format src_format_tmp, dst_format_tmp;
+    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
     zimg_graph_builder_params alpha_params, params;
-    zimg_filter_graph *alpha_graph, *graph;
+    zimg_graph_builder_params alpha_params_tmp, params_tmp;
+    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
 
     enum AVColorSpace in_colorspace, out_colorspace;
     enum AVColorTransferCharacteristic in_trc, out_trc;
@@ -128,10 +135,36 @@ typedef struct ZScaleContext {
     enum AVChromaLocation in_chromal, out_chromal;
 } ZScaleContext;
 
+typedef struct ThreadData {
+    const AVPixFmtDescriptor *desc, *odesc;
+    AVFrame *in, *out;
+} ThreadData;
+
 static av_cold int init(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
     int ret;
+    int i;
+
+    for (i = 0; i < MAX_THREADS; i++) {
+        s->tmp[i] = NULL;
+        s->graph[i] = NULL;
+        s->alpha_graph[i] = NULL;
+    }
+    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
 
     if (s->size_str && (s->w_expr || s->h_expr)) {
         av_log(ctx, AV_LOG_ERROR,
@@ -158,7 +191,6 @@ static av_cold int init(AVFilterContext *ctx)
         av_opt_set(s, "w", "iw", 0);
     if (!s->h_expr)
         av_opt_set(s, "h", "ih", 0);
-
     return 0;
 }
 
@@ -471,6 +503,51 @@ static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_r
     return AVCOL_RANGE_UNSPECIFIED;
 }
 
+/* returns 0 if image formats are the same and 1 otherwise */
+static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
+{
+    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
+#if ZIMG_API_VERSION >= 0x204
+        (img_fmt0->alpha != img_fmt1->alpha) ||
+#endif
+        (img_fmt0->color_family != img_fmt1->color_family) ||
+        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
+        (img_fmt0->depth != img_fmt1->depth) ||
+        (img_fmt0->field_parity != img_fmt1->field_parity) ||
+        (img_fmt0->height != img_fmt1->height) ||
+        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
+        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
+        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
+        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
+        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
+        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
+        (img_fmt0->width != img_fmt1->width));
+}
+
+/* returns 0 if graph builder parameters are the same and 1 otherwise */
+static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
+{
+    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
+    and NaN values that are default for some params are treated properly*/
+    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
+        (parm0->dither_type != parm1->dither_type) ||
+        (parm0->resample_filter != parm1->resample_filter) ||
+        (parm0->resample_filter_uv != parm1->resample_filter_uv);
+
+    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
+        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
+    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
+        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
+    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
+        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
+    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
+        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
+    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
+        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
+
+    return ret;
+}
+
 static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
                         int colorspace, int primaries, int transfer, int range, int location)
 {
@@ -483,46 +560,94 @@ static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFm
     format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
     format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
     format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
-    format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
+    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
     format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
     format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
 }
 
-static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
-                       zimg_image_format *src_format, zimg_image_format *dst_format,
-                       void **tmp, size_t *tmp_size)
+static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *desc, const AVPixFmtDescriptor *out_desc,
+                        ZScaleContext *s, int job_nr)
 {
     int ret;
     size_t size;
+    zimg_image_format src_format;
+    zimg_image_format dst_format;
+    zimg_image_format alpha_src_format;
+    zimg_image_format alpha_dst_format;
+
+    src_format = s->src_format;
+    dst_format = s->dst_format;
+    /* The input slice is specified through the active_region field,
+    unlike the output slice.
+    according to zimg requirements input and output slices should have even dimentions */
+    src_format.active_region.width = in->width;
+    src_format.active_region.height = s->slice_h;
+    src_format.active_region.left = 0;
+    src_format.active_region.top = job_nr * src_format.active_region.height;
+    //dst now is the single tile only!!
+    dst_format.width = out->width;
+    dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+    //the last slice could differ from the previous ones due to the slices division "tail"
+    if (job_nr == (s->nb_threads - 1)) {
+        src_format.active_region.height = src_format.height - src_format.active_region.top;
+        dst_format.height = out->height - job_nr * dst_format.height;
+    }
 
-    zimg_filter_graph_free(*graph);
-    *graph = zimg_filter_graph_build(src_format, dst_format, params);
-    if (!*graph)
+    if (s->graph[job_nr]) {
+        zimg_filter_graph_free(s->graph[job_nr]);
+    }
+    s->graph[job_nr] = zimg_filter_graph_build(&src_format, &dst_format, &s->params);
+    if (!s->graph[job_nr])
         return print_zimg_error(NULL);
 
-    ret = zimg_filter_graph_get_tmp_size(*graph, &size);
+    ret = zimg_filter_graph_get_tmp_size(s->graph[job_nr], &size);
     if (ret)
         return print_zimg_error(NULL);
 
-    if (size > *tmp_size) {
-        av_freep(tmp);
-        *tmp = av_malloc(size);
-        if (!*tmp)
-            return AVERROR(ENOMEM);
-
-        *tmp_size = size;
-    }
+    if (s->tmp[job_nr])
+        av_freep(&s->tmp[job_nr]);
+    s->tmp[job_nr] = av_malloc(size);
+    if (!s->tmp[job_nr])
+        return AVERROR(ENOMEM);
+
+    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && out_desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        alpha_src_format = s->alpha_src_format;
+        alpha_dst_format = s->alpha_dst_format;
+        /* The input slice is specified through the active_region field, unlike the output slice.
+        according to zimg requirements input and output slices should have even dimentions */
+        alpha_src_format.active_region.width = in->width;
+        alpha_src_format.active_region.height = s->slice_h;
+        alpha_src_format.active_region.left = 0;
+        alpha_src_format.active_region.top = job_nr * alpha_src_format.active_region.height;
+        //dst now is the single tile only!!
+        alpha_dst_format.width = out->width;
+        alpha_dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+        //the last slice could differ from the previous ones due to the slices division "tail"
+        if (job_nr == (s->nb_threads - 1)) {
+            alpha_src_format.active_region.height = alpha_src_format.height - alpha_src_format.active_region.top;
+            alpha_dst_format.height = out->height - job_nr * alpha_dst_format.height;
+        }
 
+        if (s->alpha_graph[job_nr]) {
+            zimg_filter_graph_free(s->alpha_graph[job_nr]);
+        }
+        s->alpha_graph[job_nr] = zimg_filter_graph_build(&alpha_src_format, &alpha_dst_format, &s->alpha_params);
+        if (!s->alpha_graph[job_nr])
+            return print_zimg_error(NULL);
+     }
     return 0;
 }
 
 static int realign_frame(const AVPixFmtDescriptor *desc, AVFrame **frame)
 {
     AVFrame *aligned = NULL;
-    int ret = 0, plane;
+    int ret = 0, plane, planes;
 
     /* Realign any unaligned input frame. */
-    for (plane = 0; plane < 3; plane++) {
+    planes = av_pix_fmt_count_planes(desc->nb_components);
+    for (plane = 0; plane < planes; plane++) {
         int p = desc->comp[plane].plane;
         if ((uintptr_t)(*frame)->data[p] % ZIMG_ALIGNMENT || (*frame)->linesize[p] % ZIMG_ALIGNMENT) {
             if (!(aligned = av_frame_alloc())) {
@@ -572,20 +697,77 @@ static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
         frame->chroma_location = (int)s->dst_format.chroma_location + 1;
 }
 
+static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
+{
+    ThreadData *td = data;
+    int ret = 0;
+    int p;
+    int out_sampl;
+    int need_gb;
+    ZScaleContext *s = ctx->priv;
+    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
+    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
+    int  dst_tile_height = ((unsigned int)(td->out->height / n_jobs)) & 0xfffffffe;
+
+    /* create zimg filter graphs for each thread
+     only if not created earlier or there is some change in frame parameters */
+    need_gb = compare_zimg_image_formats(&s->src_format, &s->src_format_tmp) ||
+        compare_zimg_image_formats(&s->dst_format, &s->dst_format_tmp) ||
+        compare_zimg_graph_builder_params(&s->params, &s->params_tmp);
+    if(td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA)
+        need_gb = need_gb || compare_zimg_image_formats(&s->alpha_src_format, &s->alpha_src_format_tmp) ||
+            compare_zimg_image_formats(&s->alpha_dst_format, &s->alpha_dst_format_tmp) ||
+            compare_zimg_graph_builder_params(&s->alpha_params, &s->alpha_params_tmp);
+
+    if (need_gb){
+        ret = graphs_build(td->in, td->out, td->desc, td->odesc, s, job_nr);
+        if (ret < 0)
+            return print_zimg_error(ctx);
+    }
+    out_sampl = FFMAX3(td->out->linesize[0], td->out->linesize[1], td->out->linesize[2]);
+    for (int i = 0; i < 3; i++) {
+        p = td->desc->comp[i].plane;
+
+        src_buf.plane[i].data = td->in->data[p];
+        src_buf.plane[i].stride = td->in->linesize[p];
+        src_buf.plane[i].mask = -1;
+
+        p = td->odesc->comp[i].plane;
+        dst_buf.plane[i].data = td->out->data[p] + td->out->linesize[p] * dst_tile_height * td->out->linesize[p] / out_sampl * job_nr;
+        dst_buf.plane[i].stride = td->out->linesize[p];
+        dst_buf.plane[i].mask = -1;
+    }
+    ret = zimg_filter_graph_process(s->graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+    if (ret)
+        return  print_zimg_error(ctx);
+
+    if (td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        src_buf.plane[0].data = td->in->data[3];
+        src_buf.plane[0].stride = td->in->linesize[3];
+        src_buf.plane[0].mask = -1;
+
+        dst_buf.plane[0].data = td->out->data[3] + td->out->linesize[3] * dst_tile_height  * job_nr;
+        dst_buf.plane[0].stride = td->out->linesize[3];
+        dst_buf.plane[0].mask = -1;
+
+        ret = zimg_filter_graph_process(s->alpha_graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+        if (ret)
+            return print_zimg_error(ctx);
+    }
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
-    ZScaleContext *s = link->dst->priv;
-    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFilterContext *ctx = link->dst;
+    ZScaleContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
     const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
-    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
-    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
     char buf[32];
-    int ret = 0, plane;
+    int ret = 0;
     AVFrame *out = NULL;
-
-    if ((ret = realign_frame(desc, &in)) < 0)
-        goto fail;
+    ThreadData td;
 
     if (!(out = ff_get_video_buffer(outlink, outlink->w, outlink->h))) {
         ret =  AVERROR(ENOMEM);
@@ -596,35 +778,53 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     out->width  = outlink->w;
     out->height = outlink->h;
 
-    if(   in->width  != link->w
-       || in->height != link->h
-       || in->format != link->format
-       || s->in_colorspace != in->colorspace
-       || s->in_trc  != in->color_trc
-       || s->in_primaries != in->color_primaries
-       || s->in_range != in->color_range
-       || s->out_colorspace != out->colorspace
-       || s->out_trc  != out->color_trc
-       || s->out_primaries != out->color_primaries
-       || s->out_range != out->color_range
-       || s->in_chromal != in->chroma_location
-       || s->out_chromal != out->chroma_location) {
+    //we need to use this filter if something is different for an input and output only
+    //otherwise - just copy the input frame to the output
+    if ((link->w != outlink->w) ||
+        (link->h != outlink->h) ||
+        (s->src_format.chroma_location != s->dst_format.chroma_location) ||
+        (s->src_format.color_family !=s->dst_format.color_family) ||
+        (s->src_format.color_primaries !=s->dst_format.color_primaries) ||
+        (s->src_format.depth !=s->dst_format.depth) ||
+        (s->src_format.matrix_coefficients !=s->dst_format.matrix_coefficients) ||
+        (s->src_format.field_parity !=s->dst_format.field_parity) ||
+        (s->src_format.pixel_range !=s->dst_format.pixel_range) ||
+        (s->src_format.pixel_type !=s->dst_format.pixel_type) ||
+        (s->src_format.transfer_characteristics !=s->dst_format.transfer_characteristics)
+    ){
+        if ((ret = realign_frame(desc, &in)) < 0)
+            goto fail;
+
         snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
         av_opt_set(s, "w", buf, 0);
         snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
         av_opt_set(s, "h", buf, 0);
 
+
         link->dst->inputs[0]->format = in->format;
         link->dst->inputs[0]->w      = in->width;
         link->dst->inputs[0]->h      = in->height;
 
-        if ((ret = config_props(outlink)) < 0)
-            goto fail;
+        s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), link->h / MIN_TILESIZE);
+        s->slice_h = ((unsigned int)(link->h / s->nb_threads)) & 0xfffffffe; // slice_h should be even for zimg
+        s->in_colorspace = in->colorspace;
+        s->in_trc = in->color_trc;
+        s->in_primaries = in->color_primaries;
+        s->in_range = in->color_range;
+        s->out_colorspace = out->colorspace;
+        s->out_trc = out->color_trc;
+        s->out_primaries = out->color_primaries;
+        s->out_range = out->color_range;
 
         zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
         zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
         zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
 
+        format_init(&s->src_format, in, desc, s->colorspace_in,
+            s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
+        format_init(&s->dst_format, out, odesc, s->colorspace,
+            s->primaries, s->trc, s->range, s->chromal);
+
         s->params.dither_type = s->dither;
         s->params.cpu_type = ZIMG_CPU_AUTO;
         s->params.resample_filter = s->filter;
@@ -634,27 +834,6 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
         s->params.filter_param_a = s->params.filter_param_a_uv = s->param_a;
         s->params.filter_param_b = s->params.filter_param_b_uv = s->param_b;
 
-        format_init(&s->src_format, in, desc, s->colorspace_in,
-                    s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
-        format_init(&s->dst_format, out, odesc, s->colorspace,
-                    s->primaries, s->trc, s->range, s->chromal);
-
-        update_output_color_information(s, out);
-
-        ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
-                          &s->tmp, &s->tmp_size);
-        if (ret < 0)
-            goto fail;
-
-        s->in_colorspace  = in->colorspace;
-        s->in_trc         = in->color_trc;
-        s->in_primaries   = in->color_primaries;
-        s->in_range       = in->color_range;
-        s->out_colorspace = out->colorspace;
-        s->out_trc        = out->color_trc;
-        s->out_primaries  = out->color_primaries;
-        s->out_range      = out->color_range;
-
         if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
             zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
             zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
@@ -670,76 +849,54 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
             s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
 
-            s->alpha_dst_format.width = out->width;
-            s->alpha_dst_format.height = out->height;
             s->alpha_dst_format.depth = odesc->comp[0].depth;
             s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
-
-            zimg_filter_graph_free(s->alpha_graph);
-            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
-            if (!s->alpha_graph) {
-                ret = print_zimg_error(link->dst);
-                goto fail;
-            }
         }
-    }
-
-    update_output_color_information(s, out);
-
-    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
-              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
-              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
-              INT_MAX);
-
-    for (plane = 0; plane < 3; plane++) {
-        int p = desc->comp[plane].plane;
-        src_buf.plane[plane].data   = in->data[p];
-        src_buf.plane[plane].stride = in->linesize[p];
-        src_buf.plane[plane].mask   = -1;
-
-        p = odesc->comp[plane].plane;
-        dst_buf.plane[plane].data   = out->data[p];
-        dst_buf.plane[plane].stride = out->linesize[p];
-        dst_buf.plane[plane].mask   = -1;
-    }
 
-    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-    if (ret) {
-        ret = print_zimg_error(link->dst);
-        goto fail;
-    }
+        update_output_color_information(s, out);
+        av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
+                  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
+                  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
+                  INT_MAX);
 
-    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        src_buf.plane[0].data   = in->data[3];
-        src_buf.plane[0].stride = in->linesize[3];
-        src_buf.plane[0].mask   = -1;
+        td.in = in;
+        td.out = out;
+        td.desc = desc;
+        td.odesc = odesc;
 
-        dst_buf.plane[0].data   = out->data[3];
-        dst_buf.plane[0].stride = out->linesize[3];
-        dst_buf.plane[0].mask   = -1;
+        ff_filter_execute(ctx, filter_slice, &td, NULL, s->nb_threads);
 
-        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-        if (ret) {
-            ret = print_zimg_error(link->dst);
-            goto fail;
+        s->src_format_tmp = s->src_format;
+        s->dst_format_tmp = s->dst_format;
+        s->params_tmp = s->params;
+        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+            s->alpha_src_format_tmp = s->alpha_src_format;
+            s->alpha_dst_format_tmp = s->alpha_dst_format;
+            s->alpha_params_tmp = s->alpha_params;
         }
-    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        int x, y;
-
-        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
-            for (y = 0; y < out->height; y++) {
-                for (x = 0; x < out->width; x++) {
-                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
-                            av_float2int(1.0f));
+
+        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) && (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) ){
+            int x, y;
+            if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
+                for (y = 0; y < out->height; y++) {
+                    for (x = 0; x < out->width; x++) {
+                        AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
+                                av_float2int(1.0f));
+                    }
                 }
+            } else {
+                for (y = 0; y < outlink->h; y++)
+                    memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
             }
-        } else {
-            for (y = 0; y < outlink->h; y++)
-                memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
         }
     }
-
+    else {
+        /*no need for any filtering */
+        ret = av_frame_copy(out, in);
+        if (ret < 0)
+            return ret;
+    }
 fail:
     av_frame_free(&in);
     if (ret) {
@@ -753,11 +910,12 @@ fail:
 static av_cold void uninit(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
-
-    zimg_filter_graph_free(s->graph);
-    zimg_filter_graph_free(s->alpha_graph);
-    av_freep(&s->tmp);
-    s->tmp_size = 0;
+    int i;
+    for (i = 0; i < s->nb_threads; i++) {
+        if (s->tmp[i]) av_freep(&s->tmp[i]);
+        zimg_filter_graph_free(s->graph[i]);
+        zimg_filter_graph_free(s->alpha_graph[i]);
+    }
 }
 
 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
@@ -941,4 +1099,5 @@ const AVFilter ff_vf_zscale = {
     FILTER_OUTPUTS(avfilter_vf_zscale_outputs),
     FILTER_QUERY_FUNC(query_formats),
     .process_command = process_command,
+    .flags           = AVFILTER_FLAG_SLICE_THREADS,
 };
-- 
2.31.1.windows.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
  2022-02-10 10:08 Victoria Zhislina
@ 2022-02-18 11:43 ` Paul B Mahol
  0 siblings, 0 replies; 17+ messages in thread
From: Paul B Mahol @ 2022-02-18 11:43 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Thu, Feb 10, 2022 at 01:08:04PM +0300, Victoria Zhislina wrote:
> By ffmpeg threading support implementation via frame slicing and doing
> zimg_filter_graph_build that used to take 30-60% of each frame processig
> only if necessary (some parameters changed)
> the performance increase vs original version
> in video downscale and color conversion  >4x is seen
> on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
> 
> Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
> ---
>  libavfilter/vf_zscale.c | 786 ++++++++++++++++++++++++----------------
>  1 file changed, 475 insertions(+), 311 deletions(-)
> 
> diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
> index 1288c5efc1..ce4c0b2c76 100644
> --- a/libavfilter/vf_zscale.c
> +++ b/libavfilter/vf_zscale.c
> @@ -1,6 +1,7 @@
>  /*
>   * Copyright (c) 2015 Paul B Mahol
> - *
> + * * 2022 Victoria Zhislina, Intel - performance optimization

Just name, please, without extra stuff, see line above.

> + 
>   * This file is part of FFmpeg.
>   *
>   * FFmpeg is free software; you can redistribute it and/or
> @@ -44,6 +45,8 @@
>  #include "libavutil/imgutils.h"
>  
>  #define ZIMG_ALIGNMENT 32
> +#define MIN_TILESIZE 64
> +#define MAX_THREADS 64
>  
>  static const char *const var_names[] = {
>      "in_w",   "iw",
> @@ -113,13 +116,17 @@ typedef struct ZScaleContext {
>  
>      int force_original_aspect_ratio;
>  
> -    void *tmp;
> -    size_t tmp_size;
> +    void *tmp[MAX_THREADS]; //separate for each thread;
> +	int nb_threads;

Sorry, but tab characters are generally forbidden in FFmpeg source code.

> +    int slice_h;
>  
>      zimg_image_format src_format, dst_format;
>      zimg_image_format alpha_src_format, alpha_dst_format;
> +    zimg_image_format src_format_tmp, dst_format_tmp;
> +    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
>      zimg_graph_builder_params alpha_params, params;
> -    zimg_filter_graph *alpha_graph, *graph;
> +    zimg_graph_builder_params alpha_params_tmp, params_tmp;
> +    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
>  
>      enum AVColorSpace in_colorspace, out_colorspace;
>      enum AVColorTransferCharacteristic in_trc, out_trc;
> @@ -128,10 +135,181 @@ typedef struct ZScaleContext {
>      enum AVChromaLocation in_chromal, out_chromal;
>  } ZScaleContext;
>  
> +
> +typedef struct ThreadData {
> +    const AVPixFmtDescriptor *desc, *odesc;
> +    AVFrame *in, *out;
> +} ThreadData;
> +
> +static int convert_chroma_location(enum AVChromaLocation chroma_location)
> +{
> +    switch (chroma_location) {
> +    case AVCHROMA_LOC_UNSPECIFIED:
> +    case AVCHROMA_LOC_LEFT:
> +        return ZIMG_CHROMA_LEFT;
> +    case AVCHROMA_LOC_CENTER:
> +        return ZIMG_CHROMA_CENTER;
> +    case AVCHROMA_LOC_TOPLEFT:
> +        return ZIMG_CHROMA_TOP_LEFT;
> +    case AVCHROMA_LOC_TOP:
> +        return ZIMG_CHROMA_TOP;
> +    case AVCHROMA_LOC_BOTTOMLEFT:
> +        return ZIMG_CHROMA_BOTTOM_LEFT;
> +    case AVCHROMA_LOC_BOTTOM:
> +        return ZIMG_CHROMA_BOTTOM;
> +    }
> +    return ZIMG_CHROMA_LEFT;
> +}
> +
> +static int convert_matrix(enum AVColorSpace colorspace)
> +{
> +    switch (colorspace) {
> +    case AVCOL_SPC_RGB:
> +        return ZIMG_MATRIX_RGB;
> +    case AVCOL_SPC_BT709:
> +        return ZIMG_MATRIX_709;
> +    case AVCOL_SPC_UNSPECIFIED:
> +        return ZIMG_MATRIX_UNSPECIFIED;
> +    case AVCOL_SPC_FCC:
> +        return ZIMG_MATRIX_FCC;
> +    case AVCOL_SPC_BT470BG:
> +        return ZIMG_MATRIX_470BG;
> +    case AVCOL_SPC_SMPTE170M:
> +        return ZIMG_MATRIX_170M;
> +    case AVCOL_SPC_SMPTE240M:
> +        return ZIMG_MATRIX_240M;
> +    case AVCOL_SPC_YCGCO:
> +        return ZIMG_MATRIX_YCGCO;
> +    case AVCOL_SPC_BT2020_NCL:
> +        return ZIMG_MATRIX_2020_NCL;
> +    case AVCOL_SPC_BT2020_CL:
> +        return ZIMG_MATRIX_2020_CL;
> +    case AVCOL_SPC_CHROMA_DERIVED_NCL:
> +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
> +    case AVCOL_SPC_CHROMA_DERIVED_CL:
> +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
> +    case AVCOL_SPC_ICTCP:
> +        return ZIMG_MATRIX_ICTCP;
> +    }
> +    return ZIMG_MATRIX_UNSPECIFIED;
> +}
> +
> +static int convert_trc(enum AVColorTransferCharacteristic color_trc)
> +{
> +    switch (color_trc) {
> +    case AVCOL_TRC_UNSPECIFIED:
> +        return ZIMG_TRANSFER_UNSPECIFIED;
> +    case AVCOL_TRC_BT709:
> +        return ZIMG_TRANSFER_709;
> +    case AVCOL_TRC_GAMMA22:
> +        return ZIMG_TRANSFER_470_M;
> +    case AVCOL_TRC_GAMMA28:
> +        return ZIMG_TRANSFER_470_BG;
> +    case AVCOL_TRC_SMPTE170M:
> +        return ZIMG_TRANSFER_601;
> +    case AVCOL_TRC_SMPTE240M:
> +        return ZIMG_TRANSFER_240M;
> +    case AVCOL_TRC_LINEAR:
> +        return ZIMG_TRANSFER_LINEAR;
> +    case AVCOL_TRC_LOG:
> +        return ZIMG_TRANSFER_LOG_100;
> +    case AVCOL_TRC_LOG_SQRT:
> +        return ZIMG_TRANSFER_LOG_316;
> +    case AVCOL_TRC_IEC61966_2_4:
> +        return ZIMG_TRANSFER_IEC_61966_2_4;
> +    case AVCOL_TRC_BT2020_10:
> +        return ZIMG_TRANSFER_2020_10;
> +    case AVCOL_TRC_BT2020_12:
> +        return ZIMG_TRANSFER_2020_12;
> +    case AVCOL_TRC_SMPTE2084:
> +        return ZIMG_TRANSFER_ST2084;
> +    case AVCOL_TRC_ARIB_STD_B67:
> +        return ZIMG_TRANSFER_ARIB_B67;
> +    case AVCOL_TRC_IEC61966_2_1:
> +        return ZIMG_TRANSFER_IEC_61966_2_1;
> +    }
> +    return ZIMG_TRANSFER_UNSPECIFIED;
> +}
> +
> +static int convert_primaries(enum AVColorPrimaries color_primaries)
> +{
> +    switch (color_primaries) {
> +    case AVCOL_PRI_UNSPECIFIED:
> +        return ZIMG_PRIMARIES_UNSPECIFIED;
> +    case AVCOL_PRI_BT709:
> +        return ZIMG_PRIMARIES_709;
> +    case AVCOL_PRI_BT470M:
> +        return ZIMG_PRIMARIES_470_M;
> +    case AVCOL_PRI_BT470BG:
> +        return ZIMG_PRIMARIES_470_BG;
> +    case AVCOL_PRI_SMPTE170M:
> +        return ZIMG_PRIMARIES_170M;
> +    case AVCOL_PRI_SMPTE240M:
> +        return ZIMG_PRIMARIES_240M;
> +    case AVCOL_PRI_FILM:
> +        return ZIMG_PRIMARIES_FILM;
> +    case AVCOL_PRI_BT2020:
> +        return ZIMG_PRIMARIES_2020;
> +    case AVCOL_PRI_SMPTE428:
> +        return ZIMG_PRIMARIES_ST428;
> +    case AVCOL_PRI_SMPTE431:
> +        return ZIMG_PRIMARIES_ST431_2;
> +    case AVCOL_PRI_SMPTE432:
> +        return ZIMG_PRIMARIES_ST432_1;
> +    case AVCOL_PRI_JEDEC_P22:
> +        return ZIMG_PRIMARIES_EBU3213_E;
> +    }
> +    return ZIMG_PRIMARIES_UNSPECIFIED;
> +}
> +
> +static int convert_range(enum AVColorRange color_range)
> +{
> +    switch (color_range) {
> +    case AVCOL_RANGE_UNSPECIFIED:
> +    case AVCOL_RANGE_MPEG:
> +        return ZIMG_RANGE_LIMITED;
> +    case AVCOL_RANGE_JPEG:
> +        return ZIMG_RANGE_FULL;
> +    }
> +    return ZIMG_RANGE_LIMITED;
> +}
> +
> +static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
> +{
> +    switch (color_range) {
> +    case ZIMG_RANGE_LIMITED:
> +        return AVCOL_RANGE_MPEG;
> +    case ZIMG_RANGE_FULL:
> +        return AVCOL_RANGE_JPEG;
> +    }
> +    return AVCOL_RANGE_UNSPECIFIED;
> +}
> +
>  static av_cold int init(AVFilterContext *ctx)
>  {
>      ZScaleContext *s = ctx->priv;
>      int ret;
> +    int i;
> +
> +    for (i = 0; i < MAX_THREADS; i++) {
> +        s->tmp[i] = NULL;
> +        s->graph[i] = NULL;
> +        s->alpha_graph[i] = NULL;
> +    }
> +    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
>  
>      if (s->size_str && (s->w_expr || s->h_expr)) {
>          av_log(ctx, AV_LOG_ERROR,
> @@ -194,6 +372,153 @@ static int query_formats(AVFilterContext *ctx)
>      return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats);
>  }
>  
> +/* returns 0 if image formats are the same and 1 otherwise */
> +static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
> +{
> +    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
> +#if ZIMG_API_VERSION >= 0x204
> +        (img_fmt0->alpha != img_fmt1->alpha) ||
> +#endif
> +        (img_fmt0->color_family != img_fmt1->color_family) ||
> +        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
> +        (img_fmt0->depth != img_fmt1->depth) ||
> +        (img_fmt0->field_parity != img_fmt1->field_parity) ||
> +        (img_fmt0->height != img_fmt1->height) ||
> +        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
> +        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
> +        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
> +        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
> +        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
> +        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
> +        (img_fmt0->width != img_fmt1->width));
> +}
> +
> +/* returns 0 if graph builder parameters are the same and 1 otherwise */
> +static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
> +{
> +    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
> +    and NaN values that are default for some params are treated properly*/
> +    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
> +        (parm0->dither_type != parm1->dither_type) ||
> +        (parm0->resample_filter != parm1->resample_filter) ||
> +        (parm0->resample_filter_uv != parm1->resample_filter_uv);
> +
> +    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
> +        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
> +    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
> +        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
> +    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
> +        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
> +    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
> +        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
> +    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
> +        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
> +
> +    return ret;
> +}
> +
> +static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
> +    int colorspace, int primaries, int transfer, int range, int location)
> +{
> +    format->width = frame->width;
> +    format->height = frame->height;
> +    format->subsample_w = desc->log2_chroma_w;
> +    format->subsample_h = desc->log2_chroma_h;
> +    format->depth = desc->comp[0].depth;
> +    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> +    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
> +    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
> +    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> +    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
> +    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
> +    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
> +}
> +
> +static int print_zimg_error(AVFilterContext *ctx)
> +{
> +    char err_msg[1024];
> +    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
> +
> +    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
> +
> +    return AVERROR_EXTERNAL;
> +}
> +
> +static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *desc, const AVPixFmtDescriptor *out_desc,
> +    ZScaleContext *s, int job_nr)
> +{
> +    int ret;
> +    size_t size;
> +    zimg_image_format src_format;
> +    zimg_image_format dst_format;
> +    zimg_image_format alpha_src_format;
> +    zimg_image_format alpha_dst_format;
> +
> +    src_format = s->src_format;
> +    dst_format = s->dst_format;
> +    /* The input slice is specified through the active_region field, 
> +    unlike the output slice.
> +    according to zimg requirements input and output slices should have even dimentions */
> +    src_format.active_region.width = in->width;
> +    src_format.active_region.height = s->slice_h;
> +    src_format.active_region.left = 0;
> +    src_format.active_region.top = job_nr * src_format.active_region.height;
> +    //dst now is the single tile only!!
> +    dst_format.width = out->width;
> +    dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
> +
> +    //the last slice could differ from the previous ones due to the slices division "tail"
> +    if (job_nr == (s->nb_threads - 1)) {
> +        src_format.active_region.height = src_format.height - src_format.active_region.top;
> +        dst_format.height = out->height - job_nr * dst_format.height;
> +    }
> +
> +    if (s->graph[job_nr]) {
> +        zimg_filter_graph_free(s->graph[job_nr]);
> +    }
> +    s->graph[job_nr] = zimg_filter_graph_build(&src_format, &dst_format, &s->params);
> +    if (!s->graph[job_nr])
> +        return print_zimg_error(NULL);
> +
> +    ret = zimg_filter_graph_get_tmp_size(s->graph[job_nr], &size);
> +    if (ret)
> +        return print_zimg_error(NULL);
> +
> +    if (s->tmp[job_nr])
> +        av_freep(&s->tmp[job_nr]);
> +    s->tmp[job_nr] = av_malloc(size);
> +    if (!s->tmp[job_nr])
> +        return AVERROR(ENOMEM);
> +
> +    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && out_desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +        alpha_src_format = s->alpha_src_format;
> +        alpha_dst_format = s->alpha_dst_format;
> +        /* The input slice is specified through the active_region field, unlike the output slice.
> +        according to zimg requirements input and output slices should have even dimentions */
> +        alpha_src_format.active_region.width = in->width;
> +        alpha_src_format.active_region.height = s->slice_h;
> +        alpha_src_format.active_region.left = 0;
> +        alpha_src_format.active_region.top = job_nr * alpha_src_format.active_region.height;
> +        //dst now is the single tile only!!
> +        alpha_dst_format.width = out->width;
> +        alpha_dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
> +
> +        //the last slice could differ from the previous ones due to the slices division "tail"
> +        if (job_nr == (s->nb_threads - 1)) {
> +            alpha_src_format.active_region.height = alpha_src_format.height - alpha_src_format.active_region.top;
> +            alpha_dst_format.height = out->height - job_nr * alpha_dst_format.height;
> +        }
> +
> +        if (s->alpha_graph[job_nr]) {
> +            zimg_filter_graph_free(s->alpha_graph[job_nr]);
> +        }
> +        s->alpha_graph[job_nr] = zimg_filter_graph_build(&alpha_src_format, &alpha_dst_format, &s->alpha_params);
> +        if (!s->alpha_graph[job_nr])
> +            return print_zimg_error(NULL);
> +     }
> +    return 0;
> +}
> +
>  static int config_props(AVFilterLink *outlink)
>  {
>      AVFilterContext *ctx = outlink->src;
> @@ -317,212 +642,15 @@ fail:
>      return ret;
>  }
>  
> -static int print_zimg_error(AVFilterContext *ctx)
> -{
> -    char err_msg[1024];
> -    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
> -
> -    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
> -
> -    return AVERROR_EXTERNAL;
> -}
> -
> -static int convert_chroma_location(enum AVChromaLocation chroma_location)
> -{
> -    switch (chroma_location) {
> -    case AVCHROMA_LOC_UNSPECIFIED:
> -    case AVCHROMA_LOC_LEFT:
> -        return ZIMG_CHROMA_LEFT;
> -    case AVCHROMA_LOC_CENTER:
> -        return ZIMG_CHROMA_CENTER;
> -    case AVCHROMA_LOC_TOPLEFT:
> -        return ZIMG_CHROMA_TOP_LEFT;
> -    case AVCHROMA_LOC_TOP:
> -        return ZIMG_CHROMA_TOP;
> -    case AVCHROMA_LOC_BOTTOMLEFT:
> -        return ZIMG_CHROMA_BOTTOM_LEFT;
> -    case AVCHROMA_LOC_BOTTOM:
> -        return ZIMG_CHROMA_BOTTOM;
> -    }
> -    return ZIMG_CHROMA_LEFT;
> -}
> -
> -static int convert_matrix(enum AVColorSpace colorspace)
> -{
> -    switch (colorspace) {
> -    case AVCOL_SPC_RGB:
> -        return ZIMG_MATRIX_RGB;
> -    case AVCOL_SPC_BT709:
> -        return ZIMG_MATRIX_709;
> -    case AVCOL_SPC_UNSPECIFIED:
> -        return ZIMG_MATRIX_UNSPECIFIED;
> -    case AVCOL_SPC_FCC:
> -        return ZIMG_MATRIX_FCC;
> -    case AVCOL_SPC_BT470BG:
> -        return ZIMG_MATRIX_470BG;
> -    case AVCOL_SPC_SMPTE170M:
> -        return ZIMG_MATRIX_170M;
> -    case AVCOL_SPC_SMPTE240M:
> -        return ZIMG_MATRIX_240M;
> -    case AVCOL_SPC_YCGCO:
> -        return ZIMG_MATRIX_YCGCO;
> -    case AVCOL_SPC_BT2020_NCL:
> -        return ZIMG_MATRIX_2020_NCL;
> -    case AVCOL_SPC_BT2020_CL:
> -        return ZIMG_MATRIX_2020_CL;
> -    case AVCOL_SPC_CHROMA_DERIVED_NCL:
> -        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
> -    case AVCOL_SPC_CHROMA_DERIVED_CL:
> -        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
> -    case AVCOL_SPC_ICTCP:
> -        return ZIMG_MATRIX_ICTCP;
> -    }
> -    return ZIMG_MATRIX_UNSPECIFIED;
> -}
> -
> -static int convert_trc(enum AVColorTransferCharacteristic color_trc)
> -{
> -    switch (color_trc) {
> -    case AVCOL_TRC_UNSPECIFIED:
> -        return ZIMG_TRANSFER_UNSPECIFIED;
> -    case AVCOL_TRC_BT709:
> -        return ZIMG_TRANSFER_709;
> -    case AVCOL_TRC_GAMMA22:
> -        return ZIMG_TRANSFER_470_M;
> -    case AVCOL_TRC_GAMMA28:
> -        return ZIMG_TRANSFER_470_BG;
> -    case AVCOL_TRC_SMPTE170M:
> -        return ZIMG_TRANSFER_601;
> -    case AVCOL_TRC_SMPTE240M:
> -        return ZIMG_TRANSFER_240M;
> -    case AVCOL_TRC_LINEAR:
> -        return ZIMG_TRANSFER_LINEAR;
> -    case AVCOL_TRC_LOG:
> -        return ZIMG_TRANSFER_LOG_100;
> -    case AVCOL_TRC_LOG_SQRT:
> -        return ZIMG_TRANSFER_LOG_316;
> -    case AVCOL_TRC_IEC61966_2_4:
> -        return ZIMG_TRANSFER_IEC_61966_2_4;
> -    case AVCOL_TRC_BT2020_10:
> -        return ZIMG_TRANSFER_2020_10;
> -    case AVCOL_TRC_BT2020_12:
> -        return ZIMG_TRANSFER_2020_12;
> -    case AVCOL_TRC_SMPTE2084:
> -        return ZIMG_TRANSFER_ST2084;
> -    case AVCOL_TRC_ARIB_STD_B67:
> -        return ZIMG_TRANSFER_ARIB_B67;
> -    case AVCOL_TRC_IEC61966_2_1:
> -        return ZIMG_TRANSFER_IEC_61966_2_1;
> -    }
> -    return ZIMG_TRANSFER_UNSPECIFIED;
> -}
> -
> -static int convert_primaries(enum AVColorPrimaries color_primaries)
> -{
> -    switch (color_primaries) {
> -    case AVCOL_PRI_UNSPECIFIED:
> -        return ZIMG_PRIMARIES_UNSPECIFIED;
> -    case AVCOL_PRI_BT709:
> -        return ZIMG_PRIMARIES_709;
> -    case AVCOL_PRI_BT470M:
> -        return ZIMG_PRIMARIES_470_M;
> -    case AVCOL_PRI_BT470BG:
> -        return ZIMG_PRIMARIES_470_BG;
> -    case AVCOL_PRI_SMPTE170M:
> -        return ZIMG_PRIMARIES_170M;
> -    case AVCOL_PRI_SMPTE240M:
> -        return ZIMG_PRIMARIES_240M;
> -    case AVCOL_PRI_FILM:
> -        return ZIMG_PRIMARIES_FILM;
> -    case AVCOL_PRI_BT2020:
> -        return ZIMG_PRIMARIES_2020;
> -    case AVCOL_PRI_SMPTE428:
> -        return ZIMG_PRIMARIES_ST428;
> -    case AVCOL_PRI_SMPTE431:
> -        return ZIMG_PRIMARIES_ST431_2;
> -    case AVCOL_PRI_SMPTE432:
> -        return ZIMG_PRIMARIES_ST432_1;
> -    case AVCOL_PRI_JEDEC_P22:
> -        return ZIMG_PRIMARIES_EBU3213_E;
> -    }
> -    return ZIMG_PRIMARIES_UNSPECIFIED;
> -}
> -
> -static int convert_range(enum AVColorRange color_range)
> -{
> -    switch (color_range) {
> -    case AVCOL_RANGE_UNSPECIFIED:
> -    case AVCOL_RANGE_MPEG:
> -        return ZIMG_RANGE_LIMITED;
> -    case AVCOL_RANGE_JPEG:
> -        return ZIMG_RANGE_FULL;
> -    }
> -    return ZIMG_RANGE_LIMITED;
> -}
> -
> -static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
> -{
> -    switch (color_range) {
> -    case ZIMG_RANGE_LIMITED:
> -        return AVCOL_RANGE_MPEG;
> -    case ZIMG_RANGE_FULL:
> -        return AVCOL_RANGE_JPEG;
> -    }
> -    return AVCOL_RANGE_UNSPECIFIED;
> -}
> -
> -static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
> -                        int colorspace, int primaries, int transfer, int range, int location)
> -{
> -    format->width = frame->width;
> -    format->height = frame->height;
> -    format->subsample_w = desc->log2_chroma_w;
> -    format->subsample_h = desc->log2_chroma_h;
> -    format->depth = desc->comp[0].depth;
> -    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> -    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
> -    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
> -    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> -    format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
> -    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
> -    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
> -}
> -
> -static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
> -                       zimg_image_format *src_format, zimg_image_format *dst_format,
> -                       void **tmp, size_t *tmp_size)
> -{
> -    int ret;
> -    size_t size;
> -
> -    zimg_filter_graph_free(*graph);
> -    *graph = zimg_filter_graph_build(src_format, dst_format, params);
> -    if (!*graph)
> -        return print_zimg_error(NULL);
> -
> -    ret = zimg_filter_graph_get_tmp_size(*graph, &size);
> -    if (ret)
> -        return print_zimg_error(NULL);
> -
> -    if (size > *tmp_size) {
> -        av_freep(tmp);
> -        *tmp = av_malloc(size);
> -        if (!*tmp)
> -            return AVERROR(ENOMEM);
> -
> -        *tmp_size = size;
> -    }
> -
> -    return 0;
> -}
>  
>  static int realign_frame(const AVPixFmtDescriptor *desc, AVFrame **frame)
>  {
>      AVFrame *aligned = NULL;
> -    int ret = 0, plane;
> +    int ret = 0, plane, planes;
>  
>      /* Realign any unaligned input frame. */
> -    for (plane = 0; plane < 3; plane++) {
> +    planes = av_pix_fmt_count_planes(desc->nb_components);
> +    for (plane = 0; plane < planes; plane++) {
>          int p = desc->comp[plane].plane;
>          if ((uintptr_t)(*frame)->data[p] % ZIMG_ALIGNMENT || (*frame)->linesize[p] % ZIMG_ALIGNMENT) {
>              if (!(aligned = av_frame_alloc())) {
> @@ -554,6 +682,7 @@ fail:
>      return ret;
>  }
>  
> +

This newline is not needed.

>  static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
>  {
>      if (s->colorspace != -1)
> @@ -572,20 +701,77 @@ static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
>          frame->chroma_location = (int)s->dst_format.chroma_location + 1;
>  }
>  
> +static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
> +{
> +    ThreadData *td = data;
> +    int ret = 0;
> +    int p;
> +    int out_sampl;
> +    int need_gb;
> +    ZScaleContext *s = ctx->priv;
> +    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
> +    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
> +    int  dst_tile_height = ((unsigned int)(td->out->height / n_jobs)) & 0xfffffffe; 
> +
> +    /* create zimg filter graphs for each thread
> +     only if not created earlier or there is some change in frame parameters */
> +    need_gb = compare_zimg_image_formats(&s->src_format, &s->src_format_tmp) ||
> +        compare_zimg_image_formats(&s->dst_format, &s->dst_format_tmp) ||
> +        compare_zimg_graph_builder_params(&s->params, &s->params_tmp);
> +    if(td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA)
> +        need_gb = need_gb || compare_zimg_image_formats(&s->alpha_src_format, &s->alpha_src_format_tmp) ||
> +            compare_zimg_image_formats(&s->alpha_dst_format, &s->alpha_dst_format_tmp) ||
> +            compare_zimg_graph_builder_params(&s->alpha_params, &s->alpha_params_tmp);
> +
> +    if (need_gb){
> +        ret = graphs_build(td->in, td->out, td->desc, td->odesc, s, job_nr);
> +        if (ret < 0)
> +            return print_zimg_error(ctx);
> +    }
> +    out_sampl = FFMAX3(td->out->linesize[0], td->out->linesize[1], td->out->linesize[2]);
> +    for (int i = 0; i < 3; i++) {
> +        p = td->desc->comp[i].plane;      
> +
> +        src_buf.plane[i].data = td->in->data[p];
> +        src_buf.plane[i].stride = td->in->linesize[p];
> +        src_buf.plane[i].mask = -1;
> +
> +        p = td->odesc->comp[i].plane;
> +        dst_buf.plane[i].data = td->out->data[p] + td->out->linesize[p] * dst_tile_height * td->out->linesize[p] / out_sampl * job_nr;
> +        dst_buf.plane[i].stride = td->out->linesize[p];
> +        dst_buf.plane[i].mask = -1;
> +    }
> +    ret = zimg_filter_graph_process(s->graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
> +    if (ret) 
> +        return  print_zimg_error(ctx);
> +
> +    if (td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +        src_buf.plane[0].data = td->in->data[3];
> +        src_buf.plane[0].stride = td->in->linesize[3];
> +        src_buf.plane[0].mask = -1;
> +
> +        dst_buf.plane[0].data = td->out->data[3] + td->out->linesize[3] * dst_tile_height  * job_nr;
> +        dst_buf.plane[0].stride = td->out->linesize[3];
> +        dst_buf.plane[0].mask = -1;
> +
> +        ret = zimg_filter_graph_process(s->alpha_graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
> +        if (ret)
> +            return print_zimg_error(ctx);
> +    }
> +    return 0;
> +}
> +
>  static int filter_frame(AVFilterLink *link, AVFrame *in)
>  {
> -    ZScaleContext *s = link->dst->priv;
> -    AVFilterLink *outlink = link->dst->outputs[0];
> +    AVFilterContext *ctx = link->dst;
> +    ZScaleContext *s = ctx->priv;
> +    AVFilterLink *outlink = ctx->outputs[0];
>      const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
>      const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
> -    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
> -    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
>      char buf[32];
> -    int ret = 0, plane;
> +    int ret = 0;
>      AVFrame *out = NULL;
> -
> -    if ((ret = realign_frame(desc, &in)) < 0)
> -        goto fail;
> +    ThreadData td;
>  
>      if (!(out = ff_get_video_buffer(outlink, outlink->w, outlink->h))) {
>          ret =  AVERROR(ENOMEM);
> @@ -596,35 +782,60 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>      out->width  = outlink->w;
>      out->height = outlink->h;
>  
> -    if(   in->width  != link->w
> -       || in->height != link->h
> -       || in->format != link->format
> -       || s->in_colorspace != in->colorspace
> -       || s->in_trc  != in->color_trc
> -       || s->in_primaries != in->color_primaries
> -       || s->in_range != in->color_range
> -       || s->out_colorspace != out->colorspace
> -       || s->out_trc  != out->color_trc
> -       || s->out_primaries != out->color_primaries
> -       || s->out_range != out->color_range
> -       || s->in_chromal != in->chroma_location
> -       || s->out_chromal != out->chroma_location) {
> +    //we need to use this filter if something is different for an input and output only
> +    //otherwise - just copy the input frame to the output
> +    if ((link->w != outlink->w) ||
> +        (link->h != outlink->h) ||
> +        (s->src_format.chroma_location != s->dst_format.chroma_location)||

Please use space between ')' and '||' here and anywhere else.

> +        (s->src_format.color_family !=s->dst_format.color_family)||
> +        (s->src_format.color_primaries !=s->dst_format.color_primaries)||
> +        (s->src_format.depth !=s->dst_format.depth)||
> +        (s->src_format.matrix_coefficients !=s->dst_format.matrix_coefficients)||
> +        (s->src_format.field_parity !=s->dst_format.field_parity)||
> +        (s->src_format.pixel_range !=s->dst_format.pixel_range)||
> +        (s->src_format.pixel_type !=s->dst_format.pixel_type)||
> +        (s->src_format.transfer_characteristics !=s->dst_format.transfer_characteristics)
> +    ){
> +        if ((ret = realign_frame(desc, &in)) < 0)
> +            goto fail;
> +
>          snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
>          av_opt_set(s, "w", buf, 0);
>          snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
>          av_opt_set(s, "h", buf, 0);
>  
> +

This new line is not needed.

>          link->dst->inputs[0]->format = in->format;
>          link->dst->inputs[0]->w      = in->width;
>          link->dst->inputs[0]->h      = in->height;
>  
> -        if ((ret = config_props(outlink)) < 0)
> -            goto fail;
> +        update_output_color_information(s, out);
> +    
> +        s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), link->h / MIN_TILESIZE);
> +        s->slice_h = ((unsigned int)(link->h / s->nb_threads)) & 0xfffffffe; // slice_h should be even for zimg
> +        s->in_colorspace = in->colorspace;
> +        s->in_trc = in->color_trc;
> +        s->in_primaries = in->color_primaries;
> +        s->in_range = in->color_range;
> +        s->out_colorspace = out->colorspace;
> +        s->out_trc = out->color_trc;
> +        s->out_primaries = out->color_primaries;
> +        s->out_range = out->color_range;
> +    
> +        av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
> +                  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
> +                  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
> +                  INT_MAX);
>  
>          zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
>          zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
>          zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
>  
> +        format_init(&s->src_format, in, desc, s->colorspace_in,
> +            s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
> +        format_init(&s->dst_format, out, odesc, s->colorspace,
> +            s->primaries, s->trc, s->range, s->chromal);
> +
>          s->params.dither_type = s->dither;
>          s->params.cpu_type = ZIMG_CPU_AUTO;
>          s->params.resample_filter = s->filter;
> @@ -634,27 +845,6 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>          s->params.filter_param_a = s->params.filter_param_a_uv = s->param_a;
>          s->params.filter_param_b = s->params.filter_param_b_uv = s->param_b;
>  
> -        format_init(&s->src_format, in, desc, s->colorspace_in,
> -                    s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
> -        format_init(&s->dst_format, out, odesc, s->colorspace,
> -                    s->primaries, s->trc, s->range, s->chromal);
> -
> -        update_output_color_information(s, out);
> -
> -        ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
> -                          &s->tmp, &s->tmp_size);
> -        if (ret < 0)
> -            goto fail;
> -
> -        s->in_colorspace  = in->colorspace;
> -        s->in_trc         = in->color_trc;
> -        s->in_primaries   = in->color_primaries;
> -        s->in_range       = in->color_range;
> -        s->out_colorspace = out->colorspace;
> -        s->out_trc        = out->color_trc;
> -        s->out_primaries  = out->color_primaries;
> -        s->out_range      = out->color_range;
> -
>          if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
>              zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
>              zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> @@ -670,76 +860,48 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>              s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>              s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
>  
> -            s->alpha_dst_format.width = out->width;
> -            s->alpha_dst_format.height = out->height;
>              s->alpha_dst_format.depth = odesc->comp[0].depth;
>              s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>              s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
> -
> -            zimg_filter_graph_free(s->alpha_graph);
> -            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
> -            if (!s->alpha_graph) {
> -                ret = print_zimg_error(link->dst);
> -                goto fail;
> -            }
>          }
> -    }
>  
> -    update_output_color_information(s, out);
> +        td.in = in;
> +        td.out = out;
> +        td.desc = desc;
> +        td.odesc = odesc;
>  
> -    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
> -              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
> -              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
> -              INT_MAX);
> -
> -    for (plane = 0; plane < 3; plane++) {
> -        int p = desc->comp[plane].plane;
> -        src_buf.plane[plane].data   = in->data[p];
> -        src_buf.plane[plane].stride = in->linesize[p];
> -        src_buf.plane[plane].mask   = -1;
> -
> -        p = odesc->comp[plane].plane;
> -        dst_buf.plane[plane].data   = out->data[p];
> -        dst_buf.plane[plane].stride = out->linesize[p];
> -        dst_buf.plane[plane].mask   = -1;
> -    }
> -
> -    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
> -    if (ret) {
> -        ret = print_zimg_error(link->dst);
> -        goto fail;
> -    }
> +        ff_filter_execute(ctx, filter_slice, &td, NULL, s->nb_threads);
>  
> -    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> -        src_buf.plane[0].data   = in->data[3];
> -        src_buf.plane[0].stride = in->linesize[3];
> -        src_buf.plane[0].mask   = -1;
> -
> -        dst_buf.plane[0].data   = out->data[3];
> -        dst_buf.plane[0].stride = out->linesize[3];
> -        dst_buf.plane[0].mask   = -1;
> -
> -        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
> -        if (ret) {
> -            ret = print_zimg_error(link->dst);
> -            goto fail;
> +        s->src_format_tmp = s->src_format;
> +        s->dst_format_tmp = s->dst_format;
> +        s->params_tmp = s->params;
> +        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +            s->alpha_src_format_tmp = s->alpha_src_format;
> +            s->alpha_dst_format_tmp = s->alpha_dst_format;
> +            s->alpha_params_tmp = s->alpha_params;
>          }
> -    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> -        int x, y;
> -
> -        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
> -            for (y = 0; y < out->height; y++) {
> -                for (x = 0; x < out->width; x++) {
> -                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
> -                            av_float2int(1.0f));
> +
> +        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) && (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) ){
> +            int x, y;
> +            if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
> +                for (y = 0; y < out->height; y++) {
> +                    for (x = 0; x < out->width; x++) {
> +                        AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
> +                                av_float2int(1.0f));
> +                    }
>                  }
> +            } else {
> +                for (y = 0; y < outlink->h; y++)
> +                    memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
>              }
> -        } else {
> -            for (y = 0; y < outlink->h; y++)
> -                memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
>          }
>      }
> -
> +    else {
> +        /*no need for any filtering */
> +        ret = av_frame_copy(out, in);
> +        if (ret < 0)
> +            return ret;
> +    }
>  fail:
>      av_frame_free(&in);
>      if (ret) {
> @@ -753,11 +915,12 @@ fail:
>  static av_cold void uninit(AVFilterContext *ctx)
>  {
>      ZScaleContext *s = ctx->priv;
> -
> -    zimg_filter_graph_free(s->graph);
> -    zimg_filter_graph_free(s->alpha_graph);
> -    av_freep(&s->tmp);
> -    s->tmp_size = 0;
> +    int i;
> +    for (i = 0; i < s->nb_threads; i++) {
> +        if (s->tmp[i]) av_freep(&s->tmp[i]);

This check for !NULL is not needed.

> +        if (s->graph[i]) zimg_filter_graph_free(s->graph[i]);
> +        if (s->alpha_graph[i]) zimg_filter_graph_free(s->alpha_graph[i]);
> +    }
>  }
>  
>  static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
> @@ -941,4 +1104,5 @@ const AVFilter ff_vf_zscale = {
>      FILTER_OUTPUTS(avfilter_vf_zscale_outputs),
>      FILTER_QUERY_FUNC(query_formats),
>      .process_command = process_command,
> +    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,

timeline support does not work if w/h changes. so just remove this flag from here.

>  };
> -- 
> 2.31.1.windows.1
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x
@ 2022-02-10 10:08 Victoria Zhislina
  2022-02-18 11:43 ` Paul B Mahol
  0 siblings, 1 reply; 17+ messages in thread
From: Victoria Zhislina @ 2022-02-10 10:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Victoria Zhislina

By ffmpeg threading support implementation via frame slicing and doing
zimg_filter_graph_build that used to take 30-60% of each frame processig
only if necessary (some parameters changed)
the performance increase vs original version
in video downscale and color conversion  >4x is seen
on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)

Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
---
 libavfilter/vf_zscale.c | 786 ++++++++++++++++++++++++----------------
 1 file changed, 475 insertions(+), 311 deletions(-)

diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
index 1288c5efc1..ce4c0b2c76 100644
--- a/libavfilter/vf_zscale.c
+++ b/libavfilter/vf_zscale.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2015 Paul B Mahol
- *
+ * * 2022 Victoria Zhislina, Intel - performance optimization
+ 
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -44,6 +45,8 @@
 #include "libavutil/imgutils.h"
 
 #define ZIMG_ALIGNMENT 32
+#define MIN_TILESIZE 64
+#define MAX_THREADS 64
 
 static const char *const var_names[] = {
     "in_w",   "iw",
@@ -113,13 +116,17 @@ typedef struct ZScaleContext {
 
     int force_original_aspect_ratio;
 
-    void *tmp;
-    size_t tmp_size;
+    void *tmp[MAX_THREADS]; //separate for each thread;
+	int nb_threads;
+    int slice_h;
 
     zimg_image_format src_format, dst_format;
     zimg_image_format alpha_src_format, alpha_dst_format;
+    zimg_image_format src_format_tmp, dst_format_tmp;
+    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
     zimg_graph_builder_params alpha_params, params;
-    zimg_filter_graph *alpha_graph, *graph;
+    zimg_graph_builder_params alpha_params_tmp, params_tmp;
+    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
 
     enum AVColorSpace in_colorspace, out_colorspace;
     enum AVColorTransferCharacteristic in_trc, out_trc;
@@ -128,10 +135,181 @@ typedef struct ZScaleContext {
     enum AVChromaLocation in_chromal, out_chromal;
 } ZScaleContext;
 
+
+typedef struct ThreadData {
+    const AVPixFmtDescriptor *desc, *odesc;
+    AVFrame *in, *out;
+} ThreadData;
+
+static int convert_chroma_location(enum AVChromaLocation chroma_location)
+{
+    switch (chroma_location) {
+    case AVCHROMA_LOC_UNSPECIFIED:
+    case AVCHROMA_LOC_LEFT:
+        return ZIMG_CHROMA_LEFT;
+    case AVCHROMA_LOC_CENTER:
+        return ZIMG_CHROMA_CENTER;
+    case AVCHROMA_LOC_TOPLEFT:
+        return ZIMG_CHROMA_TOP_LEFT;
+    case AVCHROMA_LOC_TOP:
+        return ZIMG_CHROMA_TOP;
+    case AVCHROMA_LOC_BOTTOMLEFT:
+        return ZIMG_CHROMA_BOTTOM_LEFT;
+    case AVCHROMA_LOC_BOTTOM:
+        return ZIMG_CHROMA_BOTTOM;
+    }
+    return ZIMG_CHROMA_LEFT;
+}
+
+static int convert_matrix(enum AVColorSpace colorspace)
+{
+    switch (colorspace) {
+    case AVCOL_SPC_RGB:
+        return ZIMG_MATRIX_RGB;
+    case AVCOL_SPC_BT709:
+        return ZIMG_MATRIX_709;
+    case AVCOL_SPC_UNSPECIFIED:
+        return ZIMG_MATRIX_UNSPECIFIED;
+    case AVCOL_SPC_FCC:
+        return ZIMG_MATRIX_FCC;
+    case AVCOL_SPC_BT470BG:
+        return ZIMG_MATRIX_470BG;
+    case AVCOL_SPC_SMPTE170M:
+        return ZIMG_MATRIX_170M;
+    case AVCOL_SPC_SMPTE240M:
+        return ZIMG_MATRIX_240M;
+    case AVCOL_SPC_YCGCO:
+        return ZIMG_MATRIX_YCGCO;
+    case AVCOL_SPC_BT2020_NCL:
+        return ZIMG_MATRIX_2020_NCL;
+    case AVCOL_SPC_BT2020_CL:
+        return ZIMG_MATRIX_2020_CL;
+    case AVCOL_SPC_CHROMA_DERIVED_NCL:
+        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
+    case AVCOL_SPC_CHROMA_DERIVED_CL:
+        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
+    case AVCOL_SPC_ICTCP:
+        return ZIMG_MATRIX_ICTCP;
+    }
+    return ZIMG_MATRIX_UNSPECIFIED;
+}
+
+static int convert_trc(enum AVColorTransferCharacteristic color_trc)
+{
+    switch (color_trc) {
+    case AVCOL_TRC_UNSPECIFIED:
+        return ZIMG_TRANSFER_UNSPECIFIED;
+    case AVCOL_TRC_BT709:
+        return ZIMG_TRANSFER_709;
+    case AVCOL_TRC_GAMMA22:
+        return ZIMG_TRANSFER_470_M;
+    case AVCOL_TRC_GAMMA28:
+        return ZIMG_TRANSFER_470_BG;
+    case AVCOL_TRC_SMPTE170M:
+        return ZIMG_TRANSFER_601;
+    case AVCOL_TRC_SMPTE240M:
+        return ZIMG_TRANSFER_240M;
+    case AVCOL_TRC_LINEAR:
+        return ZIMG_TRANSFER_LINEAR;
+    case AVCOL_TRC_LOG:
+        return ZIMG_TRANSFER_LOG_100;
+    case AVCOL_TRC_LOG_SQRT:
+        return ZIMG_TRANSFER_LOG_316;
+    case AVCOL_TRC_IEC61966_2_4:
+        return ZIMG_TRANSFER_IEC_61966_2_4;
+    case AVCOL_TRC_BT2020_10:
+        return ZIMG_TRANSFER_2020_10;
+    case AVCOL_TRC_BT2020_12:
+        return ZIMG_TRANSFER_2020_12;
+    case AVCOL_TRC_SMPTE2084:
+        return ZIMG_TRANSFER_ST2084;
+    case AVCOL_TRC_ARIB_STD_B67:
+        return ZIMG_TRANSFER_ARIB_B67;
+    case AVCOL_TRC_IEC61966_2_1:
+        return ZIMG_TRANSFER_IEC_61966_2_1;
+    }
+    return ZIMG_TRANSFER_UNSPECIFIED;
+}
+
+static int convert_primaries(enum AVColorPrimaries color_primaries)
+{
+    switch (color_primaries) {
+    case AVCOL_PRI_UNSPECIFIED:
+        return ZIMG_PRIMARIES_UNSPECIFIED;
+    case AVCOL_PRI_BT709:
+        return ZIMG_PRIMARIES_709;
+    case AVCOL_PRI_BT470M:
+        return ZIMG_PRIMARIES_470_M;
+    case AVCOL_PRI_BT470BG:
+        return ZIMG_PRIMARIES_470_BG;
+    case AVCOL_PRI_SMPTE170M:
+        return ZIMG_PRIMARIES_170M;
+    case AVCOL_PRI_SMPTE240M:
+        return ZIMG_PRIMARIES_240M;
+    case AVCOL_PRI_FILM:
+        return ZIMG_PRIMARIES_FILM;
+    case AVCOL_PRI_BT2020:
+        return ZIMG_PRIMARIES_2020;
+    case AVCOL_PRI_SMPTE428:
+        return ZIMG_PRIMARIES_ST428;
+    case AVCOL_PRI_SMPTE431:
+        return ZIMG_PRIMARIES_ST431_2;
+    case AVCOL_PRI_SMPTE432:
+        return ZIMG_PRIMARIES_ST432_1;
+    case AVCOL_PRI_JEDEC_P22:
+        return ZIMG_PRIMARIES_EBU3213_E;
+    }
+    return ZIMG_PRIMARIES_UNSPECIFIED;
+}
+
+static int convert_range(enum AVColorRange color_range)
+{
+    switch (color_range) {
+    case AVCOL_RANGE_UNSPECIFIED:
+    case AVCOL_RANGE_MPEG:
+        return ZIMG_RANGE_LIMITED;
+    case AVCOL_RANGE_JPEG:
+        return ZIMG_RANGE_FULL;
+    }
+    return ZIMG_RANGE_LIMITED;
+}
+
+static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
+{
+    switch (color_range) {
+    case ZIMG_RANGE_LIMITED:
+        return AVCOL_RANGE_MPEG;
+    case ZIMG_RANGE_FULL:
+        return AVCOL_RANGE_JPEG;
+    }
+    return AVCOL_RANGE_UNSPECIFIED;
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
     int ret;
+    int i;
+
+    for (i = 0; i < MAX_THREADS; i++) {
+        s->tmp[i] = NULL;
+        s->graph[i] = NULL;
+        s->alpha_graph[i] = NULL;
+    }
+    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
 
     if (s->size_str && (s->w_expr || s->h_expr)) {
         av_log(ctx, AV_LOG_ERROR,
@@ -194,6 +372,153 @@ static int query_formats(AVFilterContext *ctx)
     return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats);
 }
 
+/* returns 0 if image formats are the same and 1 otherwise */
+static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
+{
+    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
+#if ZIMG_API_VERSION >= 0x204
+        (img_fmt0->alpha != img_fmt1->alpha) ||
+#endif
+        (img_fmt0->color_family != img_fmt1->color_family) ||
+        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
+        (img_fmt0->depth != img_fmt1->depth) ||
+        (img_fmt0->field_parity != img_fmt1->field_parity) ||
+        (img_fmt0->height != img_fmt1->height) ||
+        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
+        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
+        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
+        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
+        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
+        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
+        (img_fmt0->width != img_fmt1->width));
+}
+
+/* returns 0 if graph builder parameters are the same and 1 otherwise */
+static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
+{
+    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
+    and NaN values that are default for some params are treated properly*/
+    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
+        (parm0->dither_type != parm1->dither_type) ||
+        (parm0->resample_filter != parm1->resample_filter) ||
+        (parm0->resample_filter_uv != parm1->resample_filter_uv);
+
+    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
+        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
+    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
+        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
+    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
+        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
+    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
+        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
+    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
+        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
+
+    return ret;
+}
+
+static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
+    int colorspace, int primaries, int transfer, int range, int location)
+{
+    format->width = frame->width;
+    format->height = frame->height;
+    format->subsample_w = desc->log2_chroma_w;
+    format->subsample_h = desc->log2_chroma_h;
+    format->depth = desc->comp[0].depth;
+    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
+    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
+    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
+    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
+    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
+    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
+}
+
+static int print_zimg_error(AVFilterContext *ctx)
+{
+    char err_msg[1024];
+    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
+
+    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
+
+    return AVERROR_EXTERNAL;
+}
+
+static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *desc, const AVPixFmtDescriptor *out_desc,
+    ZScaleContext *s, int job_nr)
+{
+    int ret;
+    size_t size;
+    zimg_image_format src_format;
+    zimg_image_format dst_format;
+    zimg_image_format alpha_src_format;
+    zimg_image_format alpha_dst_format;
+
+    src_format = s->src_format;
+    dst_format = s->dst_format;
+    /* The input slice is specified through the active_region field, 
+    unlike the output slice.
+    according to zimg requirements input and output slices should have even dimentions */
+    src_format.active_region.width = in->width;
+    src_format.active_region.height = s->slice_h;
+    src_format.active_region.left = 0;
+    src_format.active_region.top = job_nr * src_format.active_region.height;
+    //dst now is the single tile only!!
+    dst_format.width = out->width;
+    dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+    //the last slice could differ from the previous ones due to the slices division "tail"
+    if (job_nr == (s->nb_threads - 1)) {
+        src_format.active_region.height = src_format.height - src_format.active_region.top;
+        dst_format.height = out->height - job_nr * dst_format.height;
+    }
+
+    if (s->graph[job_nr]) {
+        zimg_filter_graph_free(s->graph[job_nr]);
+    }
+    s->graph[job_nr] = zimg_filter_graph_build(&src_format, &dst_format, &s->params);
+    if (!s->graph[job_nr])
+        return print_zimg_error(NULL);
+
+    ret = zimg_filter_graph_get_tmp_size(s->graph[job_nr], &size);
+    if (ret)
+        return print_zimg_error(NULL);
+
+    if (s->tmp[job_nr])
+        av_freep(&s->tmp[job_nr]);
+    s->tmp[job_nr] = av_malloc(size);
+    if (!s->tmp[job_nr])
+        return AVERROR(ENOMEM);
+
+    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && out_desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        alpha_src_format = s->alpha_src_format;
+        alpha_dst_format = s->alpha_dst_format;
+        /* The input slice is specified through the active_region field, unlike the output slice.
+        according to zimg requirements input and output slices should have even dimentions */
+        alpha_src_format.active_region.width = in->width;
+        alpha_src_format.active_region.height = s->slice_h;
+        alpha_src_format.active_region.left = 0;
+        alpha_src_format.active_region.top = job_nr * alpha_src_format.active_region.height;
+        //dst now is the single tile only!!
+        alpha_dst_format.width = out->width;
+        alpha_dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+        //the last slice could differ from the previous ones due to the slices division "tail"
+        if (job_nr == (s->nb_threads - 1)) {
+            alpha_src_format.active_region.height = alpha_src_format.height - alpha_src_format.active_region.top;
+            alpha_dst_format.height = out->height - job_nr * alpha_dst_format.height;
+        }
+
+        if (s->alpha_graph[job_nr]) {
+            zimg_filter_graph_free(s->alpha_graph[job_nr]);
+        }
+        s->alpha_graph[job_nr] = zimg_filter_graph_build(&alpha_src_format, &alpha_dst_format, &s->alpha_params);
+        if (!s->alpha_graph[job_nr])
+            return print_zimg_error(NULL);
+     }
+    return 0;
+}
+
 static int config_props(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
@@ -317,212 +642,15 @@ fail:
     return ret;
 }
 
-static int print_zimg_error(AVFilterContext *ctx)
-{
-    char err_msg[1024];
-    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
-
-    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
-
-    return AVERROR_EXTERNAL;
-}
-
-static int convert_chroma_location(enum AVChromaLocation chroma_location)
-{
-    switch (chroma_location) {
-    case AVCHROMA_LOC_UNSPECIFIED:
-    case AVCHROMA_LOC_LEFT:
-        return ZIMG_CHROMA_LEFT;
-    case AVCHROMA_LOC_CENTER:
-        return ZIMG_CHROMA_CENTER;
-    case AVCHROMA_LOC_TOPLEFT:
-        return ZIMG_CHROMA_TOP_LEFT;
-    case AVCHROMA_LOC_TOP:
-        return ZIMG_CHROMA_TOP;
-    case AVCHROMA_LOC_BOTTOMLEFT:
-        return ZIMG_CHROMA_BOTTOM_LEFT;
-    case AVCHROMA_LOC_BOTTOM:
-        return ZIMG_CHROMA_BOTTOM;
-    }
-    return ZIMG_CHROMA_LEFT;
-}
-
-static int convert_matrix(enum AVColorSpace colorspace)
-{
-    switch (colorspace) {
-    case AVCOL_SPC_RGB:
-        return ZIMG_MATRIX_RGB;
-    case AVCOL_SPC_BT709:
-        return ZIMG_MATRIX_709;
-    case AVCOL_SPC_UNSPECIFIED:
-        return ZIMG_MATRIX_UNSPECIFIED;
-    case AVCOL_SPC_FCC:
-        return ZIMG_MATRIX_FCC;
-    case AVCOL_SPC_BT470BG:
-        return ZIMG_MATRIX_470BG;
-    case AVCOL_SPC_SMPTE170M:
-        return ZIMG_MATRIX_170M;
-    case AVCOL_SPC_SMPTE240M:
-        return ZIMG_MATRIX_240M;
-    case AVCOL_SPC_YCGCO:
-        return ZIMG_MATRIX_YCGCO;
-    case AVCOL_SPC_BT2020_NCL:
-        return ZIMG_MATRIX_2020_NCL;
-    case AVCOL_SPC_BT2020_CL:
-        return ZIMG_MATRIX_2020_CL;
-    case AVCOL_SPC_CHROMA_DERIVED_NCL:
-        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
-    case AVCOL_SPC_CHROMA_DERIVED_CL:
-        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
-    case AVCOL_SPC_ICTCP:
-        return ZIMG_MATRIX_ICTCP;
-    }
-    return ZIMG_MATRIX_UNSPECIFIED;
-}
-
-static int convert_trc(enum AVColorTransferCharacteristic color_trc)
-{
-    switch (color_trc) {
-    case AVCOL_TRC_UNSPECIFIED:
-        return ZIMG_TRANSFER_UNSPECIFIED;
-    case AVCOL_TRC_BT709:
-        return ZIMG_TRANSFER_709;
-    case AVCOL_TRC_GAMMA22:
-        return ZIMG_TRANSFER_470_M;
-    case AVCOL_TRC_GAMMA28:
-        return ZIMG_TRANSFER_470_BG;
-    case AVCOL_TRC_SMPTE170M:
-        return ZIMG_TRANSFER_601;
-    case AVCOL_TRC_SMPTE240M:
-        return ZIMG_TRANSFER_240M;
-    case AVCOL_TRC_LINEAR:
-        return ZIMG_TRANSFER_LINEAR;
-    case AVCOL_TRC_LOG:
-        return ZIMG_TRANSFER_LOG_100;
-    case AVCOL_TRC_LOG_SQRT:
-        return ZIMG_TRANSFER_LOG_316;
-    case AVCOL_TRC_IEC61966_2_4:
-        return ZIMG_TRANSFER_IEC_61966_2_4;
-    case AVCOL_TRC_BT2020_10:
-        return ZIMG_TRANSFER_2020_10;
-    case AVCOL_TRC_BT2020_12:
-        return ZIMG_TRANSFER_2020_12;
-    case AVCOL_TRC_SMPTE2084:
-        return ZIMG_TRANSFER_ST2084;
-    case AVCOL_TRC_ARIB_STD_B67:
-        return ZIMG_TRANSFER_ARIB_B67;
-    case AVCOL_TRC_IEC61966_2_1:
-        return ZIMG_TRANSFER_IEC_61966_2_1;
-    }
-    return ZIMG_TRANSFER_UNSPECIFIED;
-}
-
-static int convert_primaries(enum AVColorPrimaries color_primaries)
-{
-    switch (color_primaries) {
-    case AVCOL_PRI_UNSPECIFIED:
-        return ZIMG_PRIMARIES_UNSPECIFIED;
-    case AVCOL_PRI_BT709:
-        return ZIMG_PRIMARIES_709;
-    case AVCOL_PRI_BT470M:
-        return ZIMG_PRIMARIES_470_M;
-    case AVCOL_PRI_BT470BG:
-        return ZIMG_PRIMARIES_470_BG;
-    case AVCOL_PRI_SMPTE170M:
-        return ZIMG_PRIMARIES_170M;
-    case AVCOL_PRI_SMPTE240M:
-        return ZIMG_PRIMARIES_240M;
-    case AVCOL_PRI_FILM:
-        return ZIMG_PRIMARIES_FILM;
-    case AVCOL_PRI_BT2020:
-        return ZIMG_PRIMARIES_2020;
-    case AVCOL_PRI_SMPTE428:
-        return ZIMG_PRIMARIES_ST428;
-    case AVCOL_PRI_SMPTE431:
-        return ZIMG_PRIMARIES_ST431_2;
-    case AVCOL_PRI_SMPTE432:
-        return ZIMG_PRIMARIES_ST432_1;
-    case AVCOL_PRI_JEDEC_P22:
-        return ZIMG_PRIMARIES_EBU3213_E;
-    }
-    return ZIMG_PRIMARIES_UNSPECIFIED;
-}
-
-static int convert_range(enum AVColorRange color_range)
-{
-    switch (color_range) {
-    case AVCOL_RANGE_UNSPECIFIED:
-    case AVCOL_RANGE_MPEG:
-        return ZIMG_RANGE_LIMITED;
-    case AVCOL_RANGE_JPEG:
-        return ZIMG_RANGE_FULL;
-    }
-    return ZIMG_RANGE_LIMITED;
-}
-
-static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
-{
-    switch (color_range) {
-    case ZIMG_RANGE_LIMITED:
-        return AVCOL_RANGE_MPEG;
-    case ZIMG_RANGE_FULL:
-        return AVCOL_RANGE_JPEG;
-    }
-    return AVCOL_RANGE_UNSPECIFIED;
-}
-
-static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
-                        int colorspace, int primaries, int transfer, int range, int location)
-{
-    format->width = frame->width;
-    format->height = frame->height;
-    format->subsample_w = desc->log2_chroma_w;
-    format->subsample_h = desc->log2_chroma_h;
-    format->depth = desc->comp[0].depth;
-    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
-    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
-    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
-    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
-    format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
-    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
-    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
-}
-
-static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
-                       zimg_image_format *src_format, zimg_image_format *dst_format,
-                       void **tmp, size_t *tmp_size)
-{
-    int ret;
-    size_t size;
-
-    zimg_filter_graph_free(*graph);
-    *graph = zimg_filter_graph_build(src_format, dst_format, params);
-    if (!*graph)
-        return print_zimg_error(NULL);
-
-    ret = zimg_filter_graph_get_tmp_size(*graph, &size);
-    if (ret)
-        return print_zimg_error(NULL);
-
-    if (size > *tmp_size) {
-        av_freep(tmp);
-        *tmp = av_malloc(size);
-        if (!*tmp)
-            return AVERROR(ENOMEM);
-
-        *tmp_size = size;
-    }
-
-    return 0;
-}
 
 static int realign_frame(const AVPixFmtDescriptor *desc, AVFrame **frame)
 {
     AVFrame *aligned = NULL;
-    int ret = 0, plane;
+    int ret = 0, plane, planes;
 
     /* Realign any unaligned input frame. */
-    for (plane = 0; plane < 3; plane++) {
+    planes = av_pix_fmt_count_planes(desc->nb_components);
+    for (plane = 0; plane < planes; plane++) {
         int p = desc->comp[plane].plane;
         if ((uintptr_t)(*frame)->data[p] % ZIMG_ALIGNMENT || (*frame)->linesize[p] % ZIMG_ALIGNMENT) {
             if (!(aligned = av_frame_alloc())) {
@@ -554,6 +682,7 @@ fail:
     return ret;
 }
 
+
 static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
 {
     if (s->colorspace != -1)
@@ -572,20 +701,77 @@ static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
         frame->chroma_location = (int)s->dst_format.chroma_location + 1;
 }
 
+static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
+{
+    ThreadData *td = data;
+    int ret = 0;
+    int p;
+    int out_sampl;
+    int need_gb;
+    ZScaleContext *s = ctx->priv;
+    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
+    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
+    int  dst_tile_height = ((unsigned int)(td->out->height / n_jobs)) & 0xfffffffe; 
+
+    /* create zimg filter graphs for each thread
+     only if not created earlier or there is some change in frame parameters */
+    need_gb = compare_zimg_image_formats(&s->src_format, &s->src_format_tmp) ||
+        compare_zimg_image_formats(&s->dst_format, &s->dst_format_tmp) ||
+        compare_zimg_graph_builder_params(&s->params, &s->params_tmp);
+    if(td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA)
+        need_gb = need_gb || compare_zimg_image_formats(&s->alpha_src_format, &s->alpha_src_format_tmp) ||
+            compare_zimg_image_formats(&s->alpha_dst_format, &s->alpha_dst_format_tmp) ||
+            compare_zimg_graph_builder_params(&s->alpha_params, &s->alpha_params_tmp);
+
+    if (need_gb){
+        ret = graphs_build(td->in, td->out, td->desc, td->odesc, s, job_nr);
+        if (ret < 0)
+            return print_zimg_error(ctx);
+    }
+    out_sampl = FFMAX3(td->out->linesize[0], td->out->linesize[1], td->out->linesize[2]);
+    for (int i = 0; i < 3; i++) {
+        p = td->desc->comp[i].plane;      
+
+        src_buf.plane[i].data = td->in->data[p];
+        src_buf.plane[i].stride = td->in->linesize[p];
+        src_buf.plane[i].mask = -1;
+
+        p = td->odesc->comp[i].plane;
+        dst_buf.plane[i].data = td->out->data[p] + td->out->linesize[p] * dst_tile_height * td->out->linesize[p] / out_sampl * job_nr;
+        dst_buf.plane[i].stride = td->out->linesize[p];
+        dst_buf.plane[i].mask = -1;
+    }
+    ret = zimg_filter_graph_process(s->graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+    if (ret) 
+        return  print_zimg_error(ctx);
+
+    if (td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        src_buf.plane[0].data = td->in->data[3];
+        src_buf.plane[0].stride = td->in->linesize[3];
+        src_buf.plane[0].mask = -1;
+
+        dst_buf.plane[0].data = td->out->data[3] + td->out->linesize[3] * dst_tile_height  * job_nr;
+        dst_buf.plane[0].stride = td->out->linesize[3];
+        dst_buf.plane[0].mask = -1;
+
+        ret = zimg_filter_graph_process(s->alpha_graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+        if (ret)
+            return print_zimg_error(ctx);
+    }
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
-    ZScaleContext *s = link->dst->priv;
-    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFilterContext *ctx = link->dst;
+    ZScaleContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
     const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
-    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
-    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
     char buf[32];
-    int ret = 0, plane;
+    int ret = 0;
     AVFrame *out = NULL;
-
-    if ((ret = realign_frame(desc, &in)) < 0)
-        goto fail;
+    ThreadData td;
 
     if (!(out = ff_get_video_buffer(outlink, outlink->w, outlink->h))) {
         ret =  AVERROR(ENOMEM);
@@ -596,35 +782,60 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     out->width  = outlink->w;
     out->height = outlink->h;
 
-    if(   in->width  != link->w
-       || in->height != link->h
-       || in->format != link->format
-       || s->in_colorspace != in->colorspace
-       || s->in_trc  != in->color_trc
-       || s->in_primaries != in->color_primaries
-       || s->in_range != in->color_range
-       || s->out_colorspace != out->colorspace
-       || s->out_trc  != out->color_trc
-       || s->out_primaries != out->color_primaries
-       || s->out_range != out->color_range
-       || s->in_chromal != in->chroma_location
-       || s->out_chromal != out->chroma_location) {
+    //we need to use this filter if something is different for an input and output only
+    //otherwise - just copy the input frame to the output
+    if ((link->w != outlink->w) ||
+        (link->h != outlink->h) ||
+        (s->src_format.chroma_location != s->dst_format.chroma_location)||
+        (s->src_format.color_family !=s->dst_format.color_family)||
+        (s->src_format.color_primaries !=s->dst_format.color_primaries)||
+        (s->src_format.depth !=s->dst_format.depth)||
+        (s->src_format.matrix_coefficients !=s->dst_format.matrix_coefficients)||
+        (s->src_format.field_parity !=s->dst_format.field_parity)||
+        (s->src_format.pixel_range !=s->dst_format.pixel_range)||
+        (s->src_format.pixel_type !=s->dst_format.pixel_type)||
+        (s->src_format.transfer_characteristics !=s->dst_format.transfer_characteristics)
+    ){
+        if ((ret = realign_frame(desc, &in)) < 0)
+            goto fail;
+
         snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
         av_opt_set(s, "w", buf, 0);
         snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
         av_opt_set(s, "h", buf, 0);
 
+ 
         link->dst->inputs[0]->format = in->format;
         link->dst->inputs[0]->w      = in->width;
         link->dst->inputs[0]->h      = in->height;
 
-        if ((ret = config_props(outlink)) < 0)
-            goto fail;
+        update_output_color_information(s, out);
+    
+        s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), link->h / MIN_TILESIZE);
+        s->slice_h = ((unsigned int)(link->h / s->nb_threads)) & 0xfffffffe; // slice_h should be even for zimg
+        s->in_colorspace = in->colorspace;
+        s->in_trc = in->color_trc;
+        s->in_primaries = in->color_primaries;
+        s->in_range = in->color_range;
+        s->out_colorspace = out->colorspace;
+        s->out_trc = out->color_trc;
+        s->out_primaries = out->color_primaries;
+        s->out_range = out->color_range;
+    
+        av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
+                  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
+                  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
+                  INT_MAX);
 
         zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
         zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
         zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
 
+        format_init(&s->src_format, in, desc, s->colorspace_in,
+            s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
+        format_init(&s->dst_format, out, odesc, s->colorspace,
+            s->primaries, s->trc, s->range, s->chromal);
+
         s->params.dither_type = s->dither;
         s->params.cpu_type = ZIMG_CPU_AUTO;
         s->params.resample_filter = s->filter;
@@ -634,27 +845,6 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
         s->params.filter_param_a = s->params.filter_param_a_uv = s->param_a;
         s->params.filter_param_b = s->params.filter_param_b_uv = s->param_b;
 
-        format_init(&s->src_format, in, desc, s->colorspace_in,
-                    s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
-        format_init(&s->dst_format, out, odesc, s->colorspace,
-                    s->primaries, s->trc, s->range, s->chromal);
-
-        update_output_color_information(s, out);
-
-        ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
-                          &s->tmp, &s->tmp_size);
-        if (ret < 0)
-            goto fail;
-
-        s->in_colorspace  = in->colorspace;
-        s->in_trc         = in->color_trc;
-        s->in_primaries   = in->color_primaries;
-        s->in_range       = in->color_range;
-        s->out_colorspace = out->colorspace;
-        s->out_trc        = out->color_trc;
-        s->out_primaries  = out->color_primaries;
-        s->out_range      = out->color_range;
-
         if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
             zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
             zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
@@ -670,76 +860,48 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
             s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
 
-            s->alpha_dst_format.width = out->width;
-            s->alpha_dst_format.height = out->height;
             s->alpha_dst_format.depth = odesc->comp[0].depth;
             s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
-
-            zimg_filter_graph_free(s->alpha_graph);
-            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
-            if (!s->alpha_graph) {
-                ret = print_zimg_error(link->dst);
-                goto fail;
-            }
         }
-    }
 
-    update_output_color_information(s, out);
+        td.in = in;
+        td.out = out;
+        td.desc = desc;
+        td.odesc = odesc;
 
-    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
-              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
-              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
-              INT_MAX);
-
-    for (plane = 0; plane < 3; plane++) {
-        int p = desc->comp[plane].plane;
-        src_buf.plane[plane].data   = in->data[p];
-        src_buf.plane[plane].stride = in->linesize[p];
-        src_buf.plane[plane].mask   = -1;
-
-        p = odesc->comp[plane].plane;
-        dst_buf.plane[plane].data   = out->data[p];
-        dst_buf.plane[plane].stride = out->linesize[p];
-        dst_buf.plane[plane].mask   = -1;
-    }
-
-    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-    if (ret) {
-        ret = print_zimg_error(link->dst);
-        goto fail;
-    }
+        ff_filter_execute(ctx, filter_slice, &td, NULL, s->nb_threads);
 
-    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        src_buf.plane[0].data   = in->data[3];
-        src_buf.plane[0].stride = in->linesize[3];
-        src_buf.plane[0].mask   = -1;
-
-        dst_buf.plane[0].data   = out->data[3];
-        dst_buf.plane[0].stride = out->linesize[3];
-        dst_buf.plane[0].mask   = -1;
-
-        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-        if (ret) {
-            ret = print_zimg_error(link->dst);
-            goto fail;
+        s->src_format_tmp = s->src_format;
+        s->dst_format_tmp = s->dst_format;
+        s->params_tmp = s->params;
+        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+            s->alpha_src_format_tmp = s->alpha_src_format;
+            s->alpha_dst_format_tmp = s->alpha_dst_format;
+            s->alpha_params_tmp = s->alpha_params;
         }
-    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        int x, y;
-
-        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
-            for (y = 0; y < out->height; y++) {
-                for (x = 0; x < out->width; x++) {
-                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
-                            av_float2int(1.0f));
+
+        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) && (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) ){
+            int x, y;
+            if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
+                for (y = 0; y < out->height; y++) {
+                    for (x = 0; x < out->width; x++) {
+                        AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
+                                av_float2int(1.0f));
+                    }
                 }
+            } else {
+                for (y = 0; y < outlink->h; y++)
+                    memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
             }
-        } else {
-            for (y = 0; y < outlink->h; y++)
-                memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
         }
     }
-
+    else {
+        /*no need for any filtering */
+        ret = av_frame_copy(out, in);
+        if (ret < 0)
+            return ret;
+    }
 fail:
     av_frame_free(&in);
     if (ret) {
@@ -753,11 +915,12 @@ fail:
 static av_cold void uninit(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
-
-    zimg_filter_graph_free(s->graph);
-    zimg_filter_graph_free(s->alpha_graph);
-    av_freep(&s->tmp);
-    s->tmp_size = 0;
+    int i;
+    for (i = 0; i < s->nb_threads; i++) {
+        if (s->tmp[i]) av_freep(&s->tmp[i]);
+        if (s->graph[i]) zimg_filter_graph_free(s->graph[i]);
+        if (s->alpha_graph[i]) zimg_filter_graph_free(s->alpha_graph[i]);
+    }
 }
 
 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
@@ -941,4 +1104,5 @@ const AVFilter ff_vf_zscale = {
     FILTER_OUTPUTS(avfilter_vf_zscale_outputs),
     FILTER_QUERY_FUNC(query_formats),
     .process_command = process_command,
+    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };
-- 
2.31.1.windows.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2022-03-11 16:43 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-18 15:24 [FFmpeg-devel] [PATCH] libavfilter: zscale performance optimization >4x Victoria Zhislina
2022-02-18 15:48 ` James Almer
2022-02-18 16:00   ` Paul B Mahol
  -- strict thread matches above, loose matches on Subject: below --
2022-02-21  8:20 Victoria Zhislina
2022-02-21 11:22 ` Anton Khirnov
2022-02-21 15:22   ` Victoria Zhislina
2022-02-19 13:58 Victoria Zhislina
2022-02-20 18:51 ` Paul B Mahol
2022-02-22  5:25 ` Lynne
2022-02-22  8:15   ` Paul B Mahol
2022-02-22  8:16     ` Paul B Mahol
2022-02-22  9:34       ` Victoria Zhislina
2022-03-10 18:41       ` Victoria Zhislina
2022-03-10 18:47         ` Paul B Mahol
2022-03-11 16:42           ` Victoria Zhislina
2022-02-10 10:08 Victoria Zhislina
2022-02-18 11:43 ` Paul B Mahol

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git