* [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter
@ 2025-07-18 9:57 Niklas Haas
2025-07-18 9:57 ` [FFmpeg-devel] [PATCH v3 2/3] tests/checkasm: add check for vf_colordetect Niklas Haas
` (3 more replies)
0 siblings, 4 replies; 7+ messages in thread
From: Niklas Haas @ 2025-07-18 9:57 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
This filter can detect various properties about the image, including
whether or not there are out-of-range values, or whether the input appears
to use straight or premultiplied alpha.
Of course, these can only be heuristics, with "undetermined" as the base
case. While we can definitely prove the existence of full range or
straight alpha colors, we can never infer the opposite.
---
doc/filters.texi | 27 ++++
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_colordetect.c | 252 +++++++++++++++++++++++++++++++++++
libavfilter/vf_colordetect.h | 149 +++++++++++++++++++++
5 files changed, 430 insertions(+)
create mode 100644 libavfilter/vf_colordetect.c
create mode 100644 libavfilter/vf_colordetect.h
diff --git a/doc/filters.texi b/doc/filters.texi
index ed2956fe75..74e9e71559 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -9753,6 +9753,33 @@ colorchannelmixer=.393:.769:.189:0:.349:.686:.168:0:.272:.534:.131
This filter supports the all above options as @ref{commands}.
+@section colordetect
+Analyze the video frames to determine the effective value range and alpha
+mode.
+
+The filter accepts the following options:
+
+@table @option
+@item mode
+Set of properties to detect. Unavailable properties, such as alpha mode for
+an input image without an alpha channel, will be ignored automatically.
+
+Accepts a combination of the following flags:
+
+@table @samp
+@item color_range
+Detect if the source countains luma pixels outside the limited (MPEG) range,
+which indicates that this is a full range YUV source.
+@item alpha_mode
+Detect if the source contains color values above the alpha channel, which
+indicates that the alpha channel is independent (straight), rather than
+premultiplied.
+@item all
+Enable detection of all of the above properties. This is the default.
+@end table
+
+@end table
+
@section colorize
Overlay a solid color on the video stream.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 9e9153f5b0..e19f67a3a7 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -237,6 +237,7 @@ OBJS-$(CONFIG_COLORBALANCE_FILTER) += vf_colorbalance.o
OBJS-$(CONFIG_COLORCHANNELMIXER_FILTER) += vf_colorchannelmixer.o
OBJS-$(CONFIG_COLORCONTRAST_FILTER) += vf_colorcontrast.o
OBJS-$(CONFIG_COLORCORRECT_FILTER) += vf_colorcorrect.o
+OBJS-$(CONFIG_COLORDETECT_FILTER) += vf_colordetect.o
OBJS-$(CONFIG_COLORIZE_FILTER) += vf_colorize.o
OBJS-$(CONFIG_COLORKEY_FILTER) += vf_colorkey.o
OBJS-$(CONFIG_COLORKEY_OPENCL_FILTER) += vf_colorkey_opencl.o opencl.o \
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 409099bf1f..f3c2092b15 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -218,6 +218,7 @@ extern const FFFilter ff_vf_colorbalance;
extern const FFFilter ff_vf_colorchannelmixer;
extern const FFFilter ff_vf_colorcontrast;
extern const FFFilter ff_vf_colorcorrect;
+extern const FFFilter ff_vf_colordetect;
extern const FFFilter ff_vf_colorize;
extern const FFFilter ff_vf_colorkey;
extern const FFFilter ff_vf_colorkey_opencl;
diff --git a/libavfilter/vf_colordetect.c b/libavfilter/vf_colordetect.c
new file mode 100644
index 0000000000..0fb892634f
--- /dev/null
+++ b/libavfilter/vf_colordetect.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2025 Niklas Haas
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Video color space detector, tries to auto-detect YUV range and alpha mode.
+ */
+
+#include <stdbool.h>
+#include <stdatomic.h>
+
+#include "config.h"
+
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "video.h"
+
+#include "vf_colordetect.h"
+
+enum AlphaMode {
+ ALPHA_NONE = -1,
+ ALPHA_UNDETERMINED = 0,
+ ALPHA_STRAIGHT,
+ /* No way to positively identify premultiplied alpha */
+};
+
+enum ColorDetectMode {
+ COLOR_DETECT_COLOR_RANGE = 1 << 0,
+ COLOR_DETECT_ALPHA_MODE = 1 << 1,
+};
+
+typedef struct ColorDetectContext {
+ const AVClass *class;
+ FFColorDetectDSPContext dsp;
+ unsigned mode;
+
+ const AVPixFmtDescriptor *desc;
+ int nb_threads;
+ int depth;
+ int idx_a;
+ int mpeg_min;
+ int mpeg_max;
+
+ atomic_int detected_range; // enum AVColorRange
+ atomic_int detected_alpha; // enum AlphaMode
+} ColorDetectContext;
+
+#define OFFSET(x) offsetof(ColorDetectContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption colordetect_options[] = {
+ { "mode", "Image properties to detect", OFFSET(mode), AV_OPT_TYPE_FLAGS, {.i64 = -1}, 0, UINT_MAX, FLAGS, .unit = "mode" },
+ { "color_range", "Detect (YUV) color range", 0, AV_OPT_TYPE_CONST, {.i64 = COLOR_DETECT_COLOR_RANGE}, 0, 0, FLAGS, .unit = "mode" },
+ { "alpha_mode", "Detect alpha mode", 0, AV_OPT_TYPE_CONST, {.i64 = COLOR_DETECT_ALPHA_MODE }, 0, 0, FLAGS, .unit = "mode" },
+ { "all", "Detect all supported properties", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, FLAGS, .unit = "mode" },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(colordetect);
+
+static int query_format(const AVFilterContext *ctx,
+ AVFilterFormatsConfig **cfg_in,
+ AVFilterFormatsConfig **cfg_out)
+{
+ int want_flags = AV_PIX_FMT_FLAG_PLANAR;
+ int reject_flags = AV_PIX_FMT_FLAG_PAL | AV_PIX_FMT_FLAG_HWACCEL |
+ AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_FLOAT |
+ AV_PIX_FMT_FLAG_BAYER | AV_PIX_FMT_FLAG_XYZ;
+
+ if (HAVE_BIGENDIAN) {
+ want_flags |= AV_PIX_FMT_FLAG_BE;
+ } else {
+ reject_flags |= AV_PIX_FMT_FLAG_BE;
+ }
+
+ AVFilterFormats *formats = ff_formats_pixdesc_filter(want_flags, reject_flags);
+ return ff_set_common_formats2(ctx, cfg_in, cfg_out, formats);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+ AVFilterContext *ctx = inlink->dst;
+ ColorDetectContext *s = ctx->priv;
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+ const int depth = desc->comp[0].depth;
+ const int mpeg_min = 16 << (depth - 8);
+ const int mpeg_max = 235 << (depth - 8);
+ if (depth > 16) /* not currently possible; prevent future bugs */
+ return AVERROR(ENOTSUP);
+
+ s->desc = desc;
+ s->depth = depth;
+ s->mpeg_min = mpeg_min;
+ s->mpeg_max = mpeg_max;
+ s->nb_threads = ff_filter_get_nb_threads(ctx);
+
+ if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
+ atomic_init(&s->detected_range, AVCOL_RANGE_JPEG);
+ } else {
+ atomic_init(&s->detected_range, AVCOL_RANGE_UNSPECIFIED);
+ }
+
+ if (desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+ s->idx_a = desc->comp[desc->nb_components - 1].plane;
+ atomic_init(&s->detected_alpha, ALPHA_UNDETERMINED);
+ } else {
+ atomic_init(&s->detected_alpha, ALPHA_NONE);
+ }
+
+ ff_color_detect_dsp_init(&s->dsp, depth, inlink->color_range);
+ return 0;
+}
+
+static int detect_range(AVFilterContext *ctx, void *arg,
+ int jobnr, int nb_jobs)
+{
+ ColorDetectContext *s = ctx->priv;
+ const AVFrame *in = arg;
+ const ptrdiff_t stride = in->linesize[0];
+ const int y_start = (in->height * jobnr) / nb_jobs;
+ const int y_end = (in->height * (jobnr + 1)) / nb_jobs;
+ const int h_slice = y_end - y_start;
+
+ if (s->dsp.detect_range(in->data[0] + y_start * stride, stride,
+ in->width, h_slice, s->mpeg_min, s->mpeg_max))
+ atomic_store(&s->detected_range, AVCOL_RANGE_JPEG);
+
+ return 0;
+}
+
+static int detect_alpha(AVFilterContext *ctx, void *arg,
+ int jobnr, int nb_jobs)
+{
+ ColorDetectContext *s = ctx->priv;
+ const AVFrame *in = arg;
+ const int w = in->width;
+ const int h = in->height;
+ const int y_start = (h * jobnr) / nb_jobs;
+ const int y_end = (h * (jobnr + 1)) / nb_jobs;
+ const int h_slice = y_end - y_start;
+
+ const int nb_planes = (s->desc->flags & AV_PIX_FMT_FLAG_RGB) ? 3 : 1;
+ const ptrdiff_t alpha_stride = in->linesize[s->idx_a];
+ const uint8_t *alpha = in->data[s->idx_a] + y_start * alpha_stride;
+
+ const int p = (1 << s->depth) - 1;
+ const int q = s->mpeg_max - s->mpeg_min;
+ const int k = s->mpeg_min * p + 128;
+
+ for (int i = 0; i < nb_planes; i++) {
+ const ptrdiff_t stride = in->linesize[i];
+ if (s->dsp.detect_alpha(in->data[i] + y_start * stride, stride,
+ alpha, alpha_stride, w, h_slice, p, q, k)) {
+ atomic_store(&s->detected_alpha, ALPHA_STRAIGHT);
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+ AVFilterContext *ctx = inlink->dst;
+ ColorDetectContext *s = ctx->priv;
+ const int nb_threads = FFMIN(inlink->h, s->nb_threads);
+
+ if (s->mode & COLOR_DETECT_COLOR_RANGE && s->detected_range == AVCOL_RANGE_UNSPECIFIED)
+ ff_filter_execute(ctx, detect_range, in, NULL, nb_threads);
+ if (s->mode & COLOR_DETECT_ALPHA_MODE && s->detected_alpha == ALPHA_UNDETERMINED)
+ ff_filter_execute(ctx, detect_alpha, in, NULL, nb_threads);
+
+ return ff_filter_frame(inlink->dst->outputs[0], in);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+ ColorDetectContext *s = ctx->priv;
+ if (!s->mode)
+ return;
+
+ av_log(ctx, AV_LOG_INFO, "Detected color properties:\n");
+ if (s->mode & COLOR_DETECT_COLOR_RANGE) {
+ av_log(ctx, AV_LOG_INFO, " Color range: %s\n",
+ s->detected_range == AVCOL_RANGE_JPEG ? "JPEG / full range"
+ : "undetermined");
+ }
+
+ if (s->mode & COLOR_DETECT_ALPHA_MODE) {
+ av_log(ctx, AV_LOG_INFO, " Alpha mode: %s\n",
+ s->detected_alpha == ALPHA_NONE ? "none" :
+ s->detected_alpha == ALPHA_STRAIGHT ? "straight / independent"
+ : "undetermined");
+ }
+}
+
+av_cold void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth,
+ enum AVColorRange color_range)
+{
+ if (!dsp->detect_range)
+ dsp->detect_range = depth > 8 ? ff_detect_range16_c : ff_detect_range_c;
+ if (!dsp->detect_alpha) {
+ if (color_range == AVCOL_RANGE_JPEG) {
+ dsp->detect_alpha = depth > 8 ? ff_detect_alpha16_full_c : ff_detect_alpha_full_c;
+ } else {
+ dsp->detect_alpha = depth > 8 ? ff_detect_alpha16_limited_c : ff_detect_alpha_limited_c;
+ }
+ }
+}
+
+static const AVFilterPad colordetect_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = config_input,
+ .filter_frame = filter_frame,
+ },
+};
+
+const FFFilter ff_vf_colordetect = {
+ .p.name = "colordetect",
+ .p.description = NULL_IF_CONFIG_SMALL("Detect video color properties."),
+ .p.priv_class = &colordetect_class,
+ .p.flags = AVFILTER_FLAG_SLICE_THREADS | AVFILTER_FLAG_METADATA_ONLY,
+ .priv_size = sizeof(ColorDetectContext),
+ FILTER_INPUTS(colordetect_inputs),
+ FILTER_OUTPUTS(ff_video_default_filterpad),
+ FILTER_QUERY_FUNC2(query_format),
+ .uninit = uninit,
+};
diff --git a/libavfilter/vf_colordetect.h b/libavfilter/vf_colordetect.h
new file mode 100644
index 0000000000..8998ed83d4
--- /dev/null
+++ b/libavfilter/vf_colordetect.h
@@ -0,0 +1,149 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VF_COLORDETECT_H
+#define AVFILTER_VF_COLORDETECT_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <libavutil/macros.h>
+#include <libavutil/pixfmt.h>
+
+typedef struct FFColorDetectDSPContext {
+ /* Returns 1 if an out-of-range value was detected, 0 otherwise */
+ int (*detect_range)(const uint8_t *data, ptrdiff_t stride,
+ ptrdiff_t width, ptrdiff_t height,
+ int mpeg_min, int mpeg_max);
+
+ /* Returns 1 if the color value exceeds the alpha value, 0 otherwise */
+ int (*detect_alpha)(const uint8_t *color, ptrdiff_t color_stride,
+ const uint8_t *alpha, ptrdiff_t alpha_stride,
+ ptrdiff_t width, ptrdiff_t height,
+ int p, int q, int k);
+} FFColorDetectDSPContext;
+
+void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth,
+ enum AVColorRange color_range);
+
+static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t stride,
+ ptrdiff_t width, ptrdiff_t height,
+ int mpeg_min, int mpeg_max)
+{
+ while (height--) {
+ for (int x = 0; x < width; x++) {
+ const uint8_t val = data[x];
+ if (val < mpeg_min || val > mpeg_max)
+ return 1;
+ }
+ data += stride;
+ }
+
+ return 0;
+}
+
+static inline int ff_detect_range16_c(const uint8_t *data, ptrdiff_t stride,
+ ptrdiff_t width, ptrdiff_t height,
+ int mpeg_min, int mpeg_max)
+{
+ while (height--) {
+ const uint16_t *data16 = (const uint16_t *) data;
+ for (int x = 0; x < width; x++) {
+ const uint16_t val = data16[x];
+ if (val < mpeg_min || val > mpeg_max)
+ return 1;
+ }
+ data += stride;
+ }
+
+ return 0;
+}
+
+static inline int
+ff_detect_alpha_full_c(const uint8_t *color, ptrdiff_t color_stride,
+ const uint8_t *alpha, ptrdiff_t alpha_stride,
+ ptrdiff_t width, ptrdiff_t height,
+ int p, int q, int k)
+{
+ while (height--) {
+ for (int x = 0; x < width; x++) {
+ if (color[x] > alpha[x])
+ return 1;
+ }
+ color += color_stride;
+ alpha += alpha_stride;
+ }
+ return 0;
+}
+
+static inline int
+ff_detect_alpha_limited_c(const uint8_t *color, ptrdiff_t color_stride,
+ const uint8_t *alpha, ptrdiff_t alpha_stride,
+ ptrdiff_t width, ptrdiff_t height,
+ int p, int q, int k)
+{
+ while (height--) {
+ for (int x = 0; x < width; x++) {
+ if (p * color[x] - k > q * alpha[x])
+ return 1;
+ }
+ color += color_stride;
+ alpha += alpha_stride;
+ }
+ return 0;
+}
+
+static inline int
+ff_detect_alpha16_full_c(const uint8_t *color, ptrdiff_t color_stride,
+ const uint8_t *alpha, ptrdiff_t alpha_stride,
+ ptrdiff_t width, ptrdiff_t height,
+ int p, int q, int k)
+{
+ while (height--) {
+ const uint16_t *color16 = (const uint16_t *) color;
+ const uint16_t *alpha16 = (const uint16_t *) alpha;
+ for (int x = 0; x < width; x++) {
+ if (color16[x] > alpha16[x])
+ return 1;
+ }
+ color += color_stride;
+ alpha += alpha_stride;
+ }
+ return 0;
+}
+
+static inline int
+ff_detect_alpha16_limited_c(const uint8_t *color, ptrdiff_t color_stride,
+ const uint8_t *alpha, ptrdiff_t alpha_stride,
+ ptrdiff_t width, ptrdiff_t height,
+ int p, int q, int k)
+{
+ while (height--) {
+ const uint16_t *color16 = (const uint16_t *) color;
+ const uint16_t *alpha16 = (const uint16_t *) alpha;
+ for (int x = 0; x < width; x++) {
+ if ((int64_t) p * color16[x] - k > (int64_t) q * alpha16[x])
+ return 1;
+ }
+ color += color_stride;
+ alpha += alpha_stride;
+ }
+ return 0;
+}
+
+#endif /* AVFILTER_VF_COLORDETECT_H */
--
2.50.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH v3 2/3] tests/checkasm: add check for vf_colordetect
2025-07-18 9:57 [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
@ 2025-07-18 9:57 ` Niklas Haas
2025-07-18 9:57 ` [FFmpeg-devel] [PATCH v3 3/3] avfilter/vf_colordetect: add x86 SIMD implementation Niklas Haas
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: Niklas Haas @ 2025-07-18 9:57 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
---
tests/checkasm/Makefile | 1 +
tests/checkasm/checkasm.c | 3 +
tests/checkasm/checkasm.h | 1 +
tests/checkasm/vf_colordetect.c | 139 ++++++++++++++++++++++++++++++++
tests/fate/checkasm.mak | 1 +
5 files changed, 145 insertions(+)
create mode 100644 tests/checkasm/vf_colordetect.c
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index c6d5b0ba1f..d65f48f97b 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -57,6 +57,7 @@ AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
AVFILTEROBJS-$(CONFIG_BLACKDETECT_FILTER) += vf_blackdetect.o
AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
AVFILTEROBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o
+AVFILTEROBJS-$(CONFIG_COLORDETECT_FILTER)+= vf_colordetect.o
AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o
AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 2532405f29..968961c03c 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -275,6 +275,9 @@ static const struct {
#if CONFIG_BWDIF_FILTER
{ "vf_bwdif", checkasm_check_vf_bwdif },
#endif
+ #if CONFIG_COLORDETECT_FILTER
+ { "vf_colordetect", checkasm_check_colordetect },
+ #endif
#if CONFIG_COLORSPACE_FILTER
{ "vf_colorspace", checkasm_check_colorspace },
#endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index d85bbaf7fa..eb458a1732 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -90,6 +90,7 @@ void checkasm_check_blackdetect(void);
void checkasm_check_blend(void);
void checkasm_check_blockdsp(void);
void checkasm_check_bswapdsp(void);
+void checkasm_check_colordetect(void);
void checkasm_check_colorspace(void);
void checkasm_check_diracdsp(void);
void checkasm_check_exrdsp(void);
diff --git a/tests/checkasm/vf_colordetect.c b/tests/checkasm/vf_colordetect.c
new file mode 100644
index 0000000000..96166a48c5
--- /dev/null
+++ b/tests/checkasm/vf_colordetect.c
@@ -0,0 +1,139 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+
+#include "libavfilter/vf_colordetect.h"
+#include "libavutil/mem_internal.h"
+
+#define WIDTH 256
+#define HEIGHT 16
+#define STRIDE (WIDTH + 32)
+
+static void check_range_detect(int depth)
+{
+ const int mpeg_min = 16 << (depth - 8);
+ const int mpeg_max = 235 << (depth - 8);
+
+ FFColorDetectDSPContext dsp = {0};
+ ff_color_detect_dsp_init(&dsp, depth, AVCOL_RANGE_UNSPECIFIED);
+
+ declare_func(int, const uint8_t *, ptrdiff_t, ptrdiff_t, ptrdiff_t, int, int);
+
+ /* Initialize to 128, which should always return 0 */
+ LOCAL_ALIGNED_32(uint8_t, in, [HEIGHT * STRIDE]);
+ memset(in, 0x80, HEIGHT * STRIDE);
+
+ /* Place an out-of-range value in a random position near the center */
+ const int h2 = HEIGHT >> 1;
+ int idx0 = ((rnd() % h2) + h2) * STRIDE + (rnd() % WIDTH);
+ if (depth > 8) {
+ idx0 &= ~1;
+ in[idx0] = in[idx0 + 1] = 0;
+ } else {
+ in[idx0] = 0;
+ }
+
+ int w = WIDTH;
+ if (depth > 8)
+ w /= 2;
+
+ if (check_func(dsp.detect_range, "detect_range_%d", depth)) {
+ /* Test increasing height, to ensure we hit the placed 0 eventually */
+ for (int h = 1; h <= HEIGHT; h++) {
+ int res_ref = call_ref(in, STRIDE, w, h, mpeg_min, mpeg_max);
+ int res_new = call_new(in, STRIDE, w, h, mpeg_min, mpeg_max);
+ if (res_ref != res_new)
+ fail();
+ }
+
+ /* Test performance of base case without any out-of-range values */
+ memset(in, 0x80, HEIGHT * STRIDE);
+ bench_new(in, STRIDE, w, HEIGHT, mpeg_min, mpeg_max);
+ }
+}
+
+static void check_alpha_detect(int depth, enum AVColorRange range)
+{
+ const int mpeg_min = 16 << (depth - 8);
+ const int mpeg_max = 235 << (depth - 8);
+ const int p = (1 << depth) - 1;
+ const int q = mpeg_max - mpeg_min;
+ const int k = mpeg_min * p + 128;
+
+ FFColorDetectDSPContext dsp = {0};
+ ff_color_detect_dsp_init(&dsp, depth, range);
+
+ declare_func(int, const uint8_t *, ptrdiff_t, const uint8_t *, ptrdiff_t,
+ ptrdiff_t, ptrdiff_t, int p, int q, int k);
+
+ LOCAL_ALIGNED_32(uint8_t, luma, [HEIGHT * STRIDE]);
+ LOCAL_ALIGNED_32(uint8_t, alpha, [HEIGHT * STRIDE]);
+ memset(luma, 0x80, HEIGHT * STRIDE);
+ memset(alpha, 0xFF, HEIGHT * STRIDE);
+
+ /* Try and force overflow */
+ if (depth > 8 && range == AVCOL_RANGE_MPEG) {
+ ((uint16_t *) luma)[0] = 235 << (depth - 8);
+ ((uint16_t *) luma)[1] = 16 << (depth - 8);
+ } else {
+ luma[0] = 235;
+ luma[1] = 16;
+ }
+
+ /* Place an out-of-range value in a random position near the center */
+ const int h2 = HEIGHT >> 1;
+ int idx0 = ((rnd() % h2) + h2) * STRIDE + (rnd() % WIDTH);
+ if (depth > 8) {
+ idx0 &= ~1;
+ alpha[idx0] = alpha[idx0 + 1] = 0;
+ } else {
+ alpha[idx0] = 0;
+ }
+
+ int w = WIDTH;
+ if (depth > 8)
+ w /= 2;
+
+ if (check_func(dsp.detect_alpha, "detect_alpha_%d_%s", depth, range == AVCOL_RANGE_JPEG ? "full" : "limited")) {
+ /* Test increasing height, to ensure we hit the placed 0 eventually */
+ for (int h = 1; h <= HEIGHT; h++) {
+ int res_ref = call_ref(luma, STRIDE, alpha, STRIDE, w, h, p, q, k);
+ int res_new = call_new(luma, STRIDE, alpha, STRIDE, w, h, p, q, k);
+ if (res_ref != res_new)
+ fail();
+ }
+
+ /* Test performance of base case without any out-of-range values */
+ memset(alpha, 0xFF, HEIGHT * STRIDE);
+ bench_new(luma, STRIDE, alpha, STRIDE, w, HEIGHT, p, q, k);
+ }
+}
+
+void checkasm_check_colordetect(void)
+{
+ for (int depth = 8; depth <= 16; depth += 8) {
+ check_range_detect(depth);
+ report("detect_range_%d", depth);
+
+ check_alpha_detect(depth, AVCOL_RANGE_JPEG);
+ check_alpha_detect(depth, AVCOL_RANGE_MPEG);
+ report("detect_alpha_%d", depth);
+ }
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 0ae402cad4..366afa0373 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -59,6 +59,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp \
fate-checkasm-vf_blackdetect \
fate-checkasm-vf_blend \
fate-checkasm-vf_bwdif \
+ fate-checkasm-vf_colordetect \
fate-checkasm-vf_colorspace \
fate-checkasm-vf_eq \
fate-checkasm-vf_gblur \
--
2.50.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH v3 3/3] avfilter/vf_colordetect: add x86 SIMD implementation
2025-07-18 9:57 [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
2025-07-18 9:57 ` [FFmpeg-devel] [PATCH v3 2/3] tests/checkasm: add check for vf_colordetect Niklas Haas
@ 2025-07-18 9:57 ` Niklas Haas
2025-07-18 12:18 ` [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
2025-07-18 12:38 ` Kacper Michajlow
3 siblings, 0 replies; 7+ messages in thread
From: Niklas Haas @ 2025-07-18 9:57 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
alphadetect8_full_c: 5658.2 ( 1.00x)
alphadetect8_full_avx2: 215.1 (26.31x)
alphadetect8_full_avx512: 133.5 (42.40x)
alphadetect8_limited_c: 7391.5 ( 1.00x)
alphadetect8_limited_avx2: 649.3 (11.38x)
alphadetect8_limited_avx512: 330.5 (22.36x)
alphadetect16_full_c: 3027.4 ( 1.00x)
alphadetect16_full_avx2: 209.4 (14.46x)
alphadetect16_full_avx512: 141.4 (21.41x)
alphadetect16_limited_c: 3880.9 ( 1.00x)
alphadetect16_limited_avx2: 734.9 ( 5.28x)
alphadetect16_limited_avx512: 349.2 (11.11x)
rangedetect8_c: 5854.2 ( 1.00x)
rangedetect8_avx2: 138.9 (42.15x)
rangedetect8_avx512: 106.2 (55.12x)
rangedetect16_c: 4122.0 ( 1.00x)
rangedetect16_avx2: 138.6 (29.74x)
rangedetect16_avx512: 104.1 (39.60x)
---
libavfilter/vf_colordetect.c | 4 +
libavfilter/vf_colordetect.h | 3 +
libavfilter/x86/Makefile | 2 +
libavfilter/x86/vf_colordetect.asm | 150 ++++++++++++++++++++++++++
libavfilter/x86/vf_colordetect_init.c | 105 ++++++++++++++++++
5 files changed, 264 insertions(+)
create mode 100644 libavfilter/x86/vf_colordetect.asm
create mode 100644 libavfilter/x86/vf_colordetect_init.c
diff --git a/libavfilter/vf_colordetect.c b/libavfilter/vf_colordetect.c
index 0fb892634f..642bec62c6 100644
--- a/libavfilter/vf_colordetect.c
+++ b/libavfilter/vf_colordetect.c
@@ -219,6 +219,10 @@ static av_cold void uninit(AVFilterContext *ctx)
av_cold void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth,
enum AVColorRange color_range)
{
+#if ARCH_X86
+ ff_color_detect_dsp_init_x86(dsp, depth, color_range);
+#endif
+
if (!dsp->detect_range)
dsp->detect_range = depth > 8 ? ff_detect_range16_c : ff_detect_range_c;
if (!dsp->detect_alpha) {
diff --git a/libavfilter/vf_colordetect.h b/libavfilter/vf_colordetect.h
index 8998ed83d4..78d296433d 100644
--- a/libavfilter/vf_colordetect.h
+++ b/libavfilter/vf_colordetect.h
@@ -41,6 +41,9 @@ typedef struct FFColorDetectDSPContext {
void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth,
enum AVColorRange color_range);
+void ff_color_detect_dsp_init_x86(FFColorDetectDSPContext *dsp, int depth,
+ enum AVColorRange color_range);
+
static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t stride,
ptrdiff_t width, ptrdiff_t height,
int mpeg_min, int mpeg_max)
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 86f7119a7b..0e531a9b41 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -6,6 +6,7 @@ OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise_init.o
OBJS-$(CONFIG_BLACKDETECT_FILTER) += x86/vf_blackdetect_init.o
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
+OBJS-$(CONFIG_COLORDETECT_FILTER) += x86/vf_colordetect_init.o
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o
OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128_init.o
@@ -53,6 +54,7 @@ X86ASM-OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise.o
X86ASM-OBJS-$(CONFIG_BLACKDETECT_FILTER) += x86/vf_blackdetect.o
X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
+X86ASM-OBJS-$(CONFIG_COLORDETECT_FILTER) += x86/vf_colordetect.o
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o
X86ASM-OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128.o
diff --git a/libavfilter/x86/vf_colordetect.asm b/libavfilter/x86/vf_colordetect.asm
new file mode 100644
index 0000000000..6482df95cc
--- /dev/null
+++ b/libavfilter/x86/vf_colordetect.asm
@@ -0,0 +1,150 @@
+;*****************************************************************************
+;* x86-optimized functions for blackdetect filter
+;*
+;* Copyright (C) 2025 Niklas Haas
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+%macro detect_range_fn 1 ; suffix
+cglobal detect_range%1, 6, 7, 5, data, stride, width, height, mpeg_min, mpeg_max, x
+%if UNIX64 && notcpuflag(avx512)
+ movd xm0, mpeg_mind
+ movd xm1, mpeg_maxd
+ vpbroadcast%1 m0, xm0
+ vpbroadcast%1 m1, xm1
+%else
+ vpbroadcast%1 m0, mpeg_mind
+ vpbroadcast%1 m1, mpeg_maxd
+%endif
+ add dataq, widthq
+ neg widthq
+.lineloop:
+ mova m2, m0
+ mova m3, m1
+ mov xq, widthq
+ .loop:
+ movu m4, [dataq + xq]
+ pminu%1 m2, m4
+ pmaxu%1 m3, m4
+ add xq, mmsize
+ jl .loop
+
+ ; test if the data is out of range
+ pxor m2, m0
+%if cpuflag(avx512)
+ vpternlogq m2, m3, m1, 0xF6 ; m2 |= m3 ^ m1
+ vptestmq k1, m2, m2
+ kortestb k1, k1
+%else
+ pxor m3, m1
+ por m2, m3
+ ptest m2, m2
+%endif
+ jnz .end
+ add dataq, strideq
+ dec heightq
+ jg .lineloop
+.end:
+ setnz al
+ movzx rax, al
+ RET
+%endmacro
+
+%macro detect_alpha_fn 3 ; suffix, hsuffix, range
+cglobal detect_alpha%1_%3, 6, 7, 6, color, color_stride, alpha, alpha_stride, width, height, x
+ pxor m0, m0
+ add colorq, widthq
+ add alphaq, widthq
+ neg widthq
+%ifidn %3, limited
+ vpbroadcast%2 m3, r6m ; p
+ vpbroadcast%2 m4, r7m ; q
+ vpbroadcast%2 m5, r8m ; k
+%endif
+.lineloop:
+ mov xq, widthq
+ .loop:
+ %ifidn %3, full
+ movu m1, [colorq + xq]
+ movu m2, [alphaq + xq]
+ pmaxu%1 m1, m2
+ %else
+ pmovzx%1%2 m1, [colorq + xq]
+ pmovzx%1%2 m2, [alphaq + xq]
+ pmull%2 m1, m3
+ pmull%2 m2, m4
+ %ifidn %1, b
+ psubusw m1, m5
+ %else
+ pmaxud m1, m5
+ psubd m1, m5
+ %endif
+ pmaxu%2 m1, m2
+ %endif
+ %if cpuflag(avx512)
+ vpternlogq m0, m1, m2, 0xF6 ; m0 |= m1 ^ m2
+ %else
+ pxor m1, m2
+ por m0, m1
+ %endif
+ %ifidn %3, full
+ add xq, mmsize
+ %else
+ add xq, mmsize >> 1
+ %endif
+ jl .loop
+
+%if cpuflag(avx512)
+ vptestmq k1, m0, m0
+ kortestb k1, k1
+%else
+ ptest m0, m0
+%endif
+ jnz .found
+
+ add colorq, color_strideq
+ add alphaq, alpha_strideq
+ dec heightq
+ jg .lineloop
+ xor rax, rax
+ RET
+
+.found:
+ mov rax, 1
+ RET
+%endmacro
+
+INIT_YMM avx2
+detect_range_fn b
+detect_range_fn w
+detect_alpha_fn b, w, full
+detect_alpha_fn w, d, full
+detect_alpha_fn b, w, limited
+detect_alpha_fn w, d, limited
+
+INIT_ZMM avx512
+detect_range_fn b
+detect_range_fn w
+detect_alpha_fn b, w, full
+detect_alpha_fn w, d, full
+detect_alpha_fn b, w, limited
+detect_alpha_fn w, d, limited
diff --git a/libavfilter/x86/vf_colordetect_init.c b/libavfilter/x86/vf_colordetect_init.c
new file mode 100644
index 0000000000..62a7e87388
--- /dev/null
+++ b/libavfilter/x86/vf_colordetect_init.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2025 Niklas Haas
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/vf_colordetect.h"
+
+#define DETECT_RANGE_FUNC(FUNC_NAME, ASM_FUNC_NAME, C_FUNC_NAME, SHIFT, MMSIZE) \
+int ASM_FUNC_NAME(const uint8_t *src, ptrdiff_t stride, \
+ ptrdiff_t width, ptrdiff_t height, int min, int max); \
+ \
+static int FUNC_NAME(const uint8_t *src, ptrdiff_t stride, \
+ ptrdiff_t width, ptrdiff_t height, int min, int max) \
+{ \
+ ptrdiff_t bytes = (width << SHIFT) & ~(MMSIZE - 1); \
+ int ret = ASM_FUNC_NAME(src, stride, bytes, height, min, max); \
+ if (ret) \
+ return ret; \
+ \
+ return C_FUNC_NAME(src + bytes, stride, width - (bytes >> SHIFT), \
+ height, min, max); \
+}
+
+#define DETECT_ALPHA_FUNC(FUNC_NAME, ASM_FUNC_NAME, C_FUNC_NAME, SHIFT, MMSIZE) \
+int ASM_FUNC_NAME(const uint8_t *color, ptrdiff_t color_stride, \
+ const uint8_t *alpha, ptrdiff_t alpha_stride, \
+ ptrdiff_t width, ptrdiff_t height, int p, int q, int k); \
+ \
+static int FUNC_NAME(const uint8_t *color, ptrdiff_t color_stride, \
+ const uint8_t *alpha, ptrdiff_t alpha_stride, \
+ ptrdiff_t width, ptrdiff_t height, int p, int q, int k) \
+{ \
+ ptrdiff_t bytes = (width << SHIFT) & ~(MMSIZE - 1); \
+ int ret = ASM_FUNC_NAME(color, color_stride, alpha, alpha_stride, \
+ bytes, height, p, q, k); \
+ if (ret) \
+ return ret; \
+ \
+ return C_FUNC_NAME(color + bytes, color_stride, alpha + bytes, alpha_stride,\
+ width - (bytes >> SHIFT), height, p, q, k); \
+}
+
+#if HAVE_X86ASM
+#if HAVE_AVX512_EXTERNAL
+DETECT_RANGE_FUNC(detect_range_avx512, ff_detect_rangeb_avx512, ff_detect_range_c, 0, 64)
+DETECT_RANGE_FUNC(detect_range16_avx512, ff_detect_rangew_avx512, ff_detect_range16_c, 1, 64)
+DETECT_ALPHA_FUNC(detect_alpha_full_avx512, ff_detect_alphab_full_avx512, ff_detect_alpha_full_c, 0, 64)
+DETECT_ALPHA_FUNC(detect_alpha16_full_avx512, ff_detect_alphaw_full_avx512, ff_detect_alpha16_full_c, 1, 64)
+DETECT_ALPHA_FUNC(detect_alpha_limited_avx512, ff_detect_alphab_limited_avx512, ff_detect_alpha_limited_c, 0, 64)
+DETECT_ALPHA_FUNC(detect_alpha16_limited_avx512, ff_detect_alphaw_limited_avx512, ff_detect_alpha16_limited_c, 1, 64)
+#endif
+#if HAVE_AVX2_EXTERNAL
+DETECT_RANGE_FUNC(detect_range_avx2, ff_detect_rangeb_avx2, ff_detect_range_c, 0, 32)
+DETECT_RANGE_FUNC(detect_range16_avx2, ff_detect_rangew_avx2, ff_detect_range16_c, 1, 32)
+DETECT_ALPHA_FUNC(detect_alpha_full_avx2, ff_detect_alphab_full_avx2, ff_detect_alpha_full_c, 0, 32)
+DETECT_ALPHA_FUNC(detect_alpha16_full_avx2, ff_detect_alphaw_full_avx2, ff_detect_alpha16_full_c, 1, 32)
+DETECT_ALPHA_FUNC(detect_alpha_limited_avx2, ff_detect_alphab_limited_avx2, ff_detect_alpha_limited_c, 0, 32)
+DETECT_ALPHA_FUNC(detect_alpha16_limited_avx2, ff_detect_alphaw_limited_avx2, ff_detect_alpha16_limited_c, 1, 32)
+#endif
+#endif
+
+av_cold void ff_color_detect_dsp_init_x86(FFColorDetectDSPContext *dsp, int depth,
+ enum AVColorRange color_range)
+{
+#if HAVE_X86ASM
+ int cpu_flags = av_get_cpu_flags();
+#if HAVE_AVX2_EXTERNAL
+ if (EXTERNAL_AVX2(cpu_flags)) {
+ dsp->detect_range = depth > 8 ? detect_range16_avx2 : detect_range_avx2;
+ if (color_range == AVCOL_RANGE_JPEG) {
+ dsp->detect_alpha = depth > 8 ? detect_alpha16_full_avx2 : detect_alpha_full_avx2;
+ } else {
+ dsp->detect_alpha = depth > 8 ? detect_alpha16_limited_avx2 : detect_alpha_limited_avx2;
+ }
+ }
+#endif
+#if HAVE_AVX512_EXTERNAL
+ if (EXTERNAL_AVX512(cpu_flags)) {
+ dsp->detect_range = depth > 8 ? detect_range16_avx512 : detect_range_avx512;
+ if (color_range == AVCOL_RANGE_JPEG) {
+ dsp->detect_alpha = depth > 8 ? detect_alpha16_full_avx512 : detect_alpha_full_avx512;
+ } else {
+ dsp->detect_alpha = depth > 8 ? detect_alpha16_limited_avx512 : detect_alpha_limited_avx512;
+ }
+ }
+#endif
+#endif
+}
--
2.50.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter
2025-07-18 9:57 [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
2025-07-18 9:57 ` [FFmpeg-devel] [PATCH v3 2/3] tests/checkasm: add check for vf_colordetect Niklas Haas
2025-07-18 9:57 ` [FFmpeg-devel] [PATCH v3 3/3] avfilter/vf_colordetect: add x86 SIMD implementation Niklas Haas
@ 2025-07-18 12:18 ` Niklas Haas
2025-07-18 12:38 ` Kacper Michajlow
3 siblings, 0 replies; 7+ messages in thread
From: Niklas Haas @ 2025-07-18 12:18 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
On Fri, 18 Jul 2025 11:57:14 +0200 Niklas Haas <ffmpeg@haasn.xyz> wrote:
> From: Niklas Haas <git@haasn.dev>
>
> This filter can detect various properties about the image, including
> whether or not there are out-of-range values, or whether the input appears
> to use straight or premultiplied alpha.
>
> Of course, these can only be heuristics, with "undetermined" as the base
> case. While we can definitely prove the existence of full range or
> straight alpha colors, we can never infer the opposite.
> ---
> [...]
> +static int detect_alpha(AVFilterContext *ctx, void *arg,
> + int jobnr, int nb_jobs)
> +{
> + ColorDetectContext *s = ctx->priv;
> + const AVFrame *in = arg;
> + const int w = in->width;
> + const int h = in->height;
> + const int y_start = (h * jobnr) / nb_jobs;
> + const int y_end = (h * (jobnr + 1)) / nb_jobs;
> + const int h_slice = y_end - y_start;
> +
> + const int nb_planes = (s->desc->flags & AV_PIX_FMT_FLAG_RGB) ? 3 : 1;
> + const ptrdiff_t alpha_stride = in->linesize[s->idx_a];
> + const uint8_t *alpha = in->data[s->idx_a] + y_start * alpha_stride;
> +
> + const int p = (1 << s->depth) - 1;
> + const int q = s->mpeg_max - s->mpeg_min;
> + const int k = s->mpeg_min * p + 128;
I replaced this by a slightly tighter bound, and also added some explanation:
/**
* To check if a value is out of range, we need to compare the color value
* against the maximum possible color for a given alpha value:
* x > (((mpeg_max - mpeg_min) * (a + ((a >> 1) & 1) + (1 << (depth - 1))) >> depth) + mpeg_min
* (cf. premultiply16offset in vf_premultiply.c)
*
* This rearranges to:
* (x - mpeg_min << depth) - (1 << (depth - 1)) > (mpeg_max - mpeg_min) * (a + ((a >> 1) & 1)
*
* And since a + 1 >= a + ((a >> 1) & 1) we can relax the RHS slightly, giving us:
* (1 << depth) * x - (mpeg_min << depth) - (1 << (depth - 1)) - (mpeg_max - mpeg_min) > (mpeg_max - mpeg_min) * a
* = P * x - K > Q * a in the below formula
*/
const int p = 1 << s->depth;
const int q = s->mpeg_max - s->mpeg_min;
const int k = p * s->mpeg_min + q + (1 << (s->depth - 1));
I won't bother sending a v4 just for this diff.
> [...]
> diff --git a/libavfilter/vf_colordetect.h b/libavfilter/vf_colordetect.h
> new file mode 100644
> index 0000000000..8998ed83d4
> --- /dev/null
> +++ b/libavfilter/vf_colordetect.h
> @@ -0,0 +1,149 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_VF_COLORDETECT_H
> +#define AVFILTER_VF_COLORDETECT_H
Fixed locally to unbreak fate-source.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter
2025-07-18 9:57 [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
` (2 preceding siblings ...)
2025-07-18 12:18 ` [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
@ 2025-07-18 12:38 ` Kacper Michajlow
2025-07-18 12:46 ` Niklas Haas
3 siblings, 1 reply; 7+ messages in thread
From: Kacper Michajlow @ 2025-07-18 12:38 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Fri, 18 Jul 2025 at 11:57, Niklas Haas <ffmpeg@haasn.xyz> wrote:
>
> From: Niklas Haas <git@haasn.dev>
>
> This filter can detect various properties about the image, including
> whether or not there are out-of-range values, or whether the input appears
> to use straight or premultiplied alpha.
>
> Of course, these can only be heuristics, with "undetermined" as the base
> case. While we can definitely prove the existence of full range or
> straight alpha colors, we can never infer the opposite.
> ---
> doc/filters.texi | 27 ++++
> libavfilter/Makefile | 1 +
> libavfilter/allfilters.c | 1 +
> libavfilter/vf_colordetect.c | 252 +++++++++++++++++++++++++++++++++++
> libavfilter/vf_colordetect.h | 149 +++++++++++++++++++++
> 5 files changed, 430 insertions(+)
> create mode 100644 libavfilter/vf_colordetect.c
> create mode 100644 libavfilter/vf_colordetect.h
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index ed2956fe75..74e9e71559 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -9753,6 +9753,33 @@ colorchannelmixer=.393:.769:.189:0:.349:.686:.168:0:.272:.534:.131
>
> This filter supports the all above options as @ref{commands}.
>
> +@section colordetect
> +Analyze the video frames to determine the effective value range and alpha
> +mode.
> +
> +The filter accepts the following options:
> +
> +@table @option
> +@item mode
> +Set of properties to detect. Unavailable properties, such as alpha mode for
> +an input image without an alpha channel, will be ignored automatically.
> +
> +Accepts a combination of the following flags:
> +
> +@table @samp
> +@item color_range
> +Detect if the source countains luma pixels outside the limited (MPEG) range,
> +which indicates that this is a full range YUV source.
> +@item alpha_mode
> +Detect if the source contains color values above the alpha channel, which
> +indicates that the alpha channel is independent (straight), rather than
> +premultiplied.
> +@item all
> +Enable detection of all of the above properties. This is the default.
> +@end table
> +
> +@end table
> +
> @section colorize
> Overlay a solid color on the video stream.
>
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 9e9153f5b0..e19f67a3a7 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -237,6 +237,7 @@ OBJS-$(CONFIG_COLORBALANCE_FILTER) += vf_colorbalance.o
> OBJS-$(CONFIG_COLORCHANNELMIXER_FILTER) += vf_colorchannelmixer.o
> OBJS-$(CONFIG_COLORCONTRAST_FILTER) += vf_colorcontrast.o
> OBJS-$(CONFIG_COLORCORRECT_FILTER) += vf_colorcorrect.o
> +OBJS-$(CONFIG_COLORDETECT_FILTER) += vf_colordetect.o
> OBJS-$(CONFIG_COLORIZE_FILTER) += vf_colorize.o
> OBJS-$(CONFIG_COLORKEY_FILTER) += vf_colorkey.o
> OBJS-$(CONFIG_COLORKEY_OPENCL_FILTER) += vf_colorkey_opencl.o opencl.o \
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 409099bf1f..f3c2092b15 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -218,6 +218,7 @@ extern const FFFilter ff_vf_colorbalance;
> extern const FFFilter ff_vf_colorchannelmixer;
> extern const FFFilter ff_vf_colorcontrast;
> extern const FFFilter ff_vf_colorcorrect;
> +extern const FFFilter ff_vf_colordetect;
> extern const FFFilter ff_vf_colorize;
> extern const FFFilter ff_vf_colorkey;
> extern const FFFilter ff_vf_colorkey_opencl;
> diff --git a/libavfilter/vf_colordetect.c b/libavfilter/vf_colordetect.c
> new file mode 100644
> index 0000000000..0fb892634f
> --- /dev/null
> +++ b/libavfilter/vf_colordetect.c
> @@ -0,0 +1,252 @@
> +/*
> + * Copyright (c) 2025 Niklas Haas
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Video color space detector, tries to auto-detect YUV range and alpha mode.
> + */
> +
> +#include <stdbool.h>
> +#include <stdatomic.h>
> +
> +#include "config.h"
> +
> +#include "libavutil/mem.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "avfilter.h"
> +#include "filters.h"
> +#include "formats.h"
> +#include "video.h"
> +
> +#include "vf_colordetect.h"
> +
> +enum AlphaMode {
> + ALPHA_NONE = -1,
> + ALPHA_UNDETERMINED = 0,
> + ALPHA_STRAIGHT,
> + /* No way to positively identify premultiplied alpha */
> +};
> +
> +enum ColorDetectMode {
> + COLOR_DETECT_COLOR_RANGE = 1 << 0,
> + COLOR_DETECT_ALPHA_MODE = 1 << 1,
> +};
> +
> +typedef struct ColorDetectContext {
> + const AVClass *class;
> + FFColorDetectDSPContext dsp;
> + unsigned mode;
> +
> + const AVPixFmtDescriptor *desc;
> + int nb_threads;
> + int depth;
> + int idx_a;
> + int mpeg_min;
> + int mpeg_max;
> +
> + atomic_int detected_range; // enum AVColorRange
> + atomic_int detected_alpha; // enum AlphaMode
> +} ColorDetectContext;
> +
> +#define OFFSET(x) offsetof(ColorDetectContext, x)
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> +
> +static const AVOption colordetect_options[] = {
> + { "mode", "Image properties to detect", OFFSET(mode), AV_OPT_TYPE_FLAGS, {.i64 = -1}, 0, UINT_MAX, FLAGS, .unit = "mode" },
> + { "color_range", "Detect (YUV) color range", 0, AV_OPT_TYPE_CONST, {.i64 = COLOR_DETECT_COLOR_RANGE}, 0, 0, FLAGS, .unit = "mode" },
> + { "alpha_mode", "Detect alpha mode", 0, AV_OPT_TYPE_CONST, {.i64 = COLOR_DETECT_ALPHA_MODE }, 0, 0, FLAGS, .unit = "mode" },
> + { "all", "Detect all supported properties", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, FLAGS, .unit = "mode" },
> + { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(colordetect);
> +
> +static int query_format(const AVFilterContext *ctx,
> + AVFilterFormatsConfig **cfg_in,
> + AVFilterFormatsConfig **cfg_out)
> +{
> + int want_flags = AV_PIX_FMT_FLAG_PLANAR;
> + int reject_flags = AV_PIX_FMT_FLAG_PAL | AV_PIX_FMT_FLAG_HWACCEL |
> + AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_FLOAT |
> + AV_PIX_FMT_FLAG_BAYER | AV_PIX_FMT_FLAG_XYZ;
> +
> + if (HAVE_BIGENDIAN) {
> + want_flags |= AV_PIX_FMT_FLAG_BE;
> + } else {
> + reject_flags |= AV_PIX_FMT_FLAG_BE;
> + }
> +
> + AVFilterFormats *formats = ff_formats_pixdesc_filter(want_flags, reject_flags);
> + return ff_set_common_formats2(ctx, cfg_in, cfg_out, formats);
> +}
> +
> +static int config_input(AVFilterLink *inlink)
> +{
> + AVFilterContext *ctx = inlink->dst;
> + ColorDetectContext *s = ctx->priv;
> + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
> + const int depth = desc->comp[0].depth;
> + const int mpeg_min = 16 << (depth - 8);
> + const int mpeg_max = 235 << (depth - 8);
> + if (depth > 16) /* not currently possible; prevent future bugs */
> + return AVERROR(ENOTSUP);
> +
> + s->desc = desc;
> + s->depth = depth;
> + s->mpeg_min = mpeg_min;
> + s->mpeg_max = mpeg_max;
> + s->nb_threads = ff_filter_get_nb_threads(ctx);
> +
> + if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
> + atomic_init(&s->detected_range, AVCOL_RANGE_JPEG);
> + } else {
> + atomic_init(&s->detected_range, AVCOL_RANGE_UNSPECIFIED);
> + }
> +
> + if (desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> + s->idx_a = desc->comp[desc->nb_components - 1].plane;
> + atomic_init(&s->detected_alpha, ALPHA_UNDETERMINED);
> + } else {
> + atomic_init(&s->detected_alpha, ALPHA_NONE);
> + }
> +
> + ff_color_detect_dsp_init(&s->dsp, depth, inlink->color_range);
> + return 0;
> +}
> +
> +static int detect_range(AVFilterContext *ctx, void *arg,
> + int jobnr, int nb_jobs)
> +{
> + ColorDetectContext *s = ctx->priv;
> + const AVFrame *in = arg;
> + const ptrdiff_t stride = in->linesize[0];
> + const int y_start = (in->height * jobnr) / nb_jobs;
> + const int y_end = (in->height * (jobnr + 1)) / nb_jobs;
> + const int h_slice = y_end - y_start;
> +
> + if (s->dsp.detect_range(in->data[0] + y_start * stride, stride,
> + in->width, h_slice, s->mpeg_min, s->mpeg_max))
> + atomic_store(&s->detected_range, AVCOL_RANGE_JPEG);
> +
> + return 0;
> +}
> +
> +static int detect_alpha(AVFilterContext *ctx, void *arg,
> + int jobnr, int nb_jobs)
> +{
> + ColorDetectContext *s = ctx->priv;
> + const AVFrame *in = arg;
> + const int w = in->width;
> + const int h = in->height;
> + const int y_start = (h * jobnr) / nb_jobs;
> + const int y_end = (h * (jobnr + 1)) / nb_jobs;
> + const int h_slice = y_end - y_start;
> +
> + const int nb_planes = (s->desc->flags & AV_PIX_FMT_FLAG_RGB) ? 3 : 1;
> + const ptrdiff_t alpha_stride = in->linesize[s->idx_a];
> + const uint8_t *alpha = in->data[s->idx_a] + y_start * alpha_stride;
> +
> + const int p = (1 << s->depth) - 1;
> + const int q = s->mpeg_max - s->mpeg_min;
> + const int k = s->mpeg_min * p + 128;
> +
> + for (int i = 0; i < nb_planes; i++) {
> + const ptrdiff_t stride = in->linesize[i];
> + if (s->dsp.detect_alpha(in->data[i] + y_start * stride, stride,
> + alpha, alpha_stride, w, h_slice, p, q, k)) {
> + atomic_store(&s->detected_alpha, ALPHA_STRAIGHT);
> + return 0;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> +{
> + AVFilterContext *ctx = inlink->dst;
> + ColorDetectContext *s = ctx->priv;
> + const int nb_threads = FFMIN(inlink->h, s->nb_threads);
> +
> + if (s->mode & COLOR_DETECT_COLOR_RANGE && s->detected_range == AVCOL_RANGE_UNSPECIFIED)
> + ff_filter_execute(ctx, detect_range, in, NULL, nb_threads);
> + if (s->mode & COLOR_DETECT_ALPHA_MODE && s->detected_alpha == ALPHA_UNDETERMINED)
> + ff_filter_execute(ctx, detect_alpha, in, NULL, nb_threads);
> +
> + return ff_filter_frame(inlink->dst->outputs[0], in);
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> + ColorDetectContext *s = ctx->priv;
> + if (!s->mode)
> + return;
> +
> + av_log(ctx, AV_LOG_INFO, "Detected color properties:\n");
> + if (s->mode & COLOR_DETECT_COLOR_RANGE) {
> + av_log(ctx, AV_LOG_INFO, " Color range: %s\n",
> + s->detected_range == AVCOL_RANGE_JPEG ? "JPEG / full range"
> + : "undetermined");
> + }
> +
> + if (s->mode & COLOR_DETECT_ALPHA_MODE) {
> + av_log(ctx, AV_LOG_INFO, " Alpha mode: %s\n",
> + s->detected_alpha == ALPHA_NONE ? "none" :
> + s->detected_alpha == ALPHA_STRAIGHT ? "straight / independent"
> + : "undetermined");
> + }
> +}
> +
> +av_cold void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth,
> + enum AVColorRange color_range)
> +{
> + if (!dsp->detect_range)
> + dsp->detect_range = depth > 8 ? ff_detect_range16_c : ff_detect_range_c;
> + if (!dsp->detect_alpha) {
> + if (color_range == AVCOL_RANGE_JPEG) {
> + dsp->detect_alpha = depth > 8 ? ff_detect_alpha16_full_c : ff_detect_alpha_full_c;
> + } else {
> + dsp->detect_alpha = depth > 8 ? ff_detect_alpha16_limited_c : ff_detect_alpha_limited_c;
> + }
> + }
> +}
> +
> +static const AVFilterPad colordetect_inputs[] = {
> + {
> + .name = "default",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .config_props = config_input,
> + .filter_frame = filter_frame,
> + },
> +};
> +
> +const FFFilter ff_vf_colordetect = {
> + .p.name = "colordetect",
> + .p.description = NULL_IF_CONFIG_SMALL("Detect video color properties."),
> + .p.priv_class = &colordetect_class,
> + .p.flags = AVFILTER_FLAG_SLICE_THREADS | AVFILTER_FLAG_METADATA_ONLY,
> + .priv_size = sizeof(ColorDetectContext),
> + FILTER_INPUTS(colordetect_inputs),
> + FILTER_OUTPUTS(ff_video_default_filterpad),
> + FILTER_QUERY_FUNC2(query_format),
> + .uninit = uninit,
> +};
> diff --git a/libavfilter/vf_colordetect.h b/libavfilter/vf_colordetect.h
> new file mode 100644
> index 0000000000..8998ed83d4
> --- /dev/null
> +++ b/libavfilter/vf_colordetect.h
> @@ -0,0 +1,149 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_VF_COLORDETECT_H
> +#define AVFILTER_VF_COLORDETECT_H
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +
> +#include <libavutil/macros.h>
> +#include <libavutil/pixfmt.h>
> +
> +typedef struct FFColorDetectDSPContext {
> + /* Returns 1 if an out-of-range value was detected, 0 otherwise */
> + int (*detect_range)(const uint8_t *data, ptrdiff_t stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int mpeg_min, int mpeg_max);
> +
> + /* Returns 1 if the color value exceeds the alpha value, 0 otherwise */
> + int (*detect_alpha)(const uint8_t *color, ptrdiff_t color_stride,
> + const uint8_t *alpha, ptrdiff_t alpha_stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int p, int q, int k);
> +} FFColorDetectDSPContext;
> +
> +void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth,
> + enum AVColorRange color_range);
> +
> +static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int mpeg_min, int mpeg_max)
> +{
> + while (height--) {
> + for (int x = 0; x < width; x++) {
> + const uint8_t val = data[x];
> + if (val < mpeg_min || val > mpeg_max)
> + return 1;
> + }
> + data += stride;
> + }
> +
> + return 0;
> +}
You could process width as a whole to allow better vectorization.
Assuming you don't process 10000x1 images, it will be faster on average.
Before (clang --march=znver4):
detect_range_8_c: 5264.6 ( 1.00x)
detect_range_8_avx2: 124.5 (42.30x)
detect_range_8_avx512: 121.6 (43.31x)
After (clang --march=znver4):
detect_range_8_c: 211.5 ( 1.00x)
detect_range_8_avx2: 136.4 ( 1.55x)
detect_range_8_avx512: 95.4 ( 2.22x)
static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t stride,
ptrdiff_t width, ptrdiff_t height,
- int mpeg_min, int mpeg_max)
+ uint8_t mpeg_min, uint8_t mpeg_max)
{
while (height--) {
+ bool out_of_range = false;
for (int x = 0; x < width; x++) {
const uint8_t val = data[x];
- if (val < mpeg_min || val > mpeg_max)
- return 1;
+ out_of_range |= val < mpeg_min || val > mpeg_max;
}
+ if (out_of_range)
+ return 1;
data += stride;
}
- Kacper
> +
> +static inline int ff_detect_range16_c(const uint8_t *data, ptrdiff_t stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int mpeg_min, int mpeg_max)
> +{
> + while (height--) {
> + const uint16_t *data16 = (const uint16_t *) data;
> + for (int x = 0; x < width; x++) {
> + const uint16_t val = data16[x];
> + if (val < mpeg_min || val > mpeg_max)
> + return 1;
> + }
> + data += stride;
> + }
> +
> + return 0;
> +}
> +
> +static inline int
> +ff_detect_alpha_full_c(const uint8_t *color, ptrdiff_t color_stride,
> + const uint8_t *alpha, ptrdiff_t alpha_stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int p, int q, int k)
> +{
> + while (height--) {
> + for (int x = 0; x < width; x++) {
> + if (color[x] > alpha[x])
> + return 1;
> + }
> + color += color_stride;
> + alpha += alpha_stride;
> + }
> + return 0;
> +}
> +
> +static inline int
> +ff_detect_alpha_limited_c(const uint8_t *color, ptrdiff_t color_stride,
> + const uint8_t *alpha, ptrdiff_t alpha_stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int p, int q, int k)
> +{
> + while (height--) {
> + for (int x = 0; x < width; x++) {
> + if (p * color[x] - k > q * alpha[x])
> + return 1;
> + }
> + color += color_stride;
> + alpha += alpha_stride;
> + }
> + return 0;
> +}
> +
> +static inline int
> +ff_detect_alpha16_full_c(const uint8_t *color, ptrdiff_t color_stride,
> + const uint8_t *alpha, ptrdiff_t alpha_stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int p, int q, int k)
> +{
> + while (height--) {
> + const uint16_t *color16 = (const uint16_t *) color;
> + const uint16_t *alpha16 = (const uint16_t *) alpha;
> + for (int x = 0; x < width; x++) {
> + if (color16[x] > alpha16[x])
> + return 1;
> + }
> + color += color_stride;
> + alpha += alpha_stride;
> + }
> + return 0;
> +}
> +
> +static inline int
> +ff_detect_alpha16_limited_c(const uint8_t *color, ptrdiff_t color_stride,
> + const uint8_t *alpha, ptrdiff_t alpha_stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int p, int q, int k)
> +{
> + while (height--) {
> + const uint16_t *color16 = (const uint16_t *) color;
> + const uint16_t *alpha16 = (const uint16_t *) alpha;
> + for (int x = 0; x < width; x++) {
> + if ((int64_t) p * color16[x] - k > (int64_t) q * alpha16[x])
> + return 1;
> + }
> + color += color_stride;
> + alpha += alpha_stride;
> + }
> + return 0;
> +}
> +
> +#endif /* AVFILTER_VF_COLORDETECT_H */
> --
> 2.50.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter
2025-07-18 12:38 ` Kacper Michajlow
@ 2025-07-18 12:46 ` Niklas Haas
2025-07-18 14:51 ` Kacper Michajlow
0 siblings, 1 reply; 7+ messages in thread
From: Niklas Haas @ 2025-07-18 12:46 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Fri, 18 Jul 2025 14:38:04 +0200 Kacper Michajlow <kasper93@gmail.com> wrote:
> > +static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t stride,
> > + ptrdiff_t width, ptrdiff_t height,
> > + int mpeg_min, int mpeg_max)
> > +{
> > + while (height--) {
> > + for (int x = 0; x < width; x++) {
> > + const uint8_t val = data[x];
> > + if (val < mpeg_min || val > mpeg_max)
> > + return 1;
> > + }
> > + data += stride;
> > + }
> > +
> > + return 0;
> > +}
>
> You could process width as a whole to allow better vectorization.
> Assuming you don't process 10000x1 images, it will be faster on average.
That's what I had in v1 of my patch, but it is significantly (50%) slower
on GCC, which prefers the version I have written above.
There is the not insignificant point that this C routine is also being used
to handle remaining elements that don't fit into a multiple of the SIMD
kernel, for which the scalar code is actually preferred.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter
2025-07-18 12:46 ` Niklas Haas
@ 2025-07-18 14:51 ` Kacper Michajlow
0 siblings, 0 replies; 7+ messages in thread
From: Kacper Michajlow @ 2025-07-18 14:51 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Fri, 18 Jul 2025 at 14:47, Niklas Haas <ffmpeg@haasn.xyz> wrote:
>
> On Fri, 18 Jul 2025 14:38:04 +0200 Kacper Michajlow <kasper93@gmail.com>
wrote:
> > > +static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t
stride,
> > > + ptrdiff_t width, ptrdiff_t
height,
> > > + int mpeg_min, int mpeg_max)
> > > +{
> > > + while (height--) {
> > > + for (int x = 0; x < width; x++) {
> > > + const uint8_t val = data[x];
> > > + if (val < mpeg_min || val > mpeg_max)
> > > + return 1;
> > > + }
> > > + data += stride;
> > > + }
> > > +
> > > + return 0;
> > > +}
> >
> > You could process width as a whole to allow better vectorization.
> > Assuming you don't process 10000x1 images, it will be faster on average.
>
> That's what I had in v1 of my patch, but it is significantly (50%) slower
> on GCC, which prefers the version I have written above.
>
> There is the not insignificant point that this C routine is also being
used
> to handle remaining elements that don't fit into a multiple of the SIMD
> kernel, for which the scalar code is actually preferred.
Interesting. It's my fault, I didn't check. GCC really doesn't like
bool/int there.
If that function is important you could try:
{
+ uint8_t min = mpeg_min, max = mpeg_max;
while (height--) {
+ uint8_t out_of_range = 0;
for (int x = 0; x < width; x++) {
const uint8_t val = data[x];
- if (val < mpeg_min || val > mpeg_max)
- return 1;
+ out_of_range |= val < min || val > max;
}
+ if (out_of_range)
+ return 1;
data += stride;
}
Side note, if you change function prototype to `uint8_t mpeg_min, uint8_t
mpeg_max` directly,
clang goes down to 267.6 ( 1.00x). Unless it's because of UB, lol.
So, gcc scalar version is a bit slower in this case, but I think there is
value in it,
because it scales very nicely with -ftree-vectorize, even with sse2 target.
Hopefully we will enable -ftree-vectorize by default soon, there is pending
patch for that.
before (gcc -fno-tree-vectorize)
detect_range_8_c: 5537.4 ( 1.00x)
detect_range_8_avx2: 149.7 (36.98x)
detect_range_8_avx512: 111.2 (49.80x)
after (gcc -fno-tree-vectorize)
detect_range_8_c: 7709.0 ( 1.00x)
detect_range_8_avx2: 137.6 (56.02x)
detect_range_8_avx512: 104.2 (73.97x)
after (gcc -ftree-vectorize --march=generic)
detect_range_8_c: 657.0 ( 1.00x)
detect_range_8_avx2: 161.7 ( 4.06x)
detect_range_8_avx512: 116.5 ( 5.64x)
after (gcc -ftree-vectorize --march=znver4)
detect_range_8_c: 285.6 ( 1.00x)
detect_range_8_avx2: 256.0 ( 1.12x)
detect_range_8_avx512: 107.6 ( 2.65x)
after (clang --march=generic)
detect_range_8_c: 1769.0 ( 1.00x)
detect_range_8_avx2: 231.8 ( 7.63x)
detect_range_8_avx512: 96.6 (18.32x)
after (clang --march=znver4)
detect_range_8_c: 952.9 ( 1.00x)
detect_range_8_avx2: 137.7 ( 6.92x)
detect_range_8_avx512: 95.9 ( 9.94x)
- Kacper
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-07-18 14:52 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-07-18 9:57 [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
2025-07-18 9:57 ` [FFmpeg-devel] [PATCH v3 2/3] tests/checkasm: add check for vf_colordetect Niklas Haas
2025-07-18 9:57 ` [FFmpeg-devel] [PATCH v3 3/3] avfilter/vf_colordetect: add x86 SIMD implementation Niklas Haas
2025-07-18 12:18 ` [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
2025-07-18 12:38 ` Kacper Michajlow
2025-07-18 12:46 ` Niklas Haas
2025-07-18 14:51 ` Kacper Michajlow
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git