* [FFmpeg-devel] [PATCH v3 2/3] tests/checkasm: add check for vf_colordetect
2025-07-18 9:57 [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
@ 2025-07-18 9:57 ` Niklas Haas
2025-07-18 9:57 ` [FFmpeg-devel] [PATCH v3 3/3] avfilter/vf_colordetect: add x86 SIMD implementation Niklas Haas
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: Niklas Haas @ 2025-07-18 9:57 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
---
tests/checkasm/Makefile | 1 +
tests/checkasm/checkasm.c | 3 +
tests/checkasm/checkasm.h | 1 +
tests/checkasm/vf_colordetect.c | 139 ++++++++++++++++++++++++++++++++
tests/fate/checkasm.mak | 1 +
5 files changed, 145 insertions(+)
create mode 100644 tests/checkasm/vf_colordetect.c
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index c6d5b0ba1f..d65f48f97b 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -57,6 +57,7 @@ AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
AVFILTEROBJS-$(CONFIG_BLACKDETECT_FILTER) += vf_blackdetect.o
AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
AVFILTEROBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o
+AVFILTEROBJS-$(CONFIG_COLORDETECT_FILTER)+= vf_colordetect.o
AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o
AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 2532405f29..968961c03c 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -275,6 +275,9 @@ static const struct {
#if CONFIG_BWDIF_FILTER
{ "vf_bwdif", checkasm_check_vf_bwdif },
#endif
+ #if CONFIG_COLORDETECT_FILTER
+ { "vf_colordetect", checkasm_check_colordetect },
+ #endif
#if CONFIG_COLORSPACE_FILTER
{ "vf_colorspace", checkasm_check_colorspace },
#endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index d85bbaf7fa..eb458a1732 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -90,6 +90,7 @@ void checkasm_check_blackdetect(void);
void checkasm_check_blend(void);
void checkasm_check_blockdsp(void);
void checkasm_check_bswapdsp(void);
+void checkasm_check_colordetect(void);
void checkasm_check_colorspace(void);
void checkasm_check_diracdsp(void);
void checkasm_check_exrdsp(void);
diff --git a/tests/checkasm/vf_colordetect.c b/tests/checkasm/vf_colordetect.c
new file mode 100644
index 0000000000..96166a48c5
--- /dev/null
+++ b/tests/checkasm/vf_colordetect.c
@@ -0,0 +1,139 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+
+#include "libavfilter/vf_colordetect.h"
+#include "libavutil/mem_internal.h"
+
+#define WIDTH 256
+#define HEIGHT 16
+#define STRIDE (WIDTH + 32)
+
+static void check_range_detect(int depth)
+{
+ const int mpeg_min = 16 << (depth - 8);
+ const int mpeg_max = 235 << (depth - 8);
+
+ FFColorDetectDSPContext dsp = {0};
+ ff_color_detect_dsp_init(&dsp, depth, AVCOL_RANGE_UNSPECIFIED);
+
+ declare_func(int, const uint8_t *, ptrdiff_t, ptrdiff_t, ptrdiff_t, int, int);
+
+ /* Initialize to 128, which should always return 0 */
+ LOCAL_ALIGNED_32(uint8_t, in, [HEIGHT * STRIDE]);
+ memset(in, 0x80, HEIGHT * STRIDE);
+
+ /* Place an out-of-range value in a random position near the center */
+ const int h2 = HEIGHT >> 1;
+ int idx0 = ((rnd() % h2) + h2) * STRIDE + (rnd() % WIDTH);
+ if (depth > 8) {
+ idx0 &= ~1;
+ in[idx0] = in[idx0 + 1] = 0;
+ } else {
+ in[idx0] = 0;
+ }
+
+ int w = WIDTH;
+ if (depth > 8)
+ w /= 2;
+
+ if (check_func(dsp.detect_range, "detect_range_%d", depth)) {
+ /* Test increasing height, to ensure we hit the placed 0 eventually */
+ for (int h = 1; h <= HEIGHT; h++) {
+ int res_ref = call_ref(in, STRIDE, w, h, mpeg_min, mpeg_max);
+ int res_new = call_new(in, STRIDE, w, h, mpeg_min, mpeg_max);
+ if (res_ref != res_new)
+ fail();
+ }
+
+ /* Test performance of base case without any out-of-range values */
+ memset(in, 0x80, HEIGHT * STRIDE);
+ bench_new(in, STRIDE, w, HEIGHT, mpeg_min, mpeg_max);
+ }
+}
+
+static void check_alpha_detect(int depth, enum AVColorRange range)
+{
+ const int mpeg_min = 16 << (depth - 8);
+ const int mpeg_max = 235 << (depth - 8);
+ const int p = (1 << depth) - 1;
+ const int q = mpeg_max - mpeg_min;
+ const int k = mpeg_min * p + 128;
+
+ FFColorDetectDSPContext dsp = {0};
+ ff_color_detect_dsp_init(&dsp, depth, range);
+
+ declare_func(int, const uint8_t *, ptrdiff_t, const uint8_t *, ptrdiff_t,
+ ptrdiff_t, ptrdiff_t, int p, int q, int k);
+
+ LOCAL_ALIGNED_32(uint8_t, luma, [HEIGHT * STRIDE]);
+ LOCAL_ALIGNED_32(uint8_t, alpha, [HEIGHT * STRIDE]);
+ memset(luma, 0x80, HEIGHT * STRIDE);
+ memset(alpha, 0xFF, HEIGHT * STRIDE);
+
+ /* Try and force overflow */
+ if (depth > 8 && range == AVCOL_RANGE_MPEG) {
+ ((uint16_t *) luma)[0] = 235 << (depth - 8);
+ ((uint16_t *) luma)[1] = 16 << (depth - 8);
+ } else {
+ luma[0] = 235;
+ luma[1] = 16;
+ }
+
+ /* Place an out-of-range value in a random position near the center */
+ const int h2 = HEIGHT >> 1;
+ int idx0 = ((rnd() % h2) + h2) * STRIDE + (rnd() % WIDTH);
+ if (depth > 8) {
+ idx0 &= ~1;
+ alpha[idx0] = alpha[idx0 + 1] = 0;
+ } else {
+ alpha[idx0] = 0;
+ }
+
+ int w = WIDTH;
+ if (depth > 8)
+ w /= 2;
+
+ if (check_func(dsp.detect_alpha, "detect_alpha_%d_%s", depth, range == AVCOL_RANGE_JPEG ? "full" : "limited")) {
+ /* Test increasing height, to ensure we hit the placed 0 eventually */
+ for (int h = 1; h <= HEIGHT; h++) {
+ int res_ref = call_ref(luma, STRIDE, alpha, STRIDE, w, h, p, q, k);
+ int res_new = call_new(luma, STRIDE, alpha, STRIDE, w, h, p, q, k);
+ if (res_ref != res_new)
+ fail();
+ }
+
+ /* Test performance of base case without any out-of-range values */
+ memset(alpha, 0xFF, HEIGHT * STRIDE);
+ bench_new(luma, STRIDE, alpha, STRIDE, w, HEIGHT, p, q, k);
+ }
+}
+
+void checkasm_check_colordetect(void)
+{
+ for (int depth = 8; depth <= 16; depth += 8) {
+ check_range_detect(depth);
+ report("detect_range_%d", depth);
+
+ check_alpha_detect(depth, AVCOL_RANGE_JPEG);
+ check_alpha_detect(depth, AVCOL_RANGE_MPEG);
+ report("detect_alpha_%d", depth);
+ }
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 0ae402cad4..366afa0373 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -59,6 +59,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp \
fate-checkasm-vf_blackdetect \
fate-checkasm-vf_blend \
fate-checkasm-vf_bwdif \
+ fate-checkasm-vf_colordetect \
fate-checkasm-vf_colorspace \
fate-checkasm-vf_eq \
fate-checkasm-vf_gblur \
--
2.50.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH v3 3/3] avfilter/vf_colordetect: add x86 SIMD implementation
2025-07-18 9:57 [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
2025-07-18 9:57 ` [FFmpeg-devel] [PATCH v3 2/3] tests/checkasm: add check for vf_colordetect Niklas Haas
@ 2025-07-18 9:57 ` Niklas Haas
2025-07-18 12:18 ` [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
2025-07-18 12:38 ` Kacper Michajlow
3 siblings, 0 replies; 7+ messages in thread
From: Niklas Haas @ 2025-07-18 9:57 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
alphadetect8_full_c: 5658.2 ( 1.00x)
alphadetect8_full_avx2: 215.1 (26.31x)
alphadetect8_full_avx512: 133.5 (42.40x)
alphadetect8_limited_c: 7391.5 ( 1.00x)
alphadetect8_limited_avx2: 649.3 (11.38x)
alphadetect8_limited_avx512: 330.5 (22.36x)
alphadetect16_full_c: 3027.4 ( 1.00x)
alphadetect16_full_avx2: 209.4 (14.46x)
alphadetect16_full_avx512: 141.4 (21.41x)
alphadetect16_limited_c: 3880.9 ( 1.00x)
alphadetect16_limited_avx2: 734.9 ( 5.28x)
alphadetect16_limited_avx512: 349.2 (11.11x)
rangedetect8_c: 5854.2 ( 1.00x)
rangedetect8_avx2: 138.9 (42.15x)
rangedetect8_avx512: 106.2 (55.12x)
rangedetect16_c: 4122.0 ( 1.00x)
rangedetect16_avx2: 138.6 (29.74x)
rangedetect16_avx512: 104.1 (39.60x)
---
libavfilter/vf_colordetect.c | 4 +
libavfilter/vf_colordetect.h | 3 +
libavfilter/x86/Makefile | 2 +
libavfilter/x86/vf_colordetect.asm | 150 ++++++++++++++++++++++++++
libavfilter/x86/vf_colordetect_init.c | 105 ++++++++++++++++++
5 files changed, 264 insertions(+)
create mode 100644 libavfilter/x86/vf_colordetect.asm
create mode 100644 libavfilter/x86/vf_colordetect_init.c
diff --git a/libavfilter/vf_colordetect.c b/libavfilter/vf_colordetect.c
index 0fb892634f..642bec62c6 100644
--- a/libavfilter/vf_colordetect.c
+++ b/libavfilter/vf_colordetect.c
@@ -219,6 +219,10 @@ static av_cold void uninit(AVFilterContext *ctx)
av_cold void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth,
enum AVColorRange color_range)
{
+#if ARCH_X86
+ ff_color_detect_dsp_init_x86(dsp, depth, color_range);
+#endif
+
if (!dsp->detect_range)
dsp->detect_range = depth > 8 ? ff_detect_range16_c : ff_detect_range_c;
if (!dsp->detect_alpha) {
diff --git a/libavfilter/vf_colordetect.h b/libavfilter/vf_colordetect.h
index 8998ed83d4..78d296433d 100644
--- a/libavfilter/vf_colordetect.h
+++ b/libavfilter/vf_colordetect.h
@@ -41,6 +41,9 @@ typedef struct FFColorDetectDSPContext {
void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth,
enum AVColorRange color_range);
+void ff_color_detect_dsp_init_x86(FFColorDetectDSPContext *dsp, int depth,
+ enum AVColorRange color_range);
+
static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t stride,
ptrdiff_t width, ptrdiff_t height,
int mpeg_min, int mpeg_max)
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 86f7119a7b..0e531a9b41 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -6,6 +6,7 @@ OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise_init.o
OBJS-$(CONFIG_BLACKDETECT_FILTER) += x86/vf_blackdetect_init.o
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
+OBJS-$(CONFIG_COLORDETECT_FILTER) += x86/vf_colordetect_init.o
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o
OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128_init.o
@@ -53,6 +54,7 @@ X86ASM-OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise.o
X86ASM-OBJS-$(CONFIG_BLACKDETECT_FILTER) += x86/vf_blackdetect.o
X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
+X86ASM-OBJS-$(CONFIG_COLORDETECT_FILTER) += x86/vf_colordetect.o
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o
X86ASM-OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128.o
diff --git a/libavfilter/x86/vf_colordetect.asm b/libavfilter/x86/vf_colordetect.asm
new file mode 100644
index 0000000000..6482df95cc
--- /dev/null
+++ b/libavfilter/x86/vf_colordetect.asm
@@ -0,0 +1,150 @@
+;*****************************************************************************
+;* x86-optimized functions for blackdetect filter
+;*
+;* Copyright (C) 2025 Niklas Haas
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+%macro detect_range_fn 1 ; suffix
+cglobal detect_range%1, 6, 7, 5, data, stride, width, height, mpeg_min, mpeg_max, x
+%if UNIX64 && notcpuflag(avx512)
+ movd xm0, mpeg_mind
+ movd xm1, mpeg_maxd
+ vpbroadcast%1 m0, xm0
+ vpbroadcast%1 m1, xm1
+%else
+ vpbroadcast%1 m0, mpeg_mind
+ vpbroadcast%1 m1, mpeg_maxd
+%endif
+ add dataq, widthq
+ neg widthq
+.lineloop:
+ mova m2, m0
+ mova m3, m1
+ mov xq, widthq
+ .loop:
+ movu m4, [dataq + xq]
+ pminu%1 m2, m4
+ pmaxu%1 m3, m4
+ add xq, mmsize
+ jl .loop
+
+ ; test if the data is out of range
+ pxor m2, m0
+%if cpuflag(avx512)
+ vpternlogq m2, m3, m1, 0xF6 ; m2 |= m3 ^ m1
+ vptestmq k1, m2, m2
+ kortestb k1, k1
+%else
+ pxor m3, m1
+ por m2, m3
+ ptest m2, m2
+%endif
+ jnz .end
+ add dataq, strideq
+ dec heightq
+ jg .lineloop
+.end:
+ setnz al
+ movzx rax, al
+ RET
+%endmacro
+
+%macro detect_alpha_fn 3 ; suffix, hsuffix, range
+cglobal detect_alpha%1_%3, 6, 7, 6, color, color_stride, alpha, alpha_stride, width, height, x
+ pxor m0, m0
+ add colorq, widthq
+ add alphaq, widthq
+ neg widthq
+%ifidn %3, limited
+ vpbroadcast%2 m3, r6m ; p
+ vpbroadcast%2 m4, r7m ; q
+ vpbroadcast%2 m5, r8m ; k
+%endif
+.lineloop:
+ mov xq, widthq
+ .loop:
+ %ifidn %3, full
+ movu m1, [colorq + xq]
+ movu m2, [alphaq + xq]
+ pmaxu%1 m1, m2
+ %else
+ pmovzx%1%2 m1, [colorq + xq]
+ pmovzx%1%2 m2, [alphaq + xq]
+ pmull%2 m1, m3
+ pmull%2 m2, m4
+ %ifidn %1, b
+ psubusw m1, m5
+ %else
+ pmaxud m1, m5
+ psubd m1, m5
+ %endif
+ pmaxu%2 m1, m2
+ %endif
+ %if cpuflag(avx512)
+ vpternlogq m0, m1, m2, 0xF6 ; m0 |= m1 ^ m2
+ %else
+ pxor m1, m2
+ por m0, m1
+ %endif
+ %ifidn %3, full
+ add xq, mmsize
+ %else
+ add xq, mmsize >> 1
+ %endif
+ jl .loop
+
+%if cpuflag(avx512)
+ vptestmq k1, m0, m0
+ kortestb k1, k1
+%else
+ ptest m0, m0
+%endif
+ jnz .found
+
+ add colorq, color_strideq
+ add alphaq, alpha_strideq
+ dec heightq
+ jg .lineloop
+ xor rax, rax
+ RET
+
+.found:
+ mov rax, 1
+ RET
+%endmacro
+
+INIT_YMM avx2
+detect_range_fn b
+detect_range_fn w
+detect_alpha_fn b, w, full
+detect_alpha_fn w, d, full
+detect_alpha_fn b, w, limited
+detect_alpha_fn w, d, limited
+
+INIT_ZMM avx512
+detect_range_fn b
+detect_range_fn w
+detect_alpha_fn b, w, full
+detect_alpha_fn w, d, full
+detect_alpha_fn b, w, limited
+detect_alpha_fn w, d, limited
diff --git a/libavfilter/x86/vf_colordetect_init.c b/libavfilter/x86/vf_colordetect_init.c
new file mode 100644
index 0000000000..62a7e87388
--- /dev/null
+++ b/libavfilter/x86/vf_colordetect_init.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2025 Niklas Haas
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/vf_colordetect.h"
+
+#define DETECT_RANGE_FUNC(FUNC_NAME, ASM_FUNC_NAME, C_FUNC_NAME, SHIFT, MMSIZE) \
+int ASM_FUNC_NAME(const uint8_t *src, ptrdiff_t stride, \
+ ptrdiff_t width, ptrdiff_t height, int min, int max); \
+ \
+static int FUNC_NAME(const uint8_t *src, ptrdiff_t stride, \
+ ptrdiff_t width, ptrdiff_t height, int min, int max) \
+{ \
+ ptrdiff_t bytes = (width << SHIFT) & ~(MMSIZE - 1); \
+ int ret = ASM_FUNC_NAME(src, stride, bytes, height, min, max); \
+ if (ret) \
+ return ret; \
+ \
+ return C_FUNC_NAME(src + bytes, stride, width - (bytes >> SHIFT), \
+ height, min, max); \
+}
+
+#define DETECT_ALPHA_FUNC(FUNC_NAME, ASM_FUNC_NAME, C_FUNC_NAME, SHIFT, MMSIZE) \
+int ASM_FUNC_NAME(const uint8_t *color, ptrdiff_t color_stride, \
+ const uint8_t *alpha, ptrdiff_t alpha_stride, \
+ ptrdiff_t width, ptrdiff_t height, int p, int q, int k); \
+ \
+static int FUNC_NAME(const uint8_t *color, ptrdiff_t color_stride, \
+ const uint8_t *alpha, ptrdiff_t alpha_stride, \
+ ptrdiff_t width, ptrdiff_t height, int p, int q, int k) \
+{ \
+ ptrdiff_t bytes = (width << SHIFT) & ~(MMSIZE - 1); \
+ int ret = ASM_FUNC_NAME(color, color_stride, alpha, alpha_stride, \
+ bytes, height, p, q, k); \
+ if (ret) \
+ return ret; \
+ \
+ return C_FUNC_NAME(color + bytes, color_stride, alpha + bytes, alpha_stride,\
+ width - (bytes >> SHIFT), height, p, q, k); \
+}
+
+#if HAVE_X86ASM
+#if HAVE_AVX512_EXTERNAL
+DETECT_RANGE_FUNC(detect_range_avx512, ff_detect_rangeb_avx512, ff_detect_range_c, 0, 64)
+DETECT_RANGE_FUNC(detect_range16_avx512, ff_detect_rangew_avx512, ff_detect_range16_c, 1, 64)
+DETECT_ALPHA_FUNC(detect_alpha_full_avx512, ff_detect_alphab_full_avx512, ff_detect_alpha_full_c, 0, 64)
+DETECT_ALPHA_FUNC(detect_alpha16_full_avx512, ff_detect_alphaw_full_avx512, ff_detect_alpha16_full_c, 1, 64)
+DETECT_ALPHA_FUNC(detect_alpha_limited_avx512, ff_detect_alphab_limited_avx512, ff_detect_alpha_limited_c, 0, 64)
+DETECT_ALPHA_FUNC(detect_alpha16_limited_avx512, ff_detect_alphaw_limited_avx512, ff_detect_alpha16_limited_c, 1, 64)
+#endif
+#if HAVE_AVX2_EXTERNAL
+DETECT_RANGE_FUNC(detect_range_avx2, ff_detect_rangeb_avx2, ff_detect_range_c, 0, 32)
+DETECT_RANGE_FUNC(detect_range16_avx2, ff_detect_rangew_avx2, ff_detect_range16_c, 1, 32)
+DETECT_ALPHA_FUNC(detect_alpha_full_avx2, ff_detect_alphab_full_avx2, ff_detect_alpha_full_c, 0, 32)
+DETECT_ALPHA_FUNC(detect_alpha16_full_avx2, ff_detect_alphaw_full_avx2, ff_detect_alpha16_full_c, 1, 32)
+DETECT_ALPHA_FUNC(detect_alpha_limited_avx2, ff_detect_alphab_limited_avx2, ff_detect_alpha_limited_c, 0, 32)
+DETECT_ALPHA_FUNC(detect_alpha16_limited_avx2, ff_detect_alphaw_limited_avx2, ff_detect_alpha16_limited_c, 1, 32)
+#endif
+#endif
+
+av_cold void ff_color_detect_dsp_init_x86(FFColorDetectDSPContext *dsp, int depth,
+ enum AVColorRange color_range)
+{
+#if HAVE_X86ASM
+ int cpu_flags = av_get_cpu_flags();
+#if HAVE_AVX2_EXTERNAL
+ if (EXTERNAL_AVX2(cpu_flags)) {
+ dsp->detect_range = depth > 8 ? detect_range16_avx2 : detect_range_avx2;
+ if (color_range == AVCOL_RANGE_JPEG) {
+ dsp->detect_alpha = depth > 8 ? detect_alpha16_full_avx2 : detect_alpha_full_avx2;
+ } else {
+ dsp->detect_alpha = depth > 8 ? detect_alpha16_limited_avx2 : detect_alpha_limited_avx2;
+ }
+ }
+#endif
+#if HAVE_AVX512_EXTERNAL
+ if (EXTERNAL_AVX512(cpu_flags)) {
+ dsp->detect_range = depth > 8 ? detect_range16_avx512 : detect_range_avx512;
+ if (color_range == AVCOL_RANGE_JPEG) {
+ dsp->detect_alpha = depth > 8 ? detect_alpha16_full_avx512 : detect_alpha_full_avx512;
+ } else {
+ dsp->detect_alpha = depth > 8 ? detect_alpha16_limited_avx512 : detect_alpha_limited_avx512;
+ }
+ }
+#endif
+#endif
+}
--
2.50.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter
2025-07-18 9:57 [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
2025-07-18 9:57 ` [FFmpeg-devel] [PATCH v3 2/3] tests/checkasm: add check for vf_colordetect Niklas Haas
2025-07-18 9:57 ` [FFmpeg-devel] [PATCH v3 3/3] avfilter/vf_colordetect: add x86 SIMD implementation Niklas Haas
@ 2025-07-18 12:18 ` Niklas Haas
2025-07-18 12:38 ` Kacper Michajlow
3 siblings, 0 replies; 7+ messages in thread
From: Niklas Haas @ 2025-07-18 12:18 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
On Fri, 18 Jul 2025 11:57:14 +0200 Niklas Haas <ffmpeg@haasn.xyz> wrote:
> From: Niklas Haas <git@haasn.dev>
>
> This filter can detect various properties about the image, including
> whether or not there are out-of-range values, or whether the input appears
> to use straight or premultiplied alpha.
>
> Of course, these can only be heuristics, with "undetermined" as the base
> case. While we can definitely prove the existence of full range or
> straight alpha colors, we can never infer the opposite.
> ---
> [...]
> +static int detect_alpha(AVFilterContext *ctx, void *arg,
> + int jobnr, int nb_jobs)
> +{
> + ColorDetectContext *s = ctx->priv;
> + const AVFrame *in = arg;
> + const int w = in->width;
> + const int h = in->height;
> + const int y_start = (h * jobnr) / nb_jobs;
> + const int y_end = (h * (jobnr + 1)) / nb_jobs;
> + const int h_slice = y_end - y_start;
> +
> + const int nb_planes = (s->desc->flags & AV_PIX_FMT_FLAG_RGB) ? 3 : 1;
> + const ptrdiff_t alpha_stride = in->linesize[s->idx_a];
> + const uint8_t *alpha = in->data[s->idx_a] + y_start * alpha_stride;
> +
> + const int p = (1 << s->depth) - 1;
> + const int q = s->mpeg_max - s->mpeg_min;
> + const int k = s->mpeg_min * p + 128;
I replaced this by a slightly tighter bound, and also added some explanation:
/**
* To check if a value is out of range, we need to compare the color value
* against the maximum possible color for a given alpha value:
* x > (((mpeg_max - mpeg_min) * (a + ((a >> 1) & 1) + (1 << (depth - 1))) >> depth) + mpeg_min
* (cf. premultiply16offset in vf_premultiply.c)
*
* This rearranges to:
* (x - mpeg_min << depth) - (1 << (depth - 1)) > (mpeg_max - mpeg_min) * (a + ((a >> 1) & 1)
*
* And since a + 1 >= a + ((a >> 1) & 1) we can relax the RHS slightly, giving us:
* (1 << depth) * x - (mpeg_min << depth) - (1 << (depth - 1)) - (mpeg_max - mpeg_min) > (mpeg_max - mpeg_min) * a
* = P * x - K > Q * a in the below formula
*/
const int p = 1 << s->depth;
const int q = s->mpeg_max - s->mpeg_min;
const int k = p * s->mpeg_min + q + (1 << (s->depth - 1));
I won't bother sending a v4 just for this diff.
> [...]
> diff --git a/libavfilter/vf_colordetect.h b/libavfilter/vf_colordetect.h
> new file mode 100644
> index 0000000000..8998ed83d4
> --- /dev/null
> +++ b/libavfilter/vf_colordetect.h
> @@ -0,0 +1,149 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_VF_COLORDETECT_H
> +#define AVFILTER_VF_COLORDETECT_H
Fixed locally to unbreak fate-source.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter
2025-07-18 9:57 [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
` (2 preceding siblings ...)
2025-07-18 12:18 ` [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas
@ 2025-07-18 12:38 ` Kacper Michajlow
2025-07-18 12:46 ` Niklas Haas
3 siblings, 1 reply; 7+ messages in thread
From: Kacper Michajlow @ 2025-07-18 12:38 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Fri, 18 Jul 2025 at 11:57, Niklas Haas <ffmpeg@haasn.xyz> wrote:
>
> From: Niklas Haas <git@haasn.dev>
>
> This filter can detect various properties about the image, including
> whether or not there are out-of-range values, or whether the input appears
> to use straight or premultiplied alpha.
>
> Of course, these can only be heuristics, with "undetermined" as the base
> case. While we can definitely prove the existence of full range or
> straight alpha colors, we can never infer the opposite.
> ---
> doc/filters.texi | 27 ++++
> libavfilter/Makefile | 1 +
> libavfilter/allfilters.c | 1 +
> libavfilter/vf_colordetect.c | 252 +++++++++++++++++++++++++++++++++++
> libavfilter/vf_colordetect.h | 149 +++++++++++++++++++++
> 5 files changed, 430 insertions(+)
> create mode 100644 libavfilter/vf_colordetect.c
> create mode 100644 libavfilter/vf_colordetect.h
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index ed2956fe75..74e9e71559 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -9753,6 +9753,33 @@ colorchannelmixer=.393:.769:.189:0:.349:.686:.168:0:.272:.534:.131
>
> This filter supports the all above options as @ref{commands}.
>
> +@section colordetect
> +Analyze the video frames to determine the effective value range and alpha
> +mode.
> +
> +The filter accepts the following options:
> +
> +@table @option
> +@item mode
> +Set of properties to detect. Unavailable properties, such as alpha mode for
> +an input image without an alpha channel, will be ignored automatically.
> +
> +Accepts a combination of the following flags:
> +
> +@table @samp
> +@item color_range
> +Detect if the source countains luma pixels outside the limited (MPEG) range,
> +which indicates that this is a full range YUV source.
> +@item alpha_mode
> +Detect if the source contains color values above the alpha channel, which
> +indicates that the alpha channel is independent (straight), rather than
> +premultiplied.
> +@item all
> +Enable detection of all of the above properties. This is the default.
> +@end table
> +
> +@end table
> +
> @section colorize
> Overlay a solid color on the video stream.
>
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 9e9153f5b0..e19f67a3a7 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -237,6 +237,7 @@ OBJS-$(CONFIG_COLORBALANCE_FILTER) += vf_colorbalance.o
> OBJS-$(CONFIG_COLORCHANNELMIXER_FILTER) += vf_colorchannelmixer.o
> OBJS-$(CONFIG_COLORCONTRAST_FILTER) += vf_colorcontrast.o
> OBJS-$(CONFIG_COLORCORRECT_FILTER) += vf_colorcorrect.o
> +OBJS-$(CONFIG_COLORDETECT_FILTER) += vf_colordetect.o
> OBJS-$(CONFIG_COLORIZE_FILTER) += vf_colorize.o
> OBJS-$(CONFIG_COLORKEY_FILTER) += vf_colorkey.o
> OBJS-$(CONFIG_COLORKEY_OPENCL_FILTER) += vf_colorkey_opencl.o opencl.o \
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 409099bf1f..f3c2092b15 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -218,6 +218,7 @@ extern const FFFilter ff_vf_colorbalance;
> extern const FFFilter ff_vf_colorchannelmixer;
> extern const FFFilter ff_vf_colorcontrast;
> extern const FFFilter ff_vf_colorcorrect;
> +extern const FFFilter ff_vf_colordetect;
> extern const FFFilter ff_vf_colorize;
> extern const FFFilter ff_vf_colorkey;
> extern const FFFilter ff_vf_colorkey_opencl;
> diff --git a/libavfilter/vf_colordetect.c b/libavfilter/vf_colordetect.c
> new file mode 100644
> index 0000000000..0fb892634f
> --- /dev/null
> +++ b/libavfilter/vf_colordetect.c
> @@ -0,0 +1,252 @@
> +/*
> + * Copyright (c) 2025 Niklas Haas
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Video color space detector, tries to auto-detect YUV range and alpha mode.
> + */
> +
> +#include <stdbool.h>
> +#include <stdatomic.h>
> +
> +#include "config.h"
> +
> +#include "libavutil/mem.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "avfilter.h"
> +#include "filters.h"
> +#include "formats.h"
> +#include "video.h"
> +
> +#include "vf_colordetect.h"
> +
> +enum AlphaMode {
> + ALPHA_NONE = -1,
> + ALPHA_UNDETERMINED = 0,
> + ALPHA_STRAIGHT,
> + /* No way to positively identify premultiplied alpha */
> +};
> +
> +enum ColorDetectMode {
> + COLOR_DETECT_COLOR_RANGE = 1 << 0,
> + COLOR_DETECT_ALPHA_MODE = 1 << 1,
> +};
> +
> +typedef struct ColorDetectContext {
> + const AVClass *class;
> + FFColorDetectDSPContext dsp;
> + unsigned mode;
> +
> + const AVPixFmtDescriptor *desc;
> + int nb_threads;
> + int depth;
> + int idx_a;
> + int mpeg_min;
> + int mpeg_max;
> +
> + atomic_int detected_range; // enum AVColorRange
> + atomic_int detected_alpha; // enum AlphaMode
> +} ColorDetectContext;
> +
> +#define OFFSET(x) offsetof(ColorDetectContext, x)
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> +
> +static const AVOption colordetect_options[] = {
> + { "mode", "Image properties to detect", OFFSET(mode), AV_OPT_TYPE_FLAGS, {.i64 = -1}, 0, UINT_MAX, FLAGS, .unit = "mode" },
> + { "color_range", "Detect (YUV) color range", 0, AV_OPT_TYPE_CONST, {.i64 = COLOR_DETECT_COLOR_RANGE}, 0, 0, FLAGS, .unit = "mode" },
> + { "alpha_mode", "Detect alpha mode", 0, AV_OPT_TYPE_CONST, {.i64 = COLOR_DETECT_ALPHA_MODE }, 0, 0, FLAGS, .unit = "mode" },
> + { "all", "Detect all supported properties", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, FLAGS, .unit = "mode" },
> + { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(colordetect);
> +
> +static int query_format(const AVFilterContext *ctx,
> + AVFilterFormatsConfig **cfg_in,
> + AVFilterFormatsConfig **cfg_out)
> +{
> + int want_flags = AV_PIX_FMT_FLAG_PLANAR;
> + int reject_flags = AV_PIX_FMT_FLAG_PAL | AV_PIX_FMT_FLAG_HWACCEL |
> + AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_FLOAT |
> + AV_PIX_FMT_FLAG_BAYER | AV_PIX_FMT_FLAG_XYZ;
> +
> + if (HAVE_BIGENDIAN) {
> + want_flags |= AV_PIX_FMT_FLAG_BE;
> + } else {
> + reject_flags |= AV_PIX_FMT_FLAG_BE;
> + }
> +
> + AVFilterFormats *formats = ff_formats_pixdesc_filter(want_flags, reject_flags);
> + return ff_set_common_formats2(ctx, cfg_in, cfg_out, formats);
> +}
> +
> +static int config_input(AVFilterLink *inlink)
> +{
> + AVFilterContext *ctx = inlink->dst;
> + ColorDetectContext *s = ctx->priv;
> + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
> + const int depth = desc->comp[0].depth;
> + const int mpeg_min = 16 << (depth - 8);
> + const int mpeg_max = 235 << (depth - 8);
> + if (depth > 16) /* not currently possible; prevent future bugs */
> + return AVERROR(ENOTSUP);
> +
> + s->desc = desc;
> + s->depth = depth;
> + s->mpeg_min = mpeg_min;
> + s->mpeg_max = mpeg_max;
> + s->nb_threads = ff_filter_get_nb_threads(ctx);
> +
> + if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
> + atomic_init(&s->detected_range, AVCOL_RANGE_JPEG);
> + } else {
> + atomic_init(&s->detected_range, AVCOL_RANGE_UNSPECIFIED);
> + }
> +
> + if (desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> + s->idx_a = desc->comp[desc->nb_components - 1].plane;
> + atomic_init(&s->detected_alpha, ALPHA_UNDETERMINED);
> + } else {
> + atomic_init(&s->detected_alpha, ALPHA_NONE);
> + }
> +
> + ff_color_detect_dsp_init(&s->dsp, depth, inlink->color_range);
> + return 0;
> +}
> +
> +static int detect_range(AVFilterContext *ctx, void *arg,
> + int jobnr, int nb_jobs)
> +{
> + ColorDetectContext *s = ctx->priv;
> + const AVFrame *in = arg;
> + const ptrdiff_t stride = in->linesize[0];
> + const int y_start = (in->height * jobnr) / nb_jobs;
> + const int y_end = (in->height * (jobnr + 1)) / nb_jobs;
> + const int h_slice = y_end - y_start;
> +
> + if (s->dsp.detect_range(in->data[0] + y_start * stride, stride,
> + in->width, h_slice, s->mpeg_min, s->mpeg_max))
> + atomic_store(&s->detected_range, AVCOL_RANGE_JPEG);
> +
> + return 0;
> +}
> +
> +static int detect_alpha(AVFilterContext *ctx, void *arg,
> + int jobnr, int nb_jobs)
> +{
> + ColorDetectContext *s = ctx->priv;
> + const AVFrame *in = arg;
> + const int w = in->width;
> + const int h = in->height;
> + const int y_start = (h * jobnr) / nb_jobs;
> + const int y_end = (h * (jobnr + 1)) / nb_jobs;
> + const int h_slice = y_end - y_start;
> +
> + const int nb_planes = (s->desc->flags & AV_PIX_FMT_FLAG_RGB) ? 3 : 1;
> + const ptrdiff_t alpha_stride = in->linesize[s->idx_a];
> + const uint8_t *alpha = in->data[s->idx_a] + y_start * alpha_stride;
> +
> + const int p = (1 << s->depth) - 1;
> + const int q = s->mpeg_max - s->mpeg_min;
> + const int k = s->mpeg_min * p + 128;
> +
> + for (int i = 0; i < nb_planes; i++) {
> + const ptrdiff_t stride = in->linesize[i];
> + if (s->dsp.detect_alpha(in->data[i] + y_start * stride, stride,
> + alpha, alpha_stride, w, h_slice, p, q, k)) {
> + atomic_store(&s->detected_alpha, ALPHA_STRAIGHT);
> + return 0;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> +{
> + AVFilterContext *ctx = inlink->dst;
> + ColorDetectContext *s = ctx->priv;
> + const int nb_threads = FFMIN(inlink->h, s->nb_threads);
> +
> + if (s->mode & COLOR_DETECT_COLOR_RANGE && s->detected_range == AVCOL_RANGE_UNSPECIFIED)
> + ff_filter_execute(ctx, detect_range, in, NULL, nb_threads);
> + if (s->mode & COLOR_DETECT_ALPHA_MODE && s->detected_alpha == ALPHA_UNDETERMINED)
> + ff_filter_execute(ctx, detect_alpha, in, NULL, nb_threads);
> +
> + return ff_filter_frame(inlink->dst->outputs[0], in);
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> + ColorDetectContext *s = ctx->priv;
> + if (!s->mode)
> + return;
> +
> + av_log(ctx, AV_LOG_INFO, "Detected color properties:\n");
> + if (s->mode & COLOR_DETECT_COLOR_RANGE) {
> + av_log(ctx, AV_LOG_INFO, " Color range: %s\n",
> + s->detected_range == AVCOL_RANGE_JPEG ? "JPEG / full range"
> + : "undetermined");
> + }
> +
> + if (s->mode & COLOR_DETECT_ALPHA_MODE) {
> + av_log(ctx, AV_LOG_INFO, " Alpha mode: %s\n",
> + s->detected_alpha == ALPHA_NONE ? "none" :
> + s->detected_alpha == ALPHA_STRAIGHT ? "straight / independent"
> + : "undetermined");
> + }
> +}
> +
> +av_cold void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth,
> + enum AVColorRange color_range)
> +{
> + if (!dsp->detect_range)
> + dsp->detect_range = depth > 8 ? ff_detect_range16_c : ff_detect_range_c;
> + if (!dsp->detect_alpha) {
> + if (color_range == AVCOL_RANGE_JPEG) {
> + dsp->detect_alpha = depth > 8 ? ff_detect_alpha16_full_c : ff_detect_alpha_full_c;
> + } else {
> + dsp->detect_alpha = depth > 8 ? ff_detect_alpha16_limited_c : ff_detect_alpha_limited_c;
> + }
> + }
> +}
> +
> +static const AVFilterPad colordetect_inputs[] = {
> + {
> + .name = "default",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .config_props = config_input,
> + .filter_frame = filter_frame,
> + },
> +};
> +
> +const FFFilter ff_vf_colordetect = {
> + .p.name = "colordetect",
> + .p.description = NULL_IF_CONFIG_SMALL("Detect video color properties."),
> + .p.priv_class = &colordetect_class,
> + .p.flags = AVFILTER_FLAG_SLICE_THREADS | AVFILTER_FLAG_METADATA_ONLY,
> + .priv_size = sizeof(ColorDetectContext),
> + FILTER_INPUTS(colordetect_inputs),
> + FILTER_OUTPUTS(ff_video_default_filterpad),
> + FILTER_QUERY_FUNC2(query_format),
> + .uninit = uninit,
> +};
> diff --git a/libavfilter/vf_colordetect.h b/libavfilter/vf_colordetect.h
> new file mode 100644
> index 0000000000..8998ed83d4
> --- /dev/null
> +++ b/libavfilter/vf_colordetect.h
> @@ -0,0 +1,149 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_VF_COLORDETECT_H
> +#define AVFILTER_VF_COLORDETECT_H
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +
> +#include <libavutil/macros.h>
> +#include <libavutil/pixfmt.h>
> +
> +typedef struct FFColorDetectDSPContext {
> + /* Returns 1 if an out-of-range value was detected, 0 otherwise */
> + int (*detect_range)(const uint8_t *data, ptrdiff_t stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int mpeg_min, int mpeg_max);
> +
> + /* Returns 1 if the color value exceeds the alpha value, 0 otherwise */
> + int (*detect_alpha)(const uint8_t *color, ptrdiff_t color_stride,
> + const uint8_t *alpha, ptrdiff_t alpha_stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int p, int q, int k);
> +} FFColorDetectDSPContext;
> +
> +void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth,
> + enum AVColorRange color_range);
> +
> +static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int mpeg_min, int mpeg_max)
> +{
> + while (height--) {
> + for (int x = 0; x < width; x++) {
> + const uint8_t val = data[x];
> + if (val < mpeg_min || val > mpeg_max)
> + return 1;
> + }
> + data += stride;
> + }
> +
> + return 0;
> +}
You could process width as a whole to allow better vectorization.
Assuming you don't process 10000x1 images, it will be faster on average.
Before (clang --march=znver4):
detect_range_8_c: 5264.6 ( 1.00x)
detect_range_8_avx2: 124.5 (42.30x)
detect_range_8_avx512: 121.6 (43.31x)
After (clang --march=znver4):
detect_range_8_c: 211.5 ( 1.00x)
detect_range_8_avx2: 136.4 ( 1.55x)
detect_range_8_avx512: 95.4 ( 2.22x)
static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t stride,
ptrdiff_t width, ptrdiff_t height,
- int mpeg_min, int mpeg_max)
+ uint8_t mpeg_min, uint8_t mpeg_max)
{
while (height--) {
+ bool out_of_range = false;
for (int x = 0; x < width; x++) {
const uint8_t val = data[x];
- if (val < mpeg_min || val > mpeg_max)
- return 1;
+ out_of_range |= val < mpeg_min || val > mpeg_max;
}
+ if (out_of_range)
+ return 1;
data += stride;
}
- Kacper
> +
> +static inline int ff_detect_range16_c(const uint8_t *data, ptrdiff_t stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int mpeg_min, int mpeg_max)
> +{
> + while (height--) {
> + const uint16_t *data16 = (const uint16_t *) data;
> + for (int x = 0; x < width; x++) {
> + const uint16_t val = data16[x];
> + if (val < mpeg_min || val > mpeg_max)
> + return 1;
> + }
> + data += stride;
> + }
> +
> + return 0;
> +}
> +
> +static inline int
> +ff_detect_alpha_full_c(const uint8_t *color, ptrdiff_t color_stride,
> + const uint8_t *alpha, ptrdiff_t alpha_stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int p, int q, int k)
> +{
> + while (height--) {
> + for (int x = 0; x < width; x++) {
> + if (color[x] > alpha[x])
> + return 1;
> + }
> + color += color_stride;
> + alpha += alpha_stride;
> + }
> + return 0;
> +}
> +
> +static inline int
> +ff_detect_alpha_limited_c(const uint8_t *color, ptrdiff_t color_stride,
> + const uint8_t *alpha, ptrdiff_t alpha_stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int p, int q, int k)
> +{
> + while (height--) {
> + for (int x = 0; x < width; x++) {
> + if (p * color[x] - k > q * alpha[x])
> + return 1;
> + }
> + color += color_stride;
> + alpha += alpha_stride;
> + }
> + return 0;
> +}
> +
> +static inline int
> +ff_detect_alpha16_full_c(const uint8_t *color, ptrdiff_t color_stride,
> + const uint8_t *alpha, ptrdiff_t alpha_stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int p, int q, int k)
> +{
> + while (height--) {
> + const uint16_t *color16 = (const uint16_t *) color;
> + const uint16_t *alpha16 = (const uint16_t *) alpha;
> + for (int x = 0; x < width; x++) {
> + if (color16[x] > alpha16[x])
> + return 1;
> + }
> + color += color_stride;
> + alpha += alpha_stride;
> + }
> + return 0;
> +}
> +
> +static inline int
> +ff_detect_alpha16_limited_c(const uint8_t *color, ptrdiff_t color_stride,
> + const uint8_t *alpha, ptrdiff_t alpha_stride,
> + ptrdiff_t width, ptrdiff_t height,
> + int p, int q, int k)
> +{
> + while (height--) {
> + const uint16_t *color16 = (const uint16_t *) color;
> + const uint16_t *alpha16 = (const uint16_t *) alpha;
> + for (int x = 0; x < width; x++) {
> + if ((int64_t) p * color16[x] - k > (int64_t) q * alpha16[x])
> + return 1;
> + }
> + color += color_stride;
> + alpha += alpha_stride;
> + }
> + return 0;
> +}
> +
> +#endif /* AVFILTER_VF_COLORDETECT_H */
> --
> 2.50.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter
2025-07-18 12:38 ` Kacper Michajlow
@ 2025-07-18 12:46 ` Niklas Haas
2025-07-18 14:51 ` Kacper Michajlow
0 siblings, 1 reply; 7+ messages in thread
From: Niklas Haas @ 2025-07-18 12:46 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Fri, 18 Jul 2025 14:38:04 +0200 Kacper Michajlow <kasper93@gmail.com> wrote:
> > +static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t stride,
> > + ptrdiff_t width, ptrdiff_t height,
> > + int mpeg_min, int mpeg_max)
> > +{
> > + while (height--) {
> > + for (int x = 0; x < width; x++) {
> > + const uint8_t val = data[x];
> > + if (val < mpeg_min || val > mpeg_max)
> > + return 1;
> > + }
> > + data += stride;
> > + }
> > +
> > + return 0;
> > +}
>
> You could process width as a whole to allow better vectorization.
> Assuming you don't process 10000x1 images, it will be faster on average.
That's what I had in v1 of my patch, but it is significantly (50%) slower
on GCC, which prefers the version I have written above.
There is the not insignificant point that this C routine is also being used
to handle remaining elements that don't fit into a multiple of the SIMD
kernel, for which the scalar code is actually preferred.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 1/3] avfilter/vf_colordetect: add new color range detection filter
2025-07-18 12:46 ` Niklas Haas
@ 2025-07-18 14:51 ` Kacper Michajlow
0 siblings, 0 replies; 7+ messages in thread
From: Kacper Michajlow @ 2025-07-18 14:51 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Fri, 18 Jul 2025 at 14:47, Niklas Haas <ffmpeg@haasn.xyz> wrote:
>
> On Fri, 18 Jul 2025 14:38:04 +0200 Kacper Michajlow <kasper93@gmail.com>
wrote:
> > > +static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t
stride,
> > > + ptrdiff_t width, ptrdiff_t
height,
> > > + int mpeg_min, int mpeg_max)
> > > +{
> > > + while (height--) {
> > > + for (int x = 0; x < width; x++) {
> > > + const uint8_t val = data[x];
> > > + if (val < mpeg_min || val > mpeg_max)
> > > + return 1;
> > > + }
> > > + data += stride;
> > > + }
> > > +
> > > + return 0;
> > > +}
> >
> > You could process width as a whole to allow better vectorization.
> > Assuming you don't process 10000x1 images, it will be faster on average.
>
> That's what I had in v1 of my patch, but it is significantly (50%) slower
> on GCC, which prefers the version I have written above.
>
> There is the not insignificant point that this C routine is also being
used
> to handle remaining elements that don't fit into a multiple of the SIMD
> kernel, for which the scalar code is actually preferred.
Interesting. It's my fault, I didn't check. GCC really doesn't like
bool/int there.
If that function is important you could try:
{
+ uint8_t min = mpeg_min, max = mpeg_max;
while (height--) {
+ uint8_t out_of_range = 0;
for (int x = 0; x < width; x++) {
const uint8_t val = data[x];
- if (val < mpeg_min || val > mpeg_max)
- return 1;
+ out_of_range |= val < min || val > max;
}
+ if (out_of_range)
+ return 1;
data += stride;
}
Side note, if you change function prototype to `uint8_t mpeg_min, uint8_t
mpeg_max` directly,
clang goes down to 267.6 ( 1.00x). Unless it's because of UB, lol.
So, gcc scalar version is a bit slower in this case, but I think there is
value in it,
because it scales very nicely with -ftree-vectorize, even with sse2 target.
Hopefully we will enable -ftree-vectorize by default soon, there is pending
patch for that.
before (gcc -fno-tree-vectorize)
detect_range_8_c: 5537.4 ( 1.00x)
detect_range_8_avx2: 149.7 (36.98x)
detect_range_8_avx512: 111.2 (49.80x)
after (gcc -fno-tree-vectorize)
detect_range_8_c: 7709.0 ( 1.00x)
detect_range_8_avx2: 137.6 (56.02x)
detect_range_8_avx512: 104.2 (73.97x)
after (gcc -ftree-vectorize --march=generic)
detect_range_8_c: 657.0 ( 1.00x)
detect_range_8_avx2: 161.7 ( 4.06x)
detect_range_8_avx512: 116.5 ( 5.64x)
after (gcc -ftree-vectorize --march=znver4)
detect_range_8_c: 285.6 ( 1.00x)
detect_range_8_avx2: 256.0 ( 1.12x)
detect_range_8_avx512: 107.6 ( 2.65x)
after (clang --march=generic)
detect_range_8_c: 1769.0 ( 1.00x)
detect_range_8_avx2: 231.8 ( 7.63x)
detect_range_8_avx512: 96.6 (18.32x)
after (clang --march=znver4)
detect_range_8_c: 952.9 ( 1.00x)
detect_range_8_avx2: 137.7 ( 6.92x)
detect_range_8_avx512: 95.9 ( 9.94x)
- Kacper
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread