* [FFmpeg-devel] [PATCH] Add AVX2 and AVX512 versions of vf_idet (PR #20528)
@ 2025-09-15 16:13 Niklas Haas via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: Niklas Haas via ffmpeg-devel @ 2025-09-15 16:13 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
PR #20528 opened by Niklas Haas (haasn)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20528
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20528.patch
Plus checkasm tests for both
>From a406d6e309380e0413aa1b0c6ea98cfbe5912538 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 17:56:51 +0200
Subject: [PATCH 1/6] avfilter/vf_idet: expose DSP init function internally
For checkasm.
---
libavfilter/vf_idet.c | 22 +++++++++++-----------
libavfilter/vf_idet.h | 2 ++
2 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/libavfilter/vf_idet.c b/libavfilter/vf_idet.c
index 445ecc203a..141911e636 100644
--- a/libavfilter/vf_idet.c
+++ b/libavfilter/vf_idet.c
@@ -274,12 +274,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *picref)
if (!idet->csp)
idet->csp = av_pix_fmt_desc_get(link->format);
- if (idet->csp->comp[0].depth > 8){
- idet->filter_line = (ff_idet_filter_func)ff_idet_filter_line_c_16bit;
-#if ARCH_X86
- ff_idet_init_x86(idet, 1);
-#endif
- }
+ if (idet->csp->comp[0].depth > 8)
+ ff_idet_dsp_init(idet, 1);
if (idet->analyze_interlaced_flag) {
if (idet->cur->flags & AV_FRAME_FLAG_INTERLACED) {
@@ -395,6 +391,14 @@ static const enum AVPixelFormat pix_fmts[] = {
AV_PIX_FMT_NONE
};
+void ff_idet_dsp_init(IDETContext *idet, int for_16b)
+{
+ idet->filter_line = for_16b ? (ff_idet_filter_func)ff_idet_filter_line_c_16bit : ff_idet_filter_line_c;
+#if ARCH_X86
+ ff_idet_init_x86(idet, for_16b);
+#endif
+}
+
static av_cold int init(AVFilterContext *ctx)
{
IDETContext *idet = ctx->priv;
@@ -408,11 +412,7 @@ static av_cold int init(AVFilterContext *ctx)
else
idet->decay_coefficient = PRECISION;
- idet->filter_line = ff_idet_filter_line_c;
-
-#if ARCH_X86
- ff_idet_init_x86(idet, 0);
-#endif
+ ff_idet_dsp_init(idet, 0);
return 0;
}
diff --git a/libavfilter/vf_idet.h b/libavfilter/vf_idet.h
index afd8947055..9351c70cf9 100644
--- a/libavfilter/vf_idet.h
+++ b/libavfilter/vf_idet.h
@@ -71,6 +71,8 @@ typedef struct IDETContext {
int eof;
} IDETContext;
+void ff_idet_dsp_init(IDETContext *idet, int for_16b);
+
void ff_idet_init_x86(IDETContext *idet, int for_16b);
/* main fall-back for left-over */
--
2.49.1
>From 755034e92f34860541006e7318a28d8f63bb7c91 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 18:02:16 +0200
Subject: [PATCH 2/6] avfilter/vf_idet: correctly reset pixdesc on format
change
---
libavfilter/vf_idet.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/libavfilter/vf_idet.c b/libavfilter/vf_idet.c
index 141911e636..c0c20e9b15 100644
--- a/libavfilter/vf_idet.c
+++ b/libavfilter/vf_idet.c
@@ -259,6 +259,7 @@ static int filter_frame(AVFilterLink *link, AVFrame *picref)
av_frame_free(&idet->cur );
av_frame_free(&idet->next);
+ idet->csp = NULL;
}
idet->prev = idet->cur;
--
2.49.1
>From 8972883a05a8c540163e55c4aa0f75ecd6b1c801 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 18:02:56 +0200
Subject: [PATCH 3/6] avfilter/vf_idet: correctly update dsp function on format
change
Currently, this never updates on change from high bit depth to low bit depth.
---
libavfilter/vf_idet.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/libavfilter/vf_idet.c b/libavfilter/vf_idet.c
index c0c20e9b15..ec2ef83129 100644
--- a/libavfilter/vf_idet.c
+++ b/libavfilter/vf_idet.c
@@ -273,10 +273,10 @@ static int filter_frame(AVFilterLink *link, AVFrame *picref)
if (!idet->prev)
return 0;
- if (!idet->csp)
+ if (!idet->csp) {
idet->csp = av_pix_fmt_desc_get(link->format);
- if (idet->csp->comp[0].depth > 8)
- ff_idet_dsp_init(idet, 1);
+ ff_idet_dsp_init(idet, idet->csp->comp[0].depth > 8);
+ }
if (idet->analyze_interlaced_flag) {
if (idet->cur->flags & AV_FRAME_FLAG_INTERLACED) {
--
2.49.1
>From c8119f9980cb5fdc78192a76f494ba4db0742214 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 18:10:43 +0200
Subject: [PATCH 4/6] tests/checkasm: add vf_idet checkasm
---
tests/checkasm/Makefile | 1 +
tests/checkasm/checkasm.c | 3 ++
tests/checkasm/checkasm.h | 1 +
tests/checkasm/vf_idet.c | 65 +++++++++++++++++++++++++++++++++++++++
4 files changed, 70 insertions(+)
create mode 100644 tests/checkasm/vf_idet.c
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 9f1dd57fa6..0a54adc96a 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -62,6 +62,7 @@ AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o
AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o
AVFILTEROBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o
+AVFILTEROBJS-$(CONFIG_IDET_FILTER) += vf_idet.o
AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o
AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER) += vf_nlmeans.o
AVFILTEROBJS-$(CONFIG_SOBEL_FILTER) += vf_convolution.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 5312d02909..ad4d9b53b6 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -290,6 +290,9 @@ static const struct {
#if CONFIG_HFLIP_FILTER
{ "vf_hflip", checkasm_check_vf_hflip },
#endif
+ #if CONFIG_IDET_FILTER
+ { "vf_idet", checkasm_check_idet },
+ #endif
#if CONFIG_NLMEANS_FILTER
{ "vf_nlmeans", checkasm_check_nlmeans },
#endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index bb6b413aba..1684c427d6 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -112,6 +112,7 @@ void checkasm_check_hevc_pel(void);
void checkasm_check_hevc_sao(void);
void checkasm_check_huffyuvdsp(void);
void checkasm_check_idctdsp(void);
+void checkasm_check_idet(void);
void checkasm_check_jpeg2000dsp(void);
void checkasm_check_llauddsp(void);
void checkasm_check_lls(void);
diff --git a/tests/checkasm/vf_idet.c b/tests/checkasm/vf_idet.c
new file mode 100644
index 0000000000..5fffcd14ee
--- /dev/null
+++ b/tests/checkasm/vf_idet.c
@@ -0,0 +1,65 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "checkasm.h"
+
+#include "libavfilter/vf_idet.h"
+#include "libavutil/mem_internal.h"
+
+#define WIDTH 512
+
+static void check_idet(int depth)
+{
+ IDETContext idet;
+
+ LOCAL_ALIGNED_32(uint8_t, in0, [WIDTH]);
+ LOCAL_ALIGNED_32(uint8_t, in1, [WIDTH]);
+ LOCAL_ALIGNED_32(uint8_t, in2, [WIDTH]);
+
+ declare_func(int, const uint8_t *a, const uint8_t *b,
+ const uint8_t *c, int w);
+
+ ff_idet_dsp_init(&idet, depth > 8);
+
+ for (int x = 0; x < WIDTH; x++) {
+ in0[x] = rnd() & 0xFF;
+ in1[x] = rnd() & 0xFF;
+ in2[x] = rnd() & 0xFF;
+ }
+
+ if (check_func(idet.filter_line, "idet%d", depth)) {
+ /* Ensure odd tail is handled correctly */
+ int res_ref = call_ref(in0, in1, in2, WIDTH - 8);
+ int res_new = call_new(in0, in1, in2, WIDTH - 8);
+ if (res_ref != res_new) {
+ fprintf(stderr, "idet%d: result mismatch: %u != %u\n",
+ depth, res_ref, res_new);
+ fail();
+ }
+ bench_new(in0, in1, in2, WIDTH);
+ }
+}
+
+void checkasm_check_idet(void)
+{
+ check_idet(8);
+ report("idet8");
+
+ check_idet(16);
+ report("idet16");
+}
--
2.49.1
>From 842bbc54a489d4a2652ab57ccfd4bb351d019d8c Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 17:20:17 +0200
Subject: [PATCH 5/6] avfilter/x86/vf_idet.asm: generalize 8-bit macro
This is mostly compatible with AVX as well, so turn it into a macro.
---
libavfilter/x86/vf_idet.asm | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/libavfilter/x86/vf_idet.asm b/libavfilter/x86/vf_idet.asm
index 7bc8e7d2c4..63d9f4533d 100644
--- a/libavfilter/x86/vf_idet.asm
+++ b/libavfilter/x86/vf_idet.asm
@@ -78,7 +78,7 @@ IDET_FILTER_LINE_16BIT 8
;******************************************************************************
; SSE2 8-bit implementation that does 16-bytes at a time:
-INIT_XMM sse2
+%macro IDET_FILTER_LINE 0
cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total
xor indexq, indexq
pxor m0, m0
@@ -92,7 +92,7 @@ cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total
psubusb m5, m2, m3 ; ba
movu m3, [cq + indexq*1] ; C
- add indexq, 0x10
+ add indexq, mmsize
psubusb m4, m2 ; ab
CMP indexd, widthd
@@ -110,3 +110,7 @@ cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total
paddq m0, m1
movd eax, m0
RET
+%endmacro
+
+INIT_XMM sse2
+IDET_FILTER_LINE
--
2.49.1
>From 2b408d44c46a2c908f9d074218dace493c630a53 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 17:47:39 +0200
Subject: [PATCH 6/6] avfilter/x86/vf_idet.asm: add AVX2 and AVX512
implementations
The only thing that changes slightly is the horizontal sum at the end.
---
libavfilter/x86/vf_idet.asm | 36 ++++++++++++++++++++++++++++------
libavfilter/x86/vf_idet_init.c | 12 ++++++++++++
2 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/libavfilter/x86/vf_idet.asm b/libavfilter/x86/vf_idet.asm
index 63d9f4533d..12d65000ab 100644
--- a/libavfilter/x86/vf_idet.asm
+++ b/libavfilter/x86/vf_idet.asm
@@ -39,7 +39,7 @@ SECTION .text
paddd %1, %2
%endmacro
-%macro IDET_FILTER_LINE_16BIT 1 ; %1=increment (4 or 8 words)
+%macro IDET_FILTER_LINE_16BIT 0
cglobal idet_filter_line_16bit, 4, 5, 8, a, b, c, width, index
xor indexq, indexq
%define m_zero m1
@@ -54,7 +54,7 @@ cglobal idet_filter_line_16bit, 4, 5, 8, a, b, c, width, index
psubusw m5, m2, m3 ; ba
movu m4, [cq + indexq * 2] ; C
- add indexq, %1
+ add indexq, mmsize >> 1
psubusw m3, m2 ; ab
CMP indexd, widthd
@@ -67,13 +67,23 @@ cglobal idet_filter_line_16bit, 4, 5, 8, a, b, c, width, index
paddd m_sum, m5
jl .loop_16bit
+%if mmsize > 32
+ vextracti64x4 ym1, m0, 1
+ paddq ym0, ym1
+%endif
HADDD m_sum, m2
movd eax, m_sum
RET
%endmacro
INIT_XMM sse2
-IDET_FILTER_LINE_16BIT 8
+IDET_FILTER_LINE_16BIT
+
+INIT_XMM avx2
+IDET_FILTER_LINE_16BIT
+
+INIT_XMM avx512icl
+IDET_FILTER_LINE_16BIT
;******************************************************************************
; SSE2 8-bit implementation that does 16-bytes at a time:
@@ -106,11 +116,25 @@ cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total
jl .sse2_loop
paddq m0, m1
- movhlps m1, m0
- paddq m0, m1
- movd eax, m0
+%if mmsize > 32
+ vextracti64x4 ym1, m0, 1
+ paddq ym0, ym1
+%endif
+%if mmsize > 16
+ vextracti128 xm1, ym0, 1
+ paddq xm0, xm1
+%endif
+ movhlps xm1, xm0
+ paddq xm0, xm1
+ movd eax, xm0
RET
%endmacro
INIT_XMM sse2
IDET_FILTER_LINE
+
+INIT_YMM avx2
+IDET_FILTER_LINE
+
+INIT_ZMM avx512icl
+IDET_FILTER_LINE
diff --git a/libavfilter/x86/vf_idet_init.c b/libavfilter/x86/vf_idet_init.c
index acb4e2a778..5836fd3ad4 100644
--- a/libavfilter/x86/vf_idet_init.c
+++ b/libavfilter/x86/vf_idet_init.c
@@ -59,6 +59,12 @@ static int idet_filter_line_16bit_##KIND(const uint16_t *a, const uint16_t *b, \
FUNC_MAIN_DECL(sse2, 16)
FUNC_MAIN_DECL_16bit(sse2, 8)
+FUNC_MAIN_DECL(avx2, 32)
+FUNC_MAIN_DECL_16bit(avx2, 16)
+
+FUNC_MAIN_DECL(avx512icl, 64)
+FUNC_MAIN_DECL_16bit(avx512icl, 32)
+
#endif
av_cold void ff_idet_init_x86(IDETContext *idet, int for_16b)
{
@@ -68,5 +74,11 @@ av_cold void ff_idet_init_x86(IDETContext *idet, int for_16b)
if (EXTERNAL_SSE2(cpu_flags)) {
idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_sse2 : idet_filter_line_sse2;
}
+ if (EXTERNAL_AVX2(cpu_flags)) {
+ idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_avx2 : idet_filter_line_avx2;
+ }
+ if (EXTERNAL_AVX512ICL(cpu_flags)) {
+ idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_avx512icl : idet_filter_line_avx512icl;
+ }
#endif // HAVE_X86ASM
}
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-09-15 16:13 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-09-15 16:13 [FFmpeg-devel] [PATCH] Add AVX2 and AVX512 versions of vf_idet (PR #20528) Niklas Haas via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git