[FFmpeg-devel] [PATCH] Add AVX2 and AVX512 versions of vf_idet (PR #20528)

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

From: Niklas Haas via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: Niklas Haas <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH] Add AVX2 and AVX512 versions of vf_idet (PR #20528)
Date: Mon, 15 Sep 2025 16:13:29 -0000
Message-ID: <175795280994.25.10221052007894125506@463a07221176> (raw)

PR #20528 opened by Niklas Haas (haasn)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20528
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20528.patch

Plus checkasm tests for both


>From a406d6e309380e0413aa1b0c6ea98cfbe5912538 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 17:56:51 +0200
Subject: [PATCH 1/6] avfilter/vf_idet: expose DSP init function internally

For checkasm.
---
 libavfilter/vf_idet.c | 22 +++++++++++-----------
 libavfilter/vf_idet.h |  2 ++
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/libavfilter/vf_idet.c b/libavfilter/vf_idet.c
index 445ecc203a..141911e636 100644
--- a/libavfilter/vf_idet.c
+++ b/libavfilter/vf_idet.c
@@ -274,12 +274,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *picref)
 
     if (!idet->csp)
         idet->csp = av_pix_fmt_desc_get(link->format);
-    if (idet->csp->comp[0].depth > 8){
-        idet->filter_line = (ff_idet_filter_func)ff_idet_filter_line_c_16bit;
-#if ARCH_X86
-        ff_idet_init_x86(idet, 1);
-#endif
-    }
+    if (idet->csp->comp[0].depth > 8)
+        ff_idet_dsp_init(idet, 1);
 
     if (idet->analyze_interlaced_flag) {
         if (idet->cur->flags & AV_FRAME_FLAG_INTERLACED) {
@@ -395,6 +391,14 @@ static const enum AVPixelFormat pix_fmts[] = {
     AV_PIX_FMT_NONE
 };
 
+void ff_idet_dsp_init(IDETContext *idet, int for_16b)
+{
+    idet->filter_line = for_16b ? (ff_idet_filter_func)ff_idet_filter_line_c_16bit : ff_idet_filter_line_c;
+#if ARCH_X86
+    ff_idet_init_x86(idet, for_16b);
+#endif
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
     IDETContext *idet = ctx->priv;
@@ -408,11 +412,7 @@ static av_cold int init(AVFilterContext *ctx)
     else
         idet->decay_coefficient = PRECISION;
 
-    idet->filter_line = ff_idet_filter_line_c;
-
-#if ARCH_X86
-    ff_idet_init_x86(idet, 0);
-#endif
+    ff_idet_dsp_init(idet, 0);
 
     return 0;
 }
diff --git a/libavfilter/vf_idet.h b/libavfilter/vf_idet.h
index afd8947055..9351c70cf9 100644
--- a/libavfilter/vf_idet.h
+++ b/libavfilter/vf_idet.h
@@ -71,6 +71,8 @@ typedef struct IDETContext {
     int eof;
 } IDETContext;
 
+void ff_idet_dsp_init(IDETContext *idet, int for_16b);
+
 void ff_idet_init_x86(IDETContext *idet, int for_16b);
 
 /* main fall-back for left-over */
-- 
2.49.1


>From 755034e92f34860541006e7318a28d8f63bb7c91 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 18:02:16 +0200
Subject: [PATCH 2/6] avfilter/vf_idet: correctly reset pixdesc on format
 change

---
 libavfilter/vf_idet.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavfilter/vf_idet.c b/libavfilter/vf_idet.c
index 141911e636..c0c20e9b15 100644
--- a/libavfilter/vf_idet.c
+++ b/libavfilter/vf_idet.c
@@ -259,6 +259,7 @@ static int filter_frame(AVFilterLink *link, AVFrame *picref)
 
         av_frame_free(&idet->cur );
         av_frame_free(&idet->next);
+        idet->csp = NULL;
     }
 
     idet->prev = idet->cur;
-- 
2.49.1


>From 8972883a05a8c540163e55c4aa0f75ecd6b1c801 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 18:02:56 +0200
Subject: [PATCH 3/6] avfilter/vf_idet: correctly update dsp function on format
 change

Currently, this never updates on change from high bit depth to low bit depth.
---
 libavfilter/vf_idet.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavfilter/vf_idet.c b/libavfilter/vf_idet.c
index c0c20e9b15..ec2ef83129 100644
--- a/libavfilter/vf_idet.c
+++ b/libavfilter/vf_idet.c
@@ -273,10 +273,10 @@ static int filter_frame(AVFilterLink *link, AVFrame *picref)
     if (!idet->prev)
         return 0;
 
-    if (!idet->csp)
+    if (!idet->csp) {
         idet->csp = av_pix_fmt_desc_get(link->format);
-    if (idet->csp->comp[0].depth > 8)
-        ff_idet_dsp_init(idet, 1);
+        ff_idet_dsp_init(idet, idet->csp->comp[0].depth > 8);
+    }
 
     if (idet->analyze_interlaced_flag) {
         if (idet->cur->flags & AV_FRAME_FLAG_INTERLACED) {
-- 
2.49.1


>From c8119f9980cb5fdc78192a76f494ba4db0742214 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 18:10:43 +0200
Subject: [PATCH 4/6] tests/checkasm: add vf_idet checkasm

---
 tests/checkasm/Makefile   |  1 +
 tests/checkasm/checkasm.c |  3 ++
 tests/checkasm/checkasm.h |  1 +
 tests/checkasm/vf_idet.c  | 65 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+)
 create mode 100644 tests/checkasm/vf_idet.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 9f1dd57fa6..0a54adc96a 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -62,6 +62,7 @@ AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
 AVFILTEROBJS-$(CONFIG_EQ_FILTER)         += vf_eq.o
 AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)      += vf_gblur.o
 AVFILTEROBJS-$(CONFIG_HFLIP_FILTER)      += vf_hflip.o
+AVFILTEROBJS-$(CONFIG_IDET_FILTER)       += vf_idet.o
 AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER)  += vf_threshold.o
 AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER)    += vf_nlmeans.o
 AVFILTEROBJS-$(CONFIG_SOBEL_FILTER)      += vf_convolution.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 5312d02909..ad4d9b53b6 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -290,6 +290,9 @@ static const struct {
     #if CONFIG_HFLIP_FILTER
         { "vf_hflip", checkasm_check_vf_hflip },
     #endif
+    #if CONFIG_IDET_FILTER
+        { "vf_idet", checkasm_check_idet },
+    #endif
     #if CONFIG_NLMEANS_FILTER
         { "vf_nlmeans", checkasm_check_nlmeans },
     #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index bb6b413aba..1684c427d6 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -112,6 +112,7 @@ void checkasm_check_hevc_pel(void);
 void checkasm_check_hevc_sao(void);
 void checkasm_check_huffyuvdsp(void);
 void checkasm_check_idctdsp(void);
+void checkasm_check_idet(void);
 void checkasm_check_jpeg2000dsp(void);
 void checkasm_check_llauddsp(void);
 void checkasm_check_lls(void);
diff --git a/tests/checkasm/vf_idet.c b/tests/checkasm/vf_idet.c
new file mode 100644
index 0000000000..5fffcd14ee
--- /dev/null
+++ b/tests/checkasm/vf_idet.c
@@ -0,0 +1,65 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "checkasm.h"
+
+#include "libavfilter/vf_idet.h"
+#include "libavutil/mem_internal.h"
+
+#define WIDTH 512
+
+static void check_idet(int depth)
+{
+    IDETContext idet;
+
+    LOCAL_ALIGNED_32(uint8_t, in0, [WIDTH]);
+    LOCAL_ALIGNED_32(uint8_t, in1, [WIDTH]);
+    LOCAL_ALIGNED_32(uint8_t, in2, [WIDTH]);
+
+    declare_func(int, const uint8_t *a, const uint8_t *b,
+                 const uint8_t *c, int w);
+
+    ff_idet_dsp_init(&idet, depth > 8);
+
+    for (int x = 0; x < WIDTH; x++) {
+        in0[x] = rnd() & 0xFF;
+        in1[x] = rnd() & 0xFF;
+        in2[x] = rnd() & 0xFF;
+    }
+
+    if (check_func(idet.filter_line, "idet%d", depth)) {
+        /* Ensure odd tail is handled correctly */
+        int res_ref = call_ref(in0, in1, in2, WIDTH - 8);
+        int res_new = call_new(in0, in1, in2, WIDTH - 8);
+        if (res_ref != res_new) {
+            fprintf(stderr, "idet%d: result mismatch: %u != %u\n",
+                    depth, res_ref, res_new);
+            fail();
+        }
+        bench_new(in0, in1, in2, WIDTH);
+    }
+}
+
+void checkasm_check_idet(void)
+{
+    check_idet(8);
+    report("idet8");
+
+    check_idet(16);
+    report("idet16");
+}
-- 
2.49.1


>From 842bbc54a489d4a2652ab57ccfd4bb351d019d8c Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 17:20:17 +0200
Subject: [PATCH 5/6] avfilter/x86/vf_idet.asm: generalize 8-bit macro

This is mostly compatible with AVX as well, so turn it into a macro.
---
 libavfilter/x86/vf_idet.asm | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavfilter/x86/vf_idet.asm b/libavfilter/x86/vf_idet.asm
index 7bc8e7d2c4..63d9f4533d 100644
--- a/libavfilter/x86/vf_idet.asm
+++ b/libavfilter/x86/vf_idet.asm
@@ -78,7 +78,7 @@ IDET_FILTER_LINE_16BIT 8
 ;******************************************************************************
 ; SSE2 8-bit implementation that does 16-bytes at a time:
 
-INIT_XMM sse2
+%macro IDET_FILTER_LINE 0
 cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total
     xor       indexq, indexq
     pxor      m0, m0
@@ -92,7 +92,7 @@ cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total
     psubusb   m5, m2, m3           ; ba
 
     movu      m3, [cq + indexq*1]  ; C
-    add       indexq, 0x10
+    add       indexq, mmsize
     psubusb   m4, m2               ; ab
     CMP       indexd, widthd
 
@@ -110,3 +110,7 @@ cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total
     paddq     m0, m1
     movd      eax, m0
     RET
+%endmacro
+
+INIT_XMM sse2
+IDET_FILTER_LINE
-- 
2.49.1


>From 2b408d44c46a2c908f9d074218dace493c630a53 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 15 Sep 2025 17:47:39 +0200
Subject: [PATCH 6/6] avfilter/x86/vf_idet.asm: add AVX2 and AVX512
 implementations

The only thing that changes slightly is the horizontal sum at the end.
---
 libavfilter/x86/vf_idet.asm    | 36 ++++++++++++++++++++++++++++------
 libavfilter/x86/vf_idet_init.c | 12 ++++++++++++
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/libavfilter/x86/vf_idet.asm b/libavfilter/x86/vf_idet.asm
index 63d9f4533d..12d65000ab 100644
--- a/libavfilter/x86/vf_idet.asm
+++ b/libavfilter/x86/vf_idet.asm
@@ -39,7 +39,7 @@ SECTION .text
   paddd     %1, %2
 %endmacro
 
-%macro IDET_FILTER_LINE_16BIT 1   ; %1=increment (4 or 8 words)
+%macro IDET_FILTER_LINE_16BIT 0
 cglobal idet_filter_line_16bit, 4, 5, 8, a, b, c, width, index
     xor       indexq, indexq
 %define m_zero m1
@@ -54,7 +54,7 @@ cglobal idet_filter_line_16bit, 4, 5, 8, a, b, c, width, index
     psubusw   m5, m2, m3             ; ba
 
     movu      m4, [cq + indexq * 2]  ; C
-    add       indexq, %1
+    add       indexq, mmsize >> 1
     psubusw   m3, m2                 ; ab
     CMP       indexd, widthd
 
@@ -67,13 +67,23 @@ cglobal idet_filter_line_16bit, 4, 5, 8, a, b, c, width, index
     paddd          m_sum, m5
     jl        .loop_16bit
 
+%if mmsize > 32
+    vextracti64x4 ym1, m0, 1
+    paddq     ym0, ym1
+%endif
     HADDD     m_sum, m2
     movd      eax, m_sum
     RET
 %endmacro
 
 INIT_XMM sse2
-IDET_FILTER_LINE_16BIT 8
+IDET_FILTER_LINE_16BIT
+
+INIT_XMM avx2
+IDET_FILTER_LINE_16BIT
+
+INIT_XMM avx512icl
+IDET_FILTER_LINE_16BIT
 
 ;******************************************************************************
 ; SSE2 8-bit implementation that does 16-bytes at a time:
@@ -106,11 +116,25 @@ cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total
     jl       .sse2_loop
 
     paddq     m0, m1
-    movhlps   m1, m0
-    paddq     m0, m1
-    movd      eax, m0
+%if mmsize > 32
+    vextracti64x4 ym1, m0, 1
+    paddq     ym0, ym1
+%endif
+%if mmsize > 16
+    vextracti128 xm1, ym0, 1
+    paddq     xm0, xm1
+%endif
+    movhlps   xm1, xm0
+    paddq     xm0, xm1
+    movd      eax, xm0
     RET
 %endmacro
 
 INIT_XMM sse2
 IDET_FILTER_LINE
+
+INIT_YMM avx2
+IDET_FILTER_LINE
+
+INIT_ZMM avx512icl
+IDET_FILTER_LINE
diff --git a/libavfilter/x86/vf_idet_init.c b/libavfilter/x86/vf_idet_init.c
index acb4e2a778..5836fd3ad4 100644
--- a/libavfilter/x86/vf_idet_init.c
+++ b/libavfilter/x86/vf_idet_init.c
@@ -59,6 +59,12 @@ static int idet_filter_line_16bit_##KIND(const uint16_t *a, const uint16_t *b, \
 FUNC_MAIN_DECL(sse2, 16)
 FUNC_MAIN_DECL_16bit(sse2, 8)
 
+FUNC_MAIN_DECL(avx2, 32)
+FUNC_MAIN_DECL_16bit(avx2, 16)
+
+FUNC_MAIN_DECL(avx512icl, 64)
+FUNC_MAIN_DECL_16bit(avx512icl, 32)
+
 #endif
 av_cold void ff_idet_init_x86(IDETContext *idet, int for_16b)
 {
@@ -68,5 +74,11 @@ av_cold void ff_idet_init_x86(IDETContext *idet, int for_16b)
     if (EXTERNAL_SSE2(cpu_flags)) {
         idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_sse2 : idet_filter_line_sse2;
     }
+    if (EXTERNAL_AVX2(cpu_flags)) {
+        idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_avx2 : idet_filter_line_avx2;
+    }
+    if (EXTERNAL_AVX512ICL(cpu_flags)) {
+        idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_avx512icl : idet_filter_line_avx512icl;
+    }
 #endif // HAVE_X86ASM
 }
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

                 reply	other threads:[~2025-09-15 16:13 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=175795280994.25.10221052007894125506@463a07221176 \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=code@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git