From: Niklas Haas via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Cc: Niklas Haas <code@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH] Add AVX2 and AVX512 versions of vf_idet (PR #20528) Date: Mon, 15 Sep 2025 16:13:29 -0000 Message-ID: <175795280994.25.10221052007894125506@463a07221176> (raw) PR #20528 opened by Niklas Haas (haasn) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20528 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20528.patch Plus checkasm tests for both >From a406d6e309380e0413aa1b0c6ea98cfbe5912538 Mon Sep 17 00:00:00 2001 From: Niklas Haas <git@haasn.dev> Date: Mon, 15 Sep 2025 17:56:51 +0200 Subject: [PATCH 1/6] avfilter/vf_idet: expose DSP init function internally For checkasm. --- libavfilter/vf_idet.c | 22 +++++++++++----------- libavfilter/vf_idet.h | 2 ++ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/libavfilter/vf_idet.c b/libavfilter/vf_idet.c index 445ecc203a..141911e636 100644 --- a/libavfilter/vf_idet.c +++ b/libavfilter/vf_idet.c @@ -274,12 +274,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *picref) if (!idet->csp) idet->csp = av_pix_fmt_desc_get(link->format); - if (idet->csp->comp[0].depth > 8){ - idet->filter_line = (ff_idet_filter_func)ff_idet_filter_line_c_16bit; -#if ARCH_X86 - ff_idet_init_x86(idet, 1); -#endif - } + if (idet->csp->comp[0].depth > 8) + ff_idet_dsp_init(idet, 1); if (idet->analyze_interlaced_flag) { if (idet->cur->flags & AV_FRAME_FLAG_INTERLACED) { @@ -395,6 +391,14 @@ static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_NONE }; +void ff_idet_dsp_init(IDETContext *idet, int for_16b) +{ + idet->filter_line = for_16b ? (ff_idet_filter_func)ff_idet_filter_line_c_16bit : ff_idet_filter_line_c; +#if ARCH_X86 + ff_idet_init_x86(idet, for_16b); +#endif +} + static av_cold int init(AVFilterContext *ctx) { IDETContext *idet = ctx->priv; @@ -408,11 +412,7 @@ static av_cold int init(AVFilterContext *ctx) else idet->decay_coefficient = PRECISION; - idet->filter_line = ff_idet_filter_line_c; - -#if ARCH_X86 - ff_idet_init_x86(idet, 0); -#endif + ff_idet_dsp_init(idet, 0); return 0; } diff --git a/libavfilter/vf_idet.h b/libavfilter/vf_idet.h index afd8947055..9351c70cf9 100644 --- a/libavfilter/vf_idet.h +++ b/libavfilter/vf_idet.h @@ -71,6 +71,8 @@ typedef struct IDETContext { int eof; } IDETContext; +void ff_idet_dsp_init(IDETContext *idet, int for_16b); + void ff_idet_init_x86(IDETContext *idet, int for_16b); /* main fall-back for left-over */ -- 2.49.1 >From 755034e92f34860541006e7318a28d8f63bb7c91 Mon Sep 17 00:00:00 2001 From: Niklas Haas <git@haasn.dev> Date: Mon, 15 Sep 2025 18:02:16 +0200 Subject: [PATCH 2/6] avfilter/vf_idet: correctly reset pixdesc on format change --- libavfilter/vf_idet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libavfilter/vf_idet.c b/libavfilter/vf_idet.c index 141911e636..c0c20e9b15 100644 --- a/libavfilter/vf_idet.c +++ b/libavfilter/vf_idet.c @@ -259,6 +259,7 @@ static int filter_frame(AVFilterLink *link, AVFrame *picref) av_frame_free(&idet->cur ); av_frame_free(&idet->next); + idet->csp = NULL; } idet->prev = idet->cur; -- 2.49.1 >From 8972883a05a8c540163e55c4aa0f75ecd6b1c801 Mon Sep 17 00:00:00 2001 From: Niklas Haas <git@haasn.dev> Date: Mon, 15 Sep 2025 18:02:56 +0200 Subject: [PATCH 3/6] avfilter/vf_idet: correctly update dsp function on format change Currently, this never updates on change from high bit depth to low bit depth. --- libavfilter/vf_idet.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libavfilter/vf_idet.c b/libavfilter/vf_idet.c index c0c20e9b15..ec2ef83129 100644 --- a/libavfilter/vf_idet.c +++ b/libavfilter/vf_idet.c @@ -273,10 +273,10 @@ static int filter_frame(AVFilterLink *link, AVFrame *picref) if (!idet->prev) return 0; - if (!idet->csp) + if (!idet->csp) { idet->csp = av_pix_fmt_desc_get(link->format); - if (idet->csp->comp[0].depth > 8) - ff_idet_dsp_init(idet, 1); + ff_idet_dsp_init(idet, idet->csp->comp[0].depth > 8); + } if (idet->analyze_interlaced_flag) { if (idet->cur->flags & AV_FRAME_FLAG_INTERLACED) { -- 2.49.1 >From c8119f9980cb5fdc78192a76f494ba4db0742214 Mon Sep 17 00:00:00 2001 From: Niklas Haas <git@haasn.dev> Date: Mon, 15 Sep 2025 18:10:43 +0200 Subject: [PATCH 4/6] tests/checkasm: add vf_idet checkasm --- tests/checkasm/Makefile | 1 + tests/checkasm/checkasm.c | 3 ++ tests/checkasm/checkasm.h | 1 + tests/checkasm/vf_idet.c | 65 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) create mode 100644 tests/checkasm/vf_idet.c diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 9f1dd57fa6..0a54adc96a 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -62,6 +62,7 @@ AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o AVFILTEROBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o +AVFILTEROBJS-$(CONFIG_IDET_FILTER) += vf_idet.o AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER) += vf_nlmeans.o AVFILTEROBJS-$(CONFIG_SOBEL_FILTER) += vf_convolution.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 5312d02909..ad4d9b53b6 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -290,6 +290,9 @@ static const struct { #if CONFIG_HFLIP_FILTER { "vf_hflip", checkasm_check_vf_hflip }, #endif + #if CONFIG_IDET_FILTER + { "vf_idet", checkasm_check_idet }, + #endif #if CONFIG_NLMEANS_FILTER { "vf_nlmeans", checkasm_check_nlmeans }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index bb6b413aba..1684c427d6 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -112,6 +112,7 @@ void checkasm_check_hevc_pel(void); void checkasm_check_hevc_sao(void); void checkasm_check_huffyuvdsp(void); void checkasm_check_idctdsp(void); +void checkasm_check_idet(void); void checkasm_check_jpeg2000dsp(void); void checkasm_check_llauddsp(void); void checkasm_check_lls(void); diff --git a/tests/checkasm/vf_idet.c b/tests/checkasm/vf_idet.c new file mode 100644 index 0000000000..5fffcd14ee --- /dev/null +++ b/tests/checkasm/vf_idet.c @@ -0,0 +1,65 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "checkasm.h" + +#include "libavfilter/vf_idet.h" +#include "libavutil/mem_internal.h" + +#define WIDTH 512 + +static void check_idet(int depth) +{ + IDETContext idet; + + LOCAL_ALIGNED_32(uint8_t, in0, [WIDTH]); + LOCAL_ALIGNED_32(uint8_t, in1, [WIDTH]); + LOCAL_ALIGNED_32(uint8_t, in2, [WIDTH]); + + declare_func(int, const uint8_t *a, const uint8_t *b, + const uint8_t *c, int w); + + ff_idet_dsp_init(&idet, depth > 8); + + for (int x = 0; x < WIDTH; x++) { + in0[x] = rnd() & 0xFF; + in1[x] = rnd() & 0xFF; + in2[x] = rnd() & 0xFF; + } + + if (check_func(idet.filter_line, "idet%d", depth)) { + /* Ensure odd tail is handled correctly */ + int res_ref = call_ref(in0, in1, in2, WIDTH - 8); + int res_new = call_new(in0, in1, in2, WIDTH - 8); + if (res_ref != res_new) { + fprintf(stderr, "idet%d: result mismatch: %u != %u\n", + depth, res_ref, res_new); + fail(); + } + bench_new(in0, in1, in2, WIDTH); + } +} + +void checkasm_check_idet(void) +{ + check_idet(8); + report("idet8"); + + check_idet(16); + report("idet16"); +} -- 2.49.1 >From 842bbc54a489d4a2652ab57ccfd4bb351d019d8c Mon Sep 17 00:00:00 2001 From: Niklas Haas <git@haasn.dev> Date: Mon, 15 Sep 2025 17:20:17 +0200 Subject: [PATCH 5/6] avfilter/x86/vf_idet.asm: generalize 8-bit macro This is mostly compatible with AVX as well, so turn it into a macro. --- libavfilter/x86/vf_idet.asm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libavfilter/x86/vf_idet.asm b/libavfilter/x86/vf_idet.asm index 7bc8e7d2c4..63d9f4533d 100644 --- a/libavfilter/x86/vf_idet.asm +++ b/libavfilter/x86/vf_idet.asm @@ -78,7 +78,7 @@ IDET_FILTER_LINE_16BIT 8 ;****************************************************************************** ; SSE2 8-bit implementation that does 16-bytes at a time: -INIT_XMM sse2 +%macro IDET_FILTER_LINE 0 cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total xor indexq, indexq pxor m0, m0 @@ -92,7 +92,7 @@ cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total psubusb m5, m2, m3 ; ba movu m3, [cq + indexq*1] ; C - add indexq, 0x10 + add indexq, mmsize psubusb m4, m2 ; ab CMP indexd, widthd @@ -110,3 +110,7 @@ cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total paddq m0, m1 movd eax, m0 RET +%endmacro + +INIT_XMM sse2 +IDET_FILTER_LINE -- 2.49.1 >From 2b408d44c46a2c908f9d074218dace493c630a53 Mon Sep 17 00:00:00 2001 From: Niklas Haas <git@haasn.dev> Date: Mon, 15 Sep 2025 17:47:39 +0200 Subject: [PATCH 6/6] avfilter/x86/vf_idet.asm: add AVX2 and AVX512 implementations The only thing that changes slightly is the horizontal sum at the end. --- libavfilter/x86/vf_idet.asm | 36 ++++++++++++++++++++++++++++------ libavfilter/x86/vf_idet_init.c | 12 ++++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/libavfilter/x86/vf_idet.asm b/libavfilter/x86/vf_idet.asm index 63d9f4533d..12d65000ab 100644 --- a/libavfilter/x86/vf_idet.asm +++ b/libavfilter/x86/vf_idet.asm @@ -39,7 +39,7 @@ SECTION .text paddd %1, %2 %endmacro -%macro IDET_FILTER_LINE_16BIT 1 ; %1=increment (4 or 8 words) +%macro IDET_FILTER_LINE_16BIT 0 cglobal idet_filter_line_16bit, 4, 5, 8, a, b, c, width, index xor indexq, indexq %define m_zero m1 @@ -54,7 +54,7 @@ cglobal idet_filter_line_16bit, 4, 5, 8, a, b, c, width, index psubusw m5, m2, m3 ; ba movu m4, [cq + indexq * 2] ; C - add indexq, %1 + add indexq, mmsize >> 1 psubusw m3, m2 ; ab CMP indexd, widthd @@ -67,13 +67,23 @@ cglobal idet_filter_line_16bit, 4, 5, 8, a, b, c, width, index paddd m_sum, m5 jl .loop_16bit +%if mmsize > 32 + vextracti64x4 ym1, m0, 1 + paddq ym0, ym1 +%endif HADDD m_sum, m2 movd eax, m_sum RET %endmacro INIT_XMM sse2 -IDET_FILTER_LINE_16BIT 8 +IDET_FILTER_LINE_16BIT + +INIT_XMM avx2 +IDET_FILTER_LINE_16BIT + +INIT_XMM avx512icl +IDET_FILTER_LINE_16BIT ;****************************************************************************** ; SSE2 8-bit implementation that does 16-bytes at a time: @@ -106,11 +116,25 @@ cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total jl .sse2_loop paddq m0, m1 - movhlps m1, m0 - paddq m0, m1 - movd eax, m0 +%if mmsize > 32 + vextracti64x4 ym1, m0, 1 + paddq ym0, ym1 +%endif +%if mmsize > 16 + vextracti128 xm1, ym0, 1 + paddq xm0, xm1 +%endif + movhlps xm1, xm0 + paddq xm0, xm1 + movd eax, xm0 RET %endmacro INIT_XMM sse2 IDET_FILTER_LINE + +INIT_YMM avx2 +IDET_FILTER_LINE + +INIT_ZMM avx512icl +IDET_FILTER_LINE diff --git a/libavfilter/x86/vf_idet_init.c b/libavfilter/x86/vf_idet_init.c index acb4e2a778..5836fd3ad4 100644 --- a/libavfilter/x86/vf_idet_init.c +++ b/libavfilter/x86/vf_idet_init.c @@ -59,6 +59,12 @@ static int idet_filter_line_16bit_##KIND(const uint16_t *a, const uint16_t *b, \ FUNC_MAIN_DECL(sse2, 16) FUNC_MAIN_DECL_16bit(sse2, 8) +FUNC_MAIN_DECL(avx2, 32) +FUNC_MAIN_DECL_16bit(avx2, 16) + +FUNC_MAIN_DECL(avx512icl, 64) +FUNC_MAIN_DECL_16bit(avx512icl, 32) + #endif av_cold void ff_idet_init_x86(IDETContext *idet, int for_16b) { @@ -68,5 +74,11 @@ av_cold void ff_idet_init_x86(IDETContext *idet, int for_16b) if (EXTERNAL_SSE2(cpu_flags)) { idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_sse2 : idet_filter_line_sse2; } + if (EXTERNAL_AVX2(cpu_flags)) { + idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_avx2 : idet_filter_line_avx2; + } + if (EXTERNAL_AVX512ICL(cpu_flags)) { + idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_avx512icl : idet_filter_line_avx512icl; + } #endif // HAVE_X86ASM } -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2025-09-15 16:13 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=175795280994.25.10221052007894125506@463a07221176 \ --to=ffmpeg-devel@ffmpeg.org \ --cc=code@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git