Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] avcodec/x86/hevc/idct: Port ff_hevc_idct_4x4_dc_{8,10,12}_mmxext to SSE2 (PR #20788)
@ 2025-10-29 23:36 mkver via ffmpeg-devel
  0 siblings, 0 replies; only message in thread
From: mkver via ffmpeg-devel @ 2025-10-29 23:36 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: mkver

PR #20788 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20788
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20788.patch

Practically no change in benchmarks (and in codesize).

hevc_idct_4x4_dc_8_c:                                    7.8 ( 1.00x)
hevc_idct_4x4_dc_8_mmxext:                               6.9 ( 1.14x)
hevc_idct_4x4_dc_8_sse2:                                 6.8 ( 1.15x)
hevc_idct_4x4_dc_10_c:                                   7.9 ( 1.00x)
hevc_idct_4x4_dc_10_mmxext:                              6.9 ( 1.16x)
hevc_idct_4x4_dc_10_sse2:                                6.8 ( 1.16x)
hevc_idct_4x4_dc_12_c:                                   7.8 ( 1.00x)
hevc_idct_4x4_dc_12_mmxext:                              7.0 ( 1.13x)
hevc_idct_4x4_dc_12_sse2:                                6.8 ( 1.15x)


>From e81edabc4d6dfb825e6432da48a0a827b69e6ade Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Thu, 30 Oct 2025 00:07:42 +0100
Subject: [PATCH] avcodec/x86/hevc/idct: Port
 ff_hevc_idct_4x4_dc_{8,10,12}_mmxext to SSE2

Practically no change in benchmarks (and in codesize).

hevc_idct_4x4_dc_8_c:                                    7.8 ( 1.00x)
hevc_idct_4x4_dc_8_mmxext:                               6.9 ( 1.14x)
hevc_idct_4x4_dc_8_sse2:                                 6.8 ( 1.15x)
hevc_idct_4x4_dc_10_c:                                   7.9 ( 1.00x)
hevc_idct_4x4_dc_10_mmxext:                              6.9 ( 1.16x)
hevc_idct_4x4_dc_10_sse2:                                6.8 ( 1.16x)
hevc_idct_4x4_dc_12_c:                                   7.8 ( 1.00x)
hevc_idct_4x4_dc_12_mmxext:                              7.0 ( 1.13x)
hevc_idct_4x4_dc_12_sse2:                                6.8 ( 1.15x)

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/x86/hevc/dsp_init.c | 11 ++++-------
 libavcodec/x86/hevc/idct.asm   | 19 ++++++-------------
 tests/checkasm/hevc_idct.c     |  2 +-
 3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c
index ba921e7299..6966340c42 100644
--- a/libavcodec/x86/hevc/dsp_init.c
+++ b/libavcodec/x86/hevc/dsp_init.c
@@ -65,7 +65,7 @@ void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
 
-IDCT_DC_FUNCS(4x4,   mmxext);
+IDCT_DC_FUNCS(4x4,   sse2);
 IDCT_DC_FUNCS(8x8,   sse2);
 IDCT_DC_FUNCS(16x16, sse2);
 IDCT_DC_FUNCS(32x32, sse2);
@@ -816,8 +816,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
 
     if (bit_depth == 8) {
         if (EXTERNAL_MMXEXT(cpu_flags)) {
-            c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
-
             c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
         }
         if (EXTERNAL_SSE2(cpu_flags)) {
@@ -832,6 +830,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
             }
             SAO_BAND_INIT(8, sse2);
 
+            c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_sse2;
             c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
             c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
             c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
@@ -998,7 +997,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
     } else if (bit_depth == 10) {
         if (EXTERNAL_MMXEXT(cpu_flags)) {
             c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
-            c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
         }
         if (EXTERNAL_SSE2(cpu_flags)) {
             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
@@ -1013,6 +1011,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
             SAO_BAND_INIT(10, sse2);
             SAO_EDGE_INIT(10, sse2);
 
+            c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_sse2;
             c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
             c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
             c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
@@ -1218,9 +1217,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
         }
 #endif /* HAVE_AVX2_EXTERNAL */
     } else if (bit_depth == 12) {
-        if (EXTERNAL_MMXEXT(cpu_flags)) {
-            c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
-        }
         if (EXTERNAL_SSE2(cpu_flags)) {
             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
@@ -1231,6 +1227,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
             SAO_BAND_INIT(12, sse2);
             SAO_EDGE_INIT(12, sse2);
 
+            c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_sse2;
             c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
             c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
             c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
diff --git a/libavcodec/x86/hevc/idct.asm b/libavcodec/x86/hevc/idct.asm
index 021e5dab14..088144171d 100644
--- a/libavcodec/x86/hevc/idct.asm
+++ b/libavcodec/x86/hevc/idct.asm
@@ -273,16 +273,11 @@ cglobal hevc_idct_%1x%1_dc_%2, 1, 2, 1, coeff, tmp
     sar               tmpd, (15 - %2)
     movd                m0, tmpd
     SPLATW              m0, xm0
-    mova [coeffq+mmsize*0], m0
-    mova [coeffq+mmsize*1], m0
-    mova [coeffq+mmsize*2], m0
-    mova [coeffq+mmsize*3], m0
-%if mmsize == 16
-    mova [coeffq+mmsize*4], m0
-    mova [coeffq+mmsize*5], m0
-    mova [coeffq+mmsize*6], m0
-    mova [coeffq+mmsize*7], m0
-%endif
+%assign %%offset 0
+%rep 2*%1*%1/mmsize
+    mova [coeffq+%%offset], m0
+    %assign %%offset %%offset+mmsize
+%endrep
     RET
 %endmacro
 
@@ -809,10 +804,8 @@ cglobal hevc_idct_32x32_%1, 1, 6, 16, 256, coeffs
 %endmacro
 
 %macro INIT_IDCT_DC 1
-INIT_MMX mmxext
-IDCT_DC_NL  4,      %1
-
 INIT_XMM sse2
+IDCT_DC_NL  4,      %1
 IDCT_DC_NL  8,      %1
 IDCT_DC    16,  4,  %1
 IDCT_DC    32, 16,  %1
diff --git a/tests/checkasm/hevc_idct.c b/tests/checkasm/hevc_idct.c
index 2bd7ae9409..139ae81727 100644
--- a/tests/checkasm/hevc_idct.c
+++ b/tests/checkasm/hevc_idct.c
@@ -69,7 +69,7 @@ static void check_idct_dc(HEVCDSPContext *h, int bit_depth)
     for (i = 2; i <= 5; i++) {
         int block_size = 1 << i;
         int size = block_size * block_size;
-        declare_func_emms(AV_CPU_FLAG_MMXEXT, void, int16_t *coeffs);
+        declare_func(void, int16_t *coeffs);
 
         randomize_buffers(coeffs0, size);
         memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2025-10-29 23:37 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-10-29 23:36 [FFmpeg-devel] [PATCH] avcodec/x86/hevc/idct: Port ff_hevc_idct_4x4_dc_{8,10,12}_mmxext to SSE2 (PR #20788) mkver via ffmpeg-devel

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git