Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: mkver via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: mkver <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PR] avcodec/x86/hevc/dequant: Add SSSE3 dequant ASM function (PR #21579)
Date: Mon, 26 Jan 2026 01:27:30 -0000
Message-ID: <176939085085.25.8307423681209595508@4457048688e7> (raw)

PR #21579 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21579
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21579.patch


>From 86e553bdda774c17c30b87192b198eddae9dd2ef Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 25 Jan 2026 23:23:36 +0100
Subject: [PATCH 1/4] avcodec/hevc/dsp_template: Optimize impossible branches
 away

Saves 1856B of .text here.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/hevc/dsp_template.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/hevc/dsp_template.c b/libavcodec/hevc/dsp_template.c
index 573cf9ee1e..f703f6d071 100644
--- a/libavcodec/hevc/dsp_template.c
+++ b/libavcodec/hevc/dsp_template.c
@@ -132,7 +132,7 @@ static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
     int x, y;
     int size = 1 << log2_size;
 
-    if (shift > 0) {
+    if (BIT_DEPTH <= 9 || shift > 0) {
         int offset = 1 << (shift - 1);
         for (y = 0; y < size; y++) {
             for (x = 0; x < size; x++) {
@@ -140,7 +140,7 @@ static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
                 coeffs++;
             }
         }
-    } else if (shift < 0) {
+    } else if (BIT_DEPTH > 10 && shift < 0) {
         for (y = 0; y < size; y++) {
             for (x = 0; x < size; x++) {
                 *coeffs = *(uint16_t*)coeffs << -shift;
-- 
2.52.0


>From 2e5ae4f840dea1a8cd3c2907d5a007616e7ed27b Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 25 Jan 2026 23:32:14 +0100
Subject: [PATCH 2/4] avcodec/hevc/dsp: Add alignment for dequant

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/hevc/dsp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/hevc/dsp.h b/libavcodec/hevc/dsp.h
index a63586c3a2..b884cd36be 100644
--- a/libavcodec/hevc/dsp.h
+++ b/libavcodec/hevc/dsp.h
@@ -50,7 +50,7 @@ typedef struct HEVCDSPContext {
 
     void (*add_residual[4])(uint8_t *dst, const int16_t *res, ptrdiff_t stride);
 
-    void (*dequant)(int16_t *coeffs, int16_t log2_size);
+    void (*dequant)(int16_t *coeffs /* align 32 */, int16_t log2_size);
 
     void (*transform_rdpcm)(int16_t *coeffs, int16_t log2_size, int mode);
 
-- 
2.52.0


>From 5edc6a6274f1592c3d2de62f9782f4e3b93d1842 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 26 Jan 2026 02:03:32 +0100
Subject: [PATCH 3/4] avcodec/x86/hevc/dequant: Add SSSE3 dequant ASM function

hevc_dequant_4x4_8_c (GCC):                             20.2 ( 1.00x)
hevc_dequant_4x4_8_c (Clang):                           21.7 ( 1.00x)
hevc_dequant_4x4_8_ssse3:                                5.8 ( 3.51x)
hevc_dequant_8x8_8_c (GCC):                             32.9 ( 1.00x)
hevc_dequant_8x8_8_c (Clang):                           78.7 ( 1.00x)
hevc_dequant_8x8_8_ssse3:                                6.8 ( 4.83x)
hevc_dequant_16x16_8_c (GCC):                          105.1 ( 1.00x)
hevc_dequant_16x16_8_c (Clang):                        151.1 ( 1.00x)
hevc_dequant_16x16_8_ssse3:                             19.3 ( 5.45x)
hevc_dequant_32x32_8_c (GCC):                          415.7 ( 1.00x)
hevc_dequant_32x32_8_c (Clang):                        602.3 ( 1.00x)
hevc_dequant_32x32_8_ssse3:                             78.2 ( 5.32x)

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/x86/hevc/Makefile    |  1 +
 libavcodec/x86/hevc/dequant.asm | 60 +++++++++++++++++++++++++++++++++
 libavcodec/x86/hevc/dsp_init.c  |  3 ++
 3 files changed, 64 insertions(+)
 create mode 100644 libavcodec/x86/hevc/dequant.asm

diff --git a/libavcodec/x86/hevc/Makefile b/libavcodec/x86/hevc/Makefile
index 74418a322c..d09c613a19 100644
--- a/libavcodec/x86/hevc/Makefile
+++ b/libavcodec/x86/hevc/Makefile
@@ -4,6 +4,7 @@ clean::
 X86ASM-OBJS-$(CONFIG_HEVC_DECODER)      += x86/hevc/dsp_init.o      \
                                            x86/hevc/add_res.o       \
                                            x86/hevc/deblock.o       \
+                                           x86/hevc/dequant.o       \
                                            x86/hevc/idct.o          \
                                            x86/hevc/mc.o            \
                                            x86/hevc/sao.o           \
diff --git a/libavcodec/x86/hevc/dequant.asm b/libavcodec/x86/hevc/dequant.asm
new file mode 100644
index 0000000000..f0453c940b
--- /dev/null
+++ b/libavcodec/x86/hevc/dequant.asm
@@ -0,0 +1,60 @@
+;*****************************************************************************
+;* SSSE3-optimized HEVC dequant code
+;*****************************************************************************
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+INIT_XMM ssse3
+; void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size)
+cglobal hevc_dequant_8, 2, 3+UNIX64, 3
+
+; coeffs, log2_size (in ecx), tmp/size
+%if WIN64
+    DECLARE_REG_TMP 1,0,2
+    ; r0 is the shift register (ecx) on win64
+    xchg          r0, r1
+%elif ARCH_X86_64
+    DECLARE_REG_TMP 0,3,1
+    ; r3 is ecx
+    mov          t1d, r1d
+%else
+    ; r1 is ecx
+    DECLARE_REG_TMP 0,1,2
+%endif
+
+    mov          t2d, 256
+    shl          t2d, t1b
+    movd          m0, t2d
+    add          t1d, t1d
+    SPLATW        m0, m0
+    mov          t2d, 1
+    shl          t2d, t1b
+.loop:
+    mova          m1, [t0]
+    mova          m2, [t0+mmsize]
+    pmulhrsw      m1, m0
+    pmulhrsw      m2, m0
+    mova        [t0], m1
+    mova [t0+mmsize], m2
+    add           t0, 2*mmsize
+    sub          t2d, mmsize
+    jg         .loop
+    RET
diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c
index 5b2b10f33a..bd967eac67 100644
--- a/libavcodec/x86/hevc/dsp_init.c
+++ b/libavcodec/x86/hevc/dsp_init.c
@@ -30,6 +30,8 @@
 #include "libavcodec/x86/hevc/dsp.h"
 #include "libavcodec/x86/h26x/h2656dsp.h"
 
+void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size);
+
 #define LFC_FUNC(DIR, DEPTH, OPT) \
 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
 
@@ -847,6 +849,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
             c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
             c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
 #endif
+            c->dequant = ff_hevc_dequant_8_ssse3;
             SAO_EDGE_INIT(8, ssse3);
         }
 #if HAVE_SSE4_EXTERNAL && ARCH_X86_64
-- 
2.52.0


>From 3fbdf06a6d681a86578bca2812fd052c639f35f9 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 26 Jan 2026 02:16:47 +0100
Subject: [PATCH 4/4] tests/checkasm/hevc_dequant: Only init buffer when needed

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 tests/checkasm/hevc_dequant.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/checkasm/hevc_dequant.c b/tests/checkasm/hevc_dequant.c
index 20e322994a..5036662666 100644
--- a/tests/checkasm/hevc_dequant.c
+++ b/tests/checkasm/hevc_dequant.c
@@ -48,11 +48,11 @@ static void check_dequant(HEVCDSPContext *h, int bit_depth)
         int size = block_size * block_size;
         declare_func(void, int16_t *coeffs, int16_t log2_size);
 
-        randomize_buffers(coeffs0, size);
-        memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
-
         if (check_func(h->dequant, "hevc_dequant_%dx%d_%d",
                        block_size, block_size, bit_depth)) {
+            randomize_buffers(coeffs0, size);
+            memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
+
             call_ref(coeffs0, i);
             call_new(coeffs1, i);
             if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size))
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

                 reply	other threads:[~2026-01-26  1:28 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=176939085085.25.8307423681209595508@4457048688e7 \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=code@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git