* [FFmpeg-devel] [PR] avcodec/x86/hevc/dequant: Add SSSE3 dequant ASM function (PR #21579)
@ 2026-01-26 1:27 mkver via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: mkver via ffmpeg-devel @ 2026-01-26 1:27 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: mkver
PR #21579 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21579
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21579.patch
>From 86e553bdda774c17c30b87192b198eddae9dd2ef Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 25 Jan 2026 23:23:36 +0100
Subject: [PATCH 1/4] avcodec/hevc/dsp_template: Optimize impossible branches
away
Saves 1856B of .text here.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/hevc/dsp_template.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libavcodec/hevc/dsp_template.c b/libavcodec/hevc/dsp_template.c
index 573cf9ee1e..f703f6d071 100644
--- a/libavcodec/hevc/dsp_template.c
+++ b/libavcodec/hevc/dsp_template.c
@@ -132,7 +132,7 @@ static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
int x, y;
int size = 1 << log2_size;
- if (shift > 0) {
+ if (BIT_DEPTH <= 9 || shift > 0) {
int offset = 1 << (shift - 1);
for (y = 0; y < size; y++) {
for (x = 0; x < size; x++) {
@@ -140,7 +140,7 @@ static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
coeffs++;
}
}
- } else if (shift < 0) {
+ } else if (BIT_DEPTH > 10 && shift < 0) {
for (y = 0; y < size; y++) {
for (x = 0; x < size; x++) {
*coeffs = *(uint16_t*)coeffs << -shift;
--
2.52.0
>From 2e5ae4f840dea1a8cd3c2907d5a007616e7ed27b Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 25 Jan 2026 23:32:14 +0100
Subject: [PATCH 2/4] avcodec/hevc/dsp: Add alignment for dequant
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/hevc/dsp.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavcodec/hevc/dsp.h b/libavcodec/hevc/dsp.h
index a63586c3a2..b884cd36be 100644
--- a/libavcodec/hevc/dsp.h
+++ b/libavcodec/hevc/dsp.h
@@ -50,7 +50,7 @@ typedef struct HEVCDSPContext {
void (*add_residual[4])(uint8_t *dst, const int16_t *res, ptrdiff_t stride);
- void (*dequant)(int16_t *coeffs, int16_t log2_size);
+ void (*dequant)(int16_t *coeffs /* align 32 */, int16_t log2_size);
void (*transform_rdpcm)(int16_t *coeffs, int16_t log2_size, int mode);
--
2.52.0
>From 5edc6a6274f1592c3d2de62f9782f4e3b93d1842 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 26 Jan 2026 02:03:32 +0100
Subject: [PATCH 3/4] avcodec/x86/hevc/dequant: Add SSSE3 dequant ASM function
hevc_dequant_4x4_8_c (GCC): 20.2 ( 1.00x)
hevc_dequant_4x4_8_c (Clang): 21.7 ( 1.00x)
hevc_dequant_4x4_8_ssse3: 5.8 ( 3.51x)
hevc_dequant_8x8_8_c (GCC): 32.9 ( 1.00x)
hevc_dequant_8x8_8_c (Clang): 78.7 ( 1.00x)
hevc_dequant_8x8_8_ssse3: 6.8 ( 4.83x)
hevc_dequant_16x16_8_c (GCC): 105.1 ( 1.00x)
hevc_dequant_16x16_8_c (Clang): 151.1 ( 1.00x)
hevc_dequant_16x16_8_ssse3: 19.3 ( 5.45x)
hevc_dequant_32x32_8_c (GCC): 415.7 ( 1.00x)
hevc_dequant_32x32_8_c (Clang): 602.3 ( 1.00x)
hevc_dequant_32x32_8_ssse3: 78.2 ( 5.32x)
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/hevc/Makefile | 1 +
libavcodec/x86/hevc/dequant.asm | 60 +++++++++++++++++++++++++++++++++
libavcodec/x86/hevc/dsp_init.c | 3 ++
3 files changed, 64 insertions(+)
create mode 100644 libavcodec/x86/hevc/dequant.asm
diff --git a/libavcodec/x86/hevc/Makefile b/libavcodec/x86/hevc/Makefile
index 74418a322c..d09c613a19 100644
--- a/libavcodec/x86/hevc/Makefile
+++ b/libavcodec/x86/hevc/Makefile
@@ -4,6 +4,7 @@ clean::
X86ASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc/dsp_init.o \
x86/hevc/add_res.o \
x86/hevc/deblock.o \
+ x86/hevc/dequant.o \
x86/hevc/idct.o \
x86/hevc/mc.o \
x86/hevc/sao.o \
diff --git a/libavcodec/x86/hevc/dequant.asm b/libavcodec/x86/hevc/dequant.asm
new file mode 100644
index 0000000000..f0453c940b
--- /dev/null
+++ b/libavcodec/x86/hevc/dequant.asm
@@ -0,0 +1,60 @@
+;*****************************************************************************
+;* SSSE3-optimized HEVC dequant code
+;*****************************************************************************
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+INIT_XMM ssse3
+; void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size)
+cglobal hevc_dequant_8, 2, 3+UNIX64, 3
+
+; coeffs, log2_size (in ecx), tmp/size
+%if WIN64
+ DECLARE_REG_TMP 1,0,2
+ ; r0 is the shift register (ecx) on win64
+ xchg r0, r1
+%elif ARCH_X86_64
+ DECLARE_REG_TMP 0,3,1
+ ; r3 is ecx
+ mov t1d, r1d
+%else
+ ; r1 is ecx
+ DECLARE_REG_TMP 0,1,2
+%endif
+
+ mov t2d, 256
+ shl t2d, t1b
+ movd m0, t2d
+ add t1d, t1d
+ SPLATW m0, m0
+ mov t2d, 1
+ shl t2d, t1b
+.loop:
+ mova m1, [t0]
+ mova m2, [t0+mmsize]
+ pmulhrsw m1, m0
+ pmulhrsw m2, m0
+ mova [t0], m1
+ mova [t0+mmsize], m2
+ add t0, 2*mmsize
+ sub t2d, mmsize
+ jg .loop
+ RET
diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c
index 5b2b10f33a..bd967eac67 100644
--- a/libavcodec/x86/hevc/dsp_init.c
+++ b/libavcodec/x86/hevc/dsp_init.c
@@ -30,6 +30,8 @@
#include "libavcodec/x86/hevc/dsp.h"
#include "libavcodec/x86/h26x/h2656dsp.h"
+void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size);
+
#define LFC_FUNC(DIR, DEPTH, OPT) \
void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, const int *tc, const uint8_t *no_p, const uint8_t *no_q);
@@ -847,6 +849,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
#endif
+ c->dequant = ff_hevc_dequant_8_ssse3;
SAO_EDGE_INIT(8, ssse3);
}
#if HAVE_SSE4_EXTERNAL && ARCH_X86_64
--
2.52.0
>From 3fbdf06a6d681a86578bca2812fd052c639f35f9 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 26 Jan 2026 02:16:47 +0100
Subject: [PATCH 4/4] tests/checkasm/hevc_dequant: Only init buffer when needed
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
tests/checkasm/hevc_dequant.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/tests/checkasm/hevc_dequant.c b/tests/checkasm/hevc_dequant.c
index 20e322994a..5036662666 100644
--- a/tests/checkasm/hevc_dequant.c
+++ b/tests/checkasm/hevc_dequant.c
@@ -48,11 +48,11 @@ static void check_dequant(HEVCDSPContext *h, int bit_depth)
int size = block_size * block_size;
declare_func(void, int16_t *coeffs, int16_t log2_size);
- randomize_buffers(coeffs0, size);
- memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
-
if (check_func(h->dequant, "hevc_dequant_%dx%d_%d",
block_size, block_size, bit_depth)) {
+ randomize_buffers(coeffs0, size);
+ memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
+
call_ref(coeffs0, i);
call_new(coeffs1, i);
if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size))
--
2.52.0
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2026-01-26 1:28 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-01-26 1:27 [FFmpeg-devel] [PR] avcodec/x86/hevc/dequant: Add SSSE3 dequant ASM function (PR #21579) mkver via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git