From: mkver via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: mkver <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PR] avcodec/x86/h264_deblock: Various improvements (PR #21535)
Date: Wed, 21 Jan 2026 01:02:48 -0000
Message-ID: <176895736905.25.1708576412784609975@4457048688e7> (raw)
PR #21535 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21535
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21535.patch
Also remove "h264" from H264DSPContext member names.
>From 9c68f3f25e3348e1f56daf1261c9a523fbabf6ed Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Tue, 6 Jan 2026 15:00:30 +0100
Subject: [PATCH 01/13] avcodec/x86/h264_deblock: Remove unused macros
Forgotten in 4618f36a2424a3a4d5760afabc2e9dd18d73f0a4.
Also remove a PASS8ROWS wrapper that seems to have been always
unused.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h264_deblock.asm | 36 ---------------------------------
1 file changed, 36 deletions(-)
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 479e6c3460..98a0867102 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -37,38 +37,6 @@ cextern pb_0
cextern pb_1
cextern pb_3
-%define PASS8ROWS(base, base3, stride, stride3, offset) \
- PASS8ROWS(base+offset, base3+offset, stride, stride3)
-
-; in: 8 rows of 4 bytes in %4..%11
-; out: 4 rows of 8 bytes in m0..m3
-%macro TRANSPOSE4x8_LOAD 11
- movh m0, %4
- movh m2, %5
- movh m1, %6
- movh m3, %7
- punpckl%1 m0, m2
- punpckl%1 m1, m3
- mova m2, m0
- punpckl%2 m0, m1
- punpckh%2 m2, m1
-
- movh m4, %8
- movh m6, %9
- movh m5, %10
- movh m7, %11
- punpckl%1 m4, m6
- punpckl%1 m5, m7
- mova m6, m4
- punpckl%2 m4, m5
- punpckh%2 m6, m5
-
- punpckh%3 m1, m0, m4
- punpckh%3 m3, m2, m6
- punpckl%3 m0, m4
- punpckl%3 m2, m6
-%endmacro
-
; in: 4 rows of 8 bytes in m0..m3
; out: 8 rows of 4 bytes in %1..%8
%macro TRANSPOSE8x4B_STORE 8
@@ -100,10 +68,6 @@ cextern pb_3
movh %8, m4
%endmacro
-%macro TRANSPOSE4x8B_LOAD 8
- TRANSPOSE4x8_LOAD bw, wd, dq, %1, %2, %3, %4, %5, %6, %7, %8
-%endmacro
-
%macro SBUTTERFLY3 4
punpckh%1 %4, %2, %3
punpckl%1 %2, %3
--
2.52.0
>From 2afa66816c6e8bf380fbb6883fc911af6cd6eec7 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 7 Jan 2026 09:00:47 +0100
Subject: [PATCH 02/13] tests/checkasm/h264dsp: Don't test loop filter strength
0
These functions are not used with these parameters;
see the filter_mb_* functions in h264_loopfilter.c.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
tests/checkasm/h264dsp.c | 30 +++++++++++++++++-------------
1 file changed, 17 insertions(+), 13 deletions(-)
diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c
index acf4f61ebb..0bf01e072e 100644
--- a/tests/checkasm/h264dsp.c
+++ b/tests/checkasm/h264dsp.c
@@ -374,24 +374,26 @@ static void check_idct_dequant(void)
static void check_loop_filter(void)
{
+ enum {
+ N = 35,
+ };
LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
H264DSPContext h;
int bit_depth;
- int alphas[36], betas[36];
- int8_t tc0[36][4];
+ int alphas[N], betas[N];
+ int8_t tc0[N][4];
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
int alpha, int beta, int8_t *tc0);
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
- int i, j, a, c;
uint32_t mask = pixel_mask_lf[bit_depth - 8];
ff_h264dsp_init(&h, bit_depth, 1);
- for (i = 35, a = 255, c = 250; i >= 0; i--) {
+ for (int i = N, a = 255, c = 250; --i >= 0;) {
alphas[i] = a << (bit_depth - 8);
- betas[i] = (i + 1) / 2 << (bit_depth - 8);
+ betas[i] = (i + 2) / 2 << (bit_depth - 8);
tc0[i][0] = tc0[i][3] = (c + 6) / 10;
tc0[i][1] = (c + 7) / 15;
tc0[i][2] = (c + 9) / 20;
@@ -402,9 +404,9 @@ static void check_loop_filter(void)
#define CHECK_LOOP_FILTER(name, align, idc) \
do { \
if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
- for (j = 0; j < 36; j++) { \
+ for (int j = 0; j < N; ++j) { \
intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
- for (i = 0; i < 1024; i+=4) { \
+ for (int i = 0; i < 1024; i += 4) { \
AV_WN32A(dst + i, rnd() & mask); \
} \
memcpy(dst0, dst, 32 * 16 * 2); \
@@ -439,32 +441,34 @@ static void check_loop_filter(void)
static void check_loop_filter_intra(void)
{
+ enum {
+ N = 35,
+ };
LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
H264DSPContext h;
int bit_depth;
- int alphas[36], betas[36];
+ int alphas[N], betas[N];
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
int alpha, int beta);
for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
- int i, j, a;
uint32_t mask = pixel_mask_lf[bit_depth - 8];
ff_h264dsp_init(&h, bit_depth, 1);
- for (i = 35, a = 255; i >= 0; i--) {
+ for (int i = N, a = 255; --i >= 0;) {
alphas[i] = a << (bit_depth - 8);
- betas[i] = (i + 1) / 2 << (bit_depth - 8);
+ betas[i] = (i + 2) / 2 << (bit_depth - 8);
a = a*9/10;
}
#define CHECK_LOOP_FILTER(name, align, idc) \
do { \
if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) { \
- for (j = 0; j < 36; j++) { \
+ for (int j = 0; j < N; ++j) { \
intptr_t off = 8 * 32 + (j & 15) * 4 * !align; \
- for (i = 0; i < 1024; i+=4) { \
+ for (int i = 0; i < 1024; i += 4) { \
AV_WN32A(dst + i, rnd() & mask); \
} \
memcpy(dst0, dst, 32 * 16 * 2); \
--
2.52.0
>From fdc4aa940af0857e4d8ca53bf7d9f5174f7ba31b Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 7 Jan 2026 09:05:04 +0100
Subject: [PATCH 03/13] avcodec/x86/h264_deblock: Remove always-false branches
These functions are always called with alpha and beta > 0.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h264_deblock.asm | 3 ---
1 file changed, 3 deletions(-)
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 98a0867102..075a6325e1 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -699,10 +699,8 @@ cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride
dec r2d ; alpha-1
- jl .end
neg r4
dec r3d ; beta-1
- jl .end
add r4, r0 ; pix-4*stride
mova p1, [r4+2*r1]
mova p0, [r4+r5]
@@ -743,7 +741,6 @@ cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
LUMA_INTRA_P012 [r4+r5], [r4+2*r1], [r4+r1], [r4]
LUMA_INTRA_SWAP_PQ
LUMA_INTRA_P012 [r0], [r0+r1], [r0+2*r1], [r0+r5]
-.end:
RET
INIT_MMX cpuname
--
2.52.0
>From 56a6f73a2c6887f977e6528fd2650b15475c1a29 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 7 Jan 2026 11:09:40 +0100
Subject: [PATCH 04/13] avcodec/x86/h264_deblock: Simplify splatting
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h264_deblock.asm | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 075a6325e1..d5610691d2 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -182,10 +182,16 @@ cextern pb_3
%macro LOAD_MASK 2-3
movd m4, %1
movd m5, %2
+%if cpuflag(ssse3)
+ pxor m6, m6
+ pshufb m4, m6
+ pshufb m5, m6
+%else
SPLATW m4, m4
SPLATW m5, m5
packuswb m4, m4 ; 16x alpha-1
packuswb m5, m5 ; 16x beta-1
+%endif
%if %0>2
mova %3, m4
%endif
--
2.52.0
>From 3175f4d1c5eeab6b8c35ec36cef2927caa36a6a0 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 7 Jan 2026 11:35:00 +0100
Subject: [PATCH 05/13] avcodec/x86/h264_deblock: Remove obsolete macro
parameters
They are a remnant of the MMX functions (which processed
only eight pixels at a time, so that it was called twice
via a wrapper; the actual MMX function had "v8" in its name
instead of simply v) which have been removed in commit
4618f36a2424a3a4d5760afabc2e9dd18d73f0a4.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h264_deblock.asm | 35 ++++++++++++---------------------
1 file changed, 13 insertions(+), 22 deletions(-)
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index d5610691d2..3a343a2afb 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -445,12 +445,12 @@ DEBLOCK_LUMA
%else
-%macro DEBLOCK_LUMA 2
+%macro DEBLOCK_LUMA 1
;-----------------------------------------------------------------------------
-; void ff_deblock_v8_luma(uint8_t *pix, int stride, int alpha, int beta,
-; int8_t *tc0)
+; void ff_deblock_v_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
+; int8_t *tc0)
;-----------------------------------------------------------------------------
-cglobal deblock_%1_luma_8, 5,5,8,2*%2
+cglobal deblock_v_luma_8, 5,5,8,2*%1
lea r4, [r1*3]
dec r2 ; alpha-1
neg r4
@@ -468,7 +468,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
movd m4, [r3] ; tc0
punpcklbw m4, m4
punpcklbw m4, m4 ; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0]
- mova [esp+%2], m4 ; tc
+ mova [esp+%1], m4 ; tc
pcmpgtb m4, m3
mova m3, [r4] ; p2
pand m4, m7
@@ -476,7 +476,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1
pand m6, m4
- pand m4, [esp+%2] ; tc
+ pand m4, [esp+%1] ; tc
psubb m7, m4, m6
pand m6, m4
LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4
@@ -484,7 +484,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
mova m4, [r0+2*r1] ; q2
DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1
pand m6, [esp] ; mask
- mova m5, [esp+%2] ; tc
+ mova m5, [esp+%1] ; tc
psubb m7, m6
pand m5, m6
mova m3, [r0+r1]
@@ -521,12 +521,7 @@ cglobal deblock_h_luma_8, 0,5,8,0x60+12
PUSH dword r2m
PUSH dword 16
PUSH dword r0
- call deblock_%1_luma_8
-%ifidn %1, v8
- add dword [esp ], 8 ; pix_tmp+0x38
- add dword [esp+16], 2 ; tc0+2
- call deblock_%1_luma_8
-%endif
+ call deblock_v_luma_8
ADD esp, 20
; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter)
@@ -552,10 +547,10 @@ cglobal deblock_h_luma_8, 0,5,8,0x60+12
%endmacro ; DEBLOCK_LUMA
INIT_XMM sse2
-DEBLOCK_LUMA v, 16
+DEBLOCK_LUMA 16
%if HAVE_AVX_EXTERNAL
INIT_XMM avx
-DEBLOCK_LUMA v, 16
+DEBLOCK_LUMA 16
%endif
%endif ; ARCH
@@ -698,9 +693,9 @@ DEBLOCK_LUMA v, 16
; void ff_deblock_v_luma_intra(uint8_t *pix, int stride, int alpha, int beta)
;-----------------------------------------------------------------------------
%if WIN64
-cglobal deblock_%1_luma_intra_8, 4,6,16,0x10
+cglobal deblock_v_luma_intra_8, 4,6,16,0x10
%else
-cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
+cglobal deblock_v_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
%endif
lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride
@@ -802,11 +797,7 @@ cglobal deblock_h_luma_intra_8, 2,4,8,0x80
PUSH dword r2m
PUSH dword 16
PUSH r0
- call deblock_%1_luma_intra_8
-%ifidn %1, v8
- add dword [rsp], 8 ; pix_tmp+8
- call deblock_%1_luma_intra_8
-%endif
+ call deblock_v_luma_intra_8
ADD esp, 16
mov r1, r1m
--
2.52.0
>From 888d08cc19f5f0ebb2d6c91dff7e4669a0c26586 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 7 Jan 2026 12:46:22 +0100
Subject: [PATCH 06/13] avcodec/x86/h264_deblock_10bit: Remove custom stack
allocation code
Allocate it via cglobal as usual. This makes the SSE2/AVX functions
available when HAVE_ALIGNED_STACK is false; it also avoids
modifying rsp unnecessarily in the deblock_h_luma_intra_10 functions
on Win64.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h264_deblock_10bit.asm | 23 +++++------------------
libavcodec/x86/h264dsp_init.c | 4 ----
2 files changed, 5 insertions(+), 22 deletions(-)
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index 033f2f4d55..1ea5ce4b28 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -153,14 +153,12 @@ cextern pw_1023
; void ff_deblock_v_luma_10(uint16_t *pix, int stride, int alpha, int beta,
; int8_t *tc0)
;-----------------------------------------------------------------------------
-cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
- %assign pad 5*mmsize+12-(stack_offset&15)
+cglobal deblock_v_luma_10, 5,5,8,-5*mmsize
%define tcm [rsp]
%define ms1 [rsp+mmsize]
%define ms2 [rsp+mmsize*2]
%define am [rsp+mmsize*3]
%define bm [rsp+mmsize*4]
- SUB rsp, pad
shl r2d, 2
shl r3d, 2
LOAD_AB m4, m5, r2d, r3d
@@ -205,11 +203,9 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
add r4, mmsize/8
dec r3
jg .loop
- ADD rsp, pad
RET
-cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
- %assign pad 7*mmsize+12-(stack_offset&15)
+cglobal deblock_h_luma_10, 5,6,8,-7*mmsize
%define tcm [rsp]
%define ms1 [rsp+mmsize]
%define ms2 [rsp+mmsize*2]
@@ -217,7 +213,6 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
%define p2m [rsp+mmsize*4]
%define am [rsp+mmsize*5]
%define bm [rsp+mmsize*6]
- SUB rsp, pad
shl r2d, 2
shl r3d, 2
LOAD_AB m4, m5, r2d, r3d
@@ -295,7 +290,6 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
lea r2, [r2+r1*(mmsize/2)]
dec r5
jg .loop
- ADD rsp, pad
RET
%endmacro
@@ -482,7 +476,6 @@ DEBLOCK_LUMA_64
%endmacro
%macro LUMA_INTRA_INIT 1
- %xdefine pad %1*mmsize+((gprsize*3) % mmsize)-(stack_offset&15)
%define t0 m4
%define t1 m5
%define t2 m6
@@ -492,7 +485,6 @@ DEBLOCK_LUMA_64
CAT_XDEFINE t, i, [rsp+mmsize*(i-4)]
%assign i i+1
%endrep
- SUB rsp, pad
%endmacro
; in: %1-%3=tmp, %4=p2, %5=q2
@@ -654,7 +646,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16
; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,
; int beta)
;-----------------------------------------------------------------------------
-cglobal deblock_h_luma_intra_10, 4,7,16
+cglobal deblock_h_luma_intra_10, 4,7,16,mmsize
%define t0 m15
%define t1 m14
%define t2 m2
@@ -667,8 +659,6 @@ cglobal deblock_h_luma_intra_10, 4,7,16
%define p2 m13
%define p3 m4
%define spill [rsp]
- %assign pad 24-(stack_offset&15)
- SUB rsp, pad
lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride
add r4, r0 ; pix+4*stride
@@ -709,7 +699,6 @@ cglobal deblock_h_luma_intra_10, 4,7,16
lea r4, [r4+r1*8]
dec r6
jg .loop
- ADD rsp, pad
RET
%endmacro
@@ -727,7 +716,7 @@ DEBLOCK_LUMA_INTRA_64
; void ff_deblock_v_luma_intra_10(uint16_t *pix, int stride, int alpha,
; int beta)
;-----------------------------------------------------------------------------
-cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16)
+cglobal deblock_v_luma_intra_10, 4,7,8,-3*mmsize
LUMA_INTRA_INIT 3
lea r4, [r1*4]
lea r5, [r1*3]
@@ -749,14 +738,13 @@ cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16)
add r4, mmsize
dec r6
jg .loop
- ADD rsp, pad
RET
;-----------------------------------------------------------------------------
; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,
; int beta)
;-----------------------------------------------------------------------------
-cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
+cglobal deblock_h_luma_intra_10, 4,7,8,-8*mmsize
LUMA_INTRA_INIT 8
%if mmsize == 8
lea r4, [r1*3]
@@ -793,7 +781,6 @@ cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
dec r6
%endif
jg .loop
- ADD rsp, pad
RET
%endmacro
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 66c2f36908..a62de09577 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -314,12 +314,10 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
} else {
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2;
}
-#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
-#endif /* HAVE_ALIGNED_STACK */
}
if (EXTERNAL_SSE4(cpu_flags)) {
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
@@ -354,12 +352,10 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
} else {
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx;
}
-#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
-#endif /* HAVE_ALIGNED_STACK */
}
}
}
--
2.52.0
>From dfa0bbd5e15a13cd96a46db01d427bd20fa12227 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 7 Jan 2026 13:07:55 +0100
Subject: [PATCH 07/13] avcodec/x86/h264_deblock_10bit: Remove mmxext functions
Now that the SSE2/AVX functions are no longer restricted
to those systems having an aligned stack, the MMXEXT functions
are always overridden (except for ancient systems without
SSE2), so remove them.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h264_deblock_10bit.asm | 103 ++------------------------
libavcodec/x86/h264dsp_init.c | 11 ---
2 files changed, 7 insertions(+), 107 deletions(-)
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index 1ea5ce4b28..ca5d9ff3b7 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -1,5 +1,5 @@
;*****************************************************************************
-;* MMX/SSE2/AVX-optimized 10-bit H.264 deblocking code
+;* SSE2/AVX-optimized 10-bit H.264 deblocking code
;*****************************************************************************
;* Copyright (C) 2005-2011 x264 project
;*
@@ -65,12 +65,8 @@ cextern pw_1023
%macro LOAD_TC 2
movd %1, [%2]
punpcklbw %1, %1
-%if mmsize == 8
- pshufw %1, %1, 0
-%else
pshuflw %1, %1, 01010000b
pshufd %1, %1, 01010000b
-%endif
psraw %1, 6
%endmacro
@@ -131,12 +127,6 @@ cextern pw_1023
%endmacro
%macro LUMA_H_STORE 2
-%if mmsize == 8
- movq [r0-4], m0
- movq [r0+r1-4], m1
- movq [r0+r1*2-4], m2
- movq [r0+%2-4], m3
-%else
movq [r0-4], m0
movhps [r0+r1-4], m0
movq [r0+r1*2-4], m1
@@ -145,7 +135,6 @@ cextern pw_1023
movhps [%1+r1*2-4], m2
movq [%1+%2-4], m3
movhps [%1+r1*4-4], m3
-%endif
%endmacro
%macro DEBLOCK_LUMA 0
@@ -222,24 +211,9 @@ cglobal deblock_h_luma_10, 5,6,8,-7*mmsize
mov r5, 32/mmsize
mova bm, m5
add r3, r1
-%if mmsize == 16
mov r2, r0
add r2, r3
-%endif
.loop:
-%if mmsize == 8
- movq m2, [r0-8] ; y q2 q1 q0
- movq m7, [r0+0]
- movq m5, [r0+r1-8]
- movq m3, [r0+r1+0]
- movq m0, [r0+r1*2-8]
- movq m6, [r0+r1*2+0]
- movq m1, [r0+r3-8]
- TRANSPOSE4x4W 2, 5, 0, 1, 4
- SWAP 2, 7
- movq m7, [r0+r3]
- TRANSPOSE4x4W 2, 3, 6, 7, 4
-%else
movu m5, [r0-8] ; y q2 q1 q0 p0 p1 p2 x
movu m0, [r0+r1-8]
movu m2, [r0+r1*2-8]
@@ -258,7 +232,6 @@ cglobal deblock_h_luma_10, 5,6,8,-7*mmsize
punpckhqdq m5, m4
SBUTTERFLY qdq, 0, 1, 7
SBUTTERFLY qdq, 2, 3, 7
-%endif
mova p2m, m6
LOAD_MASK m0, m1, m2, m3, am, bm, m7, m4, m6
@@ -515,23 +488,6 @@ DEBLOCK_LUMA_64
%endmacro
%macro LUMA_H_INTRA_LOAD 0
-%if mmsize == 8
- movu t0, [r0-8]
- movu t1, [r0+r1-8]
- movu m0, [r0+r1*2-8]
- movu m1, [r0+r4-8]
- TRANSPOSE4x4W 4, 5, 0, 1, 2
- mova t4, t0 ; p3
- mova t5, t1 ; p2
-
- movu m2, [r0]
- movu m3, [r0+r1]
- movu t0, [r0+r1*2]
- movu t1, [r0+r4]
- TRANSPOSE4x4W 2, 3, 4, 5, 6
- mova t6, t0 ; q2
- mova t7, t1 ; q3
-%else
movu t0, [r0-8]
movu t1, [r0+r1-8]
movu m0, [r0+r1*2-8]
@@ -545,24 +501,10 @@ DEBLOCK_LUMA_64
mova t5, t1 ; p2
mova t6, t2 ; q2
mova t7, t3 ; q3
-%endif
%endmacro
; in: %1=q3 %2=q2' %3=q1' %4=q0' %5=p0' %6=p1' %7=p2' %8=p3 %9=tmp
%macro LUMA_H_INTRA_STORE 9
-%if mmsize == 8
- TRANSPOSE4x4W %1, %2, %3, %4, %9
- movq [r0-8], m%1
- movq [r0+r1-8], m%2
- movq [r0+r1*2-8], m%3
- movq [r0+r4-8], m%4
- movq m%1, %8
- TRANSPOSE4x4W %5, %6, %7, %1, %9
- movq [r0], m%5
- movq [r0+r1], m%6
- movq [r0+r1*2], m%7
- movq [r0+r4], m%1
-%else
TRANSPOSE2x4x4W %1, %2, %3, %4, %9
movq [r0-8], m%1
movq [r0+r1-8], m%2
@@ -586,7 +528,6 @@ DEBLOCK_LUMA_64
movhps [r4+r1], m%6
movhps [r4+r1*2], m%7
movhps [r4+r5], m%1
-%endif
%endmacro
%if ARCH_X86_64
@@ -746,15 +687,10 @@ cglobal deblock_v_luma_intra_10, 4,7,8,-3*mmsize
;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_10, 4,7,8,-8*mmsize
LUMA_INTRA_INIT 8
-%if mmsize == 8
- lea r4, [r1*3]
- mov r5, 32/mmsize
-%else
lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride
add r4, r0 ; pix+4*stride
mov r6, 32/mmsize
-%endif
shl r2d, 2
shl r3d, 2
.loop:
@@ -774,22 +710,13 @@ cglobal deblock_h_luma_intra_10, 4,7,8,-8*mmsize
LUMA_H_INTRA_STORE 2, 0, 1, 3, 4, 6, 5, t7, 7
lea r0, [r0+r1*(mmsize/2)]
-%if mmsize == 8
- dec r5
-%else
lea r4, [r4+r1*(mmsize/2)]
dec r6
-%endif
jg .loop
RET
%endmacro
%if ARCH_X86_64 == 0
-%if HAVE_ALIGNED_STACK == 0
-INIT_MMX mmxext
-DEBLOCK_LUMA
-DEBLOCK_LUMA_INTRA
-%endif
INIT_XMM sse2
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
@@ -876,37 +803,21 @@ DEBLOCK_LUMA_INTRA
%endmacro
; %1 = base + 3*stride
-; %2 = 3*stride (unused on mmx)
+; %2 = 3*stride
; %3, %4 = place to store p1 and q1 values
%macro CHROMA_H_LOAD 4
- %if mmsize == 8
- movq m0, [pix_q - 4]
- movq m1, [pix_q + stride_q - 4]
- movq m2, [pix_q + 2*stride_q - 4]
- movq m3, [%1 - 4]
- TRANSPOSE4x4W 0, 1, 2, 3, 4
- %else
- TRANSPOSE4x8W_LOAD PASS8ROWS(pix_q-4, %1-4, stride_q, %2)
- %endif
+ TRANSPOSE4x8W_LOAD PASS8ROWS(pix_q-4, %1-4, stride_q, %2)
mova %3, m0
mova %4, m3
%endmacro
; %1 = base + 3*stride
-; %2 = 3*stride (unused on mmx)
+; %2 = 3*stride
; %3, %4 = place to load p1 and q1 values
%macro CHROMA_H_STORE 4
mova m0, %3
mova m3, %4
- %if mmsize == 8
- TRANSPOSE4x4W 0, 1, 2, 3, 4
- movq [pix_q - 4], m0
- movq [pix_q + stride_q - 4], m1
- movq [pix_q + 2*stride_q - 4], m2
- movq [%1 - 4], m3
- %else
- TRANSPOSE8x4W_STORE PASS8ROWS(pix_q-4, %1-4, stride_q, %2)
- %endif
+ TRANSPOSE8x4W_STORE PASS8ROWS(pix_q-4, %1-4, stride_q, %2)
%endmacro
%macro CHROMA_V_LOAD_TC 2
@@ -921,7 +832,7 @@ DEBLOCK_LUMA_INTRA
; void ff_deblock_v_chroma_10(uint16_t *pix, int stride, int alpha, int beta,
; int8_t *tc0)
;-----------------------------------------------------------------------------
-cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
+cglobal deblock_v_chroma_10, 5,6,8
mov r5, r0
sub r0, r1
sub r0, r1
@@ -943,7 +854,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
; void ff_deblock_v_chroma_intra_10(uint16_t *pix, int stride, int alpha,
; int beta)
;-----------------------------------------------------------------------------
-cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
+cglobal deblock_v_chroma_intra_10, 4,5,8
mov r4, r0
sub r0, r1
sub r0, r1
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index a62de09577..1ee1ee4367 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -127,11 +127,6 @@ LF_FUNC(h, luma_mbaff, 8, avx)
LF_FUNCS(uint8_t, 8)
LF_FUNCS(uint16_t, 10)
-LF_FUNC(v, luma, 10, mmxext)
-LF_FUNC(h, luma, 10, mmxext)
-LF_IFUNC(v, luma_intra, 10, mmxext)
-LF_IFUNC(h, luma_intra, 10, mmxext)
-
/***********************************/
/* weighted prediction */
@@ -275,12 +270,6 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
}
} else if (bit_depth == 10) {
if (EXTERNAL_MMXEXT(cpu_flags)) {
-#if ARCH_X86_32 && !HAVE_ALIGNED_STACK
- c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_mmxext;
- c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_mmxext;
- c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext;
- c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
-#endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */
c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags)) {
--
2.52.0
>From e54aa2d15536fbe58d9a170871166fb6f7130fd1 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 7 Jan 2026 13:32:29 +0100
Subject: [PATCH 08/13] avcodec/x86/h264_deblock_10bit: Simplify r0+4*r1
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h264_deblock_10bit.asm | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index ca5d9ff3b7..7b95754c89 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -600,9 +600,8 @@ cglobal deblock_h_luma_intra_10, 4,7,16,mmsize
%define p2 m13
%define p3 m4
%define spill [rsp]
- lea r4, [r1*4]
+ lea r4, [r0+r1*4] ; pix+4*stride
lea r5, [r1*3] ; 3*stride
- add r4, r0 ; pix+4*stride
mov r6, 2
mova m0, [pw_2]
shl r2d, 2
@@ -687,9 +686,8 @@ cglobal deblock_v_luma_intra_10, 4,7,8,-3*mmsize
;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_10, 4,7,8,-8*mmsize
LUMA_INTRA_INIT 8
- lea r4, [r1*4]
+ lea r4, [r0+r1*4]
lea r5, [r1*3] ; 3*stride
- add r4, r0 ; pix+4*stride
mov r6, 32/mmsize
shl r2d, 2
shl r3d, 2
--
2.52.0
>From 494821be5d9f93a7b553fe9047139c0da05da589 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 7 Jan 2026 17:15:08 +0100
Subject: [PATCH 09/13] avcodec/x86/h264_deblock: Avoid reloading constant
No change in benchmarks.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h264_deblock.asm | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 3a343a2afb..94d0771384 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -219,14 +219,14 @@ cextern pb_3
pavgb m3, m5
mova m6, [pb_A1]
paddusb m3, m4 ; d+128+33
- psubusb m6, m3
- psubusb m3, [pb_A1]
- pminub m6, m7
+ psubusb m4, m6, m3
+ psubusb m3, m6
+ pminub m4, m7
pminub m3, m7
- psubusb m1, m6
+ psubusb m1, m4
psubusb m2, m3
paddusb m1, m3
- paddusb m2, m6
+ paddusb m2, m4
%endmacro
; in: m1=p0 m2=q0
--
2.52.0
>From 707738d00097b9b9f27dd2cad9bc41459e6423fe Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 7 Jan 2026 15:11:06 +0100
Subject: [PATCH 10/13] avcodec/x86/h264_deblock: Avoid MMX in deblock_h_luma_8
Old benchmarks:
h264_h_loop_filter_luma_8bpp_c: 59.9 ( 1.00x)
h264_h_loop_filter_luma_8bpp_sse2: 67.9 ( 0.88x)
h264_h_loop_filter_luma_8bpp_avx: 67.4 ( 0.89x)
New benchmarks:
h264_h_loop_filter_luma_8bpp_c: 60.0 ( 1.00x)
h264_h_loop_filter_luma_8bpp_sse2: 65.4 ( 0.92x)
h264_h_loop_filter_luma_8bpp_avx: 65.3 ( 0.92x)
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h264_deblock.asm | 107 ++++++++++++++++----------------
1 file changed, 53 insertions(+), 54 deletions(-)
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 94d0771384..c81fb36494 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -37,35 +37,25 @@ cextern pb_0
cextern pb_1
cextern pb_3
-; in: 4 rows of 8 bytes in m0..m3
-; out: 8 rows of 4 bytes in %1..%8
-%macro TRANSPOSE8x4B_STORE 8
- punpckhdq m4, m0, m0
- punpckhdq m5, m1, m1
- punpckhdq m6, m2, m2
-
- punpcklbw m0, m1
- punpcklbw m2, m3
- punpcklwd m1, m0, m2
- punpckhwd m0, m2
- movh %1, m1
- punpckhdq m1, m1
- movh %2, m1
- movh %3, m0
- punpckhdq m0, m0
- movh %4, m0
-
- punpckhdq m3, m3
- punpcklbw m4, m5
- punpcklbw m6, m3
- punpcklwd m5, m4, m6
- punpckhwd m4, m6
- movh %5, m5
- punpckhdq m5, m5
- movh %6, m5
- movh %7, m4
- punpckhdq m4, m4
- movh %8, m4
+; in: 2 rows of 8 words in %1, %2
+; out: 8 rows of 4 bytes in %3..%10
+%macro TRANSPOSE8x4B_STORE 10
+ punpcklwd m6, %1, %2
+ movd %3, m6
+ pshufd m7, m6, 00110001b
+ punpckhqdq m6, m6
+ movd %4, m7
+ punpckhqdq m7, m7
+ punpckhwd %1, %2
+ movd %5, m6
+ movd %6, m7
+ pshufd m6, %1, 00110001b
+ movd %7, %1
+ punpckhqdq %1, %1
+ movd %8, m6
+ punpckhqdq m6, m6
+ movd %9, %1
+ movd %10, m6
%endmacro
%macro SBUTTERFLY3 4
@@ -325,25 +315,30 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
%endif
call deblock_v_luma_8
- ; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter)
add r6, 2
add r5, 2
- movq m0, [pix_tmp+0x18]
- movq m1, [pix_tmp+0x28]
- movq m2, [pix_tmp+0x38]
- movq m3, [pix_tmp+0x48]
- TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r7, r8)
+ INIT_XMM cpuname
+
+ ; transpose 16x4 (only the middle 4 rows were changed by the filter)
+ mova m0, [pix_tmp+0x10]
+ mova m1, [pix_tmp+0x20]
+ mova m2, [pix_tmp+0x30]
+ mova m3, [pix_tmp+0x40]
+
+ punpckhbw m4, m0, m1
+ punpckhbw m5, m2, m3
+
+ TRANSPOSE8x4B_STORE m4, m5, PASS8ROWS(r6, r5, r7, r8)
+
+ punpcklbw m0, m1
+ punpcklbw m2, m3
shl r7, 3
sub r6, r7
sub r5, r7
shr r7, 3
- movq m0, [pix_tmp+0x10]
- movq m1, [pix_tmp+0x20]
- movq m2, [pix_tmp+0x30]
- movq m3, [pix_tmp+0x40]
- TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r7, r8)
+ TRANSPOSE8x4B_STORE m0, m2, PASS8ROWS(r6, r5, r7, r8)
RET
%endmacro
@@ -524,24 +519,28 @@ cglobal deblock_h_luma_8, 0,5,8,0x60+12
call deblock_v_luma_8
ADD esp, 20
- ; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter)
- mov r0, r0mp
- sub r0, 2
+ INIT_XMM cpuname
- movq m0, [pix_tmp+0x10]
- movq m1, [pix_tmp+0x20]
- lea r1, [r0+r4]
- movq m2, [pix_tmp+0x30]
- movq m3, [pix_tmp+0x40]
- TRANSPOSE8x4B_STORE PASS8ROWS(r0, r1, r3, r4)
+ ; transpose 16x4 (only the middle 4 rows were changed by the filter)
+ mova m0, [pix_tmp+0x10]
+ mova m1, [pix_tmp+0x20]
+ mova m2, [pix_tmp+0x30]
+ mova m3, [pix_tmp+0x40]
+ mov r0, r0mp
+ punpcklbw m4, m0, m1
+ sub r0, 2
+ punpcklbw m5, m2, m3
+ lea r1, [r0+r4]
+
+ TRANSPOSE8x4B_STORE m4, m5, PASS8ROWS(r0, r1, r3, r4)
+
+ punpckhbw m0, m1
lea r0, [r0+r3*8]
+ punpckhbw m2, m3
lea r1, [r1+r3*8]
- movq m0, [pix_tmp+0x18]
- movq m1, [pix_tmp+0x28]
- movq m2, [pix_tmp+0x38]
- movq m3, [pix_tmp+0x48]
- TRANSPOSE8x4B_STORE PASS8ROWS(r0, r1, r3, r4)
+
+ TRANSPOSE8x4B_STORE m0, m2, PASS8ROWS(r0, r1, r3, r4)
RET
%endmacro ; DEBLOCK_LUMA
--
2.52.0
>From 94dc8e969e7314dcaa5f1e6fe79bf19aeaef24e5 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 7 Jan 2026 16:27:33 +0100
Subject: [PATCH 11/13] avcodec/x86/h264_deblock: Avoid reload
Old benchmarks:
h264_h_loop_filter_luma_8bpp_c: 60.0 ( 1.00x)
h264_h_loop_filter_luma_8bpp_sse2: 65.4 ( 0.92x)
h264_h_loop_filter_luma_8bpp_avx: 65.3 ( 0.92x)
New benchmarks:
h264_h_loop_filter_luma_8bpp_c: 60.4 ( 1.00x)
h264_h_loop_filter_luma_8bpp_sse2: 62.0 ( 0.97x)
h264_h_loop_filter_luma_8bpp_avx: 61.7 ( 0.98x)
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h264_deblock.asm | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index c81fb36494..d4c7ce73ee 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -322,8 +322,7 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
; transpose 16x4 (only the middle 4 rows were changed by the filter)
mova m0, [pix_tmp+0x10]
- mova m1, [pix_tmp+0x20]
- mova m2, [pix_tmp+0x30]
+ ; the two middle rows are still in the proper registers
mova m3, [pix_tmp+0x40]
punpckhbw m4, m0, m1
@@ -523,8 +522,7 @@ cglobal deblock_h_luma_8, 0,5,8,0x60+12
; transpose 16x4 (only the middle 4 rows were changed by the filter)
mova m0, [pix_tmp+0x10]
- mova m1, [pix_tmp+0x20]
- mova m2, [pix_tmp+0x30]
+ ; the two middle rows are still in the proper registers
mova m3, [pix_tmp+0x40]
mov r0, r0mp
--
2.52.0
>From 16a2a9244ebc0c4a0d1f121332d93cf5353bda8c Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Thu, 8 Jan 2026 15:20:18 +0100
Subject: [PATCH 12/13] avcodec/x86/h264_deblock: Don't sign-extend stride
Unnecessary (and wrong) since d5d699ab6e6f8a8290748d107416fd5c19757a1b.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h264_deblock.asm | 39 +++++++++++++++------------------
1 file changed, 18 insertions(+), 21 deletions(-)
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index d4c7ce73ee..d4a4033fc9 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -238,7 +238,7 @@ cextern pb_3
%if ARCH_X86_64
;-----------------------------------------------------------------------------
-; void ff_deblock_v_luma(uint8_t *pix, int stride, int alpha, int beta,
+; void ff_deblock_v_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
; int8_t *tc0)
;-----------------------------------------------------------------------------
%macro DEBLOCK_LUMA 0
@@ -284,15 +284,15 @@ cglobal deblock_v_luma_8, 5,5,10, pix_, stride_, alpha_, beta_, base3_
RET
;-----------------------------------------------------------------------------
-; void ff_deblock_h_luma(uint8_t *pix, int stride, int alpha, int beta,
+; void ff_deblock_h_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
; int8_t *tc0)
;-----------------------------------------------------------------------------
INIT_MMX cpuname
-cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
- movsxd r7, r1d
- lea r8, [r7+r7*2]
+cglobal deblock_h_luma_8, 5,9,8,0x60+16*WIN64
+ lea r8, [r1+r1*2]
lea r6, [r0-4]
lea r5, [r0-4+r8]
+ mov r7, r1
%if WIN64
%define pix_tmp rsp+0x30 ; shadow space + r4
%else
@@ -300,10 +300,10 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
%endif
; transpose 6x16 -> tmp space
- TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r7, r8), pix_tmp
- lea r6, [r6+r7*8]
- lea r5, [r5+r7*8]
- TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r7, r8), pix_tmp+8
+ TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r1, r8), pix_tmp
+ lea r6, [r6+r1*8]
+ lea r5, [r5+r1*8]
+ TRANSPOSE6x8_MEM PASS8ROWS(r6, r5, r1, r8), pix_tmp+8
; vertical filter
; alpha, beta, tc0 are still in r2d, r3d, r4
@@ -344,7 +344,6 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
%macro DEBLOCK_H_LUMA_MBAFF 0
cglobal deblock_h_luma_mbaff_8, 5, 9, 10, 8*16, pix_, stride_, alpha_, beta_, tc0_, base3_, stride3_
- movsxd stride_q, stride_d
dec alpha_d
dec beta_d
mov base3_q, pix_q
@@ -490,7 +489,7 @@ cglobal deblock_v_luma_8, 5,5,8,2*%1
RET
;-----------------------------------------------------------------------------
-; void ff_deblock_h_luma(uint8_t *pix, int stride, int alpha, int beta,
+; void ff_deblock_h_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
; int8_t *tc0)
;-----------------------------------------------------------------------------
INIT_MMX cpuname
@@ -687,7 +686,7 @@ DEBLOCK_LUMA 16
%endif
;-----------------------------------------------------------------------------
-; void ff_deblock_v_luma_intra(uint8_t *pix, int stride, int alpha, int beta)
+; void ff_deblock_v_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
;-----------------------------------------------------------------------------
%if WIN64
cglobal deblock_v_luma_intra_8, 4,6,16,0x10
@@ -744,13 +743,13 @@ cglobal deblock_v_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
INIT_MMX cpuname
%if ARCH_X86_64
;-----------------------------------------------------------------------------
-; void ff_deblock_h_luma_intra(uint8_t *pix, int stride, int alpha, int beta)
+; void ff_deblock_h_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_8, 4,9,0,0x80
- movsxd r7, r1d
- lea r8, [r7*3]
+ lea r8, [r1*3]
lea r6, [r0-4]
lea r5, [r0-4+r8]
+ mov r7, r1
%if WIN64
%define pix_tmp rsp+0x20 ; shadow space
%else
@@ -758,10 +757,10 @@ cglobal deblock_h_luma_intra_8, 4,9,0,0x80
%endif
; transpose 8x16 -> tmp space
- TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30)
- lea r6, [r6+r7*8]
- lea r5, [r5+r7*8]
- TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30)
+ TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r1, r8), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30)
+ lea r6, [r6+r1*8]
+ lea r5, [r5+r1*8]
+ TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r1, r8), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30)
lea r0, [pix_tmp+0x40]
mov r1, 0x10
@@ -899,7 +898,6 @@ DEBLOCK_LUMA_INTRA v
%endmacro
%macro CHROMA_V_START_XMM 1
- movsxdifnidn stride_q, stride_d
dec alpha_d
dec beta_d
mov %1, pix_q
@@ -908,7 +906,6 @@ DEBLOCK_LUMA_INTRA v
%endmacro
%macro CHROMA_H_START_XMM 2
- movsxdifnidn stride_q, stride_d
dec alpha_d
dec beta_d
lea %2, [3*stride_q]
--
2.52.0
>From 78ac51eebb725bfcc992f923e8b895d47879a0b2 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 7 Jan 2026 20:35:15 +0100
Subject: [PATCH 13/13] avcodec/h264dsp: Remove redundant h264 from H264DSPCtx
member names
These names are a remnant of dsputil when all the DSP functions
from all codecs were part of DSPcontext.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/aarch64/h264dsp_init_aarch64.c | 72 +++----
libavcodec/arm/h264dsp_init_arm.c | 38 ++--
libavcodec/h264_loopfilter.c | 30 +--
libavcodec/h264_mb.c | 54 +++---
libavcodec/h264_mb_template.c | 28 +--
libavcodec/h264dsp.c | 78 ++++----
libavcodec/h264dsp.h | 114 +++++------
libavcodec/loongarch/h264dsp_init_loongarch.c | 86 ++++-----
libavcodec/mips/h264dsp_init_mips.c | 108 +++++------
libavcodec/ppc/h264dsp.c | 28 +--
libavcodec/riscv/h264dsp_init.c | 88 ++++-----
libavcodec/svq3.c | 8 +-
libavcodec/x86/h264dsp_init.c | 178 +++++++++---------
tests/checkasm/h264dsp.c | 62 +++---
14 files changed, 486 insertions(+), 486 deletions(-)
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
index 6bf3ecb8a1..c684574320 100644
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
@@ -112,55 +112,55 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags) && bit_depth == 8) {
- c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
- c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
- c->h264_v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
- c->h264_h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
+ c->v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
+ c->h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
+ c->v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
+ c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
- c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
- c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
+ c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
+ c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
if (chroma_format_idc <= 1) {
- c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
- c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon;
- c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
+ c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+ c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon;
+ c->h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
} else {
- c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon;
- c->h264_h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon;
- c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon;
- c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_intra_neon;
+ c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon;
+ c->h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon;
+ c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon;
+ c->h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_intra_neon;
}
- c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
- c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
- c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
+ c->weight_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
+ c->weight_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
+ c->weight_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
- c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
- c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
- c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
+ c->biweight_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
+ c->biweight_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
+ c->biweight_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
- c->h264_idct_add = ff_h264_idct_add_neon;
- c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
- c->h264_idct_add16 = ff_h264_idct_add16_neon;
- c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
+ c->idct_add = ff_h264_idct_add_neon;
+ c->idct_dc_add = ff_h264_idct_dc_add_neon;
+ c->idct_add16 = ff_h264_idct_add16_neon;
+ c->idct_add16intra = ff_h264_idct_add16intra_neon;
if (chroma_format_idc <= 1)
- c->h264_idct_add8 = ff_h264_idct_add8_neon;
- c->h264_idct8_add = ff_h264_idct8_add_neon;
- c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon;
- c->h264_idct8_add4 = ff_h264_idct8_add4_neon;
+ c->idct_add8 = ff_h264_idct_add8_neon;
+ c->idct8_add = ff_h264_idct8_add_neon;
+ c->idct8_dc_add = ff_h264_idct8_dc_add_neon;
+ c->idct8_add4 = ff_h264_idct8_add4_neon;
} else if (have_neon(cpu_flags) && bit_depth == 10) {
- c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon_10;
- c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon_10;
+ c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon_10;
+ c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon_10;
if (chroma_format_idc <= 1) {
- c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon_10;
- c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon_10;
- c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon_10;
+ c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon_10;
+ c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon_10;
+ c->h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon_10;
} else {
- c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon_10;
- c->h264_h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon_10;
- c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon_10;
- c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_intra_neon_10;
+ c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon_10;
+ c->h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon_10;
+ c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon_10;
+ c->h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_intra_neon_10;
}
}
}
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
index 13d499fda2..050ceb90bf 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -76,32 +76,32 @@ static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth,
{
#if HAVE_NEON
if (bit_depth == 8) {
- c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
- c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
- c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
+ c->v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
+ c->h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
+ c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
if (chroma_format_idc <= 1)
- c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+ c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
else
- c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon;
+ c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon;
- c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
- c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
- c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
+ c->weight_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
+ c->weight_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
+ c->weight_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
- c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
- c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
- c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
+ c->biweight_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
+ c->biweight_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
+ c->biweight_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
- c->h264_idct_add = ff_h264_idct_add_neon;
- c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
- c->h264_idct_add16 = ff_h264_idct_add16_neon;
- c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
+ c->idct_add = ff_h264_idct_add_neon;
+ c->idct_dc_add = ff_h264_idct_dc_add_neon;
+ c->idct_add16 = ff_h264_idct_add16_neon;
+ c->idct_add16intra = ff_h264_idct_add16intra_neon;
if (chroma_format_idc <= 1)
- c->h264_idct_add8 = ff_h264_idct_add8_neon;
- c->h264_idct8_add = ff_h264_idct8_add_neon;
- c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon;
- c->h264_idct8_add4 = ff_h264_idct8_add4_neon;
+ c->idct_add8 = ff_h264_idct_add8_neon;
+ c->idct8_add = ff_h264_idct8_add_neon;
+ c->idct8_dc_add = ff_h264_idct8_dc_add_neon;
+ c->idct8_add4 = ff_h264_idct8_add4_neon;
}
#endif // HAVE_NEON
}
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index c164a289b7..e2fc886bbf 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -113,9 +113,9 @@ static av_always_inline void filter_mb_edgev(uint8_t *pix, int stride,
tc[1] = tc0_table[index_a][bS[1]];
tc[2] = tc0_table[index_a][bS[2]];
tc[3] = tc0_table[index_a][bS[3]];
- h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
+ h->h264dsp.h_loop_filter_luma(pix, stride, alpha, beta, tc);
} else {
- h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
+ h->h264dsp.h_loop_filter_luma_intra(pix, stride, alpha, beta);
}
}
@@ -135,9 +135,9 @@ static av_always_inline void filter_mb_edgecv(uint8_t *pix, int stride,
tc[1] = tc0_table[index_a][bS[1]]+1;
tc[2] = tc0_table[index_a][bS[2]]+1;
tc[3] = tc0_table[index_a][bS[3]]+1;
- h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
+ h->h264dsp.h_loop_filter_chroma(pix, stride, alpha, beta, tc);
} else {
- h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
+ h->h264dsp.h_loop_filter_chroma_intra(pix, stride, alpha, beta);
}
}
@@ -158,9 +158,9 @@ static av_always_inline void filter_mb_mbaff_edgev(const H264Context *h, uint8_t
tc[1] = tc0_table[index_a][bS[1*bsi]];
tc[2] = tc0_table[index_a][bS[2*bsi]];
tc[3] = tc0_table[index_a][bS[3*bsi]];
- h->h264dsp.h264_h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc);
+ h->h264dsp.h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc);
} else {
- h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta);
+ h->h264dsp.h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta);
}
}
@@ -181,9 +181,9 @@ static av_always_inline void filter_mb_mbaff_edgecv(const H264Context *h,
tc[1] = tc0_table[index_a][bS[1*bsi]] + 1;
tc[2] = tc0_table[index_a][bS[2*bsi]] + 1;
tc[3] = tc0_table[index_a][bS[3*bsi]] + 1;
- h->h264dsp.h264_h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, tc);
+ h->h264dsp.h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, tc);
} else {
- h->h264dsp.h264_h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, beta);
+ h->h264dsp.h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, beta);
}
}
@@ -203,9 +203,9 @@ static av_always_inline void filter_mb_edgeh(uint8_t *pix, int stride,
tc[1] = tc0_table[index_a][bS[1]];
tc[2] = tc0_table[index_a][bS[2]];
tc[3] = tc0_table[index_a][bS[3]];
- h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
+ h->h264dsp.v_loop_filter_luma(pix, stride, alpha, beta, tc);
} else {
- h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
+ h->h264dsp.v_loop_filter_luma_intra(pix, stride, alpha, beta);
}
}
@@ -225,9 +225,9 @@ static av_always_inline void filter_mb_edgech(uint8_t *pix, int stride,
tc[1] = tc0_table[index_a][bS[1]]+1;
tc[2] = tc0_table[index_a][bS[2]]+1;
tc[3] = tc0_table[index_a][bS[3]]+1;
- h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
+ h->h264dsp.v_loop_filter_chroma(pix, stride, alpha, beta, tc);
} else {
- h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
+ h->h264dsp.v_loop_filter_chroma_intra(pix, stride, alpha, beta);
}
}
@@ -368,8 +368,8 @@ static av_always_inline void h264_filter_mb_fast_internal(const H264Context *h,
int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[LTOP] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
edges = 4 - 3*((mb_type>>3) & !(sl->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
- h->h264dsp.h264_loop_filter_strength(bS, sl->non_zero_count_cache, sl->ref_cache, sl->mv_cache,
- sl->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE(h));
+ h->h264dsp.loop_filter_strength(bS, sl->non_zero_count_cache, sl->ref_cache, sl->mv_cache,
+ sl->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE(h));
}
if( IS_INTRA(left_type) )
AV_WN64A(bS[0][0], 0x0004000400040004ULL);
@@ -419,7 +419,7 @@ void ff_h264_filter_mb_fast(const H264Context *h, H264SliceContext *sl,
unsigned int linesize, unsigned int uvlinesize)
{
av_assert2(!FRAME_MBAFF(h));
- if(!h->h264dsp.h264_loop_filter_strength || h->ps.pps->chroma_qp_diff) {
+ if (!h->h264dsp.loop_filter_strength || h->ps.pps->chroma_qp_diff) {
ff_h264_filter_mb(h, sl, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
return;
}
diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index 0d6562b583..67fa980de3 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -629,10 +629,10 @@ static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h,
if (IS_8x8DCT(mb_type)) {
if (transform_bypass) {
idct_dc_add =
- idct_add = h->h264dsp.h264_add_pixels8_clear;
+ idct_add = h->h264dsp.add_pixels8_clear;
} else {
- idct_dc_add = h->h264dsp.h264_idct8_dc_add;
- idct_add = h->h264dsp.h264_idct8_add;
+ idct_dc_add = h->h264dsp.idct8_dc_add;
+ idct_add = h->h264dsp.idct8_add;
}
for (i = 0; i < 16; i += 4) {
uint8_t *const ptr = dest_y + block_offset[i];
@@ -658,11 +658,11 @@ static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h,
}
} else {
if (transform_bypass) {
- idct_dc_add =
- idct_add = h->h264dsp.h264_add_pixels4_clear;
+ idct_dc_add =
+ idct_add = h->h264dsp.add_pixels4_clear;
} else {
- idct_dc_add = h->h264dsp.h264_idct_dc_add;
- idct_add = h->h264dsp.h264_idct_add;
+ idct_dc_add = h->h264dsp.idct_dc_add;
+ idct_add = h->h264dsp.idct_add;
}
for (i = 0; i < 16; i++) {
uint8_t *const ptr = dest_y + block_offset[i];
@@ -705,9 +705,9 @@ static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h,
h->hpc.pred16x16[sl->intra16x16_pred_mode](dest_y, linesize);
if (sl->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) {
if (!transform_bypass)
- h->h264dsp.h264_luma_dc_dequant_idct(sl->mb + (p * 256 << pixel_shift),
- sl->mb_luma_dc[p],
- h->ps.pps->dequant4_coeff[p][qscale][0]);
+ h->h264dsp.luma_dc_dequant_idct(sl->mb + (p * 256 << pixel_shift),
+ sl->mb_luma_dc[p],
+ h->ps.pps->dequant4_coeff[p][qscale][0]);
else {
static const uint8_t dc_mapping[16] = {
0 * 16, 1 * 16, 4 * 16, 5 * 16,
@@ -749,21 +749,21 @@ static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264Sl
for (i = 0; i < 16; i++)
if (sl->non_zero_count_cache[scan8[i + p * 16]] ||
dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256))
- h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i],
- sl->mb + (i * 16 + p * 256 << pixel_shift),
- linesize);
+ h->h264dsp.add_pixels4_clear(dest_y + block_offset[i],
+ sl->mb + (i * 16 + p * 256 << pixel_shift),
+ linesize);
}
} else {
- h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
- sl->mb + (p * 256 << pixel_shift),
- linesize,
- sl->non_zero_count_cache + p * 5 * 8);
+ h->h264dsp.idct_add16intra(dest_y, block_offset,
+ sl->mb + (p * 256 << pixel_shift),
+ linesize,
+ sl->non_zero_count_cache + p * 5 * 8);
}
} else if (sl->cbp & 15) {
if (transform_bypass) {
const int di = IS_8x8DCT(mb_type) ? 4 : 1;
- idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear
- : h->h264dsp.h264_add_pixels4_clear;
+ idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.add_pixels8_clear
+ : h->h264dsp.add_pixels4_clear;
for (i = 0; i < 16; i += di)
if (sl->non_zero_count_cache[scan8[i + p * 16]])
idct_add(dest_y + block_offset[i],
@@ -771,15 +771,15 @@ static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264Sl
linesize);
} else {
if (IS_8x8DCT(mb_type))
- h->h264dsp.h264_idct8_add4(dest_y, block_offset,
- sl->mb + (p * 256 << pixel_shift),
- linesize,
- sl->non_zero_count_cache + p * 5 * 8);
+ h->h264dsp.idct8_add4(dest_y, block_offset,
+ sl->mb + (p * 256 << pixel_shift),
+ linesize,
+ sl->non_zero_count_cache + p * 5 * 8);
else
- h->h264dsp.h264_idct_add16(dest_y, block_offset,
- sl->mb + (p * 256 << pixel_shift),
- linesize,
- sl->non_zero_count_cache + p * 5 * 8);
+ h->h264dsp.idct_add16(dest_y, block_offset,
+ sl->mb + (p * 256 << pixel_shift),
+ linesize,
+ sl->non_zero_count_cache + p * 5 * 8);
}
}
}
diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c
index d5ea26a6e3..ee8c81a10c 100644
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -174,16 +174,16 @@ static av_noinline void FUNC(hl_decode_mb)(const H264Context *h, H264SliceContex
h->h264chroma.put_h264_chroma_pixels_tab,
h->h264qpel.avg_h264_qpel_pixels_tab,
h->h264chroma.avg_h264_chroma_pixels_tab,
- h->h264dsp.weight_h264_pixels_tab,
- h->h264dsp.biweight_h264_pixels_tab);
+ h->h264dsp.weight_pixels_tab,
+ h->h264dsp.biweight_pixels_tab);
} else {
FUNC(hl_motion_420)(h, sl, dest_y, dest_cb, dest_cr,
h->h264qpel.put_h264_qpel_pixels_tab,
h->h264chroma.put_h264_chroma_pixels_tab,
h->h264qpel.avg_h264_qpel_pixels_tab,
h->h264chroma.avg_h264_chroma_pixels_tab,
- h->h264dsp.weight_h264_pixels_tab,
- h->h264dsp.biweight_h264_pixels_tab);
+ h->h264dsp.weight_pixels_tab,
+ h->h264dsp.biweight_pixels_tab);
}
}
@@ -206,7 +206,7 @@ static av_noinline void FUNC(hl_decode_mb)(const H264Context *h, H264SliceContex
sl->mb + (16 * 16 * 2 << PIXEL_SHIFT),
uvlinesize);
} else {
- idct_add = h->h264dsp.h264_add_pixels4_clear;
+ idct_add = h->h264dsp.add_pixels4_clear;
for (j = 1; j < 3; j++) {
for (i = j * 16; i < j * 16 + 4; i++)
if (sl->non_zero_count_cache[scan8[i]] ||
@@ -234,14 +234,14 @@ static av_noinline void FUNC(hl_decode_mb)(const H264Context *h, H264SliceContex
qp[1] = sl->chroma_qp[1];
}
if (sl->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 0]])
- h->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + (16 * 16 * 1 << PIXEL_SHIFT),
- h->ps.pps->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][qp[0]][0]);
+ h->h264dsp.chroma_dc_dequant_idct(sl->mb + (16 * 16 * 1 << PIXEL_SHIFT),
+ h->ps.pps->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][qp[0]][0]);
if (sl->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 1]])
- h->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + (16 * 16 * 2 << PIXEL_SHIFT),
- h->ps.pps->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][qp[1]][0]);
- h->h264dsp.h264_idct_add8(dest, block_offset,
- sl->mb, uvlinesize,
- sl->non_zero_count_cache);
+ h->h264dsp.chroma_dc_dequant_idct(sl->mb + (16 * 16 * 2 << PIXEL_SHIFT),
+ h->ps.pps->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][qp[1]][0]);
+ h->h264dsp.idct_add8(dest, block_offset,
+ sl->mb, uvlinesize,
+ sl->non_zero_count_cache);
}
}
}
@@ -341,8 +341,8 @@ static av_noinline void FUNC(hl_decode_mb_444)(const H264Context *h, H264SliceCo
h->h264chroma.put_h264_chroma_pixels_tab,
h->h264qpel.avg_h264_qpel_pixels_tab,
h->h264chroma.avg_h264_chroma_pixels_tab,
- h->h264dsp.weight_h264_pixels_tab,
- h->h264dsp.biweight_h264_pixels_tab);
+ h->h264dsp.weight_pixels_tab,
+ h->h264dsp.biweight_pixels_tab);
}
for (p = 0; p < plane_count; p++)
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index f4c5238372..f612353596 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -70,13 +70,13 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
#define FUNC(a, depth) a ## _ ## depth ## _c
#define SET_PIXSIZE_FUNCS(depth) \
- c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
+ c->luma_dc_dequant_idct = FUNC(ff_h264_luma_dc_dequant_idct, depth);\
if (chroma_format_idc <= 1)\
- c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
+ c->chroma_dc_dequant_idct = FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
else\
- c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
- c->h264_add_pixels4_clear = FUNC(ff_h264_add_pixels4, depth);\
- c->h264_add_pixels8_clear = FUNC(ff_h264_add_pixels8, depth)
+ c->chroma_dc_dequant_idct = FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
+ c->add_pixels4_clear = FUNC(ff_h264_add_pixels4, depth);\
+ c->add_pixels8_clear = FUNC(ff_h264_add_pixels8, depth)
if (bit_depth > 8 && bit_depth <= 16) {
SET_PIXSIZE_FUNCS(16);
@@ -85,52 +85,52 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
}
#define H264_DSP(depth) \
- c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\
- c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\
- c->h264_idct_dc_add= FUNC(ff_h264_idct_dc_add, depth);\
- c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\
- c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\
- c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\
+ c->idct_add = FUNC(ff_h264_idct_add, depth);\
+ c->idct8_add = FUNC(ff_h264_idct8_add, depth);\
+ c->idct_dc_add = FUNC(ff_h264_idct_dc_add, depth);\
+ c->idct8_dc_add = FUNC(ff_h264_idct8_dc_add, depth);\
+ c->idct_add16 = FUNC(ff_h264_idct_add16, depth);\
+ c->idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\
if (chroma_format_idc <= 1)\
- c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\
+ c->idct_add8 = FUNC(ff_h264_idct_add8, depth);\
else\
- c->h264_idct_add8 = FUNC(ff_h264_idct_add8_422, depth);\
- c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
+ c->idct_add8 = FUNC(ff_h264_idct_add8_422, depth);\
+ c->idct_add16intra = FUNC(ff_h264_idct_add16intra, depth);\
\
- c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16, depth);\
- c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels8, depth);\
- c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels4, depth);\
- c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels2, depth);\
- c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16, depth);\
- c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels8, depth);\
- c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels4, depth);\
- c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels2, depth);\
+ c->weight_pixels_tab[0] = FUNC(weight_h264_pixels16, depth);\
+ c->weight_pixels_tab[1] = FUNC(weight_h264_pixels8, depth);\
+ c->weight_pixels_tab[2] = FUNC(weight_h264_pixels4, depth);\
+ c->weight_pixels_tab[3] = FUNC(weight_h264_pixels2, depth);\
+ c->biweight_pixels_tab[0] = FUNC(biweight_h264_pixels16, depth);\
+ c->biweight_pixels_tab[1] = FUNC(biweight_h264_pixels8, depth);\
+ c->biweight_pixels_tab[2] = FUNC(biweight_h264_pixels4, depth);\
+ c->biweight_pixels_tab[3] = FUNC(biweight_h264_pixels2, depth);\
\
- c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
- c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\
- c->h264_h_loop_filter_luma_mbaff= FUNC(h264_h_loop_filter_luma_mbaff, depth);\
- c->h264_v_loop_filter_luma_intra= FUNC(h264_v_loop_filter_luma_intra, depth);\
- c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\
- c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
- c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\
+ c->v_loop_filter_luma = FUNC(h264_v_loop_filter_luma, depth);\
+ c->h_loop_filter_luma = FUNC(h264_h_loop_filter_luma, depth);\
+ c->h_loop_filter_luma_mbaff = FUNC(h264_h_loop_filter_luma_mbaff, depth);\
+ c->v_loop_filter_luma_intra = FUNC(h264_v_loop_filter_luma_intra, depth);\
+ c->h_loop_filter_luma_intra = FUNC(h264_h_loop_filter_luma_intra, depth);\
+ c->h_loop_filter_luma_mbaff_intra = FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
+ c->v_loop_filter_chroma = FUNC(h264_v_loop_filter_chroma, depth);\
if (chroma_format_idc <= 1)\
- c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
+ c->h_loop_filter_chroma = FUNC(h264_h_loop_filter_chroma, depth);\
else\
- c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma422, depth);\
+ c->h_loop_filter_chroma = FUNC(h264_h_loop_filter_chroma422, depth);\
if (chroma_format_idc <= 1)\
- c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
+ c->h_loop_filter_chroma_mbaff = FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
else\
- c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma422_mbaff, depth);\
- c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\
+ c->h_loop_filter_chroma_mbaff = FUNC(h264_h_loop_filter_chroma422_mbaff, depth);\
+ c->v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\
if (chroma_format_idc <= 1)\
- c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
+ c->h_loop_filter_chroma_intra = FUNC(h264_h_loop_filter_chroma_intra, depth);\
else\
- c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma422_intra, depth);\
+ c->h_loop_filter_chroma_intra = FUNC(h264_h_loop_filter_chroma422_intra, depth);\
if (chroma_format_idc <= 1)\
- c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
+ c->h_loop_filter_chroma_mbaff_intra = FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
else\
- c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma422_mbaff_intra, depth);\
- c->h264_loop_filter_strength= NULL;
+ c->h_loop_filter_chroma_mbaff_intra = FUNC(h264_h_loop_filter_chroma422_mbaff_intra, depth);\
+ c->loop_filter_strength = NULL;
switch (bit_depth) {
case 9:
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 4a9cb1568d..f40d324a4e 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -41,71 +41,71 @@ typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src,
*/
typedef struct H264DSPContext {
/* weighted MC */
- h264_weight_func weight_h264_pixels_tab[4];
- h264_biweight_func biweight_h264_pixels_tab[4];
+ h264_weight_func weight_pixels_tab[4];
+ h264_biweight_func biweight_pixels_tab[4];
/* loop filter */
- void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, ptrdiff_t stride,
- int alpha, int beta, int8_t *tc0);
- void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, ptrdiff_t stride,
- int alpha, int beta, int8_t *tc0);
- void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, ptrdiff_t stride,
- int alpha, int beta, int8_t *tc0);
+ void (*v_loop_filter_luma)(uint8_t *pix /*align 16*/, ptrdiff_t stride,
+ int alpha, int beta, int8_t *tc0);
+ void (*h_loop_filter_luma)(uint8_t *pix /*align 4 */, ptrdiff_t stride,
+ int alpha, int beta, int8_t *tc0);
+ void (*h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, ptrdiff_t stride,
+ int alpha, int beta, int8_t *tc0);
/* v/h_loop_filter_luma_intra: align 16 */
- void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
- int alpha, int beta);
- void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
- int alpha, int beta);
- void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/,
- ptrdiff_t stride, int alpha, int beta);
- void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, ptrdiff_t stride,
- int alpha, int beta, int8_t *tc0);
- void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, ptrdiff_t stride,
- int alpha, int beta, int8_t *tc0);
- void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/,
- ptrdiff_t stride, int alpha, int beta,
- int8_t *tc0);
- void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
- ptrdiff_t stride, int alpha, int beta);
- void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
- ptrdiff_t stride, int alpha, int beta);
- void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/,
- ptrdiff_t stride, int alpha, int beta);
- // h264_loop_filter_strength: simd only. the C version is inlined in h264_loopfilter.c
- void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40],
- int8_t ref[2][40], int16_t mv[2][40][2],
- int bidir, int edges, int step,
- int mask_mv0, int mask_mv1, int field);
+ void (*v_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
+ int alpha, int beta);
+ void (*h_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
+ int alpha, int beta);
+ void (*h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/,
+ ptrdiff_t stride, int alpha, int beta);
+ void (*v_loop_filter_chroma)(uint8_t *pix /*align 8*/, ptrdiff_t stride,
+ int alpha, int beta, int8_t *tc0);
+ void (*h_loop_filter_chroma)(uint8_t *pix /*align 4*/, ptrdiff_t stride,
+ int alpha, int beta, int8_t *tc0);
+ void (*h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/,
+ ptrdiff_t stride, int alpha, int beta,
+ int8_t *tc0);
+ void (*v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
+ ptrdiff_t stride, int alpha, int beta);
+ void (*h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
+ ptrdiff_t stride, int alpha, int beta);
+ void (*h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/,
+ ptrdiff_t stride, int alpha, int beta);
+ // loop_filter_strength: simd only. the C version is inlined in h264_loopfilter.c
+ void (*loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40],
+ int8_t ref[2][40], int16_t mv[2][40][2],
+ int bidir, int edges, int step,
+ int mask_mv0, int mask_mv1, int field);
/* IDCT */
- void (*h264_idct_add)(uint8_t *dst /*align 4*/,
- int16_t *block /*align 16*/, int stride);
- void (*h264_idct8_add)(uint8_t *dst /*align 8*/,
- int16_t *block /*align 16*/, int stride);
- void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/,
- int16_t *block /*align 16*/, int stride);
- void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/,
- int16_t *block /*align 16*/, int stride);
+ void (*idct_add)(uint8_t *dst /*align 4*/,
+ int16_t *block /*align 16*/, int stride);
+ void (*idct8_add)(uint8_t *dst /*align 8*/,
+ int16_t *block /*align 16*/, int stride);
+ void (*idct_dc_add)(uint8_t *dst /*align 4*/,
+ int16_t *block /*align 16*/, int stride);
+ void (*idct8_dc_add)(uint8_t *dst /*align 8*/,
+ int16_t *block /*align 16*/, int stride);
- void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset,
- int16_t *block /*align 16*/, int stride,
- const uint8_t nnzc[5 * 8]);
- void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset,
- int16_t *block /*align 16*/, int stride,
- const uint8_t nnzc[5 * 8]);
- void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset,
- int16_t *block /*align 16*/, int stride,
- const uint8_t nnzc[15 * 8]);
- void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset,
- int16_t *block /*align 16*/,
- int stride, const uint8_t nnzc[5 * 8]);
- void (*h264_luma_dc_dequant_idct)(int16_t *output,
- int16_t *input /*align 16*/, int qmul);
- void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul);
+ void (*idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset,
+ int16_t *block /*align 16*/, int stride,
+ const uint8_t nnzc[5 * 8]);
+ void (*idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset,
+ int16_t *block /*align 16*/, int stride,
+ const uint8_t nnzc[5 * 8]);
+ void (*idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset,
+ int16_t *block /*align 16*/, int stride,
+ const uint8_t nnzc[15 * 8]);
+ void (*idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset,
+ int16_t *block /*align 16*/,
+ int stride, const uint8_t nnzc[5 * 8]);
+ void (*luma_dc_dequant_idct)(int16_t *output,
+ int16_t *input /*align 16*/, int qmul);
+ void (*chroma_dc_dequant_idct)(int16_t *block, int qmul);
/* bypass-transform */
- void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
- void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
+ void (*add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
+ void (*add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
/**
* Search buf from the start for up to size bytes. Return the index
diff --git a/libavcodec/loongarch/h264dsp_init_loongarch.c b/libavcodec/loongarch/h264dsp_init_loongarch.c
index b70fe696d2..745915d5c6 100644
--- a/libavcodec/loongarch/h264dsp_init_loongarch.c
+++ b/libavcodec/loongarch/h264dsp_init_loongarch.c
@@ -30,67 +30,67 @@ av_cold void ff_h264dsp_init_loongarch(H264DSPContext *c, const int bit_depth,
if (have_lsx(cpu_flags)) {
if (chroma_format_idc <= 1)
- c->h264_loop_filter_strength = ff_h264_loop_filter_strength_lsx;
+ c->loop_filter_strength = ff_h264_loop_filter_strength_lsx;
if (bit_depth == 8) {
- c->h264_idct_add = ff_h264_idct_add_8_lsx;
- c->h264_idct8_add = ff_h264_idct8_add_8_lsx;
- c->h264_idct_dc_add = ff_h264_idct_dc_add_8_lsx;
- c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_lsx;
+ c->idct_add = ff_h264_idct_add_8_lsx;
+ c->idct8_add = ff_h264_idct8_add_8_lsx;
+ c->idct_dc_add = ff_h264_idct_dc_add_8_lsx;
+ c->idct8_dc_add = ff_h264_idct8_dc_add_8_lsx;
if (chroma_format_idc <= 1) {
- c->h264_idct_add8 = ff_h264_idct_add8_8_lsx;
- c->h264_h_loop_filter_chroma = ff_h264_h_lpf_chroma_8_lsx;
- c->h264_h_loop_filter_chroma_intra = ff_h264_h_lpf_chroma_intra_8_lsx;
+ c->idct_add8 = ff_h264_idct_add8_8_lsx;
+ c->h_loop_filter_chroma = ff_h264_h_lpf_chroma_8_lsx;
+ c->h_loop_filter_chroma_intra = ff_h264_h_lpf_chroma_intra_8_lsx;
} else
- c->h264_idct_add8 = ff_h264_idct_add8_422_8_lsx;
+ c->idct_add8 = ff_h264_idct_add8_422_8_lsx;
- c->h264_idct_add16 = ff_h264_idct_add16_8_lsx;
- c->h264_idct8_add4 = ff_h264_idct8_add4_8_lsx;
- c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_lsx;
- c->h264_idct_add16intra = ff_h264_idct_add16_intra_8_lsx;
+ c->idct_add16 = ff_h264_idct_add16_8_lsx;
+ c->idct8_add4 = ff_h264_idct8_add4_8_lsx;
+ c->luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_lsx;
+ c->idct_add16intra = ff_h264_idct_add16_intra_8_lsx;
- c->h264_add_pixels4_clear = ff_h264_add_pixels4_8_lsx;
- c->h264_add_pixels8_clear = ff_h264_add_pixels8_8_lsx;
- c->h264_v_loop_filter_luma = ff_h264_v_lpf_luma_8_lsx;
- c->h264_h_loop_filter_luma = ff_h264_h_lpf_luma_8_lsx;
- c->h264_v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_8_lsx;
- c->h264_h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_8_lsx;
- c->h264_v_loop_filter_chroma = ff_h264_v_lpf_chroma_8_lsx;
+ c->add_pixels4_clear = ff_h264_add_pixels4_8_lsx;
+ c->add_pixels8_clear = ff_h264_add_pixels8_8_lsx;
+ c->v_loop_filter_luma = ff_h264_v_lpf_luma_8_lsx;
+ c->h_loop_filter_luma = ff_h264_h_lpf_luma_8_lsx;
+ c->v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_8_lsx;
+ c->h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_8_lsx;
+ c->v_loop_filter_chroma = ff_h264_v_lpf_chroma_8_lsx;
- c->h264_v_loop_filter_chroma_intra = ff_h264_v_lpf_chroma_intra_8_lsx;
+ c->v_loop_filter_chroma_intra = ff_h264_v_lpf_chroma_intra_8_lsx;
- c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_8_lsx;
- c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_8_lsx;
- c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels4_8_lsx;
- c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_8_lsx;
- c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_8_lsx;
- c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels4_8_lsx;
- c->h264_idct8_add = ff_h264_idct8_add_8_lsx;
- c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_lsx;
+ c->biweight_pixels_tab[0] = ff_biweight_h264_pixels16_8_lsx;
+ c->biweight_pixels_tab[1] = ff_biweight_h264_pixels8_8_lsx;
+ c->biweight_pixels_tab[2] = ff_biweight_h264_pixels4_8_lsx;
+ c->weight_pixels_tab[0] = ff_weight_h264_pixels16_8_lsx;
+ c->weight_pixels_tab[1] = ff_weight_h264_pixels8_8_lsx;
+ c->weight_pixels_tab[2] = ff_weight_h264_pixels4_8_lsx;
+ c->idct8_add = ff_h264_idct8_add_8_lsx;
+ c->idct8_dc_add = ff_h264_idct8_dc_add_8_lsx;
}
}
#if HAVE_LASX
if (have_lasx(cpu_flags)) {
if (chroma_format_idc <= 1)
- c->h264_loop_filter_strength = ff_h264_loop_filter_strength_lasx;
+ c->loop_filter_strength = ff_h264_loop_filter_strength_lasx;
if (bit_depth == 8) {
- c->h264_add_pixels4_clear = ff_h264_add_pixels4_8_lasx;
- c->h264_add_pixels8_clear = ff_h264_add_pixels8_8_lasx;
- c->h264_v_loop_filter_luma = ff_h264_v_lpf_luma_8_lasx;
- c->h264_h_loop_filter_luma = ff_h264_h_lpf_luma_8_lasx;
- c->h264_v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_8_lasx;
- c->h264_h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_8_lasx;
+ c->add_pixels4_clear = ff_h264_add_pixels4_8_lasx;
+ c->add_pixels8_clear = ff_h264_add_pixels8_8_lasx;
+ c->v_loop_filter_luma = ff_h264_v_lpf_luma_8_lasx;
+ c->h_loop_filter_luma = ff_h264_h_lpf_luma_8_lasx;
+ c->v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_8_lasx;
+ c->h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_8_lasx;
/* Weighted MC */
- c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_8_lasx;
- c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_8_lasx;
+ c->weight_pixels_tab[0] = ff_weight_h264_pixels16_8_lasx;
+ c->weight_pixels_tab[1] = ff_weight_h264_pixels8_8_lasx;
- c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_8_lasx;
- c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_8_lasx;
+ c->biweight_pixels_tab[0] = ff_biweight_h264_pixels16_8_lasx;
+ c->biweight_pixels_tab[1] = ff_biweight_h264_pixels8_8_lasx;
- c->h264_idct8_add = ff_h264_idct8_add_8_lasx;
- c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_lasx;
- c->h264_idct8_add4 = ff_h264_idct8_add4_8_lasx;
+ c->idct8_add = ff_h264_idct8_add_8_lasx;
+ c->idct8_dc_add = ff_h264_idct8_dc_add_8_lasx;
+ c->idct8_add4 = ff_h264_idct8_add4_8_lasx;
}
}
#endif // #if HAVE_LASX
diff --git a/libavcodec/mips/h264dsp_init_mips.c b/libavcodec/mips/h264dsp_init_mips.c
index 72f42895e8..4d8c3a7a59 100644
--- a/libavcodec/mips/h264dsp_init_mips.c
+++ b/libavcodec/mips/h264dsp_init_mips.c
@@ -30,101 +30,101 @@ av_cold void ff_h264dsp_init_mips(H264DSPContext *c, const int bit_depth,
if (have_mmi(cpu_flags)) {
if (bit_depth == 8) {
- c->h264_add_pixels4_clear = ff_h264_add_pixels4_8_mmi;
- c->h264_idct_add = ff_h264_idct_add_8_mmi;
- c->h264_idct8_add = ff_h264_idct8_add_8_mmi;
- c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmi;
- c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmi;
- c->h264_idct_add16 = ff_h264_idct_add16_8_mmi;
- c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmi;
- c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmi;
+ c->add_pixels4_clear = ff_h264_add_pixels4_8_mmi;
+ c->idct_add = ff_h264_idct_add_8_mmi;
+ c->idct8_add = ff_h264_idct8_add_8_mmi;
+ c->idct_dc_add = ff_h264_idct_dc_add_8_mmi;
+ c->idct8_dc_add = ff_h264_idct8_dc_add_8_mmi;
+ c->idct_add16 = ff_h264_idct_add16_8_mmi;
+ c->idct_add16intra = ff_h264_idct_add16intra_8_mmi;
+ c->idct8_add4 = ff_h264_idct8_add4_8_mmi;
if (chroma_format_idc <= 1)
- c->h264_idct_add8 = ff_h264_idct_add8_8_mmi;
+ c->idct_add8 = ff_h264_idct_add8_8_mmi;
else
- c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmi;
+ c->idct_add8 = ff_h264_idct_add8_422_8_mmi;
- c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_mmi;
+ c->luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_mmi;
- c->weight_h264_pixels_tab[0] = ff_h264_weight_pixels16_8_mmi;
- c->weight_h264_pixels_tab[1] = ff_h264_weight_pixels8_8_mmi;
- c->weight_h264_pixels_tab[2] = ff_h264_weight_pixels4_8_mmi;
+ c->weight_pixels_tab[0] = ff_h264_weight_pixels16_8_mmi;
+ c->weight_pixels_tab[1] = ff_h264_weight_pixels8_8_mmi;
+ c->weight_pixels_tab[2] = ff_h264_weight_pixels4_8_mmi;
- c->biweight_h264_pixels_tab[0] = ff_h264_biweight_pixels16_8_mmi;
- c->biweight_h264_pixels_tab[1] = ff_h264_biweight_pixels8_8_mmi;
- c->biweight_h264_pixels_tab[2] = ff_h264_biweight_pixels4_8_mmi;
+ c->biweight_pixels_tab[0] = ff_h264_biweight_pixels16_8_mmi;
+ c->biweight_pixels_tab[1] = ff_h264_biweight_pixels8_8_mmi;
+ c->biweight_pixels_tab[2] = ff_h264_biweight_pixels4_8_mmi;
- c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_mmi;
- c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmi;
+ c->v_loop_filter_chroma = ff_deblock_v_chroma_8_mmi;
+ c->v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmi;
if (chroma_format_idc <= 1) {
- c->h264_h_loop_filter_chroma =
+ c->h_loop_filter_chroma =
ff_deblock_h_chroma_8_mmi;
- c->h264_h_loop_filter_chroma_intra =
+ c->h_loop_filter_chroma_intra =
ff_deblock_h_chroma_intra_8_mmi;
}
- c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmi;
- c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmi;
- c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmi;
- c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmi;
+ c->v_loop_filter_luma = ff_deblock_v_luma_8_mmi;
+ c->v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmi;
+ c->h_loop_filter_luma = ff_deblock_h_luma_8_mmi;
+ c->h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmi;
}
}
if (have_msa(cpu_flags)) {
if (chroma_format_idc <= 1)
- c->h264_loop_filter_strength = ff_h264_loop_filter_strength_msa;
+ c->loop_filter_strength = ff_h264_loop_filter_strength_msa;
if (bit_depth == 8) {
- c->h264_v_loop_filter_luma = ff_h264_v_lpf_luma_inter_msa;
- c->h264_h_loop_filter_luma = ff_h264_h_lpf_luma_inter_msa;
- c->h264_h_loop_filter_luma_mbaff =
+ c->v_loop_filter_luma = ff_h264_v_lpf_luma_inter_msa;
+ c->h_loop_filter_luma = ff_h264_h_lpf_luma_inter_msa;
+ c->h_loop_filter_luma_mbaff =
ff_h264_h_loop_filter_luma_mbaff_msa;
- c->h264_v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_msa;
- c->h264_h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_msa;
- c->h264_h_loop_filter_luma_mbaff_intra =
+ c->v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_msa;
+ c->h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_msa;
+ c->h_loop_filter_luma_mbaff_intra =
ff_h264_h_loop_filter_luma_mbaff_intra_msa;
- c->h264_v_loop_filter_chroma = ff_h264_v_lpf_chroma_inter_msa;
+ c->v_loop_filter_chroma = ff_h264_v_lpf_chroma_inter_msa;
if (chroma_format_idc <= 1)
- c->h264_h_loop_filter_chroma = ff_h264_h_lpf_chroma_inter_msa;
+ c->h_loop_filter_chroma = ff_h264_h_lpf_chroma_inter_msa;
else
- c->h264_h_loop_filter_chroma =
+ c->h_loop_filter_chroma =
ff_h264_h_loop_filter_chroma422_msa;
if (chroma_format_idc > 1)
- c->h264_h_loop_filter_chroma_mbaff =
+ c->h_loop_filter_chroma_mbaff =
ff_h264_h_loop_filter_chroma422_mbaff_msa;
- c->h264_v_loop_filter_chroma_intra =
+ c->v_loop_filter_chroma_intra =
ff_h264_v_lpf_chroma_intra_msa;
if (chroma_format_idc <= 1)
- c->h264_h_loop_filter_chroma_intra =
+ c->h_loop_filter_chroma_intra =
ff_h264_h_lpf_chroma_intra_msa;
/* Weighted MC */
- c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_8_msa;
- c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_8_msa;
- c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels4_8_msa;
+ c->weight_pixels_tab[0] = ff_weight_h264_pixels16_8_msa;
+ c->weight_pixels_tab[1] = ff_weight_h264_pixels8_8_msa;
+ c->weight_pixels_tab[2] = ff_weight_h264_pixels4_8_msa;
- c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_8_msa;
- c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_8_msa;
- c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels4_8_msa;
+ c->biweight_pixels_tab[0] = ff_biweight_h264_pixels16_8_msa;
+ c->biweight_pixels_tab[1] = ff_biweight_h264_pixels8_8_msa;
+ c->biweight_pixels_tab[2] = ff_biweight_h264_pixels4_8_msa;
- c->h264_idct_add = ff_h264_idct_add_msa;
- c->h264_idct8_add = ff_h264_idct8_addblk_msa;
- c->h264_idct_dc_add = ff_h264_idct4x4_addblk_dc_msa;
- c->h264_idct8_dc_add = ff_h264_idct8_dc_addblk_msa;
- c->h264_idct_add16 = ff_h264_idct_add16_msa;
- c->h264_idct8_add4 = ff_h264_idct8_add4_msa;
+ c->idct_add = ff_h264_idct_add_msa;
+ c->idct8_add = ff_h264_idct8_addblk_msa;
+ c->idct_dc_add = ff_h264_idct4x4_addblk_dc_msa;
+ c->idct8_dc_add = ff_h264_idct8_dc_addblk_msa;
+ c->idct_add16 = ff_h264_idct_add16_msa;
+ c->idct8_add4 = ff_h264_idct8_add4_msa;
if (chroma_format_idc <= 1)
- c->h264_idct_add8 = ff_h264_idct_add8_msa;
+ c->idct_add8 = ff_h264_idct_add8_msa;
else
- c->h264_idct_add8 = ff_h264_idct_add8_422_msa;
+ c->idct_add8 = ff_h264_idct_add8_422_msa;
- c->h264_idct_add16intra = ff_h264_idct_add16_intra_msa;
- c->h264_luma_dc_dequant_idct = ff_h264_deq_idct_luma_dc_msa;
+ c->idct_add16intra = ff_h264_idct_add16_intra_msa;
+ c->luma_dc_dequant_idct = ff_h264_deq_idct_luma_dc_msa;
}
}
}
diff --git a/libavcodec/ppc/h264dsp.c b/libavcodec/ppc/h264dsp.c
index 0650768d7b..22d3db64a1 100644
--- a/libavcodec/ppc/h264dsp.c
+++ b/libavcodec/ppc/h264dsp.c
@@ -793,22 +793,22 @@ av_cold void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth,
return;
if (bit_depth == 8) {
- c->h264_idct_add = h264_idct_add_altivec;
+ c->idct_add = h264_idct_add_altivec;
if (chroma_format_idc <= 1)
- c->h264_idct_add8 = h264_idct_add8_altivec;
- c->h264_idct_add16 = h264_idct_add16_altivec;
- c->h264_idct_add16intra = h264_idct_add16intra_altivec;
- c->h264_idct_dc_add= h264_idct_dc_add_altivec;
- c->h264_idct8_dc_add = h264_idct8_dc_add_altivec;
- c->h264_idct8_add = h264_idct8_add_altivec;
- c->h264_idct8_add4 = h264_idct8_add4_altivec;
- c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
- c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;
+ c->idct_add8 = h264_idct_add8_altivec;
+ c->idct_add16 = h264_idct_add16_altivec;
+ c->idct_add16intra = h264_idct_add16intra_altivec;
+ c->idct_dc_add = h264_idct_dc_add_altivec;
+ c->idct8_dc_add = h264_idct8_dc_add_altivec;
+ c->idct8_add = h264_idct8_add_altivec;
+ c->idct8_add4 = h264_idct8_add4_altivec;
+ c->v_loop_filter_luma = h264_v_loop_filter_luma_altivec;
+ c->h_loop_filter_luma = h264_h_loop_filter_luma_altivec;
- c->weight_h264_pixels_tab[0] = weight_h264_pixels16_altivec;
- c->weight_h264_pixels_tab[1] = weight_h264_pixels8_altivec;
- c->biweight_h264_pixels_tab[0] = biweight_h264_pixels16_altivec;
- c->biweight_h264_pixels_tab[1] = biweight_h264_pixels8_altivec;
+ c->weight_pixels_tab[0] = weight_h264_pixels16_altivec;
+ c->weight_pixels_tab[1] = weight_h264_pixels8_altivec;
+ c->biweight_pixels_tab[0] = biweight_h264_pixels16_altivec;
+ c->biweight_pixels_tab[1] = biweight_h264_pixels8_altivec;
}
#endif /* HAVE_ALTIVEC */
}
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 06cb3c59de..5efec9eb5e 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -113,106 +113,106 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
if (bit_depth == 8) {
if (zvl128b) {
if (flags & AV_CPU_FLAG_RVB)
- dsp->weight_h264_pixels_tab[0] =
+ dsp->weight_pixels_tab[0] =
ff_h264_weight_funcs_8_rvv[0].weight;
- dsp->biweight_h264_pixels_tab[0] =
+ dsp->biweight_pixels_tab[0] =
ff_h264_weight_funcs_8_rvv[0].biweight;
}
if (flags & AV_CPU_FLAG_RVV_I64) {
- dsp->weight_h264_pixels_tab[1] =
+ dsp->weight_pixels_tab[1] =
ff_h264_weight_funcs_8_rvv[1].weight;
- dsp->biweight_h264_pixels_tab[1] =
+ dsp->biweight_pixels_tab[1] =
ff_h264_weight_funcs_8_rvv[1].biweight;
}
- dsp->weight_h264_pixels_tab[2] =
+ dsp->weight_pixels_tab[2] =
ff_h264_weight_funcs_8_rvv[2].weight;
- dsp->biweight_h264_pixels_tab[2] =
+ dsp->biweight_pixels_tab[2] =
ff_h264_weight_funcs_8_rvv[2].biweight;
- dsp->weight_h264_pixels_tab[3] =
+ dsp->weight_pixels_tab[3] =
ff_h264_weight_funcs_8_rvv[3].weight;
- dsp->biweight_h264_pixels_tab[3] =
+ dsp->biweight_pixels_tab[3] =
ff_h264_weight_funcs_8_rvv[3].biweight;
}
if (bit_depth == 8 && zvl128b) {
- dsp->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_8_rvv;
- dsp->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_8_rvv;
- dsp->h264_h_loop_filter_luma_mbaff =
+ dsp->v_loop_filter_luma = ff_h264_v_loop_filter_luma_8_rvv;
+ dsp->h_loop_filter_luma = ff_h264_h_loop_filter_luma_8_rvv;
+ dsp->h_loop_filter_luma_mbaff =
ff_h264_h_loop_filter_luma_mbaff_8_rvv;
- dsp->h264_v_loop_filter_luma_intra =
+ dsp->v_loop_filter_luma_intra =
ff_h264_v_loop_filter_luma_intra_8_rvv;
- dsp->h264_h_loop_filter_luma_intra =
+ dsp->h_loop_filter_luma_intra =
ff_h264_h_loop_filter_luma_intra_8_rvv;
- dsp->h264_h_loop_filter_luma_mbaff_intra =
+ dsp->h_loop_filter_luma_mbaff_intra =
ff_h264_h_loop_filter_luma_mbaff_intra_8_rvv;
- dsp->h264_v_loop_filter_chroma =
+ dsp->v_loop_filter_chroma =
ff_h264_v_loop_filter_chroma_8_rvv;
- dsp->h264_v_loop_filter_chroma_intra =
+ dsp->v_loop_filter_chroma_intra =
ff_h264_v_loop_filter_chroma_intra_8_rvv;
if (chroma_format_idc <= 1) {
- dsp->h264_h_loop_filter_chroma =
+ dsp->h_loop_filter_chroma =
ff_h264_h_loop_filter_chroma_8_rvv;
- dsp->h264_h_loop_filter_chroma_mbaff =
+ dsp->h_loop_filter_chroma_mbaff =
ff_h264_h_loop_filter_chroma_mbaff_8_rvv;
- dsp->h264_h_loop_filter_chroma_intra =
+ dsp->h_loop_filter_chroma_intra =
ff_h264_h_loop_filter_chroma_intra_8_rvv;
- dsp->h264_h_loop_filter_chroma_mbaff_intra =
+ dsp->h_loop_filter_chroma_mbaff_intra =
ff_h264_h_loop_filter_chroma_mbaff_intra_8_rvv;
}
- dsp->h264_idct_add = ff_h264_idct_add_8_rvv;
- dsp->h264_idct8_add = ff_h264_idct8_add_8_rvv;
+ dsp->idct_add = ff_h264_idct_add_8_rvv;
+ dsp->idct8_add = ff_h264_idct8_add_8_rvv;
if (flags & AV_CPU_FLAG_RVB) {
- dsp->h264_idct_dc_add = ff_h264_idct4_dc_add_8_rvv;
- dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv;
- dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv;
+ dsp->idct_dc_add = ff_h264_idct4_dc_add_8_rvv;
+ dsp->idct_add16 = ff_h264_idct_add16_8_rvv;
+ dsp->idct_add16intra = ff_h264_idct_add16intra_8_rvv;
# if __riscv_xlen == 64
- dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv;
+ dsp->idct8_add4 = ff_h264_idct8_add4_8_rvv;
if (chroma_format_idc <= 1)
- dsp->h264_idct_add8 = ff_h264_idct4_add8_8_rvv;
+ dsp->idct_add8 = ff_h264_idct4_add8_8_rvv;
else
- dsp->h264_idct_add8 = ff_h264_idct4_add8_422_8_rvv;
+ dsp->idct_add8 = ff_h264_idct4_add8_422_8_rvv;
# endif
}
- dsp->h264_luma_dc_dequant_idct =
+ dsp->luma_dc_dequant_idct =
ff_h264_luma_dc_dequant_idct_8_rvv;
if (flags & AV_CPU_FLAG_RVV_I64) {
- dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_8_rvv;
+ dsp->add_pixels8_clear = ff_h264_add_pixels8_8_rvv;
if (flags & AV_CPU_FLAG_RVB)
- dsp->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_rvv;
+ dsp->idct8_dc_add = ff_h264_idct8_dc_add_8_rvv;
}
- dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_8_rvv;
+ dsp->add_pixels4_clear = ff_h264_add_pixels4_8_rvv;
}
#define IDCT_DEPTH(depth) \
if (bit_depth == depth) { \
if (zvl128b) { \
- dsp->h264_idct_add = ff_h264_idct_add_##depth##_rvv; \
- dsp->h264_luma_dc_dequant_idct = \
+ dsp->idct_add = ff_h264_idct_add_##depth##_rvv; \
+ dsp->luma_dc_dequant_idct = \
ff_h264_luma_dc_dequant_idct_9_rvv; \
} \
if (flags & AV_CPU_FLAG_RVB) \
- dsp->h264_idct8_add = ff_h264_idct8_add_##depth##_rvv; \
+ dsp->idct8_add = ff_h264_idct8_add_##depth##_rvv; \
if (zvl128b && (flags & AV_CPU_FLAG_RVB)) { \
- dsp->h264_idct_dc_add = ff_h264_idct4_dc_add_##depth##_rvv; \
- dsp->h264_idct8_dc_add = ff_h264_idct8_dc_add_##depth##_rvv; \
- dsp->h264_idct_add16 = ff_h264_idct_add16_##depth##_rvv; \
- dsp->h264_idct_add16intra = \
+ dsp->idct_dc_add = ff_h264_idct4_dc_add_##depth##_rvv; \
+ dsp->idct8_dc_add = ff_h264_idct8_dc_add_##depth##_rvv; \
+ dsp->idct_add16 = ff_h264_idct_add16_##depth##_rvv; \
+ dsp->idct_add16intra = \
ff_h264_idct_add16intra_##depth##_rvv; \
if (__riscv_xlen == 64) { \
if (chroma_format_idc <= 1) \
- dsp->h264_idct_add8 = \
+ dsp->idct_add8 = \
ff_h264_idct4_add8_##depth##_rvv; \
else \
- dsp->h264_idct_add8 = \
+ dsp->idct_add8 = \
ff_h264_idct4_add8_422_##depth##_rvv; \
} \
} \
if (__riscv_xlen == 64 && (flags & AV_CPU_FLAG_RVB)) \
- dsp->h264_idct8_add4 = ff_h264_idct8_add4_##depth##_rvv; \
+ dsp->idct8_add4 = ff_h264_idct8_add4_##depth##_rvv; \
}
IDCT_DEPTH(9)
@@ -221,9 +221,9 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
IDCT_DEPTH(14)
if (bit_depth > 8 && zvl128b) {
- dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_16_rvv;
+ dsp->add_pixels8_clear = ff_h264_add_pixels8_16_rvv;
if (flags & AV_CPU_FLAG_RVV_I64)
- dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_16_rvv;
+ dsp->add_pixels4_clear = ff_h264_add_pixels4_16_rvv;
}
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index dfcfce77d3..296e81f322 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -696,10 +696,10 @@ static void hl_decode_mb(SVQ3Context *s)
if (s->cbp & 0x30) {
uint8_t *dest[2] = { dest_cb, dest_cr };
- s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
- s->dequant4_coeff[4][0]);
- s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
- s->dequant4_coeff[4][0]);
+ s->h264dsp.chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
+ s->dequant4_coeff[4][0]);
+ s->h264dsp.chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
+ s->dequant4_coeff[4][0]);
for (j = 1; j < 3; j++) {
for (i = j * 16; i < j * 16 + 4; i++)
if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 1ee1ee4367..add1344022 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -188,163 +188,163 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMXEXT(cpu_flags) && chroma_format_idc <= 1)
- c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmxext;
+ c->loop_filter_strength = ff_h264_loop_filter_strength_mmxext;
if (bit_depth == 8) {
if (EXTERNAL_MMX(cpu_flags)) {
if (chroma_format_idc <= 1) {
} else {
- c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmx;
+ c->idct_add8 = ff_h264_idct_add8_422_8_mmx;
}
}
if (EXTERNAL_MMXEXT(cpu_flags)) {
- c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext;
+ c->idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext;
- c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmxext;
+ c->weight_pixels_tab[2] = ff_h264_weight_4_mmxext;
- c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext;
+ c->biweight_pixels_tab[2] = ff_h264_biweight_4_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags)) {
- c->h264_idct8_add = ff_h264_idct8_add_8_sse2;
+ c->idct8_add = ff_h264_idct8_add_8_sse2;
- c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
- c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
+ c->idct_add16 = ff_h264_idct_add16_8_sse2;
+ c->idct8_add4 = ff_h264_idct8_add4_8_sse2;
if (chroma_format_idc <= 1)
- c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
- c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2;
- c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;
+ c->idct_add8 = ff_h264_idct_add8_8_sse2;
+ c->idct_add16intra = ff_h264_idct_add16intra_8_sse2;
+ c->luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;
- c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2;
- c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2;
+ c->weight_pixels_tab[0] = ff_h264_weight_16_sse2;
+ c->weight_pixels_tab[1] = ff_h264_weight_8_sse2;
- c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2;
- c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2;
+ c->biweight_pixels_tab[0] = ff_h264_biweight_16_sse2;
+ c->biweight_pixels_tab[1] = ff_h264_biweight_8_sse2;
- c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2;
- c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2;
- c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
- c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
+ c->v_loop_filter_luma = ff_deblock_v_luma_8_sse2;
+ c->h_loop_filter_luma = ff_deblock_h_luma_8_sse2;
+ c->v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
+ c->h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
#if ARCH_X86_64
- c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2;
+ c->h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2;
#endif
- c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_sse2;
- c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_sse2;
+ c->v_loop_filter_chroma = ff_deblock_v_chroma_8_sse2;
+ c->v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_sse2;
if (chroma_format_idc <= 1) {
- c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_sse2;
- c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_sse2;
+ c->h_loop_filter_chroma = ff_deblock_h_chroma_8_sse2;
+ c->h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_sse2;
} else {
- c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_sse2;
- c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_sse2;
+ c->h_loop_filter_chroma = ff_deblock_h_chroma422_8_sse2;
+ c->h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_sse2;
}
- c->h264_idct_add = ff_h264_idct_add_8_sse2;
- c->h264_idct_dc_add = ff_h264_idct_dc_add_8_sse2;
+ c->idct_add = ff_h264_idct_add_8_sse2;
+ c->idct_dc_add = ff_h264_idct_dc_add_8_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags)) {
- c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
- c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
+ c->biweight_pixels_tab[0] = ff_h264_biweight_16_ssse3;
+ c->biweight_pixels_tab[1] = ff_h264_biweight_8_ssse3;
}
if (EXTERNAL_AVX(cpu_flags)) {
- c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
- c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
- c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
- c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
+ c->v_loop_filter_luma = ff_deblock_v_luma_8_avx;
+ c->h_loop_filter_luma = ff_deblock_h_luma_8_avx;
+ c->v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
+ c->h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
#if ARCH_X86_64
- c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx;
+ c->h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx;
#endif
- c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_avx;
- c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_avx;
+ c->v_loop_filter_chroma = ff_deblock_v_chroma_8_avx;
+ c->v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_avx;
if (chroma_format_idc <= 1) {
- c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_avx;
- c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_avx;
+ c->h_loop_filter_chroma = ff_deblock_h_chroma_8_avx;
+ c->h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_avx;
} else {
- c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_avx;
- c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_avx;
+ c->h_loop_filter_chroma = ff_deblock_h_chroma422_8_avx;
+ c->h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_avx;
}
- c->h264_idct_add = ff_h264_idct_add_8_avx;
- c->h264_idct_dc_add = ff_h264_idct_dc_add_8_avx;
+ c->idct_add = ff_h264_idct_add_8_avx;
+ c->idct_dc_add = ff_h264_idct_dc_add_8_avx;
}
} else if (bit_depth == 10) {
if (EXTERNAL_MMXEXT(cpu_flags)) {
- c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
+ c->idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags)) {
- c->h264_idct_add = ff_h264_idct_add_10_sse2;
- c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
+ c->idct_add = ff_h264_idct_add_10_sse2;
+ c->idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
- c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
+ c->idct_add16 = ff_h264_idct_add16_10_sse2;
if (chroma_format_idc <= 1) {
- c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
+ c->idct_add8 = ff_h264_idct_add8_10_sse2;
} else {
- c->h264_idct_add8 = ff_h264_idct_add8_422_10_sse2;
+ c->idct_add8 = ff_h264_idct_add8_422_10_sse2;
}
- c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
+ c->idct_add16intra = ff_h264_idct_add16intra_10_sse2;
#if HAVE_ALIGNED_STACK
- c->h264_idct8_add = ff_h264_idct8_add_10_sse2;
- c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
+ c->idct8_add = ff_h264_idct8_add_10_sse2;
+ c->idct8_add4 = ff_h264_idct8_add4_10_sse2;
#endif /* HAVE_ALIGNED_STACK */
- c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
- c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
- c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;
+ c->weight_pixels_tab[0] = ff_h264_weight_16_10_sse2;
+ c->weight_pixels_tab[1] = ff_h264_weight_8_10_sse2;
+ c->weight_pixels_tab[2] = ff_h264_weight_4_10_sse2;
- c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
- c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
- c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
+ c->biweight_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
+ c->biweight_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
+ c->biweight_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
- c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_sse2;
- c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
+ c->v_loop_filter_chroma = ff_deblock_v_chroma_10_sse2;
+ c->v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
if (chroma_format_idc <= 1) {
- c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_sse2;
+ c->h_loop_filter_chroma = ff_deblock_h_chroma_10_sse2;
} else {
- c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2;
+ c->h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2;
}
- c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
- c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2;
- c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
- c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
+ c->v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
+ c->h_loop_filter_luma = ff_deblock_h_luma_10_sse2;
+ c->v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
+ c->h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
}
if (EXTERNAL_SSE4(cpu_flags)) {
- c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
- c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
- c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
+ c->weight_pixels_tab[0] = ff_h264_weight_16_10_sse4;
+ c->weight_pixels_tab[1] = ff_h264_weight_8_10_sse4;
+ c->weight_pixels_tab[2] = ff_h264_weight_4_10_sse4;
- c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
- c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
- c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
+ c->biweight_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
+ c->biweight_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
+ c->biweight_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
}
if (EXTERNAL_AVX(cpu_flags)) {
- c->h264_idct_dc_add =
- c->h264_idct_add = ff_h264_idct_add_10_avx;
- c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
+ c->idct_dc_add =
+ c->idct_add = ff_h264_idct_add_10_avx;
+ c->idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
- c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
+ c->idct_add16 = ff_h264_idct_add16_10_avx;
if (chroma_format_idc <= 1) {
- c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
+ c->idct_add8 = ff_h264_idct_add8_10_avx;
} else {
- c->h264_idct_add8 = ff_h264_idct_add8_422_10_avx;
+ c->idct_add8 = ff_h264_idct_add8_422_10_avx;
}
- c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
+ c->idct_add16intra = ff_h264_idct_add16intra_10_avx;
#if HAVE_ALIGNED_STACK
- c->h264_idct8_add = ff_h264_idct8_add_10_avx;
- c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx;
+ c->idct8_add = ff_h264_idct8_add_10_avx;
+ c->idct8_add4 = ff_h264_idct8_add4_10_avx;
#endif /* HAVE_ALIGNED_STACK */
- c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_avx;
- c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
+ c->v_loop_filter_chroma = ff_deblock_v_chroma_10_avx;
+ c->v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
if (chroma_format_idc <= 1) {
- c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_avx;
+ c->h_loop_filter_chroma = ff_deblock_h_chroma_10_avx;
} else {
- c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx;
+ c->h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx;
}
- c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx;
- c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx;
- c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx;
- c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
+ c->v_loop_filter_luma = ff_deblock_v_luma_10_avx;
+ c->h_loop_filter_luma = ff_deblock_h_luma_10_avx;
+ c->v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx;
+ c->h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
}
}
}
diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c
index 0bf01e072e..5a5a553901 100644
--- a/tests/checkasm/h264dsp.c
+++ b/tests/checkasm/h264dsp.c
@@ -194,8 +194,8 @@ static void check_idct(void)
for (sz = 4; sz <= 8; sz += 4) {
void (*idct)(uint8_t *, int16_t *, int) = NULL;
const char fmts[3][28] = {
- "h264_idct%d_add_%dbpp", "h264_idct%d_dc_add_%dbpp",
- "h264_add_pixels%d_%dbpp",
+ "idct%d_add_%dbpp", "idct%d_dc_add_%dbpp",
+ "add_pixels%d_%dbpp",
};
randomize_buffers(i);
@@ -206,12 +206,12 @@ static void check_idct(void)
dct8x8(coef, bit_depth);
switch ((sz << 2) | dc) {
- case (4 << 2) | 0: idct = h.h264_idct_add; break;
- case (4 << 2) | 1: idct = h.h264_idct_dc_add; break;
- case (4 << 2) | 2: idct = h.h264_add_pixels4_clear; break;
- case (8 << 2) | 0: idct = h.h264_idct8_add; break;
- case (8 << 2) | 1: idct = h.h264_idct8_dc_add; break;
- case (8 << 2) | 2: idct = h.h264_add_pixels8_clear; break;
+ case (4 << 2) | 0: idct = h.idct_add; break;
+ case (4 << 2) | 1: idct = h.idct_dc_add; break;
+ case (4 << 2) | 2: idct = h.add_pixels4_clear; break;
+ case (8 << 2) | 0: idct = h.idct8_add; break;
+ case (8 << 2) | 1: idct = h.idct8_dc_add; break;
+ case (8 << 2) | 2: idct = h.add_pixels8_clear; break;
}
if (check_func(idct, fmts[dc], sz, bit_depth)) {
@@ -261,17 +261,17 @@ static void check_idct_multiple(void)
int block_offset[16] = { 0 };
switch (func) {
case 0:
- idct = h.h264_idct_add16;
- name = "h264_idct_add16";
+ idct = h.idct_add16;
+ name = "idct_add16";
break;
case 1:
- idct = h.h264_idct_add16intra;
- name = "h264_idct_add16intra";
+ idct = h.idct_add16intra;
+ name = "idct_add16intra";
intra = 1;
break;
case 2:
- idct = h.h264_idct8_add4;
- name = "h264_idct8_add4";
+ idct = h.idct8_add4;
+ name = "idct8_add4";
sz = 8;
break;
}
@@ -361,7 +361,7 @@ static void check_idct_dequant(void)
memset(dst_ref, 0, 16 * 16 * SIZEOF_COEF);
memset(dst_new, 0, 16 * 16 * SIZEOF_COEF);
- if (check_func(h.h264_luma_dc_dequant_idct, "h264_luma_dc_dequant_idct_%d", bit_depth)) {
+ if (check_func(h.luma_dc_dequant_idct, "luma_dc_dequant_idct_%d", bit_depth)) {
call_ref(dst_ref, src, qmul);
call_new(dst_new, src, qmul);
@@ -425,16 +425,16 @@ static void check_loop_filter(void)
} \
} while (0)
- CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1,);
- CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0,);
- CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0,);
- CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1,);
- CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0,);
- CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0,);
+ CHECK_LOOP_FILTER(v_loop_filter_luma, 1,);
+ CHECK_LOOP_FILTER(h_loop_filter_luma, 0,);
+ CHECK_LOOP_FILTER(h_loop_filter_luma_mbaff, 0,);
+ CHECK_LOOP_FILTER(v_loop_filter_chroma, 1,);
+ CHECK_LOOP_FILTER(h_loop_filter_chroma, 0,);
+ CHECK_LOOP_FILTER(h_loop_filter_chroma_mbaff, 0,);
ff_h264dsp_init(&h, bit_depth, 2);
- CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0, 422);
- CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0, 422);
+ CHECK_LOOP_FILTER(h_loop_filter_chroma, 0, 422);
+ CHECK_LOOP_FILTER(h_loop_filter_chroma_mbaff, 0, 422);
#undef CHECK_LOOP_FILTER
}
}
@@ -486,16 +486,16 @@ static void check_loop_filter_intra(void)
} \
} while (0)
- CHECK_LOOP_FILTER(h264_v_loop_filter_luma_intra, 1,);
- CHECK_LOOP_FILTER(h264_h_loop_filter_luma_intra, 0,);
- CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff_intra, 0,);
- CHECK_LOOP_FILTER(h264_v_loop_filter_chroma_intra, 1,);
- CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0,);
- CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0,);
+ CHECK_LOOP_FILTER(v_loop_filter_luma_intra, 1,);
+ CHECK_LOOP_FILTER(h_loop_filter_luma_intra, 0,);
+ CHECK_LOOP_FILTER(h_loop_filter_luma_mbaff_intra, 0,);
+ CHECK_LOOP_FILTER(v_loop_filter_chroma_intra, 1,);
+ CHECK_LOOP_FILTER(h_loop_filter_chroma_intra, 0,);
+ CHECK_LOOP_FILTER(h_loop_filter_chroma_mbaff_intra, 0,);
ff_h264dsp_init(&h, bit_depth, 2);
- CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0, 422);
- CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0, 422);
+ CHECK_LOOP_FILTER(h_loop_filter_chroma_intra, 0, 422);
+ CHECK_LOOP_FILTER(h_loop_filter_chroma_mbaff_intra, 0, 422);
#undef CHECK_LOOP_FILTER
}
}
--
2.52.0
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2026-01-21 1:04 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=176895736905.25.1708576412784609975@4457048688e7 \
--to=ffmpeg-devel@ffmpeg.org \
--cc=code@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git