* [FFmpeg-devel] [PATCH] lavc/x86: Deduplicate constants (PR #20590)
@ 2025-09-24 2:07 mkver via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: mkver via ffmpeg-devel @ 2025-09-24 2:07 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: mkver
PR #20590 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20590
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20590.patch
>From 5328f198c2c21d9f2fc7bc0c2a560143134c3dca Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 24 Sep 2025 00:45:24 +0200
Subject: [PATCH 1/7] avcodec/x86/vorbisdsp: Reuse constant
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/vorbisdsp.asm | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm
index 9afe2eb352..5fa8b5f866 100644
--- a/libavcodec/x86/vorbisdsp.asm
+++ b/libavcodec/x86/vorbisdsp.asm
@@ -21,15 +21,13 @@
%include "libavutil/x86/x86util.asm"
-SECTION_RODATA
-
-pdw_80000000: times 4 dd 0x80000000
+cextern ps_neg
SECTION .text
INIT_XMM sse
cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size
- mova m5, [pdw_80000000]
+ mova m5, [ps_neg]
shl block_sized, 2
add magq, block_sizeq
add angq, block_sizeq
--
2.49.1
>From fda587ca778a525deffb42df639029f578b17886 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 24 Sep 2025 01:42:19 +0200
Subject: [PATCH 2/7] avcodec/x86/cfhd{,enc}dsp: Deduplicate constants
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/cfhddsp.asm | 47 ++++++++++++++++++-----------------
libavcodec/x86/cfhdencdsp.asm | 12 ++++-----
libavcodec/x86/constants.c | 3 +++
libavcodec/x86/constants.h | 3 +++
4 files changed, 36 insertions(+), 29 deletions(-)
diff --git a/libavcodec/x86/cfhddsp.asm b/libavcodec/x86/cfhddsp.asm
index 87c2df634a..aa27902f9b 100644
--- a/libavcodec/x86/cfhddsp.asm
+++ b/libavcodec/x86/cfhddsp.asm
@@ -23,15 +23,16 @@
SECTION_RODATA
-factor_p1_n1: dw 1, -1, 1, -1, 1, -1, 1, -1,
-factor_n1_p1: dw -1, 1, -1, 1, -1, 1, -1, 1,
+cextern pw_p1_m1
+cextern pw_m1_p1
factor_p11_n4: dw 11, -4, 11, -4, 11, -4, 11, -4,
factor_p5_p4: dw 5, 4, 5, 4, 5, 4, 5, 4,
-pd_4: times 4 dd 4
-pw_1: times 8 dw 1
-pw_0: times 8 dw 0
-pw_1023: times 8 dw 1023
-pw_4095: times 8 dw 4095
+cextern pd_4
+cextern pw_1
+cextern pb_0
+%define pw_0 pb_0
+cextern pw_1023
+cextern pw_4095
SECTION .text
@@ -79,8 +80,8 @@ cglobal cfhd_horiz_filter, 7, 7, 8, output, x, low, y, high, temp, width, height
%endif
%if ARCH_X86_64
- mova m8, [factor_p1_n1]
- mova m9, [factor_n1_p1]
+ mova m8, [pw_p1_m1]
+ mova m9, [pw_m1_p1]
mova m10, [pw_1]
mova m11, [pd_4]
%endif
@@ -158,10 +159,10 @@ cglobal cfhd_horiz_filter, 7, 7, 8, output, x, low, y, high, temp, width, height
paddd m6, m11
paddd m7, m11
%else
- pmaddwd m4, [factor_p1_n1]
- pmaddwd m5, [factor_p1_n1]
- pmaddwd m6, [factor_n1_p1]
- pmaddwd m7, [factor_n1_p1]
+ pmaddwd m4, [pw_p1_m1]
+ pmaddwd m5, [pw_p1_m1]
+ pmaddwd m6, [pw_m1_p1]
+ pmaddwd m7, [pw_m1_p1]
paddd m4, [pd_4]
paddd m5, [pd_4]
@@ -192,8 +193,8 @@ cglobal cfhd_horiz_filter, 7, 7, 8, output, x, low, y, high, temp, width, height
%else
pmaddwd m2, [pw_1]
pmaddwd m0, [pw_1]
- pmaddwd m1, [factor_p1_n1]
- pmaddwd m3, [factor_p1_n1]
+ pmaddwd m1, [pw_p1_m1]
+ pmaddwd m3, [pw_p1_m1]
%endif
paddd m2, m4
@@ -312,8 +313,8 @@ cglobal cfhd_vert_filter, 8, 11, 14, output, ostride, low, lwidth, high, hwidth,
dec heightd
- mova m8, [factor_p1_n1]
- mova m9, [factor_n1_p1]
+ mova m8, [pw_p1_m1]
+ mova m9, [pw_m1_p1]
mova m10, [pw_1]
mova m11, [pd_4]
mova m12, [factor_p11_n4]
@@ -485,10 +486,10 @@ cglobal cfhd_vert_filter, 7, 7, 8, output, x, low, y, high, pos, width, height
paddd m6, m11
paddd m7, m11
%else
- pmaddwd m4, [factor_p1_n1]
- pmaddwd m5, [factor_p1_n1]
- pmaddwd m6, [factor_n1_p1]
- pmaddwd m7, [factor_n1_p1]
+ pmaddwd m4, [pw_p1_m1]
+ pmaddwd m5, [pw_p1_m1]
+ pmaddwd m6, [pw_m1_p1]
+ pmaddwd m7, [pw_m1_p1]
paddd m4, [pd_4]
paddd m5, [pd_4]
@@ -524,8 +525,8 @@ cglobal cfhd_vert_filter, 7, 7, 8, output, x, low, y, high, pos, width, height
%else
pmaddwd m0, [pw_1]
pmaddwd m2, [pw_1]
- pmaddwd m1, [factor_p1_n1]
- pmaddwd m3, [factor_p1_n1]
+ pmaddwd m1, [pw_p1_m1]
+ pmaddwd m3, [pw_p1_m1]
%endif
paddd m0, m4
diff --git a/libavcodec/x86/cfhdencdsp.asm b/libavcodec/x86/cfhdencdsp.asm
index 4aaeb56972..7654f59643 100644
--- a/libavcodec/x86/cfhdencdsp.asm
+++ b/libavcodec/x86/cfhdencdsp.asm
@@ -23,13 +23,13 @@
SECTION_RODATA
-pw_p1_n1: dw 1, -1, 1, -1, 1, -1, 1, -1
-pw_n1_p1: dw -1, 1, -1, 1, -1, 1, -1, 1
+cextern pw_p1_m1
+cextern pw_m1_p1
pw_p5_n11: dw 5, -11, 5, -11, 5, -11, 5, -11
pw_n5_p11: dw -5, 11, -5, 11, -5, 11, -5, 11
pw_p11_n5: dw 11, -5, 11, -5, 11, -5, 11, -5
pw_n11_p5: dw -11, 5, -11, 5, -11, 5, -11, 5
-pd_4: times 4 dd 4
+cextern pd_4
pw_n4: times 8 dw -4
cextern pw_m1
cextern pw_1
@@ -46,7 +46,7 @@ cglobal cfhdenc_horiz_filter, 8, 10, 11, input, low, high, istride, lwidth, hwid
mova m7, [pd_4]
mova m8, [pw_1]
mova m9, [pw_m1]
- mova m10,[pw_p1_n1]
+ mova m10,[pw_p1_m1]
movsxdifnidn yq, yd
movsxdifnidn widthq, widthd
neg yq
@@ -208,8 +208,8 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
mova m7, [pd_4]
mova m8, [pw_1]
mova m9, [pw_m1]
- mova m10,[pw_p1_n1]
- mova m11,[pw_n1_p1]
+ mova m10,[pw_p1_m1]
+ mova m11,[pw_m1_p1]
mova m12,[pw_4]
mova m13,[pw_n4]
.loopw:
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index c5f3c6428e..95a97db4e4 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -61,6 +61,8 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x200
0x2000200020002000ULL, 0x2000200020002000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_m1_p1) = { 0x0001FFFF0001FFFFULL, 0x0001FFFF0001FFFFULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_p1_m1) = { 0xFFFF0001FFFF0001ULL, 0xFFFF0001FFFF0001ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL,
0x0000000000000000ULL, 0x0000000000000000ULL };
@@ -81,6 +83,7 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x800
DECLARE_ALIGNED(32, const ymm_reg, ff_pd_1) = { 0x0000000100000001ULL, 0x0000000100000001ULL,
0x0000000100000001ULL, 0x0000000100000001ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pd_4) = { 0x0000000400000004ULL, 0x0000000400000004ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pd_16) = { 0x0000001000000010ULL, 0x0000001000000010ULL,
0x0000001000000010ULL, 0x0000001000000010ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pd_32) = { 0x0000002000000020ULL, 0x0000002000000020ULL,
diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
index 4a55adb5b3..5badb2e104 100644
--- a/libavcodec/x86/constants.h
+++ b/libavcodec/x86/constants.h
@@ -50,6 +50,8 @@ extern const ymm_reg ff_pw_4095;
extern const ymm_reg ff_pw_4096;
extern const ymm_reg ff_pw_8192;
extern const ymm_reg ff_pw_m1;
+extern const xmm_reg ff_pw_m1_p1;
+extern const xmm_reg ff_pw_p1_m1;
extern const ymm_reg ff_pb_0;
extern const ymm_reg ff_pb_1;
@@ -62,6 +64,7 @@ extern const uint64_t ff_pb_FC;
extern const xmm_reg ff_ps_neg;
extern const ymm_reg ff_pd_1;
+extern const xmm_reg ff_pd_4;
extern const ymm_reg ff_pd_16;
extern const ymm_reg ff_pd_32;
extern const ymm_reg ff_pd_64;
--
2.49.1
>From fb910b56e294f0a53a0d234f03b350a6791caa3f Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 24 Sep 2025 02:01:52 +0200
Subject: [PATCH 3/7] avcodec/x86/rv40dsp: Deduplicate constants
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/rv40dsp.asm | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index dc520dbeb4..3abf29ee4d 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -25,8 +25,6 @@
SECTION_RODATA
-pw_1024: times 8 dw 1 << (16 - 6) ; pw_1024
-
sixtap_filter_hb_m: times 8 db 1, -5
times 8 db 52, 20
; multiplied by 2 to have the same shift
@@ -70,6 +68,7 @@ filter_h6_shuf3: db 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, 11
cextern pw_32
cextern pw_16
cextern pw_512
+cextern pw_1024
SECTION .text
--
2.49.1
>From f2fe18bd575cd437cc1fa19ab20c684eed31d7b3 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 24 Sep 2025 02:11:45 +0200
Subject: [PATCH 4/7] avcodec/x86/h26x/h2656_inter: Deduplicate constant
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h26x/h2656_inter.asm | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libavcodec/x86/h26x/h2656_inter.asm b/libavcodec/x86/h26x/h2656_inter.asm
index 49a95d58fb..aea59a1c6a 100644
--- a/libavcodec/x86/h26x/h2656_inter.asm
+++ b/libavcodec/x86/h26x/h2656_inter.asm
@@ -22,12 +22,12 @@
; */
%include "libavutil/x86/x86util.asm"
-SECTION_RODATA 32
cextern pw_255
cextern pw_512
cextern pw_2048
cextern pw_1023
cextern pw_1024
+cextern pw_4095
cextern pw_4096
cextern pw_8192
%define scale_8 pw_512
@@ -35,7 +35,7 @@ cextern pw_8192
%define scale_12 pw_8192
%define max_pixels_8 pw_255
%define max_pixels_10 pw_1023
-max_pixels_12: times 16 dw ((1 << 12)-1)
+%define max_pixels_12 pw_4095
cextern pb_0
SECTION .text
--
2.49.1
>From 2d9220e9d6a5420b9f0ab925726a47b73dfcfd95 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 24 Sep 2025 03:05:52 +0200
Subject: [PATCH 5/7] avcodec/x86/h26x/h2656_sao{,_10bit}: Deduplicate
constants
These files are templates that are included by both HEVC and VVC
code, so that every constant in there is automatically duplicated.
pb_eo is also duplicated between the two different bitversions
and for some reason the 10bit file uses its own pw_1023 and pw_4095.
All of this has been deduplicated. The constants unique to HEVC and VVC
have been put into h26x/h2656dsp.c instead of constants.c because this
file is compiled iff the constants are needed.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/h26x/h2656_sao.asm | 6 ++----
libavcodec/x86/h26x/h2656_sao_10bit.asm | 12 ++++++------
libavcodec/x86/h26x/h2656dsp.c | 10 ++++++++++
3 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/libavcodec/x86/h26x/h2656_sao.asm b/libavcodec/x86/h26x/h2656_sao.asm
index a80ee26178..e4cff790e5 100644
--- a/libavcodec/x86/h26x/h2656_sao.asm
+++ b/libavcodec/x86/h26x/h2656_sao.asm
@@ -23,10 +23,8 @@
%include "libavutil/x86/x86util.asm"
-SECTION_RODATA 32
-
-pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
-pb_eo: db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
+cextern pb_edge_shuffle
+cextern pb_eo
cextern pb_1
cextern pb_2
diff --git a/libavcodec/x86/h26x/h2656_sao_10bit.asm b/libavcodec/x86/h26x/h2656_sao_10bit.asm
index 052f2b1d16..82e702b451 100644
--- a/libavcodec/x86/h26x/h2656_sao_10bit.asm
+++ b/libavcodec/x86/h26x/h2656_sao_10bit.asm
@@ -23,15 +23,15 @@
%include "libavutil/x86/x86util.asm"
-SECTION_RODATA 32
-
-pw_m2: times 16 dw -2
-pw_mask10: times 16 dw 0x03FF
-pw_mask12: times 16 dw 0x0FFF
-pb_eo: db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
+cextern pb_eo
cextern pw_m1
+cextern pw_m2
cextern pw_1
cextern pw_2
+cextern pw_1023
+cextern pw_4095
+%define pw_mask10 pw_1023
+%define pw_mask12 pw_4095
SECTION .text
diff --git a/libavcodec/x86/h26x/h2656dsp.c b/libavcodec/x86/h26x/h2656dsp.c
index 1d8ec1898d..0a332ee55b 100644
--- a/libavcodec/x86/h26x/h2656dsp.c
+++ b/libavcodec/x86/h26x/h2656dsp.c
@@ -22,6 +22,16 @@
*/
#include "h2656dsp.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/x86/asm.h"
+
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m2) = { 0xFFFEFFFEFFFEFFFEULL, 0xFFFEFFFEFFFEFFFEULL,
+ 0xFFFEFFFEFFFEFFFEULL, 0xFFFEFFFEFFFEFFFEULL };
+
+DECLARE_ALIGNED(32, const ymm_reg, ff_pb_edge_shuffle) = { 0xFFFFFF0403000201ULL, 0xFFFFFFFFFFFFFFFFULL,
+ 0xFFFFFF0403000201ULL, 0xFFFFFFFFFFFFFFFFULL };
+const uint8_t ff_pb_eo[16] = { -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1 };
+
#define mc_rep_func(name, bitd, step, W, opt) \
void ff_h2656_put_##name##W##_##bitd##_##opt(int16_t *_dst, ptrdiff_t dststride, \
--
2.49.1
>From e460b62c7f574e2669d2570507a278f4537853a4 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 24 Sep 2025 03:19:29 +0200
Subject: [PATCH 6/7] avcodec/x86/hevc/deblock: Deduplicate constants
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/hevc/deblock.asm | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/libavcodec/x86/hevc/deblock.asm b/libavcodec/x86/hevc/deblock.asm
index 61b79f8079..9fc6bc5e1f 100644
--- a/libavcodec/x86/hevc/deblock.asm
+++ b/libavcodec/x86/hevc/deblock.asm
@@ -24,17 +24,16 @@
%include "libavutil/x86/x86util.asm"
-SECTION_RODATA
-
cextern pw_1023
%define pw_pixel_max_10 pw_1023
-pw_pixel_max_12: times 8 dw ((1 << 12)-1)
-pw_m2: times 8 dw -2
-pd_1 : times 4 dd 1
+cextern pw_4095
+%define pw_pixel_max_12 pw_4095
+cextern pd_1
cextern pw_4
cextern pw_8
cextern pw_m1
+cextern pw_m2
SECTION .text
INIT_XMM sse2
--
2.49.1
>From 6b0ac9902f778ba23748348e68b65d0fed17514a Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 24 Sep 2025 03:39:12 +0200
Subject: [PATCH 7/7] avcodec/x86/hevc/mc: Deduplicate constants
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/hevc/mc.asm | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/libavcodec/x86/hevc/mc.asm b/libavcodec/x86/hevc/mc.asm
index 550f7a0e23..53397c2018 100644
--- a/libavcodec/x86/hevc/mc.asm
+++ b/libavcodec/x86/hevc/mc.asm
@@ -22,21 +22,23 @@
SECTION_RODATA 32
cextern pw_255
+cextern pw_256
cextern pw_512
cextern pw_2048
cextern pw_8192
cextern pw_1023
cextern pw_1024
+cextern pw_4095
cextern pw_4096
%define pw_8 pw_512
%define pw_10 pw_2048
%define pw_12 pw_8192
+%define pw_bi_8 pw_256
%define pw_bi_10 pw_1024
%define pw_bi_12 pw_4096
%define max_pixels_8 pw_255
%define max_pixels_10 pw_1023
-pw_bi_8: times 16 dw (1 << 8)
-max_pixels_12: times 16 dw ((1 << 12)-1)
+%define max_pixels_12 pw_4095
cextern pd_1
cextern pb_0
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-09-24 2:07 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-09-24 2:07 [FFmpeg-devel] [PATCH] lavc/x86: Deduplicate constants (PR #20590) mkver via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror http://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ http://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git