From: mkver via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Cc: mkver <code@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH] lavc/x86: Deduplicate constants (PR #20590) Date: Wed, 24 Sep 2025 02:07:11 -0000 Message-ID: <175867963161.25.16896069750335050696@bf249f23a2c8> (raw) PR #20590 opened by mkver URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20590 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20590.patch >From 5328f198c2c21d9f2fc7bc0c2a560143134c3dca Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Wed, 24 Sep 2025 00:45:24 +0200 Subject: [PATCH 1/7] avcodec/x86/vorbisdsp: Reuse constant Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/x86/vorbisdsp.asm | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm index 9afe2eb352..5fa8b5f866 100644 --- a/libavcodec/x86/vorbisdsp.asm +++ b/libavcodec/x86/vorbisdsp.asm @@ -21,15 +21,13 @@ %include "libavutil/x86/x86util.asm" -SECTION_RODATA - -pdw_80000000: times 4 dd 0x80000000 +cextern ps_neg SECTION .text INIT_XMM sse cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size - mova m5, [pdw_80000000] + mova m5, [ps_neg] shl block_sized, 2 add magq, block_sizeq add angq, block_sizeq -- 2.49.1 >From fda587ca778a525deffb42df639029f578b17886 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Wed, 24 Sep 2025 01:42:19 +0200 Subject: [PATCH 2/7] avcodec/x86/cfhd{,enc}dsp: Deduplicate constants Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/x86/cfhddsp.asm | 47 ++++++++++++++++++----------------- libavcodec/x86/cfhdencdsp.asm | 12 ++++----- libavcodec/x86/constants.c | 3 +++ libavcodec/x86/constants.h | 3 +++ 4 files changed, 36 insertions(+), 29 deletions(-) diff --git a/libavcodec/x86/cfhddsp.asm b/libavcodec/x86/cfhddsp.asm index 87c2df634a..aa27902f9b 100644 --- a/libavcodec/x86/cfhddsp.asm +++ b/libavcodec/x86/cfhddsp.asm @@ -23,15 +23,16 @@ SECTION_RODATA -factor_p1_n1: dw 1, -1, 1, -1, 1, -1, 1, -1, -factor_n1_p1: dw -1, 1, -1, 1, -1, 1, -1, 1, +cextern pw_p1_m1 +cextern pw_m1_p1 factor_p11_n4: dw 11, -4, 11, -4, 11, -4, 11, -4, factor_p5_p4: dw 5, 4, 5, 4, 5, 4, 5, 4, -pd_4: times 4 dd 4 -pw_1: times 8 dw 1 -pw_0: times 8 dw 0 -pw_1023: times 8 dw 1023 -pw_4095: times 8 dw 4095 +cextern pd_4 +cextern pw_1 +cextern pb_0 +%define pw_0 pb_0 +cextern pw_1023 +cextern pw_4095 SECTION .text @@ -79,8 +80,8 @@ cglobal cfhd_horiz_filter, 7, 7, 8, output, x, low, y, high, temp, width, height %endif %if ARCH_X86_64 - mova m8, [factor_p1_n1] - mova m9, [factor_n1_p1] + mova m8, [pw_p1_m1] + mova m9, [pw_m1_p1] mova m10, [pw_1] mova m11, [pd_4] %endif @@ -158,10 +159,10 @@ cglobal cfhd_horiz_filter, 7, 7, 8, output, x, low, y, high, temp, width, height paddd m6, m11 paddd m7, m11 %else - pmaddwd m4, [factor_p1_n1] - pmaddwd m5, [factor_p1_n1] - pmaddwd m6, [factor_n1_p1] - pmaddwd m7, [factor_n1_p1] + pmaddwd m4, [pw_p1_m1] + pmaddwd m5, [pw_p1_m1] + pmaddwd m6, [pw_m1_p1] + pmaddwd m7, [pw_m1_p1] paddd m4, [pd_4] paddd m5, [pd_4] @@ -192,8 +193,8 @@ cglobal cfhd_horiz_filter, 7, 7, 8, output, x, low, y, high, temp, width, height %else pmaddwd m2, [pw_1] pmaddwd m0, [pw_1] - pmaddwd m1, [factor_p1_n1] - pmaddwd m3, [factor_p1_n1] + pmaddwd m1, [pw_p1_m1] + pmaddwd m3, [pw_p1_m1] %endif paddd m2, m4 @@ -312,8 +313,8 @@ cglobal cfhd_vert_filter, 8, 11, 14, output, ostride, low, lwidth, high, hwidth, dec heightd - mova m8, [factor_p1_n1] - mova m9, [factor_n1_p1] + mova m8, [pw_p1_m1] + mova m9, [pw_m1_p1] mova m10, [pw_1] mova m11, [pd_4] mova m12, [factor_p11_n4] @@ -485,10 +486,10 @@ cglobal cfhd_vert_filter, 7, 7, 8, output, x, low, y, high, pos, width, height paddd m6, m11 paddd m7, m11 %else - pmaddwd m4, [factor_p1_n1] - pmaddwd m5, [factor_p1_n1] - pmaddwd m6, [factor_n1_p1] - pmaddwd m7, [factor_n1_p1] + pmaddwd m4, [pw_p1_m1] + pmaddwd m5, [pw_p1_m1] + pmaddwd m6, [pw_m1_p1] + pmaddwd m7, [pw_m1_p1] paddd m4, [pd_4] paddd m5, [pd_4] @@ -524,8 +525,8 @@ cglobal cfhd_vert_filter, 7, 7, 8, output, x, low, y, high, pos, width, height %else pmaddwd m0, [pw_1] pmaddwd m2, [pw_1] - pmaddwd m1, [factor_p1_n1] - pmaddwd m3, [factor_p1_n1] + pmaddwd m1, [pw_p1_m1] + pmaddwd m3, [pw_p1_m1] %endif paddd m0, m4 diff --git a/libavcodec/x86/cfhdencdsp.asm b/libavcodec/x86/cfhdencdsp.asm index 4aaeb56972..7654f59643 100644 --- a/libavcodec/x86/cfhdencdsp.asm +++ b/libavcodec/x86/cfhdencdsp.asm @@ -23,13 +23,13 @@ SECTION_RODATA -pw_p1_n1: dw 1, -1, 1, -1, 1, -1, 1, -1 -pw_n1_p1: dw -1, 1, -1, 1, -1, 1, -1, 1 +cextern pw_p1_m1 +cextern pw_m1_p1 pw_p5_n11: dw 5, -11, 5, -11, 5, -11, 5, -11 pw_n5_p11: dw -5, 11, -5, 11, -5, 11, -5, 11 pw_p11_n5: dw 11, -5, 11, -5, 11, -5, 11, -5 pw_n11_p5: dw -11, 5, -11, 5, -11, 5, -11, 5 -pd_4: times 4 dd 4 +cextern pd_4 pw_n4: times 8 dw -4 cextern pw_m1 cextern pw_1 @@ -46,7 +46,7 @@ cglobal cfhdenc_horiz_filter, 8, 10, 11, input, low, high, istride, lwidth, hwid mova m7, [pd_4] mova m8, [pw_1] mova m9, [pw_m1] - mova m10,[pw_p1_n1] + mova m10,[pw_p1_m1] movsxdifnidn yq, yd movsxdifnidn widthq, widthd neg yq @@ -208,8 +208,8 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt mova m7, [pd_4] mova m8, [pw_1] mova m9, [pw_m1] - mova m10,[pw_p1_n1] - mova m11,[pw_n1_p1] + mova m10,[pw_p1_m1] + mova m11,[pw_m1_p1] mova m12,[pw_4] mova m13,[pw_n4] .loopw: diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c index c5f3c6428e..95a97db4e4 100644 --- a/libavcodec/x86/constants.c +++ b/libavcodec/x86/constants.c @@ -61,6 +61,8 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x200 0x2000200020002000ULL, 0x2000200020002000ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL }; +DECLARE_ALIGNED(16, const xmm_reg, ff_pw_m1_p1) = { 0x0001FFFF0001FFFFULL, 0x0001FFFF0001FFFFULL }; +DECLARE_ALIGNED(16, const xmm_reg, ff_pw_p1_m1) = { 0xFFFF0001FFFF0001ULL, 0xFFFF0001FFFF0001ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }; @@ -81,6 +83,7 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x800 DECLARE_ALIGNED(32, const ymm_reg, ff_pd_1) = { 0x0000000100000001ULL, 0x0000000100000001ULL, 0x0000000100000001ULL, 0x0000000100000001ULL }; +DECLARE_ALIGNED(16, const xmm_reg, ff_pd_4) = { 0x0000000400000004ULL, 0x0000000400000004ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pd_16) = { 0x0000001000000010ULL, 0x0000001000000010ULL, 0x0000001000000010ULL, 0x0000001000000010ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pd_32) = { 0x0000002000000020ULL, 0x0000002000000020ULL, diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h index 4a55adb5b3..5badb2e104 100644 --- a/libavcodec/x86/constants.h +++ b/libavcodec/x86/constants.h @@ -50,6 +50,8 @@ extern const ymm_reg ff_pw_4095; extern const ymm_reg ff_pw_4096; extern const ymm_reg ff_pw_8192; extern const ymm_reg ff_pw_m1; +extern const xmm_reg ff_pw_m1_p1; +extern const xmm_reg ff_pw_p1_m1; extern const ymm_reg ff_pb_0; extern const ymm_reg ff_pb_1; @@ -62,6 +64,7 @@ extern const uint64_t ff_pb_FC; extern const xmm_reg ff_ps_neg; extern const ymm_reg ff_pd_1; +extern const xmm_reg ff_pd_4; extern const ymm_reg ff_pd_16; extern const ymm_reg ff_pd_32; extern const ymm_reg ff_pd_64; -- 2.49.1 >From fb910b56e294f0a53a0d234f03b350a6791caa3f Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Wed, 24 Sep 2025 02:01:52 +0200 Subject: [PATCH 3/7] avcodec/x86/rv40dsp: Deduplicate constants Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/x86/rv40dsp.asm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index dc520dbeb4..3abf29ee4d 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -25,8 +25,6 @@ SECTION_RODATA -pw_1024: times 8 dw 1 << (16 - 6) ; pw_1024 - sixtap_filter_hb_m: times 8 db 1, -5 times 8 db 52, 20 ; multiplied by 2 to have the same shift @@ -70,6 +68,7 @@ filter_h6_shuf3: db 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, 11 cextern pw_32 cextern pw_16 cextern pw_512 +cextern pw_1024 SECTION .text -- 2.49.1 >From f2fe18bd575cd437cc1fa19ab20c684eed31d7b3 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Wed, 24 Sep 2025 02:11:45 +0200 Subject: [PATCH 4/7] avcodec/x86/h26x/h2656_inter: Deduplicate constant Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/x86/h26x/h2656_inter.asm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/x86/h26x/h2656_inter.asm b/libavcodec/x86/h26x/h2656_inter.asm index 49a95d58fb..aea59a1c6a 100644 --- a/libavcodec/x86/h26x/h2656_inter.asm +++ b/libavcodec/x86/h26x/h2656_inter.asm @@ -22,12 +22,12 @@ ; */ %include "libavutil/x86/x86util.asm" -SECTION_RODATA 32 cextern pw_255 cextern pw_512 cextern pw_2048 cextern pw_1023 cextern pw_1024 +cextern pw_4095 cextern pw_4096 cextern pw_8192 %define scale_8 pw_512 @@ -35,7 +35,7 @@ cextern pw_8192 %define scale_12 pw_8192 %define max_pixels_8 pw_255 %define max_pixels_10 pw_1023 -max_pixels_12: times 16 dw ((1 << 12)-1) +%define max_pixels_12 pw_4095 cextern pb_0 SECTION .text -- 2.49.1 >From 2d9220e9d6a5420b9f0ab925726a47b73dfcfd95 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Wed, 24 Sep 2025 03:05:52 +0200 Subject: [PATCH 5/7] avcodec/x86/h26x/h2656_sao{,_10bit}: Deduplicate constants These files are templates that are included by both HEVC and VVC code, so that every constant in there is automatically duplicated. pb_eo is also duplicated between the two different bitversions and for some reason the 10bit file uses its own pw_1023 and pw_4095. All of this has been deduplicated. The constants unique to HEVC and VVC have been put into h26x/h2656dsp.c instead of constants.c because this file is compiled iff the constants are needed. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/x86/h26x/h2656_sao.asm | 6 ++---- libavcodec/x86/h26x/h2656_sao_10bit.asm | 12 ++++++------ libavcodec/x86/h26x/h2656dsp.c | 10 ++++++++++ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/libavcodec/x86/h26x/h2656_sao.asm b/libavcodec/x86/h26x/h2656_sao.asm index a80ee26178..e4cff790e5 100644 --- a/libavcodec/x86/h26x/h2656_sao.asm +++ b/libavcodec/x86/h26x/h2656_sao.asm @@ -23,10 +23,8 @@ %include "libavutil/x86/x86util.asm" -SECTION_RODATA 32 - -pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 -pb_eo: db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1 +cextern pb_edge_shuffle +cextern pb_eo cextern pb_1 cextern pb_2 diff --git a/libavcodec/x86/h26x/h2656_sao_10bit.asm b/libavcodec/x86/h26x/h2656_sao_10bit.asm index 052f2b1d16..82e702b451 100644 --- a/libavcodec/x86/h26x/h2656_sao_10bit.asm +++ b/libavcodec/x86/h26x/h2656_sao_10bit.asm @@ -23,15 +23,15 @@ %include "libavutil/x86/x86util.asm" -SECTION_RODATA 32 - -pw_m2: times 16 dw -2 -pw_mask10: times 16 dw 0x03FF -pw_mask12: times 16 dw 0x0FFF -pb_eo: db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1 +cextern pb_eo cextern pw_m1 +cextern pw_m2 cextern pw_1 cextern pw_2 +cextern pw_1023 +cextern pw_4095 +%define pw_mask10 pw_1023 +%define pw_mask12 pw_4095 SECTION .text diff --git a/libavcodec/x86/h26x/h2656dsp.c b/libavcodec/x86/h26x/h2656dsp.c index 1d8ec1898d..0a332ee55b 100644 --- a/libavcodec/x86/h26x/h2656dsp.c +++ b/libavcodec/x86/h26x/h2656dsp.c @@ -22,6 +22,16 @@ */ #include "h2656dsp.h" +#include "libavutil/mem_internal.h" +#include "libavutil/x86/asm.h" + +DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m2) = { 0xFFFEFFFEFFFEFFFEULL, 0xFFFEFFFEFFFEFFFEULL, + 0xFFFEFFFEFFFEFFFEULL, 0xFFFEFFFEFFFEFFFEULL }; + +DECLARE_ALIGNED(32, const ymm_reg, ff_pb_edge_shuffle) = { 0xFFFFFF0403000201ULL, 0xFFFFFFFFFFFFFFFFULL, + 0xFFFFFF0403000201ULL, 0xFFFFFFFFFFFFFFFFULL }; +const uint8_t ff_pb_eo[16] = { -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1 }; + #define mc_rep_func(name, bitd, step, W, opt) \ void ff_h2656_put_##name##W##_##bitd##_##opt(int16_t *_dst, ptrdiff_t dststride, \ -- 2.49.1 >From e460b62c7f574e2669d2570507a278f4537853a4 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Wed, 24 Sep 2025 03:19:29 +0200 Subject: [PATCH 6/7] avcodec/x86/hevc/deblock: Deduplicate constants Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/x86/hevc/deblock.asm | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/libavcodec/x86/hevc/deblock.asm b/libavcodec/x86/hevc/deblock.asm index 61b79f8079..9fc6bc5e1f 100644 --- a/libavcodec/x86/hevc/deblock.asm +++ b/libavcodec/x86/hevc/deblock.asm @@ -24,17 +24,16 @@ %include "libavutil/x86/x86util.asm" -SECTION_RODATA - cextern pw_1023 %define pw_pixel_max_10 pw_1023 -pw_pixel_max_12: times 8 dw ((1 << 12)-1) -pw_m2: times 8 dw -2 -pd_1 : times 4 dd 1 +cextern pw_4095 +%define pw_pixel_max_12 pw_4095 +cextern pd_1 cextern pw_4 cextern pw_8 cextern pw_m1 +cextern pw_m2 SECTION .text INIT_XMM sse2 -- 2.49.1 >From 6b0ac9902f778ba23748348e68b65d0fed17514a Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Wed, 24 Sep 2025 03:39:12 +0200 Subject: [PATCH 7/7] avcodec/x86/hevc/mc: Deduplicate constants Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/x86/hevc/mc.asm | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libavcodec/x86/hevc/mc.asm b/libavcodec/x86/hevc/mc.asm index 550f7a0e23..53397c2018 100644 --- a/libavcodec/x86/hevc/mc.asm +++ b/libavcodec/x86/hevc/mc.asm @@ -22,21 +22,23 @@ SECTION_RODATA 32 cextern pw_255 +cextern pw_256 cextern pw_512 cextern pw_2048 cextern pw_8192 cextern pw_1023 cextern pw_1024 +cextern pw_4095 cextern pw_4096 %define pw_8 pw_512 %define pw_10 pw_2048 %define pw_12 pw_8192 +%define pw_bi_8 pw_256 %define pw_bi_10 pw_1024 %define pw_bi_12 pw_4096 %define max_pixels_8 pw_255 %define max_pixels_10 pw_1023 -pw_bi_8: times 16 dw (1 << 8) -max_pixels_12: times 16 dw ((1 << 12)-1) +%define max_pixels_12 pw_4095 cextern pd_1 cextern pb_0 -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2025-09-24 2:07 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=175867963161.25.16896069750335050696@bf249f23a2c8 \ --to=ffmpeg-devel@ffmpeg.org \ --cc=code@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror http://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ http://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git