Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] Revert "avutil/tx_template: extend to 2M" (PR #21184)
@ 2025-12-13  7:31 Zhao Zhili via ffmpeg-devel
  0 siblings, 0 replies; only message in thread
From: Zhao Zhili via ffmpeg-devel @ 2025-12-13  7:31 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Zhao Zhili

PR #21184 opened by Zhao Zhili (quink)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21184
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21184.patch

This reverts commit 8f48a62, 9af8782, and bd3e71b.

Commit 8f48a62 extends tx to 2M, resulting in the tx_float bss
section reaching a size of 4M.

This isn't a issue on devices with normal memory sizes and OS
supporting virtual memory. But it's a real issue for embedded devices
with realtime OS, which may not support virtual memory, e.g., Nuttx.
This 4M of bss section map to physical memory directly, which is a
scarce resource on embedded devices.


>From 07b26f98e9eb3a2429a439a72e90ac29f28c1658 Mon Sep 17 00:00:00 2001
From: Zhao Zhili <zhilizhao@tencent.com>
Date: Sat, 13 Dec 2025 12:53:58 +0800
Subject: [PATCH] Revert "avutil/tx_template: extend to 2M"

This reverts commit 8f48a62, 9af8782, and bd3e71b.

Commit 8f48a62 extends tx to 2M, resulting in the tx_float bss
section reaching a size of 4M.

This isn't a issue on devices with normal memory sizes and OS
supporting virtual memory. But it's a real issue for embedded devices
with realtime OS, which may not support virtual memory, e.g., Nuttx.
This 4M of bss section map to physical memory directly, which is a
scarce resource on embedded devices.
---
 libavutil/tx_template.c       | 12 ------------
 libavutil/x86/tx_float.asm    |  8 ++------
 libavutil/x86/tx_float_init.c | 18 +++++++++---------
 3 files changed, 11 insertions(+), 27 deletions(-)

diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index ec630954b8..ecee572e40 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -45,10 +45,6 @@
     SR_TABLE(32768)    \
     SR_TABLE(65536)    \
     SR_TABLE(131072)   \
-    SR_TABLE(262144)   \
-    SR_TABLE(524288)   \
-    SR_TABLE(1048576)   \
-    SR_TABLE(2097152)   \
 
 #define SR_TABLE(len) \
     TABLE_DEF(len, len/4 + 1);
@@ -724,10 +720,6 @@ DECL_SR_CODELET(16384,8192,4096)
 DECL_SR_CODELET(32768,16384,8192)
 DECL_SR_CODELET(65536,32768,16384)
 DECL_SR_CODELET(131072,65536,32768)
-DECL_SR_CODELET(262144,131072,65536)
-DECL_SR_CODELET(524288,262144,131072)
-DECL_SR_CODELET(1048576,524288,262144)
-DECL_SR_CODELET(2097152,1048576,524288)
 
 static av_cold int TX_NAME(ff_tx_fft_init)(AVTXContext *s,
                                            const FFTXCodelet *cd,
@@ -2160,10 +2152,6 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
     &TX_NAME(ff_tx_fft32768_ns_def),
     &TX_NAME(ff_tx_fft65536_ns_def),
     &TX_NAME(ff_tx_fft131072_ns_def),
-    &TX_NAME(ff_tx_fft262144_ns_def),
-    &TX_NAME(ff_tx_fft524288_ns_def),
-    &TX_NAME(ff_tx_fft1048576_ns_def),
-    &TX_NAME(ff_tx_fft2097152_ns_def),
 
     /* Prime factor codelets */
     &TX_NAME(ff_tx_fft3_ns_def),
diff --git a/libavutil/x86/tx_float.asm b/libavutil/x86/tx_float.asm
index c030147ce8..87be21c2d6 100644
--- a/libavutil/x86/tx_float.asm
+++ b/libavutil/x86/tx_float.asm
@@ -46,7 +46,7 @@
 %endif
 
 %assign i 16
-%rep 18
+%rep 14
 cextern tab_ %+ i %+ _float ; ff_tab_i_float...
 %assign i (i << 1)
 %endrep
@@ -1385,11 +1385,7 @@ FFT_SPLIT_RADIX_DEF 8192,  .16384pt
 FFT_SPLIT_RADIX_DEF 16384, .32768pt
 FFT_SPLIT_RADIX_DEF 32768, .65536pt
 FFT_SPLIT_RADIX_DEF 65536, .131072pt
-FFT_SPLIT_RADIX_DEF 131072, .262144pt
-FFT_SPLIT_RADIX_DEF 262144, .524288pt
-FFT_SPLIT_RADIX_DEF 524288, .1048576pt
-FFT_SPLIT_RADIX_DEF 1048576, .2097152pt
-FFT_SPLIT_RADIX_DEF 2097152
+FFT_SPLIT_RADIX_DEF 131072
 
 ;===============================================================================
 ; Final synthesis + deinterleaving code
diff --git a/libavutil/x86/tx_float_init.c b/libavutil/x86/tx_float_init.c
index 3e99c21eac..f69a5a6d77 100644
--- a/libavutil/x86/tx_float_init.c
+++ b/libavutil/x86/tx_float_init.c
@@ -271,15 +271,15 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] = {
            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW),
     TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
            AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr,    FFT, 64, 2097152, 2, 0, 256, b8_i2, avx, AVX,  0, AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX,
+    TX_DEF(fft_sr,    FFT, 64, 131072, 2, 0, 256, b8_i2, avx, AVX,  0, AV_CPU_FLAG_AVXSLOW),
+    TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX,
            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX,  AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+    TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX,  AV_TX_INPLACE | FF_TX_PRESHUFFLE,
            AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr,    FFT, 64, 2097152, 2, 0, 288, b8_i2, fma3,  FMA3,  0, AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3,  FMA3,
+    TX_DEF(fft_sr,    FFT, 64, 131072, 2, 0, 288, b8_i2, fma3,  FMA3,  0, AV_CPU_FLAG_AVXSLOW),
+    TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3,  FMA3,
            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3,  FMA3,  AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+    TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3,  FMA3,  AV_TX_INPLACE | FF_TX_PRESHUFFLE,
            AV_CPU_FLAG_AVXSLOW),
 
     TX_DEF(fft15, FFT, 15, 15, 15, 0, 320, factor_init, avx2, AVX2,
@@ -287,11 +287,11 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] = {
     TX_DEF(fft15_ns, FFT, 15, 15, 15, 0, 384, factor_init, avx2, AVX2,
            AV_TX_INPLACE | FF_TX_PRESHUFFLE, AV_CPU_FLAG_AVXSLOW),
 
-    TX_DEF(fft_sr,    FFT, 64, 2097152, 2, 0, 320, b8_i2, avx2, AVX2, 0,
+    TX_DEF(fft_sr,    FFT, 64, 131072, 2, 0, 320, b8_i2, avx2, AVX2, 0,
            AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER),
-    TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2,
+    TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2,
            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER),
-    TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+    TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2, AV_TX_INPLACE | FF_TX_PRESHUFFLE,
            AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER),
 
     TX_DEF(fft_pfa_15xM, FFT, 60, TX_LEN_UNLIMITED, 15, 2, 320, fft_pfa_init, avx2, AVX2,
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2025-12-13  7:32 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-12-13  7:31 [FFmpeg-devel] [PATCH] Revert "avutil/tx_template: extend to 2M" (PR #21184) Zhao Zhili via ffmpeg-devel

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git