* [FFmpeg-devel] [PATCH] swresample/swresample_internal: Use union for float, int matrix (PR #20505)
@ 2025-09-12 16:38 mkver via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: mkver via ffmpeg-devel @ 2025-09-12 16:38 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: mkver
PR #20505 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20505
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20505.patch
>From 92b22e11ad75397137aa4bdcab6a3bf1e680a258 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 12 Sep 2025 17:53:27 +0200
Subject: [PATCH 1/4] swresample/swresample_internal: Use union for float, int
matrix
Saves 16KiB from SwrContext.
(FATE would also pass if one made the double matrix part of
the union, too, but I don't know whether this is truely correct,
because swri_rematrix() accesses the double matrix (to check whether
it is equal to 1.0) even when one of the other matrices is in use.)
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libswresample/rematrix.c | 35 ++++++++++++++---------------
libswresample/swresample_internal.h | 8 +++++--
2 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
index f55b85a52d..c88b22d8fb 100644
--- a/libswresample/rematrix.c
+++ b/libswresample/rematrix.c
@@ -69,13 +69,12 @@ int swr_set_matrix(struct SwrContext *s, const double *matrix, int stride)
if (!s || s->in_convert) // s needs to be allocated but not initialized
return AVERROR(EINVAL);
memset(s->matrix, 0, sizeof(s->matrix));
- memset(s->matrix_flt, 0, sizeof(s->matrix_flt));
nb_in = s->user_in_chlayout.nb_channels;
nb_out = s->user_out_chlayout.nb_channels;
for (out = 0; out < nb_out; out++) {
for (in = 0; in < nb_in; in++)
- s->matrix_flt[out][in] = s->matrix[out][in] = matrix[in];
+ s->matrix[out][in] = matrix[in];
matrix += stride;
}
s->rematrix_custom = 1;
@@ -436,7 +435,6 @@ fail:
av_cold static int auto_matrix(SwrContext *s)
{
double maxval;
- int ret;
if (s->rematrix_maxval > 0) {
maxval = s->rematrix_maxval;
@@ -447,19 +445,10 @@ av_cold static int auto_matrix(SwrContext *s)
maxval = INT_MAX;
memset(s->matrix, 0, sizeof(s->matrix));
- ret = swr_build_matrix2(&s->in_ch_layout, &s->out_ch_layout,
- s->clev, s->slev, s->lfe_mix_level,
- maxval, s->rematrix_volume, (double*)s->matrix,
- s->matrix[1] - s->matrix[0], s->matrix_encoding, s);
-
- if (ret >= 0 && s->int_sample_fmt == AV_SAMPLE_FMT_FLTP) {
- int i, j;
- for (i = 0; i < FF_ARRAY_ELEMS(s->matrix[0]); i++)
- for (j = 0; j < FF_ARRAY_ELEMS(s->matrix[0]); j++)
- s->matrix_flt[i][j] = s->matrix[i][j];
- }
-
- return ret;
+ return swr_build_matrix2(&s->in_ch_layout, &s->out_ch_layout,
+ s->clev, s->slev, s->lfe_mix_level,
+ maxval, s->rematrix_volume, (double*)s->matrix,
+ s->matrix[1] - s->matrix[0], s->matrix_encoding, s);
}
av_cold int swri_rematrix_init(SwrContext *s){
@@ -554,9 +543,19 @@ av_cold int swri_rematrix_init(SwrContext *s){
for (i = 0; i < SWR_CH_MAX; i++) {
int ch_in=0;
for (j = 0; j < SWR_CH_MAX; j++) {
- s->matrix32[i][j]= lrintf(s->matrix[i][j] * 32768);
- if(s->matrix[i][j])
+ const double coeff = s->matrix[i][j];
+ if (coeff)
s->matrix_ch[i][++ch_in]= j;
+ switch (s->int_sample_fmt) {
+ case AV_SAMPLE_FMT_FLTP:
+ s->matrix_flt[i][j] = coeff;
+ break;
+ case AV_SAMPLE_FMT_DBLP:
+ break;
+ default:
+ s->matrix32[i][j] = lrintf(coeff * 32768);
+ break;
+ }
}
s->matrix_ch[i][0]= ch_in;
}
diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h
index 21c9e33fa1..c2b5e18a2c 100644
--- a/libswresample/swresample_internal.h
+++ b/libswresample/swresample_internal.h
@@ -167,12 +167,16 @@ struct SwrContext {
struct Resampler const *resampler; ///< resampler virtual function table
double matrix[SWR_CH_MAX][SWR_CH_MAX]; ///< floating point rematrixing coefficients
- float matrix_flt[SWR_CH_MAX][SWR_CH_MAX]; ///< single precision floating point rematrixing coefficients
+ union {
+ float matrix_flt[SWR_CH_MAX][SWR_CH_MAX]; ///< single precision floating point rematrixing coefficients
+ ///< valid iff int_sample_fmt is AV_SAMPLE_FMT_FLTP
+ int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX]; ///< 17.15 fixed point rematrixing coefficients
+ ///< valid iff int_sample_fmt is != AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP
+ };
uint8_t *native_matrix;
uint8_t *native_one;
uint8_t *native_simd_one;
uint8_t *native_simd_matrix;
- int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX]; ///< 17.15 fixed point rematrixing coefficients
uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1]; ///< Lists of input channels per output channel that have non zero rematrixing coefficients
mix_1_1_func_type *mix_1_1_f;
mix_1_1_func_type *mix_1_1_simd;
--
2.49.1
>From 4ef6487459c479ba4247b6d57600c837cc3ad81e Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 12 Sep 2025 18:19:48 +0200
Subject: [PATCH 2/4] swresample/rematrix: Avoid allocation for native_one
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libswresample/rematrix.c | 25 ++++++++-----------------
libswresample/swresample.c | 4 ++--
libswresample/swresample_internal.h | 6 +++++-
libswresample/x86/rematrix_init.c | 2 +-
4 files changed, 16 insertions(+), 21 deletions(-)
diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
index c88b22d8fb..906f6e0f34 100644
--- a/libswresample/rematrix.c
+++ b/libswresample/rematrix.c
@@ -466,8 +466,7 @@ av_cold int swri_rematrix_init(SwrContext *s){
if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
int maxsum = 0;
s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int));
- s->native_one = av_mallocz(sizeof(int));
- if (!s->native_matrix || !s->native_one)
+ if (!s->native_matrix)
return AVERROR(ENOMEM);
for (i = 0; i < nb_out; i++) {
double rem = 0;
@@ -481,7 +480,7 @@ av_cold int swri_rematrix_init(SwrContext *s){
}
maxsum = FFMAX(maxsum, sum);
}
- *((int*)s->native_one) = 32768;
+ s->native_one.i = 32768;
if (maxsum <= 32768) {
s->mix_1_1_f = copy_s16;
s->mix_2_1_f = sum2_s16;
@@ -493,37 +492,30 @@ av_cold int swri_rematrix_init(SwrContext *s){
}
}else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
s->native_matrix = av_calloc(nb_in * nb_out, sizeof(float));
- s->native_one = av_mallocz(sizeof(float));
- if (!s->native_matrix || !s->native_one)
+ if (!s->native_matrix)
return AVERROR(ENOMEM);
for (i = 0; i < nb_out; i++)
for (j = 0; j < nb_in; j++)
((float*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j];
- *((float*)s->native_one) = 1.0;
+ s->native_one.f = 1.0;
s->mix_1_1_f = copy_float;
s->mix_2_1_f = sum2_float;
s->mix_any_f = get_mix_any_func_float(s);
}else if(s->midbuf.fmt == AV_SAMPLE_FMT_DBLP){
s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double));
- s->native_one = av_mallocz(sizeof(double));
- if (!s->native_matrix || !s->native_one)
+ if (!s->native_matrix)
return AVERROR(ENOMEM);
for (i = 0; i < nb_out; i++)
for (j = 0; j < nb_in; j++)
((double*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j];
- *((double*)s->native_one) = 1.0;
+ s->native_one.d = 1.0;
s->mix_1_1_f = copy_double;
s->mix_2_1_f = sum2_double;
s->mix_any_f = get_mix_any_func_double(s);
}else if(s->midbuf.fmt == AV_SAMPLE_FMT_S32P){
- s->native_one = av_mallocz(sizeof(int));
- if (!s->native_one)
- return AVERROR(ENOMEM);
s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int));
- if (!s->native_matrix) {
- av_freep(&s->native_one);
+ if (!s->native_matrix)
return AVERROR(ENOMEM);
- }
for (i = 0; i < nb_out; i++) {
double rem = 0;
@@ -533,7 +525,7 @@ av_cold int swri_rematrix_init(SwrContext *s){
rem += target - ((int*)s->native_matrix)[i * nb_in + j];
}
}
- *((int*)s->native_one) = 32768;
+ s->native_one.i = 32768;
s->mix_1_1_f = copy_s32;
s->mix_2_1_f = sum2_s32;
s->mix_any_f = get_mix_any_func_s32(s);
@@ -569,7 +561,6 @@ av_cold int swri_rematrix_init(SwrContext *s){
av_cold void swri_rematrix_free(SwrContext *s){
av_freep(&s->native_matrix);
- av_freep(&s->native_one);
av_freep(&s->native_simd_matrix);
av_freep(&s->native_simd_one);
}
diff --git a/libswresample/swresample.c b/libswresample/swresample.c
index 0a4e216f9b..67fb7964b2 100644
--- a/libswresample/swresample.c
+++ b/libswresample/swresample.c
@@ -691,10 +691,10 @@ static int swr_convert_internal(struct SwrContext *s, AudioData *out, int out_co
s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_simd_one, 0, 0, len1);
if(out_count != len1)
for(ch=0; ch<preout->ch_count; ch++)
- s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off, s->native_one, 0, 0, out_count - len1);
+ s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off, &s->native_one, 0, 0, out_count - len1);
} else {
for(ch=0; ch<preout->ch_count; ch++)
- s->mix_2_1_f(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_one, 0, 0, out_count);
+ s->mix_2_1_f(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, &s->native_one, 0, 0, out_count);
}
} else {
switch(s->int_sample_fmt) {
diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h
index c2b5e18a2c..1c889272c8 100644
--- a/libswresample/swresample_internal.h
+++ b/libswresample/swresample_internal.h
@@ -173,8 +173,12 @@ struct SwrContext {
int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX]; ///< 17.15 fixed point rematrixing coefficients
///< valid iff int_sample_fmt is != AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP
};
+ union {
+ int i;
+ float f;
+ double d;
+ } native_one;
uint8_t *native_matrix;
- uint8_t *native_one;
uint8_t *native_simd_one;
uint8_t *native_simd_matrix;
uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1]; ///< Lists of input channels per output channel that have non zero rematrixing coefficients
diff --git a/libswresample/x86/rematrix_init.c b/libswresample/x86/rematrix_init.c
index 623e154f5d..5d2c7d9a37 100644
--- a/libswresample/x86/rematrix_init.c
+++ b/libswresample/x86/rematrix_init.c
@@ -79,7 +79,7 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){
if (!s->native_simd_matrix || !s->native_simd_one)
return AVERROR(ENOMEM);
memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
- memcpy(s->native_simd_one, s->native_one, sizeof(float));
+ memcpy(s->native_simd_one, &s->native_one.f, sizeof(float));
}
#endif
--
2.49.1
>From b2fa00bfedee3ab7563fb348e379138dd683b37a Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 12 Sep 2025 18:23:14 +0200
Subject: [PATCH 3/4] swresample/x86/rematrix_init: Avoid allocation for
native_simd_one
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libswresample/rematrix.c | 1 -
libswresample/swresample.c | 2 +-
libswresample/swresample_internal.h | 5 ++++-
libswresample/x86/rematrix_init.c | 12 +++++-------
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
index 906f6e0f34..76681e9229 100644
--- a/libswresample/rematrix.c
+++ b/libswresample/rematrix.c
@@ -562,7 +562,6 @@ av_cold int swri_rematrix_init(SwrContext *s){
av_cold void swri_rematrix_free(SwrContext *s){
av_freep(&s->native_matrix);
av_freep(&s->native_simd_matrix);
- av_freep(&s->native_simd_one);
}
int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy){
diff --git a/libswresample/swresample.c b/libswresample/swresample.c
index 67fb7964b2..998ee7b73a 100644
--- a/libswresample/swresample.c
+++ b/libswresample/swresample.c
@@ -688,7 +688,7 @@ static int swr_convert_internal(struct SwrContext *s, AudioData *out, int out_co
if(len1)
for(ch=0; ch<preout->ch_count; ch++)
- s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_simd_one, 0, 0, len1);
+ s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, &s->native_simd_one, 0, 0, len1);
if(out_count != len1)
for(ch=0; ch<preout->ch_count; ch++)
s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off, &s->native_one, 0, 0, out_count - len1);
diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h
index 1c889272c8..b016ba3315 100644
--- a/libswresample/swresample_internal.h
+++ b/libswresample/swresample_internal.h
@@ -179,7 +179,10 @@ struct SwrContext {
double d;
} native_one;
uint8_t *native_matrix;
- uint8_t *native_simd_one;
+ union {
+ int16_t i16[2];
+ float f;
+ } native_simd_one;
uint8_t *native_simd_matrix;
uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1]; ///< Lists of input channels per output channel that have non zero rematrixing coefficients
mix_1_1_func_type *mix_1_1_f;
diff --git a/libswresample/x86/rematrix_init.c b/libswresample/x86/rematrix_init.c
index 5d2c7d9a37..89ec362d62 100644
--- a/libswresample/x86/rematrix_init.c
+++ b/libswresample/x86/rematrix_init.c
@@ -48,8 +48,7 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){
s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2;
}
s->native_simd_matrix = av_calloc(num, 2 * sizeof(int16_t));
- s->native_simd_one = av_mallocz(2 * sizeof(int16_t));
- if (!s->native_simd_matrix || !s->native_simd_one)
+ if (!s->native_simd_matrix)
return AVERROR(ENOMEM);
for(i=0; i<nb_out; i++){
@@ -63,8 +62,8 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){
((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh;
}
}
- ((int16_t*)s->native_simd_one)[1] = 14;
- ((int16_t*)s->native_simd_one)[0] = 16384;
+ s->native_simd_one.i16[1] = 14;
+ s->native_simd_one.i16[0] = 16384;
} else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
if(EXTERNAL_SSE(mm_flags)) {
s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
@@ -75,11 +74,10 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){
s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
}
s->native_simd_matrix = av_calloc(num, sizeof(float));
- s->native_simd_one = av_mallocz(sizeof(float));
- if (!s->native_simd_matrix || !s->native_simd_one)
+ if (!s->native_simd_matrix)
return AVERROR(ENOMEM);
memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
- memcpy(s->native_simd_one, &s->native_one.f, sizeof(float));
+ s->native_simd_one.f = s->native_one.f;
}
#endif
--
2.49.1
>From b9e239738eef523a2216b210c5680d7fea76a539 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 12 Sep 2025 18:33:12 +0200
Subject: [PATCH 4/4] swresample/rematrix_template: Constify get_mix_any_func
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libswresample/rematrix_template.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libswresample/rematrix_template.c b/libswresample/rematrix_template.c
index b70e7489b0..450a276663 100644
--- a/libswresample/rematrix_template.c
+++ b/libswresample/rematrix_template.c
@@ -105,7 +105,7 @@ static void RENAME(mix8to2)(uint8_t *const *out_, const uint8_t *const *in_,
}
}
-static mix_any_func_type *RENAME(get_mix_any_func)(SwrContext *s)
+static mix_any_func_type *RENAME(get_mix_any_func)(const SwrContext *s)
{
if ( !av_channel_layout_compare(&s->out_ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO)
&& ( !av_channel_layout_compare(&s->in_ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_5POINT1)
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-09-12 16:38 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-09-12 16:38 [FFmpeg-devel] [PATCH] swresample/swresample_internal: Use union for float, int matrix (PR #20505) mkver via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git