From: mkver via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Cc: mkver <code@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH] swresample/swresample_internal: Use union for float, int matrix (PR #20505) Date: Fri, 12 Sep 2025 16:38:17 -0000 Message-ID: <175769509855.25.10365749526819116680@463a07221176> (raw) PR #20505 opened by mkver URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20505 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20505.patch >From 92b22e11ad75397137aa4bdcab6a3bf1e680a258 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Fri, 12 Sep 2025 17:53:27 +0200 Subject: [PATCH 1/4] swresample/swresample_internal: Use union for float, int matrix Saves 16KiB from SwrContext. (FATE would also pass if one made the double matrix part of the union, too, but I don't know whether this is truely correct, because swri_rematrix() accesses the double matrix (to check whether it is equal to 1.0) even when one of the other matrices is in use.) Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libswresample/rematrix.c | 35 ++++++++++++++--------------- libswresample/swresample_internal.h | 8 +++++-- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c index f55b85a52d..c88b22d8fb 100644 --- a/libswresample/rematrix.c +++ b/libswresample/rematrix.c @@ -69,13 +69,12 @@ int swr_set_matrix(struct SwrContext *s, const double *matrix, int stride) if (!s || s->in_convert) // s needs to be allocated but not initialized return AVERROR(EINVAL); memset(s->matrix, 0, sizeof(s->matrix)); - memset(s->matrix_flt, 0, sizeof(s->matrix_flt)); nb_in = s->user_in_chlayout.nb_channels; nb_out = s->user_out_chlayout.nb_channels; for (out = 0; out < nb_out; out++) { for (in = 0; in < nb_in; in++) - s->matrix_flt[out][in] = s->matrix[out][in] = matrix[in]; + s->matrix[out][in] = matrix[in]; matrix += stride; } s->rematrix_custom = 1; @@ -436,7 +435,6 @@ fail: av_cold static int auto_matrix(SwrContext *s) { double maxval; - int ret; if (s->rematrix_maxval > 0) { maxval = s->rematrix_maxval; @@ -447,19 +445,10 @@ av_cold static int auto_matrix(SwrContext *s) maxval = INT_MAX; memset(s->matrix, 0, sizeof(s->matrix)); - ret = swr_build_matrix2(&s->in_ch_layout, &s->out_ch_layout, - s->clev, s->slev, s->lfe_mix_level, - maxval, s->rematrix_volume, (double*)s->matrix, - s->matrix[1] - s->matrix[0], s->matrix_encoding, s); - - if (ret >= 0 && s->int_sample_fmt == AV_SAMPLE_FMT_FLTP) { - int i, j; - for (i = 0; i < FF_ARRAY_ELEMS(s->matrix[0]); i++) - for (j = 0; j < FF_ARRAY_ELEMS(s->matrix[0]); j++) - s->matrix_flt[i][j] = s->matrix[i][j]; - } - - return ret; + return swr_build_matrix2(&s->in_ch_layout, &s->out_ch_layout, + s->clev, s->slev, s->lfe_mix_level, + maxval, s->rematrix_volume, (double*)s->matrix, + s->matrix[1] - s->matrix[0], s->matrix_encoding, s); } av_cold int swri_rematrix_init(SwrContext *s){ @@ -554,9 +543,19 @@ av_cold int swri_rematrix_init(SwrContext *s){ for (i = 0; i < SWR_CH_MAX; i++) { int ch_in=0; for (j = 0; j < SWR_CH_MAX; j++) { - s->matrix32[i][j]= lrintf(s->matrix[i][j] * 32768); - if(s->matrix[i][j]) + const double coeff = s->matrix[i][j]; + if (coeff) s->matrix_ch[i][++ch_in]= j; + switch (s->int_sample_fmt) { + case AV_SAMPLE_FMT_FLTP: + s->matrix_flt[i][j] = coeff; + break; + case AV_SAMPLE_FMT_DBLP: + break; + default: + s->matrix32[i][j] = lrintf(coeff * 32768); + break; + } } s->matrix_ch[i][0]= ch_in; } diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h index 21c9e33fa1..c2b5e18a2c 100644 --- a/libswresample/swresample_internal.h +++ b/libswresample/swresample_internal.h @@ -167,12 +167,16 @@ struct SwrContext { struct Resampler const *resampler; ///< resampler virtual function table double matrix[SWR_CH_MAX][SWR_CH_MAX]; ///< floating point rematrixing coefficients - float matrix_flt[SWR_CH_MAX][SWR_CH_MAX]; ///< single precision floating point rematrixing coefficients + union { + float matrix_flt[SWR_CH_MAX][SWR_CH_MAX]; ///< single precision floating point rematrixing coefficients + ///< valid iff int_sample_fmt is AV_SAMPLE_FMT_FLTP + int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX]; ///< 17.15 fixed point rematrixing coefficients + ///< valid iff int_sample_fmt is != AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP + }; uint8_t *native_matrix; uint8_t *native_one; uint8_t *native_simd_one; uint8_t *native_simd_matrix; - int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX]; ///< 17.15 fixed point rematrixing coefficients uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1]; ///< Lists of input channels per output channel that have non zero rematrixing coefficients mix_1_1_func_type *mix_1_1_f; mix_1_1_func_type *mix_1_1_simd; -- 2.49.1 >From 4ef6487459c479ba4247b6d57600c837cc3ad81e Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Fri, 12 Sep 2025 18:19:48 +0200 Subject: [PATCH 2/4] swresample/rematrix: Avoid allocation for native_one Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libswresample/rematrix.c | 25 ++++++++----------------- libswresample/swresample.c | 4 ++-- libswresample/swresample_internal.h | 6 +++++- libswresample/x86/rematrix_init.c | 2 +- 4 files changed, 16 insertions(+), 21 deletions(-) diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c index c88b22d8fb..906f6e0f34 100644 --- a/libswresample/rematrix.c +++ b/libswresample/rematrix.c @@ -466,8 +466,7 @@ av_cold int swri_rematrix_init(SwrContext *s){ if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){ int maxsum = 0; s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int)); - s->native_one = av_mallocz(sizeof(int)); - if (!s->native_matrix || !s->native_one) + if (!s->native_matrix) return AVERROR(ENOMEM); for (i = 0; i < nb_out; i++) { double rem = 0; @@ -481,7 +480,7 @@ av_cold int swri_rematrix_init(SwrContext *s){ } maxsum = FFMAX(maxsum, sum); } - *((int*)s->native_one) = 32768; + s->native_one.i = 32768; if (maxsum <= 32768) { s->mix_1_1_f = copy_s16; s->mix_2_1_f = sum2_s16; @@ -493,37 +492,30 @@ av_cold int swri_rematrix_init(SwrContext *s){ } }else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){ s->native_matrix = av_calloc(nb_in * nb_out, sizeof(float)); - s->native_one = av_mallocz(sizeof(float)); - if (!s->native_matrix || !s->native_one) + if (!s->native_matrix) return AVERROR(ENOMEM); for (i = 0; i < nb_out; i++) for (j = 0; j < nb_in; j++) ((float*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j]; - *((float*)s->native_one) = 1.0; + s->native_one.f = 1.0; s->mix_1_1_f = copy_float; s->mix_2_1_f = sum2_float; s->mix_any_f = get_mix_any_func_float(s); }else if(s->midbuf.fmt == AV_SAMPLE_FMT_DBLP){ s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double)); - s->native_one = av_mallocz(sizeof(double)); - if (!s->native_matrix || !s->native_one) + if (!s->native_matrix) return AVERROR(ENOMEM); for (i = 0; i < nb_out; i++) for (j = 0; j < nb_in; j++) ((double*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j]; - *((double*)s->native_one) = 1.0; + s->native_one.d = 1.0; s->mix_1_1_f = copy_double; s->mix_2_1_f = sum2_double; s->mix_any_f = get_mix_any_func_double(s); }else if(s->midbuf.fmt == AV_SAMPLE_FMT_S32P){ - s->native_one = av_mallocz(sizeof(int)); - if (!s->native_one) - return AVERROR(ENOMEM); s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int)); - if (!s->native_matrix) { - av_freep(&s->native_one); + if (!s->native_matrix) return AVERROR(ENOMEM); - } for (i = 0; i < nb_out; i++) { double rem = 0; @@ -533,7 +525,7 @@ av_cold int swri_rematrix_init(SwrContext *s){ rem += target - ((int*)s->native_matrix)[i * nb_in + j]; } } - *((int*)s->native_one) = 32768; + s->native_one.i = 32768; s->mix_1_1_f = copy_s32; s->mix_2_1_f = sum2_s32; s->mix_any_f = get_mix_any_func_s32(s); @@ -569,7 +561,6 @@ av_cold int swri_rematrix_init(SwrContext *s){ av_cold void swri_rematrix_free(SwrContext *s){ av_freep(&s->native_matrix); - av_freep(&s->native_one); av_freep(&s->native_simd_matrix); av_freep(&s->native_simd_one); } diff --git a/libswresample/swresample.c b/libswresample/swresample.c index 0a4e216f9b..67fb7964b2 100644 --- a/libswresample/swresample.c +++ b/libswresample/swresample.c @@ -691,10 +691,10 @@ static int swr_convert_internal(struct SwrContext *s, AudioData *out, int out_co s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_simd_one, 0, 0, len1); if(out_count != len1) for(ch=0; ch<preout->ch_count; ch++) - s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off, s->native_one, 0, 0, out_count - len1); + s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off, &s->native_one, 0, 0, out_count - len1); } else { for(ch=0; ch<preout->ch_count; ch++) - s->mix_2_1_f(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_one, 0, 0, out_count); + s->mix_2_1_f(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, &s->native_one, 0, 0, out_count); } } else { switch(s->int_sample_fmt) { diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h index c2b5e18a2c..1c889272c8 100644 --- a/libswresample/swresample_internal.h +++ b/libswresample/swresample_internal.h @@ -173,8 +173,12 @@ struct SwrContext { int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX]; ///< 17.15 fixed point rematrixing coefficients ///< valid iff int_sample_fmt is != AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP }; + union { + int i; + float f; + double d; + } native_one; uint8_t *native_matrix; - uint8_t *native_one; uint8_t *native_simd_one; uint8_t *native_simd_matrix; uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1]; ///< Lists of input channels per output channel that have non zero rematrixing coefficients diff --git a/libswresample/x86/rematrix_init.c b/libswresample/x86/rematrix_init.c index 623e154f5d..5d2c7d9a37 100644 --- a/libswresample/x86/rematrix_init.c +++ b/libswresample/x86/rematrix_init.c @@ -79,7 +79,7 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){ if (!s->native_simd_matrix || !s->native_simd_one) return AVERROR(ENOMEM); memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float)); - memcpy(s->native_simd_one, s->native_one, sizeof(float)); + memcpy(s->native_simd_one, &s->native_one.f, sizeof(float)); } #endif -- 2.49.1 >From b2fa00bfedee3ab7563fb348e379138dd683b37a Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Fri, 12 Sep 2025 18:23:14 +0200 Subject: [PATCH 3/4] swresample/x86/rematrix_init: Avoid allocation for native_simd_one Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libswresample/rematrix.c | 1 - libswresample/swresample.c | 2 +- libswresample/swresample_internal.h | 5 ++++- libswresample/x86/rematrix_init.c | 12 +++++------- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c index 906f6e0f34..76681e9229 100644 --- a/libswresample/rematrix.c +++ b/libswresample/rematrix.c @@ -562,7 +562,6 @@ av_cold int swri_rematrix_init(SwrContext *s){ av_cold void swri_rematrix_free(SwrContext *s){ av_freep(&s->native_matrix); av_freep(&s->native_simd_matrix); - av_freep(&s->native_simd_one); } int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy){ diff --git a/libswresample/swresample.c b/libswresample/swresample.c index 67fb7964b2..998ee7b73a 100644 --- a/libswresample/swresample.c +++ b/libswresample/swresample.c @@ -688,7 +688,7 @@ static int swr_convert_internal(struct SwrContext *s, AudioData *out, int out_co if(len1) for(ch=0; ch<preout->ch_count; ch++) - s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_simd_one, 0, 0, len1); + s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, &s->native_simd_one, 0, 0, len1); if(out_count != len1) for(ch=0; ch<preout->ch_count; ch++) s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off, &s->native_one, 0, 0, out_count - len1); diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h index 1c889272c8..b016ba3315 100644 --- a/libswresample/swresample_internal.h +++ b/libswresample/swresample_internal.h @@ -179,7 +179,10 @@ struct SwrContext { double d; } native_one; uint8_t *native_matrix; - uint8_t *native_simd_one; + union { + int16_t i16[2]; + float f; + } native_simd_one; uint8_t *native_simd_matrix; uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1]; ///< Lists of input channels per output channel that have non zero rematrixing coefficients mix_1_1_func_type *mix_1_1_f; diff --git a/libswresample/x86/rematrix_init.c b/libswresample/x86/rematrix_init.c index 5d2c7d9a37..89ec362d62 100644 --- a/libswresample/x86/rematrix_init.c +++ b/libswresample/x86/rematrix_init.c @@ -48,8 +48,7 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){ s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2; } s->native_simd_matrix = av_calloc(num, 2 * sizeof(int16_t)); - s->native_simd_one = av_mallocz(2 * sizeof(int16_t)); - if (!s->native_simd_matrix || !s->native_simd_one) + if (!s->native_simd_matrix) return AVERROR(ENOMEM); for(i=0; i<nb_out; i++){ @@ -63,8 +62,8 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){ ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh; } } - ((int16_t*)s->native_simd_one)[1] = 14; - ((int16_t*)s->native_simd_one)[0] = 16384; + s->native_simd_one.i16[1] = 14; + s->native_simd_one.i16[0] = 16384; } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){ if(EXTERNAL_SSE(mm_flags)) { s->mix_1_1_simd = ff_mix_1_1_a_float_sse; @@ -75,11 +74,10 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){ s->mix_2_1_simd = ff_mix_2_1_a_float_avx; } s->native_simd_matrix = av_calloc(num, sizeof(float)); - s->native_simd_one = av_mallocz(sizeof(float)); - if (!s->native_simd_matrix || !s->native_simd_one) + if (!s->native_simd_matrix) return AVERROR(ENOMEM); memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float)); - memcpy(s->native_simd_one, &s->native_one.f, sizeof(float)); + s->native_simd_one.f = s->native_one.f; } #endif -- 2.49.1 >From b9e239738eef523a2216b210c5680d7fea76a539 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Fri, 12 Sep 2025 18:33:12 +0200 Subject: [PATCH 4/4] swresample/rematrix_template: Constify get_mix_any_func Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libswresample/rematrix_template.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libswresample/rematrix_template.c b/libswresample/rematrix_template.c index b70e7489b0..450a276663 100644 --- a/libswresample/rematrix_template.c +++ b/libswresample/rematrix_template.c @@ -105,7 +105,7 @@ static void RENAME(mix8to2)(uint8_t *const *out_, const uint8_t *const *in_, } } -static mix_any_func_type *RENAME(get_mix_any_func)(SwrContext *s) +static mix_any_func_type *RENAME(get_mix_any_func)(const SwrContext *s) { if ( !av_channel_layout_compare(&s->out_ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO) && ( !av_channel_layout_compare(&s->in_ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_5POINT1) -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2025-09-12 16:38 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=175769509855.25.10365749526819116680@463a07221176 \ --to=ffmpeg-devel@ffmpeg.org \ --cc=code@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git