[FFmpeg-devel] [PATCH] swresample/swresample_internal: Use union for float, int matrix (PR #20505)

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

From: mkver via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: mkver <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH] swresample/swresample_internal: Use union for float, int matrix (PR #20505)
Date: Fri, 12 Sep 2025 16:38:17 -0000
Message-ID: <175769509855.25.10365749526819116680@463a07221176> (raw)

PR #20505 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20505
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20505.patch


>From 92b22e11ad75397137aa4bdcab6a3bf1e680a258 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 12 Sep 2025 17:53:27 +0200
Subject: [PATCH 1/4] swresample/swresample_internal: Use union for float, int
 matrix

Saves 16KiB from SwrContext.
(FATE would also pass if one made the double matrix part of
the union, too, but I don't know whether this is truely correct,
because swri_rematrix() accesses the double matrix (to check whether
it is equal to 1.0) even when one of the other matrices is in use.)

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libswresample/rematrix.c            | 35 ++++++++++++++---------------
 libswresample/swresample_internal.h |  8 +++++--
 2 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
index f55b85a52d..c88b22d8fb 100644
--- a/libswresample/rematrix.c
+++ b/libswresample/rematrix.c
@@ -69,13 +69,12 @@ int swr_set_matrix(struct SwrContext *s, const double *matrix, int stride)
     if (!s || s->in_convert) // s needs to be allocated but not initialized
         return AVERROR(EINVAL);
     memset(s->matrix, 0, sizeof(s->matrix));
-    memset(s->matrix_flt, 0, sizeof(s->matrix_flt));
 
     nb_in = s->user_in_chlayout.nb_channels;
     nb_out = s->user_out_chlayout.nb_channels;
     for (out = 0; out < nb_out; out++) {
         for (in = 0; in < nb_in; in++)
-            s->matrix_flt[out][in] = s->matrix[out][in] = matrix[in];
+            s->matrix[out][in] = matrix[in];
         matrix += stride;
     }
     s->rematrix_custom = 1;
@@ -436,7 +435,6 @@ fail:
 av_cold static int auto_matrix(SwrContext *s)
 {
     double maxval;
-    int ret;
 
     if (s->rematrix_maxval > 0) {
         maxval = s->rematrix_maxval;
@@ -447,19 +445,10 @@ av_cold static int auto_matrix(SwrContext *s)
         maxval = INT_MAX;
 
     memset(s->matrix, 0, sizeof(s->matrix));
-    ret = swr_build_matrix2(&s->in_ch_layout, &s->out_ch_layout,
-                           s->clev, s->slev, s->lfe_mix_level,
-                           maxval, s->rematrix_volume, (double*)s->matrix,
-                           s->matrix[1] - s->matrix[0], s->matrix_encoding, s);
-
-    if (ret >= 0 && s->int_sample_fmt == AV_SAMPLE_FMT_FLTP) {
-        int i, j;
-        for (i = 0; i < FF_ARRAY_ELEMS(s->matrix[0]); i++)
-            for (j = 0; j < FF_ARRAY_ELEMS(s->matrix[0]); j++)
-                s->matrix_flt[i][j] = s->matrix[i][j];
-    }
-
-    return ret;
+    return swr_build_matrix2(&s->in_ch_layout, &s->out_ch_layout,
+                             s->clev, s->slev, s->lfe_mix_level,
+                             maxval, s->rematrix_volume, (double*)s->matrix,
+                             s->matrix[1] - s->matrix[0], s->matrix_encoding, s);
 }
 
 av_cold int swri_rematrix_init(SwrContext *s){
@@ -554,9 +543,19 @@ av_cold int swri_rematrix_init(SwrContext *s){
     for (i = 0; i < SWR_CH_MAX; i++) {
         int ch_in=0;
         for (j = 0; j < SWR_CH_MAX; j++) {
-            s->matrix32[i][j]= lrintf(s->matrix[i][j] * 32768);
-            if(s->matrix[i][j])
+            const double coeff = s->matrix[i][j];
+            if (coeff)
                 s->matrix_ch[i][++ch_in]= j;
+            switch (s->int_sample_fmt) {
+            case AV_SAMPLE_FMT_FLTP:
+                s->matrix_flt[i][j] = coeff;
+                break;
+            case AV_SAMPLE_FMT_DBLP:
+                break;
+            default:
+                s->matrix32[i][j] = lrintf(coeff * 32768);
+                break;
+            }
         }
         s->matrix_ch[i][0]= ch_in;
     }
diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h
index 21c9e33fa1..c2b5e18a2c 100644
--- a/libswresample/swresample_internal.h
+++ b/libswresample/swresample_internal.h
@@ -167,12 +167,16 @@ struct SwrContext {
     struct Resampler const *resampler;              ///< resampler virtual function table
 
     double matrix[SWR_CH_MAX][SWR_CH_MAX];          ///< floating point rematrixing coefficients
-    float matrix_flt[SWR_CH_MAX][SWR_CH_MAX];       ///< single precision floating point rematrixing coefficients
+    union {
+        float matrix_flt[SWR_CH_MAX][SWR_CH_MAX];   ///< single precision floating point rematrixing coefficients
+                                                    ///< valid iff int_sample_fmt is AV_SAMPLE_FMT_FLTP
+        int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX];   ///< 17.15 fixed point rematrixing coefficients
+                                                    ///< valid iff int_sample_fmt is != AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP
+    };
     uint8_t *native_matrix;
     uint8_t *native_one;
     uint8_t *native_simd_one;
     uint8_t *native_simd_matrix;
-    int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX];       ///< 17.15 fixed point rematrixing coefficients
     uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1];    ///< Lists of input channels per output channel that have non zero rematrixing coefficients
     mix_1_1_func_type *mix_1_1_f;
     mix_1_1_func_type *mix_1_1_simd;
-- 
2.49.1


>From 4ef6487459c479ba4247b6d57600c837cc3ad81e Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 12 Sep 2025 18:19:48 +0200
Subject: [PATCH 2/4] swresample/rematrix: Avoid allocation for native_one

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libswresample/rematrix.c            | 25 ++++++++-----------------
 libswresample/swresample.c          |  4 ++--
 libswresample/swresample_internal.h |  6 +++++-
 libswresample/x86/rematrix_init.c   |  2 +-
 4 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
index c88b22d8fb..906f6e0f34 100644
--- a/libswresample/rematrix.c
+++ b/libswresample/rematrix.c
@@ -466,8 +466,7 @@ av_cold int swri_rematrix_init(SwrContext *s){
     if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
         int maxsum = 0;
         s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int));
-        s->native_one    = av_mallocz(sizeof(int));
-        if (!s->native_matrix || !s->native_one)
+        if (!s->native_matrix)
             return AVERROR(ENOMEM);
         for (i = 0; i < nb_out; i++) {
             double rem = 0;
@@ -481,7 +480,7 @@ av_cold int swri_rematrix_init(SwrContext *s){
             }
             maxsum = FFMAX(maxsum, sum);
         }
-        *((int*)s->native_one) = 32768;
+        s->native_one.i = 32768;
         if (maxsum <= 32768) {
             s->mix_1_1_f = copy_s16;
             s->mix_2_1_f = sum2_s16;
@@ -493,37 +492,30 @@ av_cold int swri_rematrix_init(SwrContext *s){
         }
     }else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
         s->native_matrix = av_calloc(nb_in * nb_out, sizeof(float));
-        s->native_one    = av_mallocz(sizeof(float));
-        if (!s->native_matrix || !s->native_one)
+        if (!s->native_matrix)
             return AVERROR(ENOMEM);
         for (i = 0; i < nb_out; i++)
             for (j = 0; j < nb_in; j++)
                 ((float*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j];
-        *((float*)s->native_one) = 1.0;
+        s->native_one.f = 1.0;
         s->mix_1_1_f = copy_float;
         s->mix_2_1_f = sum2_float;
         s->mix_any_f = get_mix_any_func_float(s);
     }else if(s->midbuf.fmt == AV_SAMPLE_FMT_DBLP){
         s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double));
-        s->native_one    = av_mallocz(sizeof(double));
-        if (!s->native_matrix || !s->native_one)
+        if (!s->native_matrix)
             return AVERROR(ENOMEM);
         for (i = 0; i < nb_out; i++)
             for (j = 0; j < nb_in; j++)
                 ((double*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j];
-        *((double*)s->native_one) = 1.0;
+        s->native_one.d = 1.0;
         s->mix_1_1_f = copy_double;
         s->mix_2_1_f = sum2_double;
         s->mix_any_f = get_mix_any_func_double(s);
     }else if(s->midbuf.fmt == AV_SAMPLE_FMT_S32P){
-        s->native_one    = av_mallocz(sizeof(int));
-        if (!s->native_one)
-            return AVERROR(ENOMEM);
         s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int));
-        if (!s->native_matrix) {
-            av_freep(&s->native_one);
+        if (!s->native_matrix)
             return AVERROR(ENOMEM);
-        }
         for (i = 0; i < nb_out; i++) {
             double rem = 0;
 
@@ -533,7 +525,7 @@ av_cold int swri_rematrix_init(SwrContext *s){
                 rem += target - ((int*)s->native_matrix)[i * nb_in + j];
             }
         }
-        *((int*)s->native_one) = 32768;
+        s->native_one.i = 32768;
         s->mix_1_1_f = copy_s32;
         s->mix_2_1_f = sum2_s32;
         s->mix_any_f = get_mix_any_func_s32(s);
@@ -569,7 +561,6 @@ av_cold int swri_rematrix_init(SwrContext *s){
 
 av_cold void swri_rematrix_free(SwrContext *s){
     av_freep(&s->native_matrix);
-    av_freep(&s->native_one);
     av_freep(&s->native_simd_matrix);
     av_freep(&s->native_simd_one);
 }
diff --git a/libswresample/swresample.c b/libswresample/swresample.c
index 0a4e216f9b..67fb7964b2 100644
--- a/libswresample/swresample.c
+++ b/libswresample/swresample.c
@@ -691,10 +691,10 @@ static int swr_convert_internal(struct SwrContext *s, AudioData *out, int out_co
                             s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_simd_one, 0, 0, len1);
                     if(out_count != len1)
                         for(ch=0; ch<preout->ch_count; ch++)
-                            s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off, s->native_one, 0, 0, out_count - len1);
+                            s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off, &s->native_one, 0, 0, out_count - len1);
                 } else {
                     for(ch=0; ch<preout->ch_count; ch++)
-                        s->mix_2_1_f(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_one, 0, 0, out_count);
+                        s->mix_2_1_f(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, &s->native_one, 0, 0, out_count);
                 }
             } else {
                 switch(s->int_sample_fmt) {
diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h
index c2b5e18a2c..1c889272c8 100644
--- a/libswresample/swresample_internal.h
+++ b/libswresample/swresample_internal.h
@@ -173,8 +173,12 @@ struct SwrContext {
         int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX];   ///< 17.15 fixed point rematrixing coefficients
                                                     ///< valid iff int_sample_fmt is != AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP
     };
+    union {
+        int    i;
+        float  f;
+        double d;
+    } native_one;
     uint8_t *native_matrix;
-    uint8_t *native_one;
     uint8_t *native_simd_one;
     uint8_t *native_simd_matrix;
     uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1];    ///< Lists of input channels per output channel that have non zero rematrixing coefficients
diff --git a/libswresample/x86/rematrix_init.c b/libswresample/x86/rematrix_init.c
index 623e154f5d..5d2c7d9a37 100644
--- a/libswresample/x86/rematrix_init.c
+++ b/libswresample/x86/rematrix_init.c
@@ -79,7 +79,7 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){
         if (!s->native_simd_matrix || !s->native_simd_one)
             return AVERROR(ENOMEM);
         memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
-        memcpy(s->native_simd_one, s->native_one, sizeof(float));
+        memcpy(s->native_simd_one, &s->native_one.f, sizeof(float));
     }
 #endif
 
-- 
2.49.1


>From b2fa00bfedee3ab7563fb348e379138dd683b37a Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 12 Sep 2025 18:23:14 +0200
Subject: [PATCH 3/4] swresample/x86/rematrix_init: Avoid allocation for
 native_simd_one

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libswresample/rematrix.c            |  1 -
 libswresample/swresample.c          |  2 +-
 libswresample/swresample_internal.h |  5 ++++-
 libswresample/x86/rematrix_init.c   | 12 +++++-------
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
index 906f6e0f34..76681e9229 100644
--- a/libswresample/rematrix.c
+++ b/libswresample/rematrix.c
@@ -562,7 +562,6 @@ av_cold int swri_rematrix_init(SwrContext *s){
 av_cold void swri_rematrix_free(SwrContext *s){
     av_freep(&s->native_matrix);
     av_freep(&s->native_simd_matrix);
-    av_freep(&s->native_simd_one);
 }
 
 int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy){
diff --git a/libswresample/swresample.c b/libswresample/swresample.c
index 67fb7964b2..998ee7b73a 100644
--- a/libswresample/swresample.c
+++ b/libswresample/swresample.c
@@ -688,7 +688,7 @@ static int swr_convert_internal(struct SwrContext *s, AudioData *out, int out_co
 
                     if(len1)
                         for(ch=0; ch<preout->ch_count; ch++)
-                            s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_simd_one, 0, 0, len1);
+                            s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, &s->native_simd_one, 0, 0, len1);
                     if(out_count != len1)
                         for(ch=0; ch<preout->ch_count; ch++)
                             s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off, &s->native_one, 0, 0, out_count - len1);
diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h
index 1c889272c8..b016ba3315 100644
--- a/libswresample/swresample_internal.h
+++ b/libswresample/swresample_internal.h
@@ -179,7 +179,10 @@ struct SwrContext {
         double d;
     } native_one;
     uint8_t *native_matrix;
-    uint8_t *native_simd_one;
+    union {
+        int16_t i16[2];
+        float   f;
+    } native_simd_one;
     uint8_t *native_simd_matrix;
     uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1];    ///< Lists of input channels per output channel that have non zero rematrixing coefficients
     mix_1_1_func_type *mix_1_1_f;
diff --git a/libswresample/x86/rematrix_init.c b/libswresample/x86/rematrix_init.c
index 5d2c7d9a37..89ec362d62 100644
--- a/libswresample/x86/rematrix_init.c
+++ b/libswresample/x86/rematrix_init.c
@@ -48,8 +48,7 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){
             s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2;
         }
         s->native_simd_matrix = av_calloc(num,  2 * sizeof(int16_t));
-        s->native_simd_one    = av_mallocz(2 * sizeof(int16_t));
-        if (!s->native_simd_matrix || !s->native_simd_one)
+        if (!s->native_simd_matrix)
             return AVERROR(ENOMEM);
 
         for(i=0; i<nb_out; i++){
@@ -63,8 +62,8 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){
                     ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh;
             }
         }
-        ((int16_t*)s->native_simd_one)[1] = 14;
-        ((int16_t*)s->native_simd_one)[0] = 16384;
+        s->native_simd_one.i16[1] = 14;
+        s->native_simd_one.i16[0] = 16384;
     } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
         if(EXTERNAL_SSE(mm_flags)) {
             s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
@@ -75,11 +74,10 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){
             s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
         }
         s->native_simd_matrix = av_calloc(num, sizeof(float));
-        s->native_simd_one = av_mallocz(sizeof(float));
-        if (!s->native_simd_matrix || !s->native_simd_one)
+        if (!s->native_simd_matrix)
             return AVERROR(ENOMEM);
         memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
-        memcpy(s->native_simd_one, &s->native_one.f, sizeof(float));
+        s->native_simd_one.f = s->native_one.f;
     }
 #endif
 
-- 
2.49.1


>From b9e239738eef523a2216b210c5680d7fea76a539 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 12 Sep 2025 18:33:12 +0200
Subject: [PATCH 4/4] swresample/rematrix_template: Constify get_mix_any_func

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libswresample/rematrix_template.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswresample/rematrix_template.c b/libswresample/rematrix_template.c
index b70e7489b0..450a276663 100644
--- a/libswresample/rematrix_template.c
+++ b/libswresample/rematrix_template.c
@@ -105,7 +105,7 @@ static void RENAME(mix8to2)(uint8_t *const *out_, const uint8_t *const *in_,
     }
 }
 
-static mix_any_func_type *RENAME(get_mix_any_func)(SwrContext *s)
+static mix_any_func_type *RENAME(get_mix_any_func)(const SwrContext *s)
 {
     if (  !av_channel_layout_compare(&s->out_ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO)
        && (   !av_channel_layout_compare(&s->in_ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_5POINT1)
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

                 reply	other threads:[~2025-09-12 16:38 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=175769509855.25.10365749526819116680@463a07221176 \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=code@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git