Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples
@ 2024-04-14 18:28 Andreas Rheinhardt
  2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 2/6] avcodec/ac3enc: Avoid allocation for mdct_window Andreas Rheinhardt
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Andreas Rheinhardt @ 2024-04-14 18:28 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/ac3enc.c          |  4 ----
 libavcodec/ac3enc.h          |  7 ++++++-
 libavcodec/ac3enc_template.c | 12 +++++++++---
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 1ba4ba549e..a31b528597 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -2184,7 +2184,6 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
     AC3EncodeContext *s = avctx->priv_data;
 
     av_freep(&s->mdct_window);
-    av_freep(&s->windowed_samples);
     if (s->planar_samples)
         for (ch = 0; ch < s->channels; ch++)
             av_freep(&s->planar_samples[ch]);
@@ -2459,9 +2458,6 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
     int total_coefs    = AC3_MAX_COEFS * channel_blocks;
     const unsigned sampletype_size = SAMPLETYPE_SIZE(s);
 
-    if (!(s->windowed_samples = av_malloc(sampletype_size * AC3_WINDOW_SIZE)))
-        return AVERROR(ENOMEM);
-
     if (!FF_ALLOCZ_TYPED_ARRAY(s->planar_samples,  s->channels))
         return AVERROR(ENOMEM);
 
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index 227744d27f..618c952a18 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -30,6 +30,7 @@
 
 #include <stdint.h>
 
+#include "libavutil/mem_internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/tx.h"
 
@@ -232,7 +233,6 @@ typedef struct AC3EncodeContext {
     int frame_bits;                         ///< all frame bits except exponents and mantissas
     int exponent_bits;                      ///< number of bits used for exponents
 
-    void *windowed_samples;
     uint8_t **planar_samples;
     uint8_t *bap_buffer;
     uint8_t *bap1_buffer;
@@ -259,6 +259,11 @@ typedef struct AC3EncodeContext {
 
     /* AC-3 vs. E-AC-3 function pointers */
     void (*output_frame_header)(struct AC3EncodeContext *s);
+
+    union {
+        DECLARE_ALIGNED(32, float,   windowed_samples_float)[AC3_WINDOW_SIZE];
+        DECLARE_ALIGNED(32, int32_t, windowed_samples_fixed)[AC3_WINDOW_SIZE];
+    };
 } AC3EncodeContext;
 
 extern const AVChannelLayout ff_ac3_ch_layouts[19];
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index b0f9e69ee8..6070e14961 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -37,6 +37,11 @@
 #include "ac3enc.h"
 #include "eac3enc.h"
 
+#if AC3ENC_FLOAT
+#define RENAME(element) element ## _float
+#else
+#define RENAME(element) element ## _fixed
+#endif
 
 /*
  * Apply the MDCT to input samples to generate frequency coefficients.
@@ -51,15 +56,16 @@ static void apply_mdct(AC3EncodeContext *s)
         for (blk = 0; blk < s->num_blocks; blk++) {
             AC3Block *block = &s->blocks[blk];
             const SampleType *input_samples = (SampleType*)s->planar_samples[ch] + blk * AC3_BLOCK_SIZE;
+            SampleType *windowed_samples = s->RENAME(windowed_samples);
 
-            s->fdsp->vector_fmul(s->windowed_samples, input_samples,
+            s->fdsp->vector_fmul(windowed_samples, input_samples,
                                  s->mdct_window, AC3_BLOCK_SIZE);
-            s->fdsp->vector_fmul_reverse((SampleType*)s->windowed_samples + AC3_BLOCK_SIZE,
+            s->fdsp->vector_fmul_reverse(windowed_samples + AC3_BLOCK_SIZE,
                                          &input_samples[AC3_BLOCK_SIZE],
                                          s->mdct_window, AC3_BLOCK_SIZE);
 
             s->tx_fn(s->tx, block->mdct_coef[ch+1],
-                     s->windowed_samples, sizeof(float));
+                     windowed_samples, sizeof(*windowed_samples));
         }
     }
 }
-- 
2.40.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH 2/6] avcodec/ac3enc: Avoid allocation for mdct_window
  2024-04-14 18:28 [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples Andreas Rheinhardt
@ 2024-04-14 18:30 ` Andreas Rheinhardt
  2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 3/6] avcodec/ac3enc: Avoid indirections, allocations of small arrays Andreas Rheinhardt
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Andreas Rheinhardt @ 2024-04-14 18:30 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/ac3enc.c          | 1 -
 libavcodec/ac3enc.h          | 5 ++++-
 libavcodec/ac3enc_fixed.c    | 7 +------
 libavcodec/ac3enc_float.c    | 7 +------
 libavcodec/ac3enc_template.c | 4 ++--
 5 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index a31b528597..a3a05b3ac8 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -2183,7 +2183,6 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
     int blk, ch;
     AC3EncodeContext *s = avctx->priv_data;
 
-    av_freep(&s->mdct_window);
     if (s->planar_samples)
         for (ch = 0; ch < s->channels; ch++)
             av_freep(&s->planar_samples[ch]);
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index 618c952a18..41b9a3a20b 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -169,7 +169,6 @@ typedef struct AC3EncodeContext {
     AC3DSPContext ac3dsp;                   ///< AC-3 optimized functions
     AVTXContext *tx;                        ///< FFT context for MDCT calculation
     av_tx_fn tx_fn;
-    const SampleType *mdct_window;          ///< MDCT window function array
 
     AC3Block blocks[AC3_MAX_BLOCKS];        ///< per-block info
 
@@ -260,6 +259,10 @@ typedef struct AC3EncodeContext {
     /* AC-3 vs. E-AC-3 function pointers */
     void (*output_frame_header)(struct AC3EncodeContext *s);
 
+    union {
+        DECLARE_ALIGNED(32, float,   mdct_window_float)[AC3_BLOCK_SIZE];
+        DECLARE_ALIGNED(32, int32_t, mdct_window_fixed)[AC3_BLOCK_SIZE];
+    };
     union {
         DECLARE_ALIGNED(32, float,   windowed_samples_float)[AC3_WINDOW_SIZE];
         DECLARE_ALIGNED(32, int32_t, windowed_samples_fixed)[AC3_WINDOW_SIZE];
diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c
index d2f4cecd72..869e1f27a2 100644
--- a/libavcodec/ac3enc_fixed.c
+++ b/libavcodec/ac3enc_fixed.c
@@ -27,7 +27,6 @@
  */
 
 #define AC3ENC_FLOAT 0
-#include "libavutil/mem.h"
 #include "audiodsp.h"
 #include "ac3enc.h"
 #include "codec_internal.h"
@@ -79,16 +78,12 @@ static av_cold int ac3_fixed_mdct_init(AVCodecContext *avctx, AC3EncodeContext *
     float fwin[AC3_BLOCK_SIZE];
     const float scale = -1.0f;
 
-    int32_t *iwin = av_malloc_array(AC3_BLOCK_SIZE, sizeof(*iwin));
-    if (!iwin)
-        return AVERROR(ENOMEM);
+    int32_t *iwin = s->mdct_window_fixed;
 
     ff_kbd_window_init(fwin, 5.0, AC3_BLOCK_SIZE);
     for (int i = 0; i < AC3_BLOCK_SIZE; i++)
         iwin[i] = lrintf(fwin[i] * (1 << 22));
 
-    s->mdct_window = iwin;
-
     s->fdsp = avpriv_alloc_fixed_dsp(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp)
         return AVERROR(ENOMEM);
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index cfd233da09..94e8ebc42d 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -27,7 +27,6 @@
  */
 
 #define AC3ENC_FLOAT 1
-#include "libavutil/mem.h"
 #include "audiodsp.h"
 #include "ac3enc.h"
 #include "codec_internal.h"
@@ -87,12 +86,8 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4],
 static av_cold int ac3_float_mdct_init(AC3EncodeContext *s)
 {
     const float scale = -2.0 / AC3_WINDOW_SIZE;
-    float *window = av_malloc_array(AC3_BLOCK_SIZE, sizeof(*window));
-    if (!window)
-        return AVERROR(ENOMEM);
 
-    ff_kbd_window_init(window, 5.0, AC3_BLOCK_SIZE);
-    s->mdct_window = window;
+    ff_kbd_window_init(s->mdct_window_float, 5.0, AC3_BLOCK_SIZE);
 
     return av_tx_init(&s->tx, &s->tx_fn, AV_TX_FLOAT_MDCT, 0,
                       AC3_BLOCK_SIZE, &scale, 0);
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index 6070e14961..698042ae5c 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -59,10 +59,10 @@ static void apply_mdct(AC3EncodeContext *s)
             SampleType *windowed_samples = s->RENAME(windowed_samples);
 
             s->fdsp->vector_fmul(windowed_samples, input_samples,
-                                 s->mdct_window, AC3_BLOCK_SIZE);
+                                 s->RENAME(mdct_window), AC3_BLOCK_SIZE);
             s->fdsp->vector_fmul_reverse(windowed_samples + AC3_BLOCK_SIZE,
                                          &input_samples[AC3_BLOCK_SIZE],
-                                         s->mdct_window, AC3_BLOCK_SIZE);
+                                         s->RENAME(mdct_window), AC3_BLOCK_SIZE);
 
             s->tx_fn(s->tx, block->mdct_coef[ch+1],
                      windowed_samples, sizeof(*windowed_samples));
-- 
2.40.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH 3/6] avcodec/ac3enc: Avoid indirections, allocations of small arrays
  2024-04-14 18:28 [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples Andreas Rheinhardt
  2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 2/6] avcodec/ac3enc: Avoid allocation for mdct_window Andreas Rheinhardt
@ 2024-04-14 18:30 ` Andreas Rheinhardt
  2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 4/6] avcodec/ac3enc: Combine loops Andreas Rheinhardt
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Andreas Rheinhardt @ 2024-04-14 18:30 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/ac3enc.c | 44 ++++----------------------------------------
 libavcodec/ac3enc.h | 22 +++++++++++-----------
 2 files changed, 15 insertions(+), 51 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index a3a05b3ac8..1ef670622a 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -2180,13 +2180,10 @@ static void dprint_options(AC3EncodeContext *s)
  */
 av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
 {
-    int blk, ch;
     AC3EncodeContext *s = avctx->priv_data;
 
-    if (s->planar_samples)
-        for (ch = 0; ch < s->channels; ch++)
-            av_freep(&s->planar_samples[ch]);
-    av_freep(&s->planar_samples);
+    for (int ch = 0; ch < s->channels; ch++)
+        av_freep(&s->planar_samples[ch]);
     av_freep(&s->bap_buffer);
     av_freep(&s->bap1_buffer);
     av_freep(&s->mdct_coef_buffer);
@@ -2200,19 +2197,6 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
     av_freep(&s->cpl_coord_exp_buffer);
     av_freep(&s->cpl_coord_mant_buffer);
     av_freep(&s->fdsp);
-    for (blk = 0; blk < s->num_blocks; blk++) {
-        AC3Block *block = &s->blocks[blk];
-        av_freep(&block->mdct_coef);
-        av_freep(&block->fixed_coef);
-        av_freep(&block->exp);
-        av_freep(&block->grouped_exp);
-        av_freep(&block->psd);
-        av_freep(&block->band_psd);
-        av_freep(&block->mask);
-        av_freep(&block->qmant);
-        av_freep(&block->cpl_coord_exp);
-        av_freep(&block->cpl_coord_mant);
-    }
 
     av_tx_uninit(&s->tx);
 
@@ -2457,9 +2441,6 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
     int total_coefs    = AC3_MAX_COEFS * channel_blocks;
     const unsigned sampletype_size = SAMPLETYPE_SIZE(s);
 
-    if (!FF_ALLOCZ_TYPED_ARRAY(s->planar_samples,  s->channels))
-        return AVERROR(ENOMEM);
-
     for (int ch = 0; ch < s->channels; ch++) {
         s->planar_samples[ch] = av_mallocz((AC3_FRAME_SIZE + AC3_BLOCK_SIZE) *
                                                   sampletype_size);
@@ -2486,21 +2467,6 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
     for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
 
-        if (!FF_ALLOCZ_TYPED_ARRAY(block->mdct_coef,   channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->exp,         channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->grouped_exp, channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->psd,         channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->band_psd,    channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->mask,        channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->qmant,       channels))
-            return AVERROR(ENOMEM);
-
-        if (s->cpl_enabled) {
-            if (!FF_ALLOCZ_TYPED_ARRAY(block->cpl_coord_exp,  channels) ||
-                !FF_ALLOCZ_TYPED_ARRAY(block->cpl_coord_mant, channels))
-                return AVERROR(ENOMEM);
-        }
-
         for (ch = 0; ch < channels; ch++) {
             /* arrangement: block, channel, coeff */
             block->grouped_exp[ch] = &s->grouped_exp_buffer[128           * (blk * channels + ch)];
@@ -2524,16 +2490,14 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
             return AVERROR(ENOMEM);
         for (blk = 0; blk < s->num_blocks; blk++) {
             AC3Block *block = &s->blocks[blk];
-            if (!FF_ALLOCZ_TYPED_ARRAY(block->fixed_coef, channels))
-                return AVERROR(ENOMEM);
+
             for (ch = 0; ch < channels; ch++)
                 block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
         }
     } else {
         for (blk = 0; blk < s->num_blocks; blk++) {
             AC3Block *block = &s->blocks[blk];
-            if (!FF_ALLOCZ_TYPED_ARRAY(block->fixed_coef, channels))
-                return AVERROR(ENOMEM);
+
             for (ch = 0; ch < channels; ch++)
                 block->fixed_coef[ch] = (int32_t *)block->mdct_coef[ch];
         }
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index 41b9a3a20b..54e14d43d9 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -127,16 +127,16 @@ typedef struct AC3EncOptions {
  * Data for a single audio block.
  */
 typedef struct AC3Block {
-    CoefType **mdct_coef;                       ///< MDCT coefficients
-    int32_t  **fixed_coef;                      ///< fixed-point MDCT coefficients
-    uint8_t  **exp;                             ///< original exponents
-    uint8_t  **grouped_exp;                     ///< grouped exponents
-    int16_t  **psd;                             ///< psd per frequency bin
-    int16_t  **band_psd;                        ///< psd per critical band
-    int16_t  **mask;                            ///< masking curve
-    uint16_t **qmant;                           ///< quantized mantissas
-    uint8_t  **cpl_coord_exp;                   ///< coupling coord exponents           (cplcoexp)
-    uint8_t  **cpl_coord_mant;                  ///< coupling coord mantissas           (cplcomant)
+    CoefType *mdct_coef[AC3_MAX_CHANNELS];      ///< MDCT coefficients
+    int32_t  *fixed_coef[AC3_MAX_CHANNELS];     ///< fixed-point MDCT coefficients
+    uint8_t  *exp[AC3_MAX_CHANNELS];            ///< original exponents
+    uint8_t  *grouped_exp[AC3_MAX_CHANNELS];    ///< grouped exponents
+    int16_t  *psd[AC3_MAX_CHANNELS];            ///< psd per frequency bin
+    int16_t  *band_psd[AC3_MAX_CHANNELS];       ///< psd per critical band
+    int16_t  *mask[AC3_MAX_CHANNELS];           ///< masking curve
+    uint16_t *qmant[AC3_MAX_CHANNELS];          ///< quantized mantissas
+    uint8_t  *cpl_coord_exp[AC3_MAX_CHANNELS];  ///< coupling coord exponents           (cplcoexp)
+    uint8_t  *cpl_coord_mant[AC3_MAX_CHANNELS]; ///< coupling coord mantissas           (cplcomant)
     uint8_t  new_rematrixing_strategy;          ///< send new rematrixing flags in this block
     int      num_rematrixing_bands;             ///< number of rematrixing bands
     uint8_t  rematrixing_flags[4];              ///< rematrixing flags
@@ -232,7 +232,7 @@ typedef struct AC3EncodeContext {
     int frame_bits;                         ///< all frame bits except exponents and mantissas
     int exponent_bits;                      ///< number of bits used for exponents
 
-    uint8_t **planar_samples;
+    uint8_t *planar_samples[AC3_MAX_CHANNELS - 1];
     uint8_t *bap_buffer;
     uint8_t *bap1_buffer;
     CoefType *mdct_coef_buffer;
-- 
2.40.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH 4/6] avcodec/ac3enc: Combine loops
  2024-04-14 18:28 [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples Andreas Rheinhardt
  2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 2/6] avcodec/ac3enc: Avoid allocation for mdct_window Andreas Rheinhardt
  2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 3/6] avcodec/ac3enc: Avoid indirections, allocations of small arrays Andreas Rheinhardt
@ 2024-04-14 18:30 ` Andreas Rheinhardt
  2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 5/6] avcodec/ac3enc: Combine cpl_coord buffers Andreas Rheinhardt
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Andreas Rheinhardt @ 2024-04-14 18:30 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/ac3enc.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 1ef670622a..eb878afc7b 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -2459,6 +2459,10 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
         !FF_ALLOC_TYPED_ARRAY(s->qmant_buffer,       total_coefs))
         return AVERROR(ENOMEM);
 
+    if (!s->fixed_point) {
+        if (!FF_ALLOCZ_TYPED_ARRAY(s->fixed_coef_buffer, total_coefs))
+            return AVERROR(ENOMEM);
+    }
     if (s->cpl_enabled) {
         if (!FF_ALLOC_TYPED_ARRAY(s->cpl_coord_exp_buffer,  channel_blocks * 16) ||
             !FF_ALLOC_TYPED_ARRAY(s->cpl_coord_mant_buffer, channel_blocks * 16))
@@ -2482,24 +2486,10 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
             /* arrangement: channel, block, coeff */
             block->exp[ch]         = &s->exp_buffer        [AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
             block->mdct_coef[ch]   = &s->mdct_coef_buffer  [AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
-        }
-    }
-
-    if (!s->fixed_point) {
-        if (!FF_ALLOCZ_TYPED_ARRAY(s->fixed_coef_buffer, total_coefs))
-            return AVERROR(ENOMEM);
-        for (blk = 0; blk < s->num_blocks; blk++) {
-            AC3Block *block = &s->blocks[blk];
-
-            for (ch = 0; ch < channels; ch++)
-                block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
-        }
-    } else {
-        for (blk = 0; blk < s->num_blocks; blk++) {
-            AC3Block *block = &s->blocks[blk];
-
-            for (ch = 0; ch < channels; ch++)
+            if (s->fixed_point)
                 block->fixed_coef[ch] = (int32_t *)block->mdct_coef[ch];
+            else
+                block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
         }
     }
 
-- 
2.40.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH 5/6] avcodec/ac3enc: Combine cpl_coord buffers
  2024-04-14 18:28 [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples Andreas Rheinhardt
                   ` (2 preceding siblings ...)
  2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 4/6] avcodec/ac3enc: Combine loops Andreas Rheinhardt
@ 2024-04-14 18:30 ` Andreas Rheinhardt
  2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 6/6] avcodec/ac3enc: Avoid copying samples Andreas Rheinhardt
  2024-04-17 15:04 ` [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples Andreas Rheinhardt
  5 siblings, 0 replies; 7+ messages in thread
From: Andreas Rheinhardt @ 2024-04-14 18:30 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/ac3enc.c | 12 ++++++------
 libavcodec/ac3enc.h |  3 +--
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index eb878afc7b..71d3026d40 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -2194,8 +2194,7 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
     av_freep(&s->band_psd_buffer);
     av_freep(&s->mask_buffer);
     av_freep(&s->qmant_buffer);
-    av_freep(&s->cpl_coord_exp_buffer);
-    av_freep(&s->cpl_coord_mant_buffer);
+    av_freep(&s->cpl_coord_buffer);
     av_freep(&s->fdsp);
 
     av_tx_uninit(&s->tx);
@@ -2439,6 +2438,7 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
     int channels = s->channels + 1; /* includes coupling channel */
     int channel_blocks = channels * s->num_blocks;
     int total_coefs    = AC3_MAX_COEFS * channel_blocks;
+    uint8_t *cpl_coord_mant_buffer;
     const unsigned sampletype_size = SAMPLETYPE_SIZE(s);
 
     for (int ch = 0; ch < s->channels; ch++) {
@@ -2464,9 +2464,9 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
             return AVERROR(ENOMEM);
     }
     if (s->cpl_enabled) {
-        if (!FF_ALLOC_TYPED_ARRAY(s->cpl_coord_exp_buffer,  channel_blocks * 16) ||
-            !FF_ALLOC_TYPED_ARRAY(s->cpl_coord_mant_buffer, channel_blocks * 16))
+        if (!FF_ALLOC_TYPED_ARRAY(s->cpl_coord_buffer, channel_blocks * 32))
             return AVERROR(ENOMEM);
+        cpl_coord_mant_buffer = s->cpl_coord_buffer + 16 * channel_blocks;
     }
     for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
@@ -2479,8 +2479,8 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
             block->mask[ch]        = &s->mask_buffer       [64            * (blk * channels + ch)];
             block->qmant[ch]       = &s->qmant_buffer      [AC3_MAX_COEFS * (blk * channels + ch)];
             if (s->cpl_enabled) {
-                block->cpl_coord_exp[ch]  = &s->cpl_coord_exp_buffer [16  * (blk * channels + ch)];
-                block->cpl_coord_mant[ch] = &s->cpl_coord_mant_buffer[16  * (blk * channels + ch)];
+                block->cpl_coord_exp[ch]  = &s->cpl_coord_buffer [16  * (blk * channels + ch)];
+                block->cpl_coord_mant[ch] = &cpl_coord_mant_buffer[16  * (blk * channels + ch)];
             }
 
             /* arrangement: channel, block, coeff */
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index 54e14d43d9..4241a908a1 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -243,8 +243,7 @@ typedef struct AC3EncodeContext {
     int16_t *band_psd_buffer;
     int16_t *mask_buffer;
     int16_t *qmant_buffer;
-    uint8_t *cpl_coord_exp_buffer;
-    uint8_t *cpl_coord_mant_buffer;
+    uint8_t *cpl_coord_buffer;
 
     uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies
     uint8_t frame_exp_strategy[AC3_MAX_CHANNELS];           ///< frame exp strategy index
-- 
2.40.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH 6/6] avcodec/ac3enc: Avoid copying samples
  2024-04-14 18:28 [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples Andreas Rheinhardt
                   ` (3 preceding siblings ...)
  2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 5/6] avcodec/ac3enc: Combine cpl_coord buffers Andreas Rheinhardt
@ 2024-04-14 18:30 ` Andreas Rheinhardt
  2024-04-17 15:04 ` [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples Andreas Rheinhardt
  5 siblings, 0 replies; 7+ messages in thread
From: Andreas Rheinhardt @ 2024-04-14 18:30 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

Only the last 256 samples of each frame are used;
the encoder currently uses a buffer for 1536 + 256 samples
whose first 256 samples contain are the last 256 samples
from the last frame and the next 1536 are the samples
of the current frame.
Yet since 238b2d4155d9779d770fccb3594076bb32742c82 all the
DSP functions only need 256 contiguous samples and this can
be achieved by only retaining the last 256 samples of each
frame. Doing so saves 6KiB per channel.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/ac3enc.c          | 29 ++---------------------------
 libavcodec/ac3enc.h          |  2 +-
 libavcodec/ac3enc_template.c | 20 ++++++++++++++------
 3 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 71d3026d40..1a869ab865 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -503,28 +503,6 @@ static void ac3_adjust_frame_size(AC3EncodeContext *s)
     s->samples_written += AC3_BLOCK_SIZE * s->num_blocks;
 }
 
-/*
- * Copy input samples.
- * Channels are reordered from FFmpeg's default order to AC-3 order.
- */
-static void copy_input_samples(AC3EncodeContext *s, uint8_t * const *samples)
-{
-    const unsigned sampletype_size = SAMPLETYPE_SIZE(s);
-
-    /* copy and remap input samples */
-    for (int ch = 0; ch < s->channels; ch++) {
-        /* copy last 256 samples of previous frame to the start of the current frame */
-        memcpy(&s->planar_samples[ch][0],
-               s->planar_samples[ch] + AC3_BLOCK_SIZE * sampletype_size * s->num_blocks,
-               AC3_BLOCK_SIZE * sampletype_size);
-
-        /* copy new samples for current frame */
-        memcpy(s->planar_samples[ch] + AC3_BLOCK_SIZE * sampletype_size,
-               samples[s->channel_map[ch]],
-               sampletype_size * AC3_BLOCK_SIZE * s->num_blocks);
-    }
-}
-
 /**
  * Set the initial coupling strategy parameters prior to coupling analysis.
  *
@@ -2018,9 +1996,7 @@ int ff_ac3_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     if (s->bit_alloc.sr_code == 1 || s->eac3)
         ac3_adjust_frame_size(s);
 
-    copy_input_samples(s, frame->extended_data);
-
-    s->encode_frame(s);
+    s->encode_frame(s, frame->extended_data);
 
     ac3_apply_rematrixing(s);
 
@@ -2442,8 +2418,7 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
     const unsigned sampletype_size = SAMPLETYPE_SIZE(s);
 
     for (int ch = 0; ch < s->channels; ch++) {
-        s->planar_samples[ch] = av_mallocz((AC3_FRAME_SIZE + AC3_BLOCK_SIZE) *
-                                                  sampletype_size);
+        s->planar_samples[ch] = av_mallocz(AC3_BLOCK_SIZE * sampletype_size);
         if (!s->planar_samples[ch])
             return AVERROR(ENOMEM);
     }
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index 4241a908a1..30812617cc 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -253,7 +253,7 @@ typedef struct AC3EncodeContext {
     int ref_bap_set;                                         ///< indicates if ref_bap pointers have been set
 
     /** fixed vs. float function pointers */
-    void (*encode_frame)(struct AC3EncodeContext *s);
+    void (*encode_frame)(struct AC3EncodeContext *s, uint8_t * const *samples);
 
     /* AC-3 vs. E-AC-3 function pointers */
     void (*output_frame_header)(struct AC3EncodeContext *s);
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index 698042ae5c..49fc6d7f37 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -48,25 +48,33 @@
  * This applies the KBD window and normalizes the input to reduce precision
  * loss due to fixed-point calculations.
  */
-static void apply_mdct(AC3EncodeContext *s)
+static void apply_mdct(AC3EncodeContext *s, uint8_t * const *samples)
 {
     int blk, ch;
 
     for (ch = 0; ch < s->channels; ch++) {
+        const SampleType *input_samples0 = (const SampleType*)s->planar_samples[ch];
+        /* Reorder channels from native order to AC-3 order. */
+        const SampleType *input_samples1 = (const SampleType*)samples[s->channel_map[ch]];
+
         for (blk = 0; blk < s->num_blocks; blk++) {
             AC3Block *block = &s->blocks[blk];
-            const SampleType *input_samples = (SampleType*)s->planar_samples[ch] + blk * AC3_BLOCK_SIZE;
             SampleType *windowed_samples = s->RENAME(windowed_samples);
 
-            s->fdsp->vector_fmul(windowed_samples, input_samples,
+            s->fdsp->vector_fmul(windowed_samples, input_samples0,
                                  s->RENAME(mdct_window), AC3_BLOCK_SIZE);
             s->fdsp->vector_fmul_reverse(windowed_samples + AC3_BLOCK_SIZE,
-                                         &input_samples[AC3_BLOCK_SIZE],
+                                         input_samples1,
                                          s->RENAME(mdct_window), AC3_BLOCK_SIZE);
 
             s->tx_fn(s->tx, block->mdct_coef[ch+1],
                      windowed_samples, sizeof(*windowed_samples));
+            input_samples0  = input_samples1;
+            input_samples1 += AC3_BLOCK_SIZE;
         }
+        /* Store last 256 samples of current frame */
+        memcpy(s->planar_samples[ch], input_samples0,
+               AC3_BLOCK_SIZE * sizeof(*input_samples0));
     }
 }
 
@@ -336,9 +344,9 @@ static void compute_rematrixing_strategy(AC3EncodeContext *s)
 }
 
 
-static void encode_frame(AC3EncodeContext *s)
+static void encode_frame(AC3EncodeContext *s, uint8_t * const *samples)
 {
-    apply_mdct(s);
+    apply_mdct(s, samples);
 
     s->cpl_on = s->cpl_enabled;
     ff_ac3_compute_coupling_strategy(s);
-- 
2.40.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples
  2024-04-14 18:28 [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples Andreas Rheinhardt
                   ` (4 preceding siblings ...)
  2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 6/6] avcodec/ac3enc: Avoid copying samples Andreas Rheinhardt
@ 2024-04-17 15:04 ` Andreas Rheinhardt
  5 siblings, 0 replies; 7+ messages in thread
From: Andreas Rheinhardt @ 2024-04-17 15:04 UTC (permalink / raw)
  To: ffmpeg-devel

Andreas Rheinhardt:
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
>  libavcodec/ac3enc.c          |  4 ----
>  libavcodec/ac3enc.h          |  7 ++++++-
>  libavcodec/ac3enc_template.c | 12 +++++++++---
>  3 files changed, 15 insertions(+), 8 deletions(-)
> 
> diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
> index 1ba4ba549e..a31b528597 100644
> --- a/libavcodec/ac3enc.c
> +++ b/libavcodec/ac3enc.c
> @@ -2184,7 +2184,6 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
>      AC3EncodeContext *s = avctx->priv_data;
>  
>      av_freep(&s->mdct_window);
> -    av_freep(&s->windowed_samples);
>      if (s->planar_samples)
>          for (ch = 0; ch < s->channels; ch++)
>              av_freep(&s->planar_samples[ch]);
> @@ -2459,9 +2458,6 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
>      int total_coefs    = AC3_MAX_COEFS * channel_blocks;
>      const unsigned sampletype_size = SAMPLETYPE_SIZE(s);
>  
> -    if (!(s->windowed_samples = av_malloc(sampletype_size * AC3_WINDOW_SIZE)))
> -        return AVERROR(ENOMEM);
> -
>      if (!FF_ALLOCZ_TYPED_ARRAY(s->planar_samples,  s->channels))
>          return AVERROR(ENOMEM);
>  
> diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
> index 227744d27f..618c952a18 100644
> --- a/libavcodec/ac3enc.h
> +++ b/libavcodec/ac3enc.h
> @@ -30,6 +30,7 @@
>  
>  #include <stdint.h>
>  
> +#include "libavutil/mem_internal.h"
>  #include "libavutil/opt.h"
>  #include "libavutil/tx.h"
>  
> @@ -232,7 +233,6 @@ typedef struct AC3EncodeContext {
>      int frame_bits;                         ///< all frame bits except exponents and mantissas
>      int exponent_bits;                      ///< number of bits used for exponents
>  
> -    void *windowed_samples;
>      uint8_t **planar_samples;
>      uint8_t *bap_buffer;
>      uint8_t *bap1_buffer;
> @@ -259,6 +259,11 @@ typedef struct AC3EncodeContext {
>  
>      /* AC-3 vs. E-AC-3 function pointers */
>      void (*output_frame_header)(struct AC3EncodeContext *s);
> +
> +    union {
> +        DECLARE_ALIGNED(32, float,   windowed_samples_float)[AC3_WINDOW_SIZE];
> +        DECLARE_ALIGNED(32, int32_t, windowed_samples_fixed)[AC3_WINDOW_SIZE];
> +    };
>  } AC3EncodeContext;
>  
>  extern const AVChannelLayout ff_ac3_ch_layouts[19];
> diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
> index b0f9e69ee8..6070e14961 100644
> --- a/libavcodec/ac3enc_template.c
> +++ b/libavcodec/ac3enc_template.c
> @@ -37,6 +37,11 @@
>  #include "ac3enc.h"
>  #include "eac3enc.h"
>  
> +#if AC3ENC_FLOAT
> +#define RENAME(element) element ## _float
> +#else
> +#define RENAME(element) element ## _fixed
> +#endif
>  
>  /*
>   * Apply the MDCT to input samples to generate frequency coefficients.
> @@ -51,15 +56,16 @@ static void apply_mdct(AC3EncodeContext *s)
>          for (blk = 0; blk < s->num_blocks; blk++) {
>              AC3Block *block = &s->blocks[blk];
>              const SampleType *input_samples = (SampleType*)s->planar_samples[ch] + blk * AC3_BLOCK_SIZE;
> +            SampleType *windowed_samples = s->RENAME(windowed_samples);
>  
> -            s->fdsp->vector_fmul(s->windowed_samples, input_samples,
> +            s->fdsp->vector_fmul(windowed_samples, input_samples,
>                                   s->mdct_window, AC3_BLOCK_SIZE);
> -            s->fdsp->vector_fmul_reverse((SampleType*)s->windowed_samples + AC3_BLOCK_SIZE,
> +            s->fdsp->vector_fmul_reverse(windowed_samples + AC3_BLOCK_SIZE,
>                                           &input_samples[AC3_BLOCK_SIZE],
>                                           s->mdct_window, AC3_BLOCK_SIZE);
>  
>              s->tx_fn(s->tx, block->mdct_coef[ch+1],
> -                     s->windowed_samples, sizeof(float));
> +                     windowed_samples, sizeof(*windowed_samples));
>          }
>      }
>  }

Will apply this patchset tomorrow unless there are objections.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-04-17 15:04 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-14 18:28 [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples Andreas Rheinhardt
2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 2/6] avcodec/ac3enc: Avoid allocation for mdct_window Andreas Rheinhardt
2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 3/6] avcodec/ac3enc: Avoid indirections, allocations of small arrays Andreas Rheinhardt
2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 4/6] avcodec/ac3enc: Combine loops Andreas Rheinhardt
2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 5/6] avcodec/ac3enc: Combine cpl_coord buffers Andreas Rheinhardt
2024-04-14 18:30 ` [FFmpeg-devel] [PATCH 6/6] avcodec/ac3enc: Avoid copying samples Andreas Rheinhardt
2024-04-17 15:04 ` [FFmpeg-devel] [PATCH 1/6] avcodec/ac3enc: Avoid allocation for windowed_samples Andreas Rheinhardt

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git