Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data
@ 2025-03-22 17:49 Massimo Eynard
  2025-03-23 17:31 ` Lynne
  2025-03-23 19:01 ` James Almer
  0 siblings, 2 replies; 11+ messages in thread
From: Massimo Eynard @ 2025-03-22 17:49 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Massimo Eynard

This patch adds support for decoding the fourth MLP substream
which contains the 16-channel presentation used for Atmos
audio objects.

By default only the first three substreams are decoded
unless the new extract_objects flag is enabled as the resulting
presentation contains audio object feeds instead of classic
loudspeaker feeds.

As this introduces interpolation of primitive matrices, precision
has been increased to 2.18 fixed point. Therefore this requires
DSP code upgrade which has been done for C and x86 implementations
but not the ARM implementation.

Adds two FATE tests using existing atmos.thd sample to reflect
changes.

Signed-off-by: Massimo Eynard <eynard.massimo@gmail.com>
---
 libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
 libavcodec/arm/mlpdsp_init_arm.c |   3 +-
 libavcodec/mlp.h                 |  10 +-
 libavcodec/mlp_parse.c           |  31 ++-
 libavcodec/mlp_parse.h           |   1 +
 libavcodec/mlp_parser.c          |  11 +-
 libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
 libavcodec/mlpdsp.c              |  50 +++-
 libavcodec/mlpdsp.h              |  25 ++
 libavcodec/x86/mlpdsp.asm        |  19 +-
 tests/fate/truehd.mak            |  10 +
 11 files changed, 476 insertions(+), 75 deletions(-)

diff --git a/libavcodec/arm/mlpdsp_armv5te.S b/libavcodec/arm/mlpdsp_armv5te.S
index d31568611c..5210f391ab 100644
--- a/libavcodec/arm/mlpdsp_armv5te.S
+++ b/libavcodec/arm/mlpdsp_armv5te.S
@@ -21,7 +21,7 @@
 
 #include "libavutil/arm/asm.S"
 
-#define MAX_CHANNELS        8
+#define MAX_CHANNELS       16
 #define MAX_FIR_ORDER       8
 #define MAX_IIR_ORDER       4
 #define MAX_RATEFACTOR      4
diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
index 34a5f61e1d..50c3cf5488 100644
--- a/libavcodec/arm/mlpdsp_init_arm.c
+++ b/libavcodec/arm/mlpdsp_init_arm.c
@@ -113,6 +113,7 @@ static int32_t (*mlp_select_pack_output_armv6(uint8_t *ch_assign,
         ch_index = 2;
         break;
     default:
+        // max_matrix_channel > 7 requires &7 lossless buffer channel shift
         return ff_mlp_pack_output;
     }
 
@@ -139,7 +140,7 @@ av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
 
     if (have_armv5te(cpu_flags)) {
         c->mlp_filter_channel = ff_mlp_filter_channel_arm;
-        c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
+        // c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm; // TODO: update to 2.18 coeff
     }
     if (have_armv6(cpu_flags))
         c->mlp_select_pack_output = mlp_select_pack_output_armv6;
diff --git a/libavcodec/mlp.h b/libavcodec/mlp.h
index bec414c680..264ef64cf1 100644
--- a/libavcodec/mlp.h
+++ b/libavcodec/mlp.h
@@ -31,19 +31,19 @@
 
 /** Last possible matrix channel for each codec */
 #define MAX_MATRIX_CHANNEL_MLP      5
-#define MAX_MATRIX_CHANNEL_TRUEHD   7
+#define MAX_MATRIX_CHANNEL_TRUEHD  15
 /** Maximum number of channels in a valid stream.
  *  MLP   : 5.1 + 2 noise channels -> 8 channels
- *  TrueHD: 7.1                    -> 8 channels
+ *  TrueHD: up to 16-ch pres       -> 16 channels
  */
-#define MAX_CHANNELS                8
+#define MAX_CHANNELS               16
 
 /** Maximum number of matrices used in decoding; most streams have one matrix
  *  per output channel, but some rematrix a channel (usually 0) more than once.
  */
 #define MAX_MATRICES_MLP            6
-#define MAX_MATRICES_TRUEHD         8
-#define MAX_MATRICES                8
+#define MAX_MATRICES_TRUEHD        16
+#define MAX_MATRICES               16
 
 /** Maximum number of substreams that can be decoded.
  *  MLP's limit is 2. TrueHD supports at least up to 3.
diff --git a/libavcodec/mlp_parse.c b/libavcodec/mlp_parse.c
index 924c731439..c94da860d0 100644
--- a/libavcodec/mlp_parse.c
+++ b/libavcodec/mlp_parse.c
@@ -85,7 +85,7 @@ static int mlp_get_major_sync_size(const uint8_t * buf, int bufsize)
 
 int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
 {
-    int ratebits, channel_arrangement, header_size;
+    int ratebits, channel_arrangement, header_size, extra_ch_length;
     uint16_t checksum;
 
     av_assert1(get_bits_count(gb) == 0);
@@ -163,7 +163,34 @@ int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
     mh->extended_substream_info = get_bits(gb, 2);
     mh->substream_info = get_bits(gb, 8);
 
-    skip_bits_long(gb, (header_size - 18) * 8);
+    extra_ch_length = 0;
+    mh->channels_thd_stream3 = 0;
+
+    if (mh->stream_type == 0xba) {
+        skip_bits_long(gb, 63);
+
+        extra_ch_length = 64;
+        if (get_bits1(gb) && (mh->substream_info & 0x80)) {
+            /* 16ch_channel_meaning */
+            int length = (get_bits(gb, 4) + 1) << 1;
+            if (header_size - 26 < length) {
+                av_log(log, AV_LOG_ERROR, "packet too short, "
+                    "unable to read 16ch extra meaning in major sync %d %d\n",
+                    header_size, length);
+                return -1;
+            }
+
+            skip_bits_long(gb, 5+6); // dialogue norm/mix level
+            mh->channels_thd_stream3 = get_bits(gb, 5) + 1;
+            if (!get_bits1(gb)) { // dyn_object_only
+                avpriv_request_sample(log, "16ch presentation with a mixture of channels");
+                return AVERROR_PATCHWELCOME;
+            }
+            extra_ch_length += 21;
+        }
+    }
+
+    skip_bits_long(gb, (header_size - 18) * 8 - extra_ch_length);
 
     return 0;
 }
diff --git a/libavcodec/mlp_parse.h b/libavcodec/mlp_parse.h
index 5f1f953cfe..8011566ff7 100644
--- a/libavcodec/mlp_parse.h
+++ b/libavcodec/mlp_parse.h
@@ -47,6 +47,7 @@ typedef struct MLPHeaderInfo
     int channels_mlp;                       ///< Channel count for MLP streams
     int channels_thd_stream1;               ///< Channel count for substream 1 of TrueHD streams ("6-channel presentation")
     int channels_thd_stream2;               ///< Channel count for substream 2 of TrueHD streams ("8-channel presentation")
+    int channels_thd_stream3;               ///< Channel count for substream 3 of TrueHD streams ("16-channel presentation")
     uint64_t channel_layout_mlp;            ///< Channel layout for MLP streams
     uint64_t channel_layout_thd_stream1;    ///< Channel layout for substream 1 of TrueHD streams ("6-channel presentation")
     uint64_t channel_layout_thd_stream2;    ///< Channel layout for substream 2 of TrueHD streams ("8-channel presentation")
diff --git a/libavcodec/mlp_parser.c b/libavcodec/mlp_parser.c
index d391390dd5..f3d54c40e7 100644
--- a/libavcodec/mlp_parser.c
+++ b/libavcodec/mlp_parser.c
@@ -181,10 +181,15 @@ static int mlp_parse(AVCodecParserContext *s,
             av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_mlp);
         } else { /* mh.stream_type == 0xba */
             /* TrueHD stream */
-            if (!mh.channels_thd_stream2) {
-                av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream1);
-            } else {
+            if (mh.channels_thd_stream3) {
+                avctx->ch_layout = (AVChannelLayout) {
+                    AV_CHANNEL_ORDER_UNSPEC,
+                    mh.channels_thd_stream3
+                };
+            } else if (mh.channels_thd_stream2) {
                 av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream2);
+            } else {
+                av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream1);
             }
         }
 
diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index e85dac36a7..14fb953265 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -61,8 +61,8 @@ typedef struct SubStream {
 
     //@{
     /** restart header data */
-    /// The type of noise to be used in the rematrix stage.
-    uint16_t    noise_type;
+    /// The type of substream given by the restart header sync word.
+    uint16_t    substream_type;
 
     /// The index of the first channel coded in this substream.
     uint8_t     min_channel;
@@ -88,6 +88,13 @@ typedef struct SubStream {
     /// The current seed value for the pseudorandom noise generator(s).
     uint32_t    noisegen_seed;
 
+    /// Maximum output_shift value.
+    int8_t      max_shift;
+    /// Maximum size of coded audio samples LSBs part.
+    int8_t      max_lsbs;
+    /// Maximum bit-depth of output audio samples.
+    int8_t      max_bits;
+
     /// Set if the substream contains extra info to check the size of VLC blocks.
     uint8_t     data_check_present;
 
@@ -104,11 +111,23 @@ typedef struct SubStream {
     /// matrix output channel
     uint8_t     matrix_out_ch[MAX_MATRICES];
 
-    /// Whether the LSBs of the matrix output are encoded in the bitstream.
+    /// Size of the LSBs of the matrix output encoded in the bitstream.
     uint8_t     lsb_bypass[MAX_MATRICES];
-    /// Matrix coefficients, stored as 2.14 fixed point.
+    /// Matrix coefficients fractional part size in bits.
+    uint8_t     matrix_coeff_frac_bits[MAX_MATRICES];
+    /// Matrix coefficients shift amount.
+    int8_t      matrix_coeff_shift[MAX_MATRICES];
+    /// Matrix coefficients presence mask.
+    uint16_t    matrix_coeff_mask[MAX_MATRICES];
+    /// Matrix coefficients, stored as 2.18 fixed point.
     DECLARE_ALIGNED(32, int32_t, matrix_coeff)[MAX_MATRICES][MAX_CHANNELS];
-    /// Left shift to apply to noise values in 0x31eb substreams.
+    /// Delta matrix coefficients size in bits for 0x31ec substreams.
+    uint8_t     delta_matrix_coeff_bits[MAX_MATRICES];
+    /// Delta matrix coefficients precision.
+    uint8_t     delta_matrix_coeff_prec[MAX_MATRICES];
+    /// Delta matrix coefficients, stored as 2.18 fixed point.
+    DECLARE_ALIGNED(32, int32_t, delta_matrix_coeff)[MAX_MATRICES][MAX_CHANNELS];
+    /// Left shift to apply to noise values in 0x31eb and 0x31ec substreams.
     uint8_t     matrix_noise_shift[MAX_MATRICES];
     //@}
 
@@ -133,6 +152,8 @@ typedef struct MLPDecodeContext {
     AVCodecContext *avctx;
 
     AVChannelLayout downmix_layout;
+    /// Set to enable decoding of non-loudspeaker feed (objects) audio channels
+    int extract_objects;
 
     /// Current access unit being read has a major sync.
     int         is_major_sync_unit;
@@ -267,14 +288,14 @@ static inline int read_huff_channels(MLPDecodeContext *m, GetBitContext *gbp,
 
     for (mat = 0; mat < s->num_primitive_matrices; mat++)
         if (s->lsb_bypass[mat])
-            m->bypassed_lsbs[pos + s->blockpos][mat] = get_bits1(gbp);
+            m->bypassed_lsbs[pos + s->blockpos][mat] = get_bits(gbp, s->lsb_bypass[mat]);
 
     for (channel = s->min_channel; channel <= s->max_channel; channel++) {
         ChannelParams *cp = &s->channel_params[channel];
         int codebook = cp->codebook;
         int quant_step_size = s->quant_step_size[channel];
         int lsb_bits = cp->huff_lsbs - quant_step_size;
-        int result = 0;
+        int32_t result = 0;
 
         if (codebook > 0)
             result = get_vlc2(gbp, huff_vlc[codebook-1].table,
@@ -410,8 +431,12 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
         m->avctx->profile     = AV_PROFILE_TRUEHD_ATMOS;
     }
 
-    /* limit to decoding 3 substreams, as the 4th is used by Dolby Atmos for non-audio data */
-    m->max_decoded_substream = FFMIN(m->num_substreams - 1, 2);
+    /* Limit to decoding the first 3 substreams (or allow the 4th for objects) */
+    m->max_decoded_substream = FFMIN(m->num_substreams - 1,
+        m->extract_objects ? 3 : 2);
+
+    av_log(m->avctx, AV_LOG_DEBUG, "decoding up to substream %" PRIu8 "\n",
+        m->max_decoded_substream);
 
     m->avctx->sample_rate    = mh.group1_samplerate;
     m->avctx->frame_size     = mh.access_unit_size;
@@ -531,23 +556,22 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
     int sync_word, tmp;
     uint8_t checksum;
     uint8_t lossless_check;
+    uint8_t max_bit_depth;
     int start_count = get_bits_count(gbp);
-    int min_channel, max_channel, max_matrix_channel, noise_type;
+    int min_channel, max_channel, max_matrix_channel;
     const int std_max_matrix_channel = m->avctx->codec_id == AV_CODEC_ID_MLP
                                      ? MAX_MATRIX_CHANNEL_MLP
                                      : MAX_MATRIX_CHANNEL_TRUEHD;
 
-    sync_word = get_bits(gbp, 13);
+    sync_word = get_bits(gbp, 14);
 
-    if (sync_word != 0x31ea >> 1) {
+    if (sync_word < 0x31ea || 0x31ec < sync_word) {
         av_log(m->avctx, AV_LOG_ERROR,
                "restart header sync incorrect (got 0x%04x)\n", sync_word);
         return AVERROR_INVALIDDATA;
     }
 
-    noise_type = get_bits1(gbp);
-
-    if (m->avctx->codec_id == AV_CODEC_ID_MLP && noise_type) {
+    if (m->avctx->codec_id == AV_CODEC_ID_MLP && 0x31ea != sync_word) {
         av_log(m->avctx, AV_LOG_ERROR, "MLP must have 0x31ea sync word.\n");
         return AVERROR_INVALIDDATA;
     }
@@ -567,7 +591,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
 
     /* This should happen for TrueHD streams with >6 channels and MLP's noise
      * type. It is not yet known if this is allowed. */
-    if (max_matrix_channel > MAX_MATRIX_CHANNEL_MLP && !noise_type) {
+    if (max_matrix_channel > MAX_MATRIX_CHANNEL_MLP && 0x31ea == sync_word) {
         avpriv_request_sample(m->avctx,
                               "%d channels (more than the "
                               "maximum supported by the decoder)",
@@ -582,7 +606,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
     s->max_channel        = max_channel;
     s->coded_channels     = ((1LL << (max_channel - min_channel + 1)) - 1) << min_channel;
     s->max_matrix_channel = max_matrix_channel;
-    s->noise_type         = noise_type;
+    s->substream_type     = sync_word;
 
     if (mlp_channel_layout_subset(&m->downmix_layout, s->mask) &&
         m->max_decoded_substream > substr) {
@@ -595,8 +619,28 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
 
     s->noise_shift   = get_bits(gbp,  4);
     s->noisegen_seed = get_bits(gbp, 23);
+    s->max_shift     = get_bits(gbp,  4);
+    s->max_lsbs      = get_bits(gbp,  5);
+    s->max_bits      = get_bits(gbp,  5);
+
+    max_bit_depth = (0x31ec == sync_word) ? 31 : 24;
+    if (max_bit_depth < s->max_lsbs) {
+        av_log(m->avctx, AV_LOG_ERROR,
+               "Max LSB size %" PRIu8 " for substream %u exceeds "
+               "%" PRIu8 " bits.\n",
+               s->max_lsbs, substr, max_bit_depth);
+        return AVERROR_INVALIDDATA;
+    }
 
-    skip_bits(gbp, 19);
+    if (max_bit_depth < s->max_bits) {
+        av_log(m->avctx, AV_LOG_ERROR,
+               "Max output bit-depth %" PRIu8 " for substream %u exceeds "
+               "%" PRIu8 " bits.\n",
+               s->max_bits, substr, max_bit_depth);
+        return AVERROR_INVALIDDATA;
+    }
+
+    skip_bits(gbp, 5);
 
     s->data_check_present = get_bits1(gbp);
     lossless_check = get_bits(gbp, 8);
@@ -615,7 +659,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
 
     for (ch = 0; ch <= s->max_matrix_channel; ch++) {
         int ch_assign = get_bits(gbp, 6);
-        if (m->avctx->codec_id == AV_CODEC_ID_TRUEHD) {
+        if (m->avctx->codec_id == AV_CODEC_ID_TRUEHD && s->mask) {
             AVChannelLayout l;
             enum AVChannel channel = thd_channel_layout_extract_channel(s->mask, ch_assign);
 
@@ -656,12 +700,19 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
         cp->huff_offset      = 0;
         cp->sign_huff_offset = -(1 << 23);
         cp->codebook         = 0;
-        cp->huff_lsbs        = 24;
+        cp->huff_lsbs        = (3 == substr) ? 31 : 24;
     }
 
     if (substr == m->max_decoded_substream) {
         av_channel_layout_uninit(&m->avctx->ch_layout);
-        av_channel_layout_from_mask(&m->avctx->ch_layout, s->mask);
+        if (substr < 3) /* Loudspeaker feed channels */
+            av_channel_layout_from_mask(&m->avctx->ch_layout, s->mask);
+        else /* Object channels */
+            m->avctx->ch_layout = (AVChannelLayout) {
+                AV_CHANNEL_ORDER_UNSPEC,
+                s->max_channel+1
+            };
+
         m->pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
                                                        s->output_shift,
                                                        s->max_matrix_channel,
@@ -760,17 +811,43 @@ static int read_filter_params(MLPDecodeContext *m, GetBitContext *gbp,
     return 0;
 }
 
-/** Read parameters for primitive matrices. */
+/** Get the maximum number of primitive matrices allowed. */
 
-static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
+static int get_max_nb_primitive_matrices(MLPDecodeContext *m, unsigned int substr)
+{
+    switch (substr) {
+    case 0: // substream 0 (up to 2 matrix channels)
+        return 2;
+    case 1: // substream 1
+        if (m->substream_info & 0x8) // 6-ch pres carried
+            return 6;
+        if (m->substream_info & 0x20) // 8-ch pres carried
+            return 8;
+        break;
+    case 2: // substream 2
+        if (m->substream_info & 0x40) // 8-ch pres carried
+            return 8;
+        break;
+    case 3: // substream 3
+        if (m->substream_info & 80) // 16-ch pres carried
+            return 16;
+        break;
+    }
+
+    return MAX_MATRICES_TRUEHD;
+}
+
+/** Read parameters for primitive matrices (0x31ea and 0x31eb substreams). */
+
+static int read_31ea_31eb_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
 {
     SubStream *s = &m->substream[substr];
     unsigned int mat, ch;
     const int max_primitive_matrices = m->avctx->codec_id == AV_CODEC_ID_MLP
                                      ? MAX_MATRICES_MLP
-                                     : MAX_MATRICES_TRUEHD;
+                                     : get_max_nb_primitive_matrices(m, substr);
 
-    if (m->matrix_changed++ > 1) {
+    if (++m->matrix_changed > 1) {
         av_log(m->avctx, AV_LOG_ERROR, "Matrices may change only once per access unit.\n");
         return AVERROR_INVALIDDATA;
     }
@@ -779,8 +856,9 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
 
     if (s->num_primitive_matrices > max_primitive_matrices) {
         av_log(m->avctx, AV_LOG_ERROR,
-               "Number of primitive matrices cannot be greater than %d.\n",
-               max_primitive_matrices);
+               "Number of primitive matrices cannot be greater than %d "
+               "for substream %u of type 0x%04x.\n",
+               max_primitive_matrices, substr, s->substream_type);
         goto error;
     }
 
@@ -803,7 +881,7 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
         }
 
         max_chan = s->max_matrix_channel;
-        if (!s->noise_type)
+        if (0x31ea == s->substream_type)
             max_chan+=2;
 
         for (ch = 0; ch <= max_chan; ch++) {
@@ -811,10 +889,10 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
             if (get_bits1(gbp))
                 coeff_val = get_sbits(gbp, frac_bits + 2);
 
-            s->matrix_coeff[mat][ch] = coeff_val * (1 << (14 - frac_bits));
+            s->matrix_coeff[mat][ch] = coeff_val * (1 << ((14 + 4) - frac_bits));
         }
 
-        if (s->noise_type)
+        if (0x31eb == s->substream_type)
             s->matrix_noise_shift[mat] = get_bits(gbp, 4);
         else
             s->matrix_noise_shift[mat] = 0;
@@ -828,6 +906,124 @@ error:
     return AVERROR_INVALIDDATA;
 }
 
+/** Read parameters for primitive matrices (0x31ec substreams). */
+
+static int read_31ec_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
+{
+    SubStream *s = &m->substream[substr];
+    unsigned int mat, ch;
+
+    const int max_primitive_matrices = get_max_nb_primitive_matrices(m, substr);
+
+    if (++m->matrix_changed > 1) {
+        av_log(m->avctx, AV_LOG_ERROR, "Matrices may change only once per access unit.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* Seed primitive matrices */
+
+    if (get_bits1(gbp)) {
+        /* New seed primitive matrices */
+
+        if (get_bits1(gbp)) {
+            /* New seed matrices parameters */
+            s->num_primitive_matrices = get_bits(gbp, 4) + 1;
+
+            if (s->num_primitive_matrices > max_primitive_matrices) {
+                av_log(m->avctx, AV_LOG_ERROR,
+                    "Number of primitive matrices cannot be greater than %d "
+                    "in substream %u of type 0x%04x.\n",
+                    max_primitive_matrices, substr, s->substream_type);
+                goto error;
+            }
+
+            for (mat = 0; mat < s->num_primitive_matrices; mat++) {
+                s->matrix_out_ch         [mat] = get_bits(gbp, 4);
+                s->matrix_coeff_frac_bits[mat] = get_bits(gbp, 4);
+                s->matrix_coeff_shift    [mat] = ((int) get_bits(gbp, 3)) - 1;
+                s->lsb_bypass            [mat] = get_bits(gbp, 2);
+                s->matrix_noise_shift    [mat] = get_bits(gbp, 4);
+                s->matrix_coeff_mask     [mat] = get_bits(gbp, s->max_matrix_channel + 1);
+
+                if (s->matrix_out_ch[mat] > s->max_matrix_channel) {
+                    av_log(m->avctx, AV_LOG_ERROR,
+                            "Invalid channel %d specified as output from matrix.\n",
+                            s->matrix_out_ch[mat]);
+                    goto error;
+                }
+                if (s->matrix_coeff_frac_bits[mat] > 14) {
+                    av_log(m->avctx, AV_LOG_ERROR,
+                            "Too many fractional bits specified.\n");
+                    goto error;
+                }
+            }
+        }
+
+        /* Seed matrices coefficients */
+        for (mat = 0; mat < s->num_primitive_matrices; mat++) {
+            const int coeff_shift = s->matrix_coeff_shift[mat]
+                - s->matrix_coeff_frac_bits[mat];
+
+            memset(s->matrix_coeff[mat], 0, sizeof(s->matrix_coeff[mat]));
+
+            for (ch = 0; ch <= s->max_matrix_channel; ch++) {
+                int64_t coeff_val;
+
+                if (!((s->matrix_coeff_mask[mat] >> ch) & 0x1))
+                    continue; // skip channel
+
+                coeff_val = get_sbits(gbp, s->matrix_coeff_frac_bits[mat] + 2);
+                s->matrix_coeff[mat][ch] = coeff_val * (1 << (18 + coeff_shift));
+            }
+        }
+    }
+
+    if (!get_bits1(gbp)) {
+        /* No primitive matrices interpolation */
+        memset(s->delta_matrix_coeff, 0, sizeof(s->delta_matrix_coeff));
+    }
+    else if (get_bits1(gbp)) {
+        /* New delta primitive matrices */
+
+        if (get_bits1(gbp)) {
+            /* New delta primitive matrices parameters */
+
+            for (mat = 0; mat < s->num_primitive_matrices; mat++) {
+                s->delta_matrix_coeff_bits[mat] = get_bits(gbp, 4) + 1;
+                s->delta_matrix_coeff_prec[mat] = get_bits(gbp, 2);
+            }
+        }
+
+        for (mat = 0; mat < s->num_primitive_matrices; mat++) {
+            const int coeff_shift = s->matrix_coeff_shift[mat]
+                - s->delta_matrix_coeff_prec[mat]
+                - s->matrix_coeff_frac_bits[mat];
+
+            memset(s->delta_matrix_coeff[mat], 0, sizeof(s->delta_matrix_coeff[mat]));
+
+            if (s->delta_matrix_coeff_bits[mat] <= 1)
+                continue; // skip matrice
+
+            for (ch = 0; ch <= s->max_matrix_channel; ch++) {
+                int64_t coeff_val;
+
+                if (!((s->matrix_coeff_mask[mat] >> ch) & 0x1))
+                    continue; // skip channel
+
+                coeff_val = get_sbits(gbp, s->delta_matrix_coeff_bits[mat]);
+                s->delta_matrix_coeff[mat][ch] = coeff_val * (1 << (18 + coeff_shift));
+            }
+        }
+    }
+
+    return 0;
+error:
+    s->num_primitive_matrices = 0;
+    memset(s->matrix_out_ch, 0, sizeof(s->matrix_out_ch));
+
+    return AVERROR_INVALIDDATA;
+}
+
 /** Read channel parameters. */
 
 static int read_channel_params(MLPDecodeContext *m, unsigned int substr,
@@ -875,8 +1071,10 @@ static int read_channel_params(MLPDecodeContext *m, unsigned int substr,
     cp->codebook  = get_bits(gbp, 2);
     cp->huff_lsbs = get_bits(gbp, 5);
 
-    if (cp->codebook > 0 && cp->huff_lsbs > 24) {
-        av_log(m->avctx, AV_LOG_ERROR, "Invalid huff_lsbs.\n");
+    if (cp->codebook > 0 && cp->huff_lsbs > s->max_lsbs) {
+        av_log(m->avctx, AV_LOG_ERROR, "Invalid huff_lsbs=%" PRIu8 ", "
+               "exceeds max_lsbs=%" PRIu8 ".\n",
+               cp->huff_lsbs, s->max_lsbs);
         cp->huff_lsbs = 0;
         return AVERROR_INVALIDDATA;
     }
@@ -910,9 +1108,14 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
         }
 
     if (s->param_presence_flags & PARAM_MATRIX)
-        if (get_bits1(gbp))
-            if ((ret = read_matrix_params(m, substr, gbp)) < 0)
+        if (get_bits1(gbp)) {
+            if (0x31ec == s->substream_type)
+                ret = read_31ec_matrix_params(m, substr, gbp);
+            else
+                ret = read_31ea_31eb_matrix_params(m, substr, gbp);
+            if (ret < 0)
                 return ret;
+        }
 
     if (s->param_presence_flags & PARAM_OUTSHIFT)
         if (get_bits1(gbp)) {
@@ -922,6 +1125,10 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
                     avpriv_request_sample(m->avctx, "Negative output_shift");
                     s->output_shift[ch] = 0;
                 }
+                if (s->max_shift < s->output_shift[ch])
+                    av_log(m->avctx, AV_LOG_WARNING,
+                           "output_shift=%d exceeds max_shift=%d\n",
+                           s->output_shift[ch], s->max_shift);
             }
             if (substr == m->max_decoded_substream)
                 m->pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
@@ -1103,6 +1310,56 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr)
     s->noisegen_seed = seed;
 }
 
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+
+/** Check matrices-based channel remapping output for saturation. */
+
+static void check_rematrix_output(MLPDecodeContext *m, unsigned int substr)
+{
+    SubStream *s = &m->substream[substr];
+    unsigned int mat, sample;
+
+    for (mat = 0; mat < s->num_primitive_matrices; mat++) {
+        unsigned int dest_ch = s->matrix_out_ch[mat];
+
+        const uint8_t shift = (3 <= substr && s->min_channel <= dest_ch) ? 31 : 23;
+        const int32_t min_value = -(1u << shift);
+        const int32_t max_value =  (1u << shift) - 1;
+
+        for (sample = 0; sample < s->blockpos; sample++) {
+            if (m->sample_buffer[sample][dest_ch] < min_value)
+                av_log(m->avctx, AV_LOG_WARNING,
+                    "rematrix negative saturation substr=%u mat=%u sample=%d "
+                    "value=%" PRId32 "\n",
+                    substr, mat, sample, m->sample_buffer[sample][dest_ch]);
+            if (m->sample_buffer[sample][dest_ch] > max_value)
+                av_log(m->avctx, AV_LOG_WARNING,
+                    "rematrix positive saturation substr=%u mat=%u sample=%d "
+                    "value=%" PRId32 "\n",
+                    substr, mat, sample, m->sample_buffer[sample][dest_ch]);
+        }
+    }
+}
+
+/** Check output audio bit-depth. */
+
+static void check_output_bit_depth(MLPDecodeContext *m, unsigned int substr)
+{
+    SubStream *s = &m->substream[substr];
+    uint32_t cumul_mask = 0;
+    unsigned int chan, sample;
+
+    for (chan = 0; chan <= s->max_matrix_channel; chan++)
+        for (sample = 0; sample < s->blockpos; sample++)
+            cumul_mask |= FFABS(m->sample_buffer[sample][chan]);
+
+    if ((1u << s->max_bits) <= cumul_mask)
+        av_log(m->avctx, AV_LOG_WARNING, "output audio bit-depth exceeds "
+               "expected %u bits.\n",
+               s->max_bits);
+}
+#endif
+
 /** Write the audio data into the output buffer. */
 
 static int output_data(MLPDecodeContext *m, unsigned int substr,
@@ -1110,8 +1367,7 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
 {
     AVCodecContext *avctx = m->avctx;
     SubStream *s = &m->substream[substr];
-    unsigned int mat;
-    unsigned int maxchan;
+    unsigned int mat, chan, maxchan;
     int ret;
     int is32 = (m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
 
@@ -1126,7 +1382,7 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
     }
 
     maxchan = s->max_matrix_channel;
-    if (!s->noise_type) {
+    if (0x31ea == s->substream_type) {
         generate_2_noise_channels(m, substr);
         maxchan += 2;
     } else {
@@ -1137,19 +1393,45 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
      * samples. */
     for (mat = 0; mat < s->num_primitive_matrices; mat++) {
         unsigned int dest_ch = s->matrix_out_ch[mat];
-        m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
-                                    s->matrix_coeff[mat],
-                                    &m->bypassed_lsbs[0][mat],
-                                    m->noise_buffer,
-                                    s->num_primitive_matrices - mat,
-                                    dest_ch,
-                                    s->blockpos,
-                                    maxchan,
-                                    s->matrix_noise_shift[mat],
-                                    m->access_unit_size_pow2,
-                                    MSB_MASK(s->quant_step_size[dest_ch]));
+
+        if (substr < 3) {
+            /* Single primitive matrices */
+            m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
+                                        s->matrix_coeff[mat],
+                                        &m->bypassed_lsbs[0][mat],
+                                        m->noise_buffer,
+                                        s->num_primitive_matrices - mat,
+                                        dest_ch,
+                                        s->blockpos,
+                                        maxchan,
+                                        s->matrix_noise_shift[mat],
+                                        m->access_unit_size_pow2,
+                                        MSB_MASK(s->quant_step_size[dest_ch]));
+        }
+        else {
+            /* Interpolated primitive matrices */
+            m->dsp.mlp_rematrix_interp_channel(&m->sample_buffer[0][0],
+                                               s->matrix_coeff[mat],
+                                               s->delta_matrix_coeff[mat],
+                                               &m->bypassed_lsbs[0][mat],
+                                               m->noise_buffer,
+                                               s->num_primitive_matrices - mat,
+                                               dest_ch,
+                                               s->blockpos,
+                                               maxchan,
+                                               s->matrix_noise_shift[mat],
+                                               m->access_unit_size_pow2,
+                                               MSB_MASK(s->quant_step_size[dest_ch]));
+
+            for (chan = 0; chan <= maxchan; chan++)
+                s->matrix_coeff[mat][chan] += s->delta_matrix_coeff[mat][chan];
+        }
     }
 
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+    check_rematrix_output(m, substr);
+#endif
+
     /* get output buffer */
     frame->nb_samples = s->blockpos;
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
@@ -1163,6 +1445,10 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
                                             s->max_matrix_channel,
                                             is32);
 
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+    check_output_bit_depth(m, substr);
+#endif
+
     /* Update matrix encoding side data */
     if (s->matrix_encoding != s->prev_matrix_encoding) {
         if ((ret = ff_side_data_update_matrix_encoding(frame, s->matrix_encoding)) < 0)
@@ -1320,6 +1606,7 @@ static int read_access_unit(AVCodecContext *avctx, AVFrame *frame,
                  (avctx->ch_layout.nb_channels == 8 &&
                   ((m->substream_info >> 4) & 0x7) != 0x7 &&
                   ((m->substream_info >> 4) & 0x7) != 0x6 &&
+                  ((m->substream_info >> 4) & 0x7) != 0x4 &&
                   ((m->substream_info >> 4) & 0x7) != 0x3)) &&
                 substr > 0 && substr < m->max_decoded_substream &&
                 (s->min_channel <= m->substream[substr - 1].max_channel)) {
@@ -1429,8 +1716,10 @@ static void mlp_decode_flush(AVCodecContext *avctx)
 #define OFFSET(x) offsetof(MLPDecodeContext, x)
 #define FLAGS (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
 static const AVOption options[] = {
-    { "downmix", "Request a specific channel layout from the decoder", OFFSET(downmix_layout),
-        AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = FLAGS },
+    { "downmix", "Request a specific channel layout from the decoder",
+        OFFSET(downmix_layout), AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = FLAGS },
+    { "extract_objects", "Enable extraction of audio object channels",
+        OFFSET(extract_objects), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, .flags = FLAGS },
     { NULL },
 };
 
diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
index cb40160f67..e28006f2c4 100644
--- a/libavcodec/mlpdsp.c
+++ b/libavcodec/mlpdsp.c
@@ -79,11 +79,51 @@ void ff_mlp_rematrix_channel(int32_t *samples,
 
         if (matrix_noise_shift) {
             index &= access_unit_size_pow2 - 1;
-            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 7));
+            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 11));
             index += index2;
         }
 
-        samples[dest_ch] = ((accum >> 14) & mask) + *bypassed_lsbs;
+        samples[dest_ch] = ((accum >> 18) & mask) + *bypassed_lsbs;
+        bypassed_lsbs += MAX_CHANNELS;
+        samples += MAX_CHANNELS;
+    }
+}
+
+void ff_mlp_rematrix_interp_channel(int32_t *samples,
+                                    const int32_t *seed_coeffs,
+                                    const int32_t *delta_coeffs,
+                                    const uint8_t *bypassed_lsbs,
+                                    const int8_t *noise_buffer,
+                                    int index,
+                                    unsigned int dest_ch,
+                                    uint16_t blockpos,
+                                    unsigned int maxchan,
+                                    int matrix_noise_shift,
+                                    int access_unit_size_pow2,
+                                    int32_t mask)
+{
+    unsigned int src_ch, i;
+    int index2 = 2 * index + 1;
+
+    int32_t delta_inc = (1 << 16) / blockpos;
+
+    for (i = 0; i < blockpos; i++) {
+        int64_t accum = 0, delta_accum = 0;
+
+        for (src_ch = 0; src_ch <= maxchan; src_ch++) {
+            accum += (int64_t) samples[src_ch] * seed_coeffs[src_ch];
+            delta_accum += (int64_t) samples[src_ch] * delta_coeffs[src_ch];
+        }
+
+        accum += ((delta_accum >> 18) * i * delta_inc * (1 << 18)) >> 16;
+
+        if (matrix_noise_shift) {
+            index &= access_unit_size_pow2 - 1;
+            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 11));
+            index += index2;
+        }
+
+        samples[dest_ch] = ((accum >> 18) & mask) + *bypassed_lsbs;
         bypassed_lsbs += MAX_CHANNELS;
         samples += MAX_CHANNELS;
     }
@@ -115,9 +155,10 @@ int32_t ff_mlp_pack_output(int32_t lossless_check_data,
             int mat_ch = ch_assign[out_ch];
             int32_t sample = sample_buffer[i][mat_ch] *
                           (1U << output_shift[mat_ch]);
-            lossless_check_data ^= (sample & 0xffffff) << mat_ch;
+            lossless_check_data ^= (sample & 0xffffff) << (mat_ch & 7);
+
             if (is32)
-                *data_32++ = sample * 256U;
+                *data_32++ = sample * (1 << 8);
             else
                 *data_16++ = sample >> 8;
         }
@@ -129,6 +170,7 @@ av_cold void ff_mlpdsp_init(MLPDSPContext *c)
 {
     c->mlp_filter_channel = mlp_filter_channel;
     c->mlp_rematrix_channel = ff_mlp_rematrix_channel;
+    c->mlp_rematrix_interp_channel = ff_mlp_rematrix_interp_channel;
     c->mlp_select_pack_output = mlp_select_pack_output;
 #if ARCH_ARM
     ff_mlpdsp_init_arm(c);
diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
index 7a9ac228d3..fd29db10a7 100644
--- a/libavcodec/mlpdsp.h
+++ b/libavcodec/mlpdsp.h
@@ -37,6 +37,19 @@ void ff_mlp_rematrix_channel(int32_t *samples,
                              int access_unit_size_pow2,
                              int32_t mask);
 
+void ff_mlp_rematrix_interp_channel(int32_t *samples,
+                                    const int32_t *seed_coeffs,
+                                    const int32_t *delta_coeffs,
+                                    const uint8_t *bypassed_lsbs,
+                                    const int8_t *noise_buffer,
+                                    int index,
+                                    unsigned int dest_ch,
+                                    uint16_t blockpos,
+                                    unsigned int maxchan,
+                                    int matrix_noise_shift,
+                                    int access_unit_size_pow2,
+                                    int32_t mask);
+
 int32_t ff_mlp_pack_output(int32_t lossless_check_data,
                            uint16_t blockpos,
                            int32_t (*sample_buffer)[MAX_CHANNELS],
@@ -62,6 +75,18 @@ typedef struct MLPDSPContext {
                                  int matrix_noise_shift,
                                  int access_unit_size_pow2,
                                  int32_t mask);
+    void (*mlp_rematrix_interp_channel)(int32_t *samples,
+                                        const int32_t *seed_coeffs,
+                                        const int32_t *delta_coeffs,
+                                        const uint8_t *bypassed_lsbs,
+                                        const int8_t *noise_buffer,
+                                        int index,
+                                        unsigned int dest_ch,
+                                        uint16_t blockpos,
+                                        unsigned int maxchan,
+                                        int matrix_noise_shift,
+                                        int access_unit_size_pow2,
+                                        int32_t mask);
     int32_t (*(*mlp_select_pack_output)(uint8_t *ch_assign,
                                         int8_t *output_shift,
                                         uint8_t max_matrix_channel,
diff --git a/libavcodec/x86/mlpdsp.asm b/libavcodec/x86/mlpdsp.asm
index 3dc641e89e..3b232d4551 100644
--- a/libavcodec/x86/mlpdsp.asm
+++ b/libavcodec/x86/mlpdsp.asm
@@ -61,12 +61,12 @@ SECTION .text
     paddq        xm0, xm1
     movq      accumq, xm0
     movzx     blsbsd, byte [blsbs_ptrq]             ; load *bypassed_lsbs
-    sar       accumq, 14                            ; accum >>= 14
+    sar       accumq, 18                            ; accum >>= 18
     and       accumd, maskd                         ; accum &= mask
     add       accumd, blsbsd                        ; accum += *bypassed_lsbs
     mov   [samplesq + dest_chq], accumd             ; samples[dest_ch] = accum
-    add   blsbs_ptrq, 8                             ; bypassed_lsbs += MAX_CHANNELS;
-    add     samplesq, 32                            ; samples += MAX_CHANNELS;
+    add   blsbs_ptrq, 16                            ; bypassed_lsbs += MAX_CHANNELS;
+    add     samplesq, 64                            ; samples += MAX_CHANNELS;
     cmp   blsbs_ptrq, cntq
 %endmacro
 
@@ -80,12 +80,12 @@ SECTION .text
     SHLX      noiseq, mns                           ; noise_buffer[index] <<= matrix_noise_shift
     add       accumq, noiseq                        ; accum += noise_buffer[index]
     movzx     noised, byte [blsbs_ptrq]             ; load *bypassed_lsbs (reuse tmp noise register)
-    sar       accumq, 14                            ; accum >>= 14
+    sar       accumq, 18                            ; accum >>= 18
     and       accumd, maskd                         ; accum &= mask
     add       accumd, noised                        ; accum += *bypassed_lsbs
     mov   [samplesq + dest_chq], accumd             ; samples[dest_ch] = accum
-    add   blsbs_ptrq, 8                             ; bypassed_lsbs += MAX_CHANNELS;
-    add     samplesq, 32                            ; samples += MAX_CHANNELS;
+    add   blsbs_ptrq, 16                            ; bypassed_lsbs += MAX_CHANNELS;
+    add     samplesq, 64                            ; samples += MAX_CHANNELS;
     cmp   blsbs_ptrq, cntq
 %endmacro
 
@@ -106,7 +106,8 @@ cglobal mlp_rematrix_channel, 0, 13, 5, samples, coeffs, blsbs_ptr, blsbs, \
     mov     dest_chd, dest_chm                      ; load dest_chd (not needed on UNIX64)
 %endif
     shl     dest_chd, 2
-    lea         cntq, [blsbs_ptrq + blockposq*8]
+    lea         cntq, [blsbs_ptrq + blockposq*8]    ; loop end address (bypassed_lsbs + blockpos * MAX_CHANNELS)
+    lea         cntq, [cntq + blockposq*8]
     test        mnsd, mnsd                          ; is matrix_noise_shift != 0?
     jne .shift                                      ; jump if true
     cmp     maxchand, 4                             ; is maxchan < 4?
@@ -144,7 +145,7 @@ align 16
     DEFINE_ARGS samples, coeffs, blsbs_ptr, noise_buffer, \
                 index, dest_ch, accum, index2, mns, \
                 ausp, mask, cnt, noise
-    add         mnsd, 7              ; matrix_noise_shift += 7
+    add         mnsd, 11             ; matrix_noise_shift += 11
 %else ; sse4
     mov           r6, rcx            ; move rcx elsewhere so we can use cl for matrix_noise_shift
 %if WIN64
@@ -156,7 +157,7 @@ align 16
     DEFINE_ARGS samples, coeffs, blsbs_ptr, mns, index, dest_ch, noise_buffer, \
                 index2, accum, ausp, mask, cnt, noise
 %endif
-    lea         mnsd, [r8 + 7]       ; rcx = matrix_noise_shift + 7
+    lea         mnsd, [r8 + 11]      ; rcx = matrix_noise_shift + 11
 %endif ; cpuflag
     sub        auspd, 1              ; access_unit_size_pow2 -= 1
     cmp          r7d, 4              ; is maxchan < 4?
diff --git a/tests/fate/truehd.mak b/tests/fate/truehd.mak
index b0bc86a965..30c0e9628b 100644
--- a/tests/fate/truehd.mak
+++ b/tests/fate/truehd.mak
@@ -18,5 +18,15 @@ fate-truehd-mono1726: CMD = md5pipe -f truehd -i $(TARGET_SAMPLES)/truehd/ticket
 fate-truehd-mono1726: CMP = oneline
 fate-truehd-mono1726: REF = 9be9551fac418440bb02101bfdb11df9
 
+FATE_TRUEHD-$(call DEMDEC, TRUEHD, TRUEHD) += fate-truehd-atmos-no-obj
+fate-truehd-atmos-no-obj: CMD = md5pipe -f truehd -extract_objects 0 -i $(TARGET_SAMPLES)/truehd/atmos.thd -f s32le
+fate-truehd-atmos-no-obj: CMP = oneline
+fate-truehd-atmos-no-obj: REF = 53da6ce35c778bcc2182ef2160bf16a2
+
+FATE_TRUEHD-$(call DEMDEC, TRUEHD, TRUEHD) += fate-truehd-atmos-obj
+fate-truehd-atmos-obj: CMD = md5pipe -f truehd -extract_objects 1 -i $(TARGET_SAMPLES)/truehd/atmos.thd -f s32le
+fate-truehd-atmos-obj: CMP = oneline
+fate-truehd-atmos-obj: REF = f19f6f8ec8b040050aaa019f016f7ddc
+
 FATE_SAMPLES_AUDIO += $(FATE_TRUEHD-yes)
 fate-truehd: $(FATE_TRUEHD-yes)
-- 
2.43.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data
  2025-03-22 17:49 [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data Massimo Eynard
@ 2025-03-23 17:31 ` Lynne
  2025-03-23 18:58   ` Massimo Eynard
  2025-03-23 19:01 ` James Almer
  1 sibling, 1 reply; 11+ messages in thread
From: Lynne @ 2025-03-23 17:31 UTC (permalink / raw)
  To: ffmpeg-devel

On 22/03/2025 18:49, Massimo Eynard wrote:
> This patch adds support for decoding the fourth MLP substream
> which contains the 16-channel presentation used for Atmos
> audio objects.
> 
> By default only the first three substreams are decoded
> unless the new extract_objects flag is enabled as the resulting
> presentation contains audio object feeds instead of classic
> loudspeaker feeds.
> 
> As this introduces interpolation of primitive matrices, precision
> has been increased to 2.18 fixed point. Therefore this requires
> DSP code upgrade which has been done for C and x86 implementations
> but not the ARM implementation.
> 
> Adds two FATE tests using existing atmos.thd sample to reflect
> changes.
> 
> Signed-off-by: Massimo Eynard <eynard.massimo@gmail.com>
> ---
>   libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
>   libavcodec/arm/mlpdsp_init_arm.c |   3 +-
>   libavcodec/mlp.h                 |  10 +-
>   libavcodec/mlp_parse.c           |  31 ++-
>   libavcodec/mlp_parse.h           |   1 +
>   libavcodec/mlp_parser.c          |  11 +-
>   libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
>   libavcodec/mlpdsp.c              |  50 +++-
>   libavcodec/mlpdsp.h              |  25 ++
>   libavcodec/x86/mlpdsp.asm        |  19 +-
>   tests/fate/truehd.mak            |  10 +
>   11 files changed, 476 insertions(+), 75 deletions(-)
> 
> diff --git a/libavcodec/arm/mlpdsp_armv5te.S b/libavcodec/arm/mlpdsp_armv5te.S
> index d31568611c..5210f391ab 100644
> --- a/libavcodec/arm/mlpdsp_armv5te.S
> +++ b/libavcodec/arm/mlpdsp_armv5te.S
> @@ -21,7 +21,7 @@
>   
>   #include "libavutil/arm/asm.S"
>   
> -#define MAX_CHANNELS        8
> +#define MAX_CHANNELS       16
>   #define MAX_FIR_ORDER       8
>   #define MAX_IIR_ORDER       4
>   #define MAX_RATEFACTOR      4
> diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
> index 34a5f61e1d..50c3cf5488 100644
> --- a/libavcodec/arm/mlpdsp_init_arm.c
> +++ b/libavcodec/arm/mlpdsp_init_arm.c
> @@ -113,6 +113,7 @@ static int32_t (*mlp_select_pack_output_armv6(uint8_t *ch_assign,
>           ch_index = 2;
>           break;
>       default:
> +        // max_matrix_channel > 7 requires &7 lossless buffer channel shift
>           return ff_mlp_pack_output;
>       }
>   
> @@ -139,7 +140,7 @@ av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
>   
>       if (have_armv5te(cpu_flags)) {
>           c->mlp_filter_channel = ff_mlp_filter_channel_arm;
> -        c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
> +        // c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm; // TODO: update to 2.18 coeff
>       }
>       if (have_armv6(cpu_flags))
>           c->mlp_select_pack_output = mlp_select_pack_output_armv6;
> diff --git a/libavcodec/mlp.h b/libavcodec/mlp.h
> index bec414c680..264ef64cf1 100644
> --- a/libavcodec/mlp.h
> +++ b/libavcodec/mlp.h
> @@ -31,19 +31,19 @@
>   
>   /** Last possible matrix channel for each codec */
>   #define MAX_MATRIX_CHANNEL_MLP      5
> -#define MAX_MATRIX_CHANNEL_TRUEHD   7
> +#define MAX_MATRIX_CHANNEL_TRUEHD  15
>   /** Maximum number of channels in a valid stream.
>    *  MLP   : 5.1 + 2 noise channels -> 8 channels
> - *  TrueHD: 7.1                    -> 8 channels
> + *  TrueHD: up to 16-ch pres       -> 16 channels
>    */
> -#define MAX_CHANNELS                8
> +#define MAX_CHANNELS               16
>   
>   /** Maximum number of matrices used in decoding; most streams have one matrix
>    *  per output channel, but some rematrix a channel (usually 0) more than once.
>    */
>   #define MAX_MATRICES_MLP            6
> -#define MAX_MATRICES_TRUEHD         8
> -#define MAX_MATRICES                8
> +#define MAX_MATRICES_TRUEHD        16
> +#define MAX_MATRICES               16
>   
>   /** Maximum number of substreams that can be decoded.
>    *  MLP's limit is 2. TrueHD supports at least up to 3.
> diff --git a/libavcodec/mlp_parse.c b/libavcodec/mlp_parse.c
> index 924c731439..c94da860d0 100644
> --- a/libavcodec/mlp_parse.c
> +++ b/libavcodec/mlp_parse.c
> @@ -85,7 +85,7 @@ static int mlp_get_major_sync_size(const uint8_t * buf, int bufsize)
>   
>   int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
>   {
> -    int ratebits, channel_arrangement, header_size;
> +    int ratebits, channel_arrangement, header_size, extra_ch_length;
>       uint16_t checksum;
>   
>       av_assert1(get_bits_count(gb) == 0);
> @@ -163,7 +163,34 @@ int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
>       mh->extended_substream_info = get_bits(gb, 2);
>       mh->substream_info = get_bits(gb, 8);
>   
> -    skip_bits_long(gb, (header_size - 18) * 8);
> +    extra_ch_length = 0;
> +    mh->channels_thd_stream3 = 0;
> +
> +    if (mh->stream_type == 0xba) {
> +        skip_bits_long(gb, 63);
> +
> +        extra_ch_length = 64;
> +        if (get_bits1(gb) && (mh->substream_info & 0x80)) {
> +            /* 16ch_channel_meaning */
> +            int length = (get_bits(gb, 4) + 1) << 1;
> +            if (header_size - 26 < length) {
> +                av_log(log, AV_LOG_ERROR, "packet too short, "
> +                    "unable to read 16ch extra meaning in major sync %d %d\n",
> +                    header_size, length);
> +                return -1;
> +            }
> +
> +            skip_bits_long(gb, 5+6); // dialogue norm/mix level
> +            mh->channels_thd_stream3 = get_bits(gb, 5) + 1;
> +            if (!get_bits1(gb)) { // dyn_object_only
> +                avpriv_request_sample(log, "16ch presentation with a mixture of channels");
> +                return AVERROR_PATCHWELCOME;
> +            }
> +            extra_ch_length += 21;
> +        }
> +    }
> +
> +    skip_bits_long(gb, (header_size - 18) * 8 - extra_ch_length);
>   
>       return 0;
>   }
> diff --git a/libavcodec/mlp_parse.h b/libavcodec/mlp_parse.h
> index 5f1f953cfe..8011566ff7 100644
> --- a/libavcodec/mlp_parse.h
> +++ b/libavcodec/mlp_parse.h
> @@ -47,6 +47,7 @@ typedef struct MLPHeaderInfo
>       int channels_mlp;                       ///< Channel count for MLP streams
>       int channels_thd_stream1;               ///< Channel count for substream 1 of TrueHD streams ("6-channel presentation")
>       int channels_thd_stream2;               ///< Channel count for substream 2 of TrueHD streams ("8-channel presentation")
> +    int channels_thd_stream3;               ///< Channel count for substream 3 of TrueHD streams ("16-channel presentation")
>       uint64_t channel_layout_mlp;            ///< Channel layout for MLP streams
>       uint64_t channel_layout_thd_stream1;    ///< Channel layout for substream 1 of TrueHD streams ("6-channel presentation")
>       uint64_t channel_layout_thd_stream2;    ///< Channel layout for substream 2 of TrueHD streams ("8-channel presentation")
> diff --git a/libavcodec/mlp_parser.c b/libavcodec/mlp_parser.c
> index d391390dd5..f3d54c40e7 100644
> --- a/libavcodec/mlp_parser.c
> +++ b/libavcodec/mlp_parser.c
> @@ -181,10 +181,15 @@ static int mlp_parse(AVCodecParserContext *s,
>               av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_mlp);
>           } else { /* mh.stream_type == 0xba */
>               /* TrueHD stream */
> -            if (!mh.channels_thd_stream2) {
> -                av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream1);
> -            } else {
> +            if (mh.channels_thd_stream3) {
> +                avctx->ch_layout = (AVChannelLayout) {
> +                    AV_CHANNEL_ORDER_UNSPEC,
> +                    mh.channels_thd_stream3
> +                };

Is the order really unspecified? Surely there's some understanding which 
channels map to which position.

> +            } else if (mh.channels_thd_stream2) {
>                   av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream2);
> +            } else {
> +                av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream1);
>               }
>           }
>   
> diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
> index e85dac36a7..14fb953265 100644
> --- a/libavcodec/mlpdec.c
> +++ b/libavcodec/mlpdec.c
> @@ -61,8 +61,8 @@ typedef struct SubStream {
>   
>       //@{
>       /** restart header data */
> -    /// The type of noise to be used in the rematrix stage.
> -    uint16_t    noise_type;
> +    /// The type of substream given by the restart header sync word.
> +    uint16_t    substream_type;
>   
>       /// The index of the first channel coded in this substream.
>       uint8_t     min_channel;
> @@ -88,6 +88,13 @@ typedef struct SubStream {
>       /// The current seed value for the pseudorandom noise generator(s).
>       uint32_t    noisegen_seed;
>   
> +    /// Maximum output_shift value.
> +    int8_t      max_shift;
> +    /// Maximum size of coded audio samples LSBs part.
> +    int8_t      max_lsbs;
> +    /// Maximum bit-depth of output audio samples.
> +    int8_t      max_bits;
> +
>       /// Set if the substream contains extra info to check the size of VLC blocks.
>       uint8_t     data_check_present;
>   
> @@ -104,11 +111,23 @@ typedef struct SubStream {
>       /// matrix output channel
>       uint8_t     matrix_out_ch[MAX_MATRICES];
>   
> -    /// Whether the LSBs of the matrix output are encoded in the bitstream.
> +    /// Size of the LSBs of the matrix output encoded in the bitstream.
>       uint8_t     lsb_bypass[MAX_MATRICES];
> -    /// Matrix coefficients, stored as 2.14 fixed point.
> +    /// Matrix coefficients fractional part size in bits.
> +    uint8_t     matrix_coeff_frac_bits[MAX_MATRICES];
> +    /// Matrix coefficients shift amount.
> +    int8_t      matrix_coeff_shift[MAX_MATRICES];
> +    /// Matrix coefficients presence mask.
> +    uint16_t    matrix_coeff_mask[MAX_MATRICES];
> +    /// Matrix coefficients, stored as 2.18 fixed point.
>       DECLARE_ALIGNED(32, int32_t, matrix_coeff)[MAX_MATRICES][MAX_CHANNELS];
> -    /// Left shift to apply to noise values in 0x31eb substreams.
> +    /// Delta matrix coefficients size in bits for 0x31ec substreams.
> +    uint8_t     delta_matrix_coeff_bits[MAX_MATRICES];
> +    /// Delta matrix coefficients precision.
> +    uint8_t     delta_matrix_coeff_prec[MAX_MATRICES];
> +    /// Delta matrix coefficients, stored as 2.18 fixed point.
> +    DECLARE_ALIGNED(32, int32_t, delta_matrix_coeff)[MAX_MATRICES][MAX_CHANNELS];
> +    /// Left shift to apply to noise values in 0x31eb and 0x31ec substreams.
>       uint8_t     matrix_noise_shift[MAX_MATRICES];
>       //@}
>   
> @@ -133,6 +152,8 @@ typedef struct MLPDecodeContext {
>       AVCodecContext *avctx;
>   
>       AVChannelLayout downmix_layout;
> +    /// Set to enable decoding of non-loudspeaker feed (objects) audio channels
> +    int extract_objects;
>   
>       /// Current access unit being read has a major sync.
>       int         is_major_sync_unit;
> @@ -267,14 +288,14 @@ static inline int read_huff_channels(MLPDecodeContext *m, GetBitContext *gbp,
>   
>       for (mat = 0; mat < s->num_primitive_matrices; mat++)
>           if (s->lsb_bypass[mat])
> -            m->bypassed_lsbs[pos + s->blockpos][mat] = get_bits1(gbp);
> +            m->bypassed_lsbs[pos + s->blockpos][mat] = get_bits(gbp, s->lsb_bypass[mat]);
>   
>       for (channel = s->min_channel; channel <= s->max_channel; channel++) {
>           ChannelParams *cp = &s->channel_params[channel];
>           int codebook = cp->codebook;
>           int quant_step_size = s->quant_step_size[channel];
>           int lsb_bits = cp->huff_lsbs - quant_step_size;
> -        int result = 0;
> +        int32_t result = 0;
>   
>           if (codebook > 0)
>               result = get_vlc2(gbp, huff_vlc[codebook-1].table,
> @@ -410,8 +431,12 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
>           m->avctx->profile     = AV_PROFILE_TRUEHD_ATMOS;
>       }
>   
> -    /* limit to decoding 3 substreams, as the 4th is used by Dolby Atmos for non-audio data */
> -    m->max_decoded_substream = FFMIN(m->num_substreams - 1, 2);
> +    /* Limit to decoding the first 3 substreams (or allow the 4th for objects) */
> +    m->max_decoded_substream = FFMIN(m->num_substreams - 1,
> +        m->extract_objects ? 3 : 2);
> +
> +    av_log(m->avctx, AV_LOG_DEBUG, "decoding up to substream %" PRIu8 "\n",
> +        m->max_decoded_substream);
>   
>       m->avctx->sample_rate    = mh.group1_samplerate;
>       m->avctx->frame_size     = mh.access_unit_size;
> @@ -531,23 +556,22 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>       int sync_word, tmp;
>       uint8_t checksum;
>       uint8_t lossless_check;
> +    uint8_t max_bit_depth;
>       int start_count = get_bits_count(gbp);
> -    int min_channel, max_channel, max_matrix_channel, noise_type;
> +    int min_channel, max_channel, max_matrix_channel;
>       const int std_max_matrix_channel = m->avctx->codec_id == AV_CODEC_ID_MLP
>                                        ? MAX_MATRIX_CHANNEL_MLP
>                                        : MAX_MATRIX_CHANNEL_TRUEHD;
>   
> -    sync_word = get_bits(gbp, 13);
> +    sync_word = get_bits(gbp, 14);
>   
> -    if (sync_word != 0x31ea >> 1) {
> +    if (sync_word < 0x31ea || 0x31ec < sync_word) {
>           av_log(m->avctx, AV_LOG_ERROR,
>                  "restart header sync incorrect (got 0x%04x)\n", sync_word);
>           return AVERROR_INVALIDDATA;
>       }
>   
> -    noise_type = get_bits1(gbp);
> -
> -    if (m->avctx->codec_id == AV_CODEC_ID_MLP && noise_type) {
> +    if (m->avctx->codec_id == AV_CODEC_ID_MLP && 0x31ea != sync_word) {
>           av_log(m->avctx, AV_LOG_ERROR, "MLP must have 0x31ea sync word.\n");
>           return AVERROR_INVALIDDATA;
>       }
> @@ -567,7 +591,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>   
>       /* This should happen for TrueHD streams with >6 channels and MLP's noise
>        * type. It is not yet known if this is allowed. */
> -    if (max_matrix_channel > MAX_MATRIX_CHANNEL_MLP && !noise_type) {
> +    if (max_matrix_channel > MAX_MATRIX_CHANNEL_MLP && 0x31ea == sync_word) {
>           avpriv_request_sample(m->avctx,
>                                 "%d channels (more than the "
>                                 "maximum supported by the decoder)",
> @@ -582,7 +606,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>       s->max_channel        = max_channel;
>       s->coded_channels     = ((1LL << (max_channel - min_channel + 1)) - 1) << min_channel;
>       s->max_matrix_channel = max_matrix_channel;
> -    s->noise_type         = noise_type;
> +    s->substream_type     = sync_word;
>   
>       if (mlp_channel_layout_subset(&m->downmix_layout, s->mask) &&
>           m->max_decoded_substream > substr) {
> @@ -595,8 +619,28 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>   
>       s->noise_shift   = get_bits(gbp,  4);
>       s->noisegen_seed = get_bits(gbp, 23);
> +    s->max_shift     = get_bits(gbp,  4);
> +    s->max_lsbs      = get_bits(gbp,  5);
> +    s->max_bits      = get_bits(gbp,  5);
> +
> +    max_bit_depth = (0x31ec == sync_word) ? 31 : 24;
> +    if (max_bit_depth < s->max_lsbs) {
> +        av_log(m->avctx, AV_LOG_ERROR,
> +               "Max LSB size %" PRIu8 " for substream %u exceeds "
> +               "%" PRIu8 " bits.\n",
> +               s->max_lsbs, substr, max_bit_depth);
> +        return AVERROR_INVALIDDATA;
> +    }
>   
> -    skip_bits(gbp, 19);
> +    if (max_bit_depth < s->max_bits) {
> +        av_log(m->avctx, AV_LOG_ERROR,
> +               "Max output bit-depth %" PRIu8 " for substream %u exceeds "
> +               "%" PRIu8 " bits.\n",
> +               s->max_bits, substr, max_bit_depth);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    skip_bits(gbp, 5);
>   
>       s->data_check_present = get_bits1(gbp);
>       lossless_check = get_bits(gbp, 8);
> @@ -615,7 +659,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>   
>       for (ch = 0; ch <= s->max_matrix_channel; ch++) {
>           int ch_assign = get_bits(gbp, 6);
> -        if (m->avctx->codec_id == AV_CODEC_ID_TRUEHD) {
> +        if (m->avctx->codec_id == AV_CODEC_ID_TRUEHD && s->mask) {
>               AVChannelLayout l;
>               enum AVChannel channel = thd_channel_layout_extract_channel(s->mask, ch_assign);
>   
> @@ -656,12 +700,19 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>           cp->huff_offset      = 0;
>           cp->sign_huff_offset = -(1 << 23);
>           cp->codebook         = 0;
> -        cp->huff_lsbs        = 24;
> +        cp->huff_lsbs        = (3 == substr) ? 31 : 24;
>       }
>   
>       if (substr == m->max_decoded_substream) {
>           av_channel_layout_uninit(&m->avctx->ch_layout);
> -        av_channel_layout_from_mask(&m->avctx->ch_layout, s->mask);
> +        if (substr < 3) /* Loudspeaker feed channels */
> +            av_channel_layout_from_mask(&m->avctx->ch_layout, s->mask);
> +        else /* Object channels */
> +            m->avctx->ch_layout = (AVChannelLayout) {
> +                AV_CHANNEL_ORDER_UNSPEC,
> +                s->max_channel+1
> +            };
> +
>           m->pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
>                                                          s->output_shift,
>                                                          s->max_matrix_channel,
> @@ -760,17 +811,43 @@ static int read_filter_params(MLPDecodeContext *m, GetBitContext *gbp,
>       return 0;
>   }
>   
> -/** Read parameters for primitive matrices. */
> +/** Get the maximum number of primitive matrices allowed. */
>   
> -static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
> +static int get_max_nb_primitive_matrices(MLPDecodeContext *m, unsigned int substr)
> +{
> +    switch (substr) {
> +    case 0: // substream 0 (up to 2 matrix channels)
> +        return 2;
> +    case 1: // substream 1
> +        if (m->substream_info & 0x8) // 6-ch pres carried
> +            return 6;
> +        if (m->substream_info & 0x20) // 8-ch pres carried
> +            return 8;
> +        break;
> +    case 2: // substream 2
> +        if (m->substream_info & 0x40) // 8-ch pres carried
> +            return 8;
> +        break;
> +    case 3: // substream 3
> +        if (m->substream_info & 80) // 16-ch pres carried
> +            return 16;
> +        break;
> +    }
> +
> +    return MAX_MATRICES_TRUEHD;
> +}
> +
> +/** Read parameters for primitive matrices (0x31ea and 0x31eb substreams). */
> +
> +static int read_31ea_31eb_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
>   {
>       SubStream *s = &m->substream[substr];
>       unsigned int mat, ch;
>       const int max_primitive_matrices = m->avctx->codec_id == AV_CODEC_ID_MLP
>                                        ? MAX_MATRICES_MLP
> -                                     : MAX_MATRICES_TRUEHD;
> +                                     : get_max_nb_primitive_matrices(m, substr);
>   
> -    if (m->matrix_changed++ > 1) {
> +    if (++m->matrix_changed > 1) {
>           av_log(m->avctx, AV_LOG_ERROR, "Matrices may change only once per access unit.\n");
>           return AVERROR_INVALIDDATA;
>       }
> @@ -779,8 +856,9 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
>   
>       if (s->num_primitive_matrices > max_primitive_matrices) {
>           av_log(m->avctx, AV_LOG_ERROR,
> -               "Number of primitive matrices cannot be greater than %d.\n",
> -               max_primitive_matrices);
> +               "Number of primitive matrices cannot be greater than %d "
> +               "for substream %u of type 0x%04x.\n",
> +               max_primitive_matrices, substr, s->substream_type);
>           goto error;
>       }
>   
> @@ -803,7 +881,7 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
>           }
>   
>           max_chan = s->max_matrix_channel;
> -        if (!s->noise_type)
> +        if (0x31ea == s->substream_type)
>               max_chan+=2;
>   
>           for (ch = 0; ch <= max_chan; ch++) {
> @@ -811,10 +889,10 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
>               if (get_bits1(gbp))
>                   coeff_val = get_sbits(gbp, frac_bits + 2);
>   
> -            s->matrix_coeff[mat][ch] = coeff_val * (1 << (14 - frac_bits));
> +            s->matrix_coeff[mat][ch] = coeff_val * (1 << ((14 + 4) - frac_bits));
>           }
>   
> -        if (s->noise_type)
> +        if (0x31eb == s->substream_type)
>               s->matrix_noise_shift[mat] = get_bits(gbp, 4);
>           else
>               s->matrix_noise_shift[mat] = 0;
> @@ -828,6 +906,124 @@ error:
>       return AVERROR_INVALIDDATA;
>   }
>   
> +/** Read parameters for primitive matrices (0x31ec substreams). */
> +
> +static int read_31ec_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
> +{
> +    SubStream *s = &m->substream[substr];
> +    unsigned int mat, ch;
> +
> +    const int max_primitive_matrices = get_max_nb_primitive_matrices(m, substr);
> +
> +    if (++m->matrix_changed > 1) {
> +        av_log(m->avctx, AV_LOG_ERROR, "Matrices may change only once per access unit.\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    /* Seed primitive matrices */
> +
> +    if (get_bits1(gbp)) {
> +        /* New seed primitive matrices */
> +
> +        if (get_bits1(gbp)) {
> +            /* New seed matrices parameters */
> +            s->num_primitive_matrices = get_bits(gbp, 4) + 1;
> +
> +            if (s->num_primitive_matrices > max_primitive_matrices) {
> +                av_log(m->avctx, AV_LOG_ERROR,
> +                    "Number of primitive matrices cannot be greater than %d "
> +                    "in substream %u of type 0x%04x.\n",
> +                    max_primitive_matrices, substr, s->substream_type);
> +                goto error;
> +            }
> +
> +            for (mat = 0; mat < s->num_primitive_matrices; mat++) {
> +                s->matrix_out_ch         [mat] = get_bits(gbp, 4);
> +                s->matrix_coeff_frac_bits[mat] = get_bits(gbp, 4);
> +                s->matrix_coeff_shift    [mat] = ((int) get_bits(gbp, 3)) - 1;
> +                s->lsb_bypass            [mat] = get_bits(gbp, 2);
> +                s->matrix_noise_shift    [mat] = get_bits(gbp, 4);
> +                s->matrix_coeff_mask     [mat] = get_bits(gbp, s->max_matrix_channel + 1);
> +
> +                if (s->matrix_out_ch[mat] > s->max_matrix_channel) {
> +                    av_log(m->avctx, AV_LOG_ERROR,
> +                            "Invalid channel %d specified as output from matrix.\n",
> +                            s->matrix_out_ch[mat]);
> +                    goto error;
> +                }
> +                if (s->matrix_coeff_frac_bits[mat] > 14) {
> +                    av_log(m->avctx, AV_LOG_ERROR,
> +                            "Too many fractional bits specified.\n");
> +                    goto error;
> +                }
> +            }
> +        }
> +
> +        /* Seed matrices coefficients */
> +        for (mat = 0; mat < s->num_primitive_matrices; mat++) {
> +            const int coeff_shift = s->matrix_coeff_shift[mat]
> +                - s->matrix_coeff_frac_bits[mat];
> +
> +            memset(s->matrix_coeff[mat], 0, sizeof(s->matrix_coeff[mat]));
> +
> +            for (ch = 0; ch <= s->max_matrix_channel; ch++) {
> +                int64_t coeff_val;
> +
> +                if (!((s->matrix_coeff_mask[mat] >> ch) & 0x1))
> +                    continue; // skip channel
> +
> +                coeff_val = get_sbits(gbp, s->matrix_coeff_frac_bits[mat] + 2);
> +                s->matrix_coeff[mat][ch] = coeff_val * (1 << (18 + coeff_shift));
> +            }
> +        }
> +    }
> +
> +    if (!get_bits1(gbp)) {
> +        /* No primitive matrices interpolation */
> +        memset(s->delta_matrix_coeff, 0, sizeof(s->delta_matrix_coeff));
> +    }
> +    else if (get_bits1(gbp)) {
> +        /* New delta primitive matrices */
> +
> +        if (get_bits1(gbp)) {
> +            /* New delta primitive matrices parameters */
> +
> +            for (mat = 0; mat < s->num_primitive_matrices; mat++) {
> +                s->delta_matrix_coeff_bits[mat] = get_bits(gbp, 4) + 1;
> +                s->delta_matrix_coeff_prec[mat] = get_bits(gbp, 2);
> +            }
> +        }
> +
> +        for (mat = 0; mat < s->num_primitive_matrices; mat++) {
> +            const int coeff_shift = s->matrix_coeff_shift[mat]
> +                - s->delta_matrix_coeff_prec[mat]
> +                - s->matrix_coeff_frac_bits[mat];
> +
> +            memset(s->delta_matrix_coeff[mat], 0, sizeof(s->delta_matrix_coeff[mat]));
> +
> +            if (s->delta_matrix_coeff_bits[mat] <= 1)
> +                continue; // skip matrice
> +
> +            for (ch = 0; ch <= s->max_matrix_channel; ch++) {
> +                int64_t coeff_val;
> +
> +                if (!((s->matrix_coeff_mask[mat] >> ch) & 0x1))
> +                    continue; // skip channel
> +
> +                coeff_val = get_sbits(gbp, s->delta_matrix_coeff_bits[mat]);
> +                s->delta_matrix_coeff[mat][ch] = coeff_val * (1 << (18 + coeff_shift));
> +            }
> +        }
> +    }
> +
> +    return 0;
> +error:
> +    s->num_primitive_matrices = 0;
> +    memset(s->matrix_out_ch, 0, sizeof(s->matrix_out_ch));
> +
> +    return AVERROR_INVALIDDATA;
> +}
> +
>   /** Read channel parameters. */
>   
>   static int read_channel_params(MLPDecodeContext *m, unsigned int substr,
> @@ -875,8 +1071,10 @@ static int read_channel_params(MLPDecodeContext *m, unsigned int substr,
>       cp->codebook  = get_bits(gbp, 2);
>       cp->huff_lsbs = get_bits(gbp, 5);
>   
> -    if (cp->codebook > 0 && cp->huff_lsbs > 24) {
> -        av_log(m->avctx, AV_LOG_ERROR, "Invalid huff_lsbs.\n");
> +    if (cp->codebook > 0 && cp->huff_lsbs > s->max_lsbs) {
> +        av_log(m->avctx, AV_LOG_ERROR, "Invalid huff_lsbs=%" PRIu8 ", "
> +               "exceeds max_lsbs=%" PRIu8 ".\n",
> +               cp->huff_lsbs, s->max_lsbs);
>           cp->huff_lsbs = 0;
>           return AVERROR_INVALIDDATA;
>       }
> @@ -910,9 +1108,14 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
>           }
>   
>       if (s->param_presence_flags & PARAM_MATRIX)
> -        if (get_bits1(gbp))
> -            if ((ret = read_matrix_params(m, substr, gbp)) < 0)
> +        if (get_bits1(gbp)) {
> +            if (0x31ec == s->substream_type)
> +                ret = read_31ec_matrix_params(m, substr, gbp);
> +            else
> +                ret = read_31ea_31eb_matrix_params(m, substr, gbp);
> +            if (ret < 0)
>                   return ret;
> +        }
>   
>       if (s->param_presence_flags & PARAM_OUTSHIFT)
>           if (get_bits1(gbp)) {
> @@ -922,6 +1125,10 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
>                       avpriv_request_sample(m->avctx, "Negative output_shift");
>                       s->output_shift[ch] = 0;
>                   }
> +                if (s->max_shift < s->output_shift[ch])
> +                    av_log(m->avctx, AV_LOG_WARNING,
> +                           "output_shift=%d exceeds max_shift=%d\n",
> +                           s->output_shift[ch], s->max_shift);
>               }
>               if (substr == m->max_decoded_substream)
>                   m->pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
> @@ -1103,6 +1310,56 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr)
>       s->noisegen_seed = seed;
>   }
>   
> +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> +
> +/** Check matrices-based channel remapping output for saturation. */
> +
> +static void check_rematrix_output(MLPDecodeContext *m, unsigned int substr)
> +{
> +    SubStream *s = &m->substream[substr];
> +    unsigned int mat, sample;
> +
> +    for (mat = 0; mat < s->num_primitive_matrices; mat++) {
> +        unsigned int dest_ch = s->matrix_out_ch[mat];
> +
> +        const uint8_t shift = (3 <= substr && s->min_channel <= dest_ch) ? 31 : 23;
> +        const int32_t min_value = -(1u << shift);
> +        const int32_t max_value =  (1u << shift) - 1;
> +
> +        for (sample = 0; sample < s->blockpos; sample++) {
> +            if (m->sample_buffer[sample][dest_ch] < min_value)
> +                av_log(m->avctx, AV_LOG_WARNING,
> +                    "rematrix negative saturation substr=%u mat=%u sample=%d "
> +                    "value=%" PRId32 "\n",
> +                    substr, mat, sample, m->sample_buffer[sample][dest_ch]);
> +            if (m->sample_buffer[sample][dest_ch] > max_value)
> +                av_log(m->avctx, AV_LOG_WARNING,
> +                    "rematrix positive saturation substr=%u mat=%u sample=%d "
> +                    "value=%" PRId32 "\n",
> +                    substr, mat, sample, m->sample_buffer[sample][dest_ch]);
> +        }
> +    }
> +}
> +
> +/** Check output audio bit-depth. */
> +
> +static void check_output_bit_depth(MLPDecodeContext *m, unsigned int substr)
> +{
> +    SubStream *s = &m->substream[substr];
> +    uint32_t cumul_mask = 0;
> +    unsigned int chan, sample;
> +
> +    for (chan = 0; chan <= s->max_matrix_channel; chan++)
> +        for (sample = 0; sample < s->blockpos; sample++)
> +            cumul_mask |= FFABS(m->sample_buffer[sample][chan]);
> +
> +    if ((1u << s->max_bits) <= cumul_mask)
> +        av_log(m->avctx, AV_LOG_WARNING, "output audio bit-depth exceeds "
> +               "expected %u bits.\n",
> +               s->max_bits);
> +}
> +#endif
> +
>   /** Write the audio data into the output buffer. */
>   
>   static int output_data(MLPDecodeContext *m, unsigned int substr,
> @@ -1110,8 +1367,7 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>   {
>       AVCodecContext *avctx = m->avctx;
>       SubStream *s = &m->substream[substr];
> -    unsigned int mat;
> -    unsigned int maxchan;
> +    unsigned int mat, chan, maxchan;
>       int ret;
>       int is32 = (m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
>   
> @@ -1126,7 +1382,7 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>       }
>   
>       maxchan = s->max_matrix_channel;
> -    if (!s->noise_type) {
> +    if (0x31ea == s->substream_type) {
>           generate_2_noise_channels(m, substr);
>           maxchan += 2;
>       } else {
> @@ -1137,19 +1393,45 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>        * samples. */
>       for (mat = 0; mat < s->num_primitive_matrices; mat++) {
>           unsigned int dest_ch = s->matrix_out_ch[mat];
> -        m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
> -                                    s->matrix_coeff[mat],
> -                                    &m->bypassed_lsbs[0][mat],
> -                                    m->noise_buffer,
> -                                    s->num_primitive_matrices - mat,
> -                                    dest_ch,
> -                                    s->blockpos,
> -                                    maxchan,
> -                                    s->matrix_noise_shift[mat],
> -                                    m->access_unit_size_pow2,
> -                                    MSB_MASK(s->quant_step_size[dest_ch]));
> +
> +        if (substr < 3) {
> +            /* Single primitive matrices */
> +            m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
> +                                        s->matrix_coeff[mat],
> +                                        &m->bypassed_lsbs[0][mat],
> +                                        m->noise_buffer,
> +                                        s->num_primitive_matrices - mat,
> +                                        dest_ch,
> +                                        s->blockpos,
> +                                        maxchan,
> +                                        s->matrix_noise_shift[mat],
> +                                        m->access_unit_size_pow2,
> +                                        MSB_MASK(s->quant_step_size[dest_ch]));
> +        }
> +        else {
> +            /* Interpolated primitive matrices */
> +            m->dsp.mlp_rematrix_interp_channel(&m->sample_buffer[0][0],
> +                                               s->matrix_coeff[mat],
> +                                               s->delta_matrix_coeff[mat],
> +                                               &m->bypassed_lsbs[0][mat],
> +                                               m->noise_buffer,
> +                                               s->num_primitive_matrices - mat,
> +                                               dest_ch,
> +                                               s->blockpos,
> +                                               maxchan,
> +                                               s->matrix_noise_shift[mat],
> +                                               m->access_unit_size_pow2,
> +                                               MSB_MASK(s->quant_step_size[dest_ch]));
> +
> +            for (chan = 0; chan <= maxchan; chan++)
> +                s->matrix_coeff[mat][chan] += s->delta_matrix_coeff[mat][chan];
> +        }
>       }
>   
> +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> +    check_rematrix_output(m, substr);
> +#endif
> +
>       /* get output buffer */
>       frame->nb_samples = s->blockpos;
>       if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> @@ -1163,6 +1445,10 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>                                               s->max_matrix_channel,
>                                               is32);
>   
> +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> +    check_output_bit_depth(m, substr);
> +#endif
> +
>       /* Update matrix encoding side data */
>       if (s->matrix_encoding != s->prev_matrix_encoding) {
>           if ((ret = ff_side_data_update_matrix_encoding(frame, s->matrix_encoding)) < 0)
> @@ -1320,6 +1606,7 @@ static int read_access_unit(AVCodecContext *avctx, AVFrame *frame,
>                    (avctx->ch_layout.nb_channels == 8 &&
>                     ((m->substream_info >> 4) & 0x7) != 0x7 &&
>                     ((m->substream_info >> 4) & 0x7) != 0x6 &&
> +                  ((m->substream_info >> 4) & 0x7) != 0x4 &&
>                     ((m->substream_info >> 4) & 0x7) != 0x3)) &&
>                   substr > 0 && substr < m->max_decoded_substream &&
>                   (s->min_channel <= m->substream[substr - 1].max_channel)) {
> @@ -1429,8 +1716,10 @@ static void mlp_decode_flush(AVCodecContext *avctx)
>   #define OFFSET(x) offsetof(MLPDecodeContext, x)
>   #define FLAGS (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
>   static const AVOption options[] = {
> -    { "downmix", "Request a specific channel layout from the decoder", OFFSET(downmix_layout),
> -        AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = FLAGS },
> +    { "downmix", "Request a specific channel layout from the decoder",
> +        OFFSET(downmix_layout), AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = FLAGS },
> +    { "extract_objects", "Enable extraction of audio object channels",
> +        OFFSET(extract_objects), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, .flags = FLAGS },
>       { NULL },

IMHO this should be enabled by default. All streams come with 
compatibility AC3 mixes too.

>   };
>   
> diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
> index cb40160f67..e28006f2c4 100644
> --- a/libavcodec/mlpdsp.c
> +++ b/libavcodec/mlpdsp.c
> @@ -79,11 +79,51 @@ void ff_mlp_rematrix_channel(int32_t *samples,
>   
>           if (matrix_noise_shift) {
>               index &= access_unit_size_pow2 - 1;
> -            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 7));
> +            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 11));
>               index += index2;
>           }
>   
> -        samples[dest_ch] = ((accum >> 14) & mask) + *bypassed_lsbs;
> +        samples[dest_ch] = ((accum >> 18) & mask) + *bypassed_lsbs;
> +        bypassed_lsbs += MAX_CHANNELS;
> +        samples += MAX_CHANNELS;
> +    }
> +}
> +
> +void ff_mlp_rematrix_interp_channel(int32_t *samples,
> +                                    const int32_t *seed_coeffs,
> +                                    const int32_t *delta_coeffs,
> +                                    const uint8_t *bypassed_lsbs,
> +                                    const int8_t *noise_buffer,
> +                                    int index,
> +                                    unsigned int dest_ch,
> +                                    uint16_t blockpos,
> +                                    unsigned int maxchan,
> +                                    int matrix_noise_shift,
> +                                    int access_unit_size_pow2,
> +                                    int32_t mask)
> +{
> +    unsigned int src_ch, i;
> +    int index2 = 2 * index + 1;
> +
> +    int32_t delta_inc = (1 << 16) / blockpos;
> +
> +    for (i = 0; i < blockpos; i++) {
> +        int64_t accum = 0, delta_accum = 0;
> +
> +        for (src_ch = 0; src_ch <= maxchan; src_ch++) {
> +            accum += (int64_t) samples[src_ch] * seed_coeffs[src_ch];
> +            delta_accum += (int64_t) samples[src_ch] * delta_coeffs[src_ch];
> +        }
> +
> +        accum += ((delta_accum >> 18) * i * delta_inc * (1 << 18)) >> 16;
> +
> +        if (matrix_noise_shift) {
> +            index &= access_unit_size_pow2 - 1;
> +            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 11));
> +            index += index2;
> +        }
> +
> +        samples[dest_ch] = ((accum >> 18) & mask) + *bypassed_lsbs;
>           bypassed_lsbs += MAX_CHANNELS;
>           samples += MAX_CHANNELS;
>       }
> @@ -115,9 +155,10 @@ int32_t ff_mlp_pack_output(int32_t lossless_check_data,
>               int mat_ch = ch_assign[out_ch];
>               int32_t sample = sample_buffer[i][mat_ch] *
>                             (1U << output_shift[mat_ch]);
> -            lossless_check_data ^= (sample & 0xffffff) << mat_ch;
> +            lossless_check_data ^= (sample & 0xffffff) << (mat_ch & 7);
> +
>               if (is32)
> -                *data_32++ = sample * 256U;
> +                *data_32++ = sample * (1 << 8);
>               else
>                   *data_16++ = sample >> 8;
>           }
> @@ -129,6 +170,7 @@ av_cold void ff_mlpdsp_init(MLPDSPContext *c)
>   {
>       c->mlp_filter_channel = mlp_filter_channel;
>       c->mlp_rematrix_channel = ff_mlp_rematrix_channel;
> +    c->mlp_rematrix_interp_channel = ff_mlp_rematrix_interp_channel;
>       c->mlp_select_pack_output = mlp_select_pack_output;
>   #if ARCH_ARM
>       ff_mlpdsp_init_arm(c);
> diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
> index 7a9ac228d3..fd29db10a7 100644
> --- a/libavcodec/mlpdsp.h
> +++ b/libavcodec/mlpdsp.h
> @@ -37,6 +37,19 @@ void ff_mlp_rematrix_channel(int32_t *samples,
>                                int access_unit_size_pow2,
>                                int32_t mask);
>   
> +void ff_mlp_rematrix_interp_channel(int32_t *samples,
> +                                    const int32_t *seed_coeffs,
> +                                    const int32_t *delta_coeffs,
> +                                    const uint8_t *bypassed_lsbs,
> +                                    const int8_t *noise_buffer,
> +                                    int index,
> +                                    unsigned int dest_ch,
> +                                    uint16_t blockpos,
> +                                    unsigned int maxchan,
> +                                    int matrix_noise_shift,
> +                                    int access_unit_size_pow2,
> +                                    int32_t mask);
> +
>   int32_t ff_mlp_pack_output(int32_t lossless_check_data,
>                              uint16_t blockpos,
>                              int32_t (*sample_buffer)[MAX_CHANNELS],
> @@ -62,6 +75,18 @@ typedef struct MLPDSPContext {
>                                    int matrix_noise_shift,
>                                    int access_unit_size_pow2,
>                                    int32_t mask);
> +    void (*mlp_rematrix_interp_channel)(int32_t *samples,
> +                                        const int32_t *seed_coeffs,
> +                                        const int32_t *delta_coeffs,
> +                                        const uint8_t *bypassed_lsbs,
> +                                        const int8_t *noise_buffer,
> +                                        int index,
> +                                        unsigned int dest_ch,
> +                                        uint16_t blockpos,
> +                                        unsigned int maxchan,
> +                                        int matrix_noise_shift,
> +                                        int access_unit_size_pow2,
> +                                        int32_t mask);
>       int32_t (*(*mlp_select_pack_output)(uint8_t *ch_assign,
>                                           int8_t *output_shift,
>                                           uint8_t max_matrix_channel,
> diff --git a/libavcodec/x86/mlpdsp.asm b/libavcodec/x86/mlpdsp.asm
> index 3dc641e89e..3b232d4551 100644
> --- a/libavcodec/x86/mlpdsp.asm
> +++ b/libavcodec/x86/mlpdsp.asm
> @@ -61,12 +61,12 @@ SECTION .text
>       paddq        xm0, xm1
>       movq      accumq, xm0
>       movzx     blsbsd, byte [blsbs_ptrq]             ; load *bypassed_lsbs
> -    sar       accumq, 14                            ; accum >>= 14
> +    sar       accumq, 18                            ; accum >>= 18
>       and       accumd, maskd                         ; accum &= mask
>       add       accumd, blsbsd                        ; accum += *bypassed_lsbs
>       mov   [samplesq + dest_chq], accumd             ; samples[dest_ch] = accum
> -    add   blsbs_ptrq, 8                             ; bypassed_lsbs += MAX_CHANNELS;
> -    add     samplesq, 32                            ; samples += MAX_CHANNELS;
> +    add   blsbs_ptrq, 16                            ; bypassed_lsbs += MAX_CHANNELS;
> +    add     samplesq, 64                            ; samples += MAX_CHANNELS;
>       cmp   blsbs_ptrq, cntq
>   %endmacro
>   
> @@ -80,12 +80,12 @@ SECTION .text
>       SHLX      noiseq, mns                           ; noise_buffer[index] <<= matrix_noise_shift
>       add       accumq, noiseq                        ; accum += noise_buffer[index]
>       movzx     noised, byte [blsbs_ptrq]             ; load *bypassed_lsbs (reuse tmp noise register)
> -    sar       accumq, 14                            ; accum >>= 14
> +    sar       accumq, 18                            ; accum >>= 18
>       and       accumd, maskd                         ; accum &= mask
>       add       accumd, noised                        ; accum += *bypassed_lsbs
>       mov   [samplesq + dest_chq], accumd             ; samples[dest_ch] = accum
> -    add   blsbs_ptrq, 8                             ; bypassed_lsbs += MAX_CHANNELS;
> -    add     samplesq, 32                            ; samples += MAX_CHANNELS;
> +    add   blsbs_ptrq, 16                            ; bypassed_lsbs += MAX_CHANNELS;
> +    add     samplesq, 64                            ; samples += MAX_CHANNELS;
>       cmp   blsbs_ptrq, cntq
>   %endmacro
>   
> @@ -106,7 +106,8 @@ cglobal mlp_rematrix_channel, 0, 13, 5, samples, coeffs, blsbs_ptr, blsbs, \
>       mov     dest_chd, dest_chm                      ; load dest_chd (not needed on UNIX64)
>   %endif
>       shl     dest_chd, 2
> -    lea         cntq, [blsbs_ptrq + blockposq*8]
> +    lea         cntq, [blsbs_ptrq + blockposq*8]    ; loop end address (bypassed_lsbs + blockpos * MAX_CHANNELS)
> +    lea         cntq, [cntq + blockposq*8]
>       test        mnsd, mnsd                          ; is matrix_noise_shift != 0?
>       jne .shift                                      ; jump if true
>       cmp     maxchand, 4                             ; is maxchan < 4?
> @@ -144,7 +145,7 @@ align 16
>       DEFINE_ARGS samples, coeffs, blsbs_ptr, noise_buffer, \
>                   index, dest_ch, accum, index2, mns, \
>                   ausp, mask, cnt, noise
> -    add         mnsd, 7              ; matrix_noise_shift += 7
> +    add         mnsd, 11             ; matrix_noise_shift += 11
>   %else ; sse4
>       mov           r6, rcx            ; move rcx elsewhere so we can use cl for matrix_noise_shift
>   %if WIN64
> @@ -156,7 +157,7 @@ align 16
>       DEFINE_ARGS samples, coeffs, blsbs_ptr, mns, index, dest_ch, noise_buffer, \
>                   index2, accum, ausp, mask, cnt, noise
>   %endif
> -    lea         mnsd, [r8 + 7]       ; rcx = matrix_noise_shift + 7
> +    lea         mnsd, [r8 + 11]      ; rcx = matrix_noise_shift + 11
>   %endif ; cpuflag
>       sub        auspd, 1              ; access_unit_size_pow2 -= 1
>       cmp          r7d, 4              ; is maxchan < 4?

If the changes are that few, why not do the aarch64 version too?

> diff --git a/tests/fate/truehd.mak b/tests/fate/truehd.mak
> index b0bc86a965..30c0e9628b 100644
> --- a/tests/fate/truehd.mak
> +++ b/tests/fate/truehd.mak
> @@ -18,5 +18,15 @@ fate-truehd-mono1726: CMD = md5pipe -f truehd -i $(TARGET_SAMPLES)/truehd/ticket
>   fate-truehd-mono1726: CMP = oneline
>   fate-truehd-mono1726: REF = 9be9551fac418440bb02101bfdb11df9
>   
> +FATE_TRUEHD-$(call DEMDEC, TRUEHD, TRUEHD) += fate-truehd-atmos-no-obj
> +fate-truehd-atmos-no-obj: CMD = md5pipe -f truehd -extract_objects 0 -i $(TARGET_SAMPLES)/truehd/atmos.thd -f s32le
> +fate-truehd-atmos-no-obj: CMP = oneline
> +fate-truehd-atmos-no-obj: REF = 53da6ce35c778bcc2182ef2160bf16a2
> +
> +FATE_TRUEHD-$(call DEMDEC, TRUEHD, TRUEHD) += fate-truehd-atmos-obj
> +fate-truehd-atmos-obj: CMD = md5pipe -f truehd -extract_objects 1 -i $(TARGET_SAMPLES)/truehd/atmos.thd -f s32le
> +fate-truehd-atmos-obj: CMP = oneline
> +fate-truehd-atmos-obj: REF = f19f6f8ec8b040050aaa019f016f7ddc
> +
>   FATE_SAMPLES_AUDIO += $(FATE_TRUEHD-yes)
>   fate-truehd: $(FATE_TRUEHD-yes)
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data
  2025-03-23 17:31 ` Lynne
@ 2025-03-23 18:58   ` Massimo Eynard
  0 siblings, 0 replies; 11+ messages in thread
From: Massimo Eynard @ 2025-03-23 18:58 UTC (permalink / raw)
  To: ffmpeg-devel

Hi!
First thank you for your feedback.

On 23/03/2025 18:31, Lynne wrote:
> On 22/03/2025 18:49, Massimo Eynard wrote:
>> This patch adds support for decoding the fourth MLP substream
>> which contains the 16-channel presentation used for Atmos
>> audio objects.
>>
>> By default only the first three substreams are decoded
>> unless the new extract_objects flag is enabled as the resulting
>> presentation contains audio object feeds instead of classic
>> loudspeaker feeds.
>>
>> As this introduces interpolation of primitive matrices, precision
>> has been increased to 2.18 fixed point. Therefore this requires
>> DSP code upgrade which has been done for C and x86 implementations
>> but not the ARM implementation.
>>
>> Adds two FATE tests using existing atmos.thd sample to reflect
>> changes.
>>
>> Signed-off-by: Massimo Eynard <eynard.massimo@gmail.com>
>> ---
>>   libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
>>   libavcodec/arm/mlpdsp_init_arm.c |   3 +-
>>   libavcodec/mlp.h                 |  10 +-
>>   libavcodec/mlp_parse.c           |  31 ++-
>>   libavcodec/mlp_parse.h           |   1 +
>>   libavcodec/mlp_parser.c          |  11 +-
>>   libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
>>   libavcodec/mlpdsp.c              |  50 +++-
>>   libavcodec/mlpdsp.h              |  25 ++
>>   libavcodec/x86/mlpdsp.asm        |  19 +-
>>   tests/fate/truehd.mak            |  10 +
>>   11 files changed, 476 insertions(+), 75 deletions(-)
>>
>> diff --git a/libavcodec/arm/mlpdsp_armv5te.S b/libavcodec/arm/mlpdsp_armv5te.S
>> index d31568611c..5210f391ab 100644
>> --- a/libavcodec/arm/mlpdsp_armv5te.S
>> +++ b/libavcodec/arm/mlpdsp_armv5te.S
>> @@ -21,7 +21,7 @@
>>     #include "libavutil/arm/asm.S"
>>   -#define MAX_CHANNELS        8
>> +#define MAX_CHANNELS       16
>>   #define MAX_FIR_ORDER       8
>>   #define MAX_IIR_ORDER       4
>>   #define MAX_RATEFACTOR      4
>> diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
>> index 34a5f61e1d..50c3cf5488 100644
>> --- a/libavcodec/arm/mlpdsp_init_arm.c
>> +++ b/libavcodec/arm/mlpdsp_init_arm.c
>> @@ -113,6 +113,7 @@ static int32_t (*mlp_select_pack_output_armv6(uint8_t *ch_assign,
>>           ch_index = 2;
>>           break;
>>       default:
>> +        // max_matrix_channel > 7 requires &7 lossless buffer channel shift
>>           return ff_mlp_pack_output;
>>       }
>>   @@ -139,7 +140,7 @@ av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
>>         if (have_armv5te(cpu_flags)) {
>>           c->mlp_filter_channel = ff_mlp_filter_channel_arm;
>> -        c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
>> +        // c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm; // TODO: update to 2.18 coeff
>>       }
>>       if (have_armv6(cpu_flags))
>>           c->mlp_select_pack_output = mlp_select_pack_output_armv6;
>> diff --git a/libavcodec/mlp.h b/libavcodec/mlp.h
>> index bec414c680..264ef64cf1 100644
>> --- a/libavcodec/mlp.h
>> +++ b/libavcodec/mlp.h
>> @@ -31,19 +31,19 @@
>>     /** Last possible matrix channel for each codec */
>>   #define MAX_MATRIX_CHANNEL_MLP      5
>> -#define MAX_MATRIX_CHANNEL_TRUEHD   7
>> +#define MAX_MATRIX_CHANNEL_TRUEHD  15
>>   /** Maximum number of channels in a valid stream.
>>    *  MLP   : 5.1 + 2 noise channels -> 8 channels
>> - *  TrueHD: 7.1                    -> 8 channels
>> + *  TrueHD: up to 16-ch pres       -> 16 channels
>>    */
>> -#define MAX_CHANNELS                8
>> +#define MAX_CHANNELS               16
>>     /** Maximum number of matrices used in decoding; most streams have one matrix
>>    *  per output channel, but some rematrix a channel (usually 0) more than once.
>>    */
>>   #define MAX_MATRICES_MLP            6
>> -#define MAX_MATRICES_TRUEHD         8
>> -#define MAX_MATRICES                8
>> +#define MAX_MATRICES_TRUEHD        16
>> +#define MAX_MATRICES               16
>>     /** Maximum number of substreams that can be decoded.
>>    *  MLP's limit is 2. TrueHD supports at least up to 3.
>> diff --git a/libavcodec/mlp_parse.c b/libavcodec/mlp_parse.c
>> index 924c731439..c94da860d0 100644
>> --- a/libavcodec/mlp_parse.c
>> +++ b/libavcodec/mlp_parse.c
>> @@ -85,7 +85,7 @@ static int mlp_get_major_sync_size(const uint8_t * buf, int bufsize)
>>     int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
>>   {
>> -    int ratebits, channel_arrangement, header_size;
>> +    int ratebits, channel_arrangement, header_size, extra_ch_length;
>>       uint16_t checksum;
>>         av_assert1(get_bits_count(gb) == 0);
>> @@ -163,7 +163,34 @@ int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
>>       mh->extended_substream_info = get_bits(gb, 2);
>>       mh->substream_info = get_bits(gb, 8);
>>   -    skip_bits_long(gb, (header_size - 18) * 8);
>> +    extra_ch_length = 0;
>> +    mh->channels_thd_stream3 = 0;
>> +
>> +    if (mh->stream_type == 0xba) {
>> +        skip_bits_long(gb, 63);
>> +
>> +        extra_ch_length = 64;
>> +        if (get_bits1(gb) && (mh->substream_info & 0x80)) {
>> +            /* 16ch_channel_meaning */
>> +            int length = (get_bits(gb, 4) + 1) << 1;
>> +            if (header_size - 26 < length) {
>> +                av_log(log, AV_LOG_ERROR, "packet too short, "
>> +                    "unable to read 16ch extra meaning in major sync %d %d\n",
>> +                    header_size, length);
>> +                return -1;
>> +            }
>> +
>> +            skip_bits_long(gb, 5+6); // dialogue norm/mix level
>> +            mh->channels_thd_stream3 = get_bits(gb, 5) + 1;
>> +            if (!get_bits1(gb)) { // dyn_object_only
>> +                avpriv_request_sample(log, "16ch presentation with a mixture of channels");
>> +                return AVERROR_PATCHWELCOME;
>> +            }
>> +            extra_ch_length += 21;
>> +        }
>> +    }
>> +
>> +    skip_bits_long(gb, (header_size - 18) * 8 - extra_ch_length);
>>         return 0;
>>   }
>> diff --git a/libavcodec/mlp_parse.h b/libavcodec/mlp_parse.h
>> index 5f1f953cfe..8011566ff7 100644
>> --- a/libavcodec/mlp_parse.h
>> +++ b/libavcodec/mlp_parse.h
>> @@ -47,6 +47,7 @@ typedef struct MLPHeaderInfo
>>       int channels_mlp;                       ///< Channel count for MLP streams
>>       int channels_thd_stream1;               ///< Channel count for substream 1 of TrueHD streams ("6-channel presentation")
>>       int channels_thd_stream2;               ///< Channel count for substream 2 of TrueHD streams ("8-channel presentation")
>> +    int channels_thd_stream3;               ///< Channel count for substream 3 of TrueHD streams ("16-channel presentation")
>>       uint64_t channel_layout_mlp;            ///< Channel layout for MLP streams
>>       uint64_t channel_layout_thd_stream1;    ///< Channel layout for substream 1 of TrueHD streams ("6-channel presentation")
>>       uint64_t channel_layout_thd_stream2;    ///< Channel layout for substream 2 of TrueHD streams ("8-channel presentation")
>> diff --git a/libavcodec/mlp_parser.c b/libavcodec/mlp_parser.c
>> index d391390dd5..f3d54c40e7 100644
>> --- a/libavcodec/mlp_parser.c
>> +++ b/libavcodec/mlp_parser.c
>> @@ -181,10 +181,15 @@ static int mlp_parse(AVCodecParserContext *s,
>>               av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_mlp);
>>           } else { /* mh.stream_type == 0xba */
>>               /* TrueHD stream */
>> -            if (!mh.channels_thd_stream2) {
>> -                av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream1);
>> -            } else {
>> +            if (mh.channels_thd_stream3) {
>> +                avctx->ch_layout = (AVChannelLayout) {
>> +                    AV_CHANNEL_ORDER_UNSPEC,
>> +                    mh.channels_thd_stream3
>> +                };
> 
> Is the order really unspecified? Surely there's some understanding which channels map to which position.
> 

Actually, in current use of MLP/TrueHD when decoding using the fourth substream, reconstructed channels cannot be mapped to single loudspeakers or fixed position and acts more like "tracks" which are feeds for audio objects.
These audio objects are indeed not fixed in space, their position is defined by additional time-varying metadata.

>> +            } else if (mh.channels_thd_stream2) {
>>                   av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream2);
>> +            } else {
>> +                av_channel_layout_from_mask(&avctx->ch_layout, mh.channel_layout_thd_stream1);
>>               }
>>           }
>>   diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
>> index e85dac36a7..14fb953265 100644
>> --- a/libavcodec/mlpdec.c
>> +++ b/libavcodec/mlpdec.c
>> @@ -61,8 +61,8 @@ typedef struct SubStream {
>>         //@{
>>       /** restart header data */
>> -    /// The type of noise to be used in the rematrix stage.
>> -    uint16_t    noise_type;
>> +    /// The type of substream given by the restart header sync word.
>> +    uint16_t    substream_type;
>>         /// The index of the first channel coded in this substream.
>>       uint8_t     min_channel;
>> @@ -88,6 +88,13 @@ typedef struct SubStream {
>>       /// The current seed value for the pseudorandom noise generator(s).
>>       uint32_t    noisegen_seed;
>>   +    /// Maximum output_shift value.
>> +    int8_t      max_shift;
>> +    /// Maximum size of coded audio samples LSBs part.
>> +    int8_t      max_lsbs;
>> +    /// Maximum bit-depth of output audio samples.
>> +    int8_t      max_bits;
>> +
>>       /// Set if the substream contains extra info to check the size of VLC blocks.
>>       uint8_t     data_check_present;
>>   @@ -104,11 +111,23 @@ typedef struct SubStream {
>>       /// matrix output channel
>>       uint8_t     matrix_out_ch[MAX_MATRICES];
>>   -    /// Whether the LSBs of the matrix output are encoded in the bitstream.
>> +    /// Size of the LSBs of the matrix output encoded in the bitstream.
>>       uint8_t     lsb_bypass[MAX_MATRICES];
>> -    /// Matrix coefficients, stored as 2.14 fixed point.
>> +    /// Matrix coefficients fractional part size in bits.
>> +    uint8_t     matrix_coeff_frac_bits[MAX_MATRICES];
>> +    /// Matrix coefficients shift amount.
>> +    int8_t      matrix_coeff_shift[MAX_MATRICES];
>> +    /// Matrix coefficients presence mask.
>> +    uint16_t    matrix_coeff_mask[MAX_MATRICES];
>> +    /// Matrix coefficients, stored as 2.18 fixed point.
>>       DECLARE_ALIGNED(32, int32_t, matrix_coeff)[MAX_MATRICES][MAX_CHANNELS];
>> -    /// Left shift to apply to noise values in 0x31eb substreams.
>> +    /// Delta matrix coefficients size in bits for 0x31ec substreams.
>> +    uint8_t     delta_matrix_coeff_bits[MAX_MATRICES];
>> +    /// Delta matrix coefficients precision.
>> +    uint8_t     delta_matrix_coeff_prec[MAX_MATRICES];
>> +    /// Delta matrix coefficients, stored as 2.18 fixed point.
>> +    DECLARE_ALIGNED(32, int32_t, delta_matrix_coeff)[MAX_MATRICES][MAX_CHANNELS];
>> +    /// Left shift to apply to noise values in 0x31eb and 0x31ec substreams.
>>       uint8_t     matrix_noise_shift[MAX_MATRICES];
>>       //@}
>>   @@ -133,6 +152,8 @@ typedef struct MLPDecodeContext {
>>       AVCodecContext *avctx;
>>         AVChannelLayout downmix_layout;
>> +    /// Set to enable decoding of non-loudspeaker feed (objects) audio channels
>> +    int extract_objects;
>>         /// Current access unit being read has a major sync.
>>       int         is_major_sync_unit;
>> @@ -267,14 +288,14 @@ static inline int read_huff_channels(MLPDecodeContext *m, GetBitContext *gbp,
>>         for (mat = 0; mat < s->num_primitive_matrices; mat++)
>>           if (s->lsb_bypass[mat])
>> -            m->bypassed_lsbs[pos + s->blockpos][mat] = get_bits1(gbp);
>> +            m->bypassed_lsbs[pos + s->blockpos][mat] = get_bits(gbp, s->lsb_bypass[mat]);
>>         for (channel = s->min_channel; channel <= s->max_channel; channel++) {
>>           ChannelParams *cp = &s->channel_params[channel];
>>           int codebook = cp->codebook;
>>           int quant_step_size = s->quant_step_size[channel];
>>           int lsb_bits = cp->huff_lsbs - quant_step_size;
>> -        int result = 0;
>> +        int32_t result = 0;
>>             if (codebook > 0)
>>               result = get_vlc2(gbp, huff_vlc[codebook-1].table,
>> @@ -410,8 +431,12 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
>>           m->avctx->profile     = AV_PROFILE_TRUEHD_ATMOS;
>>       }
>>   -    /* limit to decoding 3 substreams, as the 4th is used by Dolby Atmos for non-audio data */
>> -    m->max_decoded_substream = FFMIN(m->num_substreams - 1, 2);
>> +    /* Limit to decoding the first 3 substreams (or allow the 4th for objects) */
>> +    m->max_decoded_substream = FFMIN(m->num_substreams - 1,
>> +        m->extract_objects ? 3 : 2);
>> +
>> +    av_log(m->avctx, AV_LOG_DEBUG, "decoding up to substream %" PRIu8 "\n",
>> +        m->max_decoded_substream);
>>         m->avctx->sample_rate    = mh.group1_samplerate;
>>       m->avctx->frame_size     = mh.access_unit_size;
>> @@ -531,23 +556,22 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>>       int sync_word, tmp;
>>       uint8_t checksum;
>>       uint8_t lossless_check;
>> +    uint8_t max_bit_depth;
>>       int start_count = get_bits_count(gbp);
>> -    int min_channel, max_channel, max_matrix_channel, noise_type;
>> +    int min_channel, max_channel, max_matrix_channel;
>>       const int std_max_matrix_channel = m->avctx->codec_id == AV_CODEC_ID_MLP
>>                                        ? MAX_MATRIX_CHANNEL_MLP
>>                                        : MAX_MATRIX_CHANNEL_TRUEHD;
>>   -    sync_word = get_bits(gbp, 13);
>> +    sync_word = get_bits(gbp, 14);
>>   -    if (sync_word != 0x31ea >> 1) {
>> +    if (sync_word < 0x31ea || 0x31ec < sync_word) {
>>           av_log(m->avctx, AV_LOG_ERROR,
>>                  "restart header sync incorrect (got 0x%04x)\n", sync_word);
>>           return AVERROR_INVALIDDATA;
>>       }
>>   -    noise_type = get_bits1(gbp);
>> -
>> -    if (m->avctx->codec_id == AV_CODEC_ID_MLP && noise_type) {
>> +    if (m->avctx->codec_id == AV_CODEC_ID_MLP && 0x31ea != sync_word) {
>>           av_log(m->avctx, AV_LOG_ERROR, "MLP must have 0x31ea sync word.\n");
>>           return AVERROR_INVALIDDATA;
>>       }
>> @@ -567,7 +591,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>>         /* This should happen for TrueHD streams with >6 channels and MLP's noise
>>        * type. It is not yet known if this is allowed. */
>> -    if (max_matrix_channel > MAX_MATRIX_CHANNEL_MLP && !noise_type) {
>> +    if (max_matrix_channel > MAX_MATRIX_CHANNEL_MLP && 0x31ea == sync_word) {
>>           avpriv_request_sample(m->avctx,
>>                                 "%d channels (more than the "
>>                                 "maximum supported by the decoder)",
>> @@ -582,7 +606,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>>       s->max_channel        = max_channel;
>>       s->coded_channels     = ((1LL << (max_channel - min_channel + 1)) - 1) << min_channel;
>>       s->max_matrix_channel = max_matrix_channel;
>> -    s->noise_type         = noise_type;
>> +    s->substream_type     = sync_word;
>>         if (mlp_channel_layout_subset(&m->downmix_layout, s->mask) &&
>>           m->max_decoded_substream > substr) {
>> @@ -595,8 +619,28 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>>         s->noise_shift   = get_bits(gbp,  4);
>>       s->noisegen_seed = get_bits(gbp, 23);
>> +    s->max_shift     = get_bits(gbp,  4);
>> +    s->max_lsbs      = get_bits(gbp,  5);
>> +    s->max_bits      = get_bits(gbp,  5);
>> +
>> +    max_bit_depth = (0x31ec == sync_word) ? 31 : 24;
>> +    if (max_bit_depth < s->max_lsbs) {
>> +        av_log(m->avctx, AV_LOG_ERROR,
>> +               "Max LSB size %" PRIu8 " for substream %u exceeds "
>> +               "%" PRIu8 " bits.\n",
>> +               s->max_lsbs, substr, max_bit_depth);
>> +        return AVERROR_INVALIDDATA;
>> +    }
>>   -    skip_bits(gbp, 19);
>> +    if (max_bit_depth < s->max_bits) {
>> +        av_log(m->avctx, AV_LOG_ERROR,
>> +               "Max output bit-depth %" PRIu8 " for substream %u exceeds "
>> +               "%" PRIu8 " bits.\n",
>> +               s->max_bits, substr, max_bit_depth);
>> +        return AVERROR_INVALIDDATA;
>> +    }
>> +
>> +    skip_bits(gbp, 5);
>>         s->data_check_present = get_bits1(gbp);
>>       lossless_check = get_bits(gbp, 8);
>> @@ -615,7 +659,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>>         for (ch = 0; ch <= s->max_matrix_channel; ch++) {
>>           int ch_assign = get_bits(gbp, 6);
>> -        if (m->avctx->codec_id == AV_CODEC_ID_TRUEHD) {
>> +        if (m->avctx->codec_id == AV_CODEC_ID_TRUEHD && s->mask) {
>>               AVChannelLayout l;
>>               enum AVChannel channel = thd_channel_layout_extract_channel(s->mask, ch_assign);
>>   @@ -656,12 +700,19 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
>>           cp->huff_offset      = 0;
>>           cp->sign_huff_offset = -(1 << 23);
>>           cp->codebook         = 0;
>> -        cp->huff_lsbs        = 24;
>> +        cp->huff_lsbs        = (3 == substr) ? 31 : 24;
>>       }
>>         if (substr == m->max_decoded_substream) {
>>           av_channel_layout_uninit(&m->avctx->ch_layout);
>> -        av_channel_layout_from_mask(&m->avctx->ch_layout, s->mask);
>> +        if (substr < 3) /* Loudspeaker feed channels */
>> +            av_channel_layout_from_mask(&m->avctx->ch_layout, s->mask);
>> +        else /* Object channels */
>> +            m->avctx->ch_layout = (AVChannelLayout) {
>> +                AV_CHANNEL_ORDER_UNSPEC,
>> +                s->max_channel+1
>> +            };
>> +
>>           m->pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
>>                                                          s->output_shift,
>>                                                          s->max_matrix_channel,
>> @@ -760,17 +811,43 @@ static int read_filter_params(MLPDecodeContext *m, GetBitContext *gbp,
>>       return 0;
>>   }
>>   -/** Read parameters for primitive matrices. */
>> +/** Get the maximum number of primitive matrices allowed. */
>>   -static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
>> +static int get_max_nb_primitive_matrices(MLPDecodeContext *m, unsigned int substr)
>> +{
>> +    switch (substr) {
>> +    case 0: // substream 0 (up to 2 matrix channels)
>> +        return 2;
>> +    case 1: // substream 1
>> +        if (m->substream_info & 0x8) // 6-ch pres carried
>> +            return 6;
>> +        if (m->substream_info & 0x20) // 8-ch pres carried
>> +            return 8;
>> +        break;
>> +    case 2: // substream 2
>> +        if (m->substream_info & 0x40) // 8-ch pres carried
>> +            return 8;
>> +        break;
>> +    case 3: // substream 3
>> +        if (m->substream_info & 80) // 16-ch pres carried
>> +            return 16;
>> +        break;
>> +    }
>> +
>> +    return MAX_MATRICES_TRUEHD;
>> +}
>> +
>> +/** Read parameters for primitive matrices (0x31ea and 0x31eb substreams). */
>> +
>> +static int read_31ea_31eb_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
>>   {
>>       SubStream *s = &m->substream[substr];
>>       unsigned int mat, ch;
>>       const int max_primitive_matrices = m->avctx->codec_id == AV_CODEC_ID_MLP
>>                                        ? MAX_MATRICES_MLP
>> -                                     : MAX_MATRICES_TRUEHD;
>> +                                     : get_max_nb_primitive_matrices(m, substr);
>>   -    if (m->matrix_changed++ > 1) {
>> +    if (++m->matrix_changed > 1) {
>>           av_log(m->avctx, AV_LOG_ERROR, "Matrices may change only once per access unit.\n");
>>           return AVERROR_INVALIDDATA;
>>       }
>> @@ -779,8 +856,9 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
>>         if (s->num_primitive_matrices > max_primitive_matrices) {
>>           av_log(m->avctx, AV_LOG_ERROR,
>> -               "Number of primitive matrices cannot be greater than %d.\n",
>> -               max_primitive_matrices);
>> +               "Number of primitive matrices cannot be greater than %d "
>> +               "for substream %u of type 0x%04x.\n",
>> +               max_primitive_matrices, substr, s->substream_type);
>>           goto error;
>>       }
>>   @@ -803,7 +881,7 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
>>           }
>>             max_chan = s->max_matrix_channel;
>> -        if (!s->noise_type)
>> +        if (0x31ea == s->substream_type)
>>               max_chan+=2;
>>             for (ch = 0; ch <= max_chan; ch++) {
>> @@ -811,10 +889,10 @@ static int read_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitCo
>>               if (get_bits1(gbp))
>>                   coeff_val = get_sbits(gbp, frac_bits + 2);
>>   -            s->matrix_coeff[mat][ch] = coeff_val * (1 << (14 - frac_bits));
>> +            s->matrix_coeff[mat][ch] = coeff_val * (1 << ((14 + 4) - frac_bits));
>>           }
>>   -        if (s->noise_type)
>> +        if (0x31eb == s->substream_type)
>>               s->matrix_noise_shift[mat] = get_bits(gbp, 4);
>>           else
>>               s->matrix_noise_shift[mat] = 0;
>> @@ -828,6 +906,124 @@ error:
>>       return AVERROR_INVALIDDATA;
>>   }
>>   +/** Read parameters for primitive matrices (0x31ec substreams). */
>> +
>> +static int read_31ec_matrix_params(MLPDecodeContext *m, unsigned int substr, GetBitContext *gbp)
>> +{
>> +    SubStream *s = &m->substream[substr];
>> +    unsigned int mat, ch;
>> +
>> +    const int max_primitive_matrices = get_max_nb_primitive_matrices(m, substr);
>> +
>> +    if (++m->matrix_changed > 1) {
>> +        av_log(m->avctx, AV_LOG_ERROR, "Matrices may change only once per access unit.\n");
>> +        return AVERROR_INVALIDDATA;
>> +    }
>> +
>> +    /* Seed primitive matrices */
>> +
>> +    if (get_bits1(gbp)) {
>> +        /* New seed primitive matrices */
>> +
>> +        if (get_bits1(gbp)) {
>> +            /* New seed matrices parameters */
>> +            s->num_primitive_matrices = get_bits(gbp, 4) + 1;
>> +
>> +            if (s->num_primitive_matrices > max_primitive_matrices) {
>> +                av_log(m->avctx, AV_LOG_ERROR,
>> +                    "Number of primitive matrices cannot be greater than %d "
>> +                    "in substream %u of type 0x%04x.\n",
>> +                    max_primitive_matrices, substr, s->substream_type);
>> +                goto error;
>> +            }
>> +
>> +            for (mat = 0; mat < s->num_primitive_matrices; mat++) {
>> +                s->matrix_out_ch         [mat] = get_bits(gbp, 4);
>> +                s->matrix_coeff_frac_bits[mat] = get_bits(gbp, 4);
>> +                s->matrix_coeff_shift    [mat] = ((int) get_bits(gbp, 3)) - 1;
>> +                s->lsb_bypass            [mat] = get_bits(gbp, 2);
>> +                s->matrix_noise_shift    [mat] = get_bits(gbp, 4);
>> +                s->matrix_coeff_mask     [mat] = get_bits(gbp, s->max_matrix_channel + 1);
>> +
>> +                if (s->matrix_out_ch[mat] > s->max_matrix_channel) {
>> +                    av_log(m->avctx, AV_LOG_ERROR,
>> +                            "Invalid channel %d specified as output from matrix.\n",
>> +                            s->matrix_out_ch[mat]);
>> +                    goto error;
>> +                }
>> +                if (s->matrix_coeff_frac_bits[mat] > 14) {
>> +                    av_log(m->avctx, AV_LOG_ERROR,
>> +                            "Too many fractional bits specified.\n");
>> +                    goto error;
>> +                }
>> +            }
>> +        }
>> +
>> +        /* Seed matrices coefficients */
>> +        for (mat = 0; mat < s->num_primitive_matrices; mat++) {
>> +            const int coeff_shift = s->matrix_coeff_shift[mat]
>> +                - s->matrix_coeff_frac_bits[mat];
>> +
>> +            memset(s->matrix_coeff[mat], 0, sizeof(s->matrix_coeff[mat]));
>> +
>> +            for (ch = 0; ch <= s->max_matrix_channel; ch++) {
>> +                int64_t coeff_val;
>> +
>> +                if (!((s->matrix_coeff_mask[mat] >> ch) & 0x1))
>> +                    continue; // skip channel
>> +
>> +                coeff_val = get_sbits(gbp, s->matrix_coeff_frac_bits[mat] + 2);
>> +                s->matrix_coeff[mat][ch] = coeff_val * (1 << (18 + coeff_shift));
>> +            }
>> +        }
>> +    }
>> +
>> +    if (!get_bits1(gbp)) {
>> +        /* No primitive matrices interpolation */
>> +        memset(s->delta_matrix_coeff, 0, sizeof(s->delta_matrix_coeff));
>> +    }
>> +    else if (get_bits1(gbp)) {
>> +        /* New delta primitive matrices */
>> +
>> +        if (get_bits1(gbp)) {
>> +            /* New delta primitive matrices parameters */
>> +
>> +            for (mat = 0; mat < s->num_primitive_matrices; mat++) {
>> +                s->delta_matrix_coeff_bits[mat] = get_bits(gbp, 4) + 1;
>> +                s->delta_matrix_coeff_prec[mat] = get_bits(gbp, 2);
>> +            }
>> +        }
>> +
>> +        for (mat = 0; mat < s->num_primitive_matrices; mat++) {
>> +            const int coeff_shift = s->matrix_coeff_shift[mat]
>> +                - s->delta_matrix_coeff_prec[mat]
>> +                - s->matrix_coeff_frac_bits[mat];
>> +
>> +            memset(s->delta_matrix_coeff[mat], 0, sizeof(s->delta_matrix_coeff[mat]));
>> +
>> +            if (s->delta_matrix_coeff_bits[mat] <= 1)
>> +                continue; // skip matrice
>> +
>> +            for (ch = 0; ch <= s->max_matrix_channel; ch++) {
>> +                int64_t coeff_val;
>> +
>> +                if (!((s->matrix_coeff_mask[mat] >> ch) & 0x1))
>> +                    continue; // skip channel
>> +
>> +                coeff_val = get_sbits(gbp, s->delta_matrix_coeff_bits[mat]);
>> +                s->delta_matrix_coeff[mat][ch] = coeff_val * (1 << (18 + coeff_shift));
>> +            }
>> +        }
>> +    }
>> +
>> +    return 0;
>> +error:
>> +    s->num_primitive_matrices = 0;
>> +    memset(s->matrix_out_ch, 0, sizeof(s->matrix_out_ch));
>> +
>> +    return AVERROR_INVALIDDATA;
>> +}
>> +
>>   /** Read channel parameters. */
>>     static int read_channel_params(MLPDecodeContext *m, unsigned int substr,
>> @@ -875,8 +1071,10 @@ static int read_channel_params(MLPDecodeContext *m, unsigned int substr,
>>       cp->codebook  = get_bits(gbp, 2);
>>       cp->huff_lsbs = get_bits(gbp, 5);
>>   -    if (cp->codebook > 0 && cp->huff_lsbs > 24) {
>> -        av_log(m->avctx, AV_LOG_ERROR, "Invalid huff_lsbs.\n");
>> +    if (cp->codebook > 0 && cp->huff_lsbs > s->max_lsbs) {
>> +        av_log(m->avctx, AV_LOG_ERROR, "Invalid huff_lsbs=%" PRIu8 ", "
>> +               "exceeds max_lsbs=%" PRIu8 ".\n",
>> +               cp->huff_lsbs, s->max_lsbs);
>>           cp->huff_lsbs = 0;
>>           return AVERROR_INVALIDDATA;
>>       }
>> @@ -910,9 +1108,14 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
>>           }
>>         if (s->param_presence_flags & PARAM_MATRIX)
>> -        if (get_bits1(gbp))
>> -            if ((ret = read_matrix_params(m, substr, gbp)) < 0)
>> +        if (get_bits1(gbp)) {
>> +            if (0x31ec == s->substream_type)
>> +                ret = read_31ec_matrix_params(m, substr, gbp);
>> +            else
>> +                ret = read_31ea_31eb_matrix_params(m, substr, gbp);
>> +            if (ret < 0)
>>                   return ret;
>> +        }
>>         if (s->param_presence_flags & PARAM_OUTSHIFT)
>>           if (get_bits1(gbp)) {
>> @@ -922,6 +1125,10 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
>>                       avpriv_request_sample(m->avctx, "Negative output_shift");
>>                       s->output_shift[ch] = 0;
>>                   }
>> +                if (s->max_shift < s->output_shift[ch])
>> +                    av_log(m->avctx, AV_LOG_WARNING,
>> +                           "output_shift=%d exceeds max_shift=%d\n",
>> +                           s->output_shift[ch], s->max_shift);
>>               }
>>               if (substr == m->max_decoded_substream)
>>                   m->pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
>> @@ -1103,6 +1310,56 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr)
>>       s->noisegen_seed = seed;
>>   }
>>   +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
>> +
>> +/** Check matrices-based channel remapping output for saturation. */
>> +
>> +static void check_rematrix_output(MLPDecodeContext *m, unsigned int substr)
>> +{
>> +    SubStream *s = &m->substream[substr];
>> +    unsigned int mat, sample;
>> +
>> +    for (mat = 0; mat < s->num_primitive_matrices; mat++) {
>> +        unsigned int dest_ch = s->matrix_out_ch[mat];
>> +
>> +        const uint8_t shift = (3 <= substr && s->min_channel <= dest_ch) ? 31 : 23;
>> +        const int32_t min_value = -(1u << shift);
>> +        const int32_t max_value =  (1u << shift) - 1;
>> +
>> +        for (sample = 0; sample < s->blockpos; sample++) {
>> +            if (m->sample_buffer[sample][dest_ch] < min_value)
>> +                av_log(m->avctx, AV_LOG_WARNING,
>> +                    "rematrix negative saturation substr=%u mat=%u sample=%d "
>> +                    "value=%" PRId32 "\n",
>> +                    substr, mat, sample, m->sample_buffer[sample][dest_ch]);
>> +            if (m->sample_buffer[sample][dest_ch] > max_value)
>> +                av_log(m->avctx, AV_LOG_WARNING,
>> +                    "rematrix positive saturation substr=%u mat=%u sample=%d "
>> +                    "value=%" PRId32 "\n",
>> +                    substr, mat, sample, m->sample_buffer[sample][dest_ch]);
>> +        }
>> +    }
>> +}
>> +
>> +/** Check output audio bit-depth. */
>> +
>> +static void check_output_bit_depth(MLPDecodeContext *m, unsigned int substr)
>> +{
>> +    SubStream *s = &m->substream[substr];
>> +    uint32_t cumul_mask = 0;
>> +    unsigned int chan, sample;
>> +
>> +    for (chan = 0; chan <= s->max_matrix_channel; chan++)
>> +        for (sample = 0; sample < s->blockpos; sample++)
>> +            cumul_mask |= FFABS(m->sample_buffer[sample][chan]);
>> +
>> +    if ((1u << s->max_bits) <= cumul_mask)
>> +        av_log(m->avctx, AV_LOG_WARNING, "output audio bit-depth exceeds "
>> +               "expected %u bits.\n",
>> +               s->max_bits);
>> +}
>> +#endif
>> +
>>   /** Write the audio data into the output buffer. */
>>     static int output_data(MLPDecodeContext *m, unsigned int substr,
>> @@ -1110,8 +1367,7 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>>   {
>>       AVCodecContext *avctx = m->avctx;
>>       SubStream *s = &m->substream[substr];
>> -    unsigned int mat;
>> -    unsigned int maxchan;
>> +    unsigned int mat, chan, maxchan;
>>       int ret;
>>       int is32 = (m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
>>   @@ -1126,7 +1382,7 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>>       }
>>         maxchan = s->max_matrix_channel;
>> -    if (!s->noise_type) {
>> +    if (0x31ea == s->substream_type) {
>>           generate_2_noise_channels(m, substr);
>>           maxchan += 2;
>>       } else {
>> @@ -1137,19 +1393,45 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>>        * samples. */
>>       for (mat = 0; mat < s->num_primitive_matrices; mat++) {
>>           unsigned int dest_ch = s->matrix_out_ch[mat];
>> -        m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
>> -                                    s->matrix_coeff[mat],
>> -                                    &m->bypassed_lsbs[0][mat],
>> -                                    m->noise_buffer,
>> -                                    s->num_primitive_matrices - mat,
>> -                                    dest_ch,
>> -                                    s->blockpos,
>> -                                    maxchan,
>> -                                    s->matrix_noise_shift[mat],
>> -                                    m->access_unit_size_pow2,
>> -                                    MSB_MASK(s->quant_step_size[dest_ch]));
>> +
>> +        if (substr < 3) {
>> +            /* Single primitive matrices */
>> +            m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
>> +                                        s->matrix_coeff[mat],
>> +                                        &m->bypassed_lsbs[0][mat],
>> +                                        m->noise_buffer,
>> +                                        s->num_primitive_matrices - mat,
>> +                                        dest_ch,
>> +                                        s->blockpos,
>> +                                        maxchan,
>> +                                        s->matrix_noise_shift[mat],
>> +                                        m->access_unit_size_pow2,
>> +                                        MSB_MASK(s->quant_step_size[dest_ch]));
>> +        }
>> +        else {
>> +            /* Interpolated primitive matrices */
>> +            m->dsp.mlp_rematrix_interp_channel(&m->sample_buffer[0][0],
>> +                                               s->matrix_coeff[mat],
>> +                                               s->delta_matrix_coeff[mat],
>> +                                               &m->bypassed_lsbs[0][mat],
>> +                                               m->noise_buffer,
>> +                                               s->num_primitive_matrices - mat,
>> +                                               dest_ch,
>> +                                               s->blockpos,
>> +                                               maxchan,
>> +                                               s->matrix_noise_shift[mat],
>> +                                               m->access_unit_size_pow2,
>> +                                               MSB_MASK(s->quant_step_size[dest_ch]));
>> +
>> +            for (chan = 0; chan <= maxchan; chan++)
>> +                s->matrix_coeff[mat][chan] += s->delta_matrix_coeff[mat][chan];
>> +        }
>>       }
>>   +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
>> +    check_rematrix_output(m, substr);
>> +#endif
>> +
>>       /* get output buffer */
>>       frame->nb_samples = s->blockpos;
>>       if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
>> @@ -1163,6 +1445,10 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
>>                                               s->max_matrix_channel,
>>                                               is32);
>>   +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
>> +    check_output_bit_depth(m, substr);
>> +#endif
>> +
>>       /* Update matrix encoding side data */
>>       if (s->matrix_encoding != s->prev_matrix_encoding) {
>>           if ((ret = ff_side_data_update_matrix_encoding(frame, s->matrix_encoding)) < 0)
>> @@ -1320,6 +1606,7 @@ static int read_access_unit(AVCodecContext *avctx, AVFrame *frame,
>>                    (avctx->ch_layout.nb_channels == 8 &&
>>                     ((m->substream_info >> 4) & 0x7) != 0x7 &&
>>                     ((m->substream_info >> 4) & 0x7) != 0x6 &&
>> +                  ((m->substream_info >> 4) & 0x7) != 0x4 &&
>>                     ((m->substream_info >> 4) & 0x7) != 0x3)) &&
>>                   substr > 0 && substr < m->max_decoded_substream &&
>>                   (s->min_channel <= m->substream[substr - 1].max_channel)) {
>> @@ -1429,8 +1716,10 @@ static void mlp_decode_flush(AVCodecContext *avctx)
>>   #define OFFSET(x) offsetof(MLPDecodeContext, x)
>>   #define FLAGS (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
>>   static const AVOption options[] = {
>> -    { "downmix", "Request a specific channel layout from the decoder", OFFSET(downmix_layout),
>> -        AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = FLAGS },
>> +    { "downmix", "Request a specific channel layout from the decoder",
>> +        OFFSET(downmix_layout), AV_OPT_TYPE_CHLAYOUT, {.str = NULL}, .flags = FLAGS },
>> +    { "extract_objects", "Enable extraction of audio object channels",
>> +        OFFSET(extract_objects), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, .flags = FLAGS },
>>       { NULL },
> 
> IMHO this should be enabled by default. All streams come with compatibility AC3 mixes too.
> 

I assumed that this would not be a good idea, as audio objects channels are not meant to be heard without rendering (as for other Atmos, or any other object audio format like MPEG-H Audio, bitstreams) which is not something currently implemented in FFmpeg.
When this option is disabled (by default) or without this patch, decoding is restricted to the third substream which produces a proper backward compatible 7.1 rendering (this is a strength of TrueHD w/ Atmos) suitable for playback.
If the option is enabled, playing back the produced non-spatially rendered audio stream would sound off to non-aware users.

More details about the backward compatible nature of TrueHD w/ Atmos can be found in the following paper:
V. Melkote, M. Law and R. Wilson, "Hierarchical and Lossless Coding of audio objects in Dolby TrueHD," 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), South Brisbane, QLD, Australia, 2015, pp. 384-388, doi: 10.1109/ICASSP.2015.7177996.

>>   };
>>   diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
>> index cb40160f67..e28006f2c4 100644
>> --- a/libavcodec/mlpdsp.c
>> +++ b/libavcodec/mlpdsp.c
>> @@ -79,11 +79,51 @@ void ff_mlp_rematrix_channel(int32_t *samples,
>>             if (matrix_noise_shift) {
>>               index &= access_unit_size_pow2 - 1;
>> -            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 7));
>> +            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 11));
>>               index += index2;
>>           }
>>   -        samples[dest_ch] = ((accum >> 14) & mask) + *bypassed_lsbs;
>> +        samples[dest_ch] = ((accum >> 18) & mask) + *bypassed_lsbs;
>> +        bypassed_lsbs += MAX_CHANNELS;
>> +        samples += MAX_CHANNELS;
>> +    }
>> +}
>> +
>> +void ff_mlp_rematrix_interp_channel(int32_t *samples,
>> +                                    const int32_t *seed_coeffs,
>> +                                    const int32_t *delta_coeffs,
>> +                                    const uint8_t *bypassed_lsbs,
>> +                                    const int8_t *noise_buffer,
>> +                                    int index,
>> +                                    unsigned int dest_ch,
>> +                                    uint16_t blockpos,
>> +                                    unsigned int maxchan,
>> +                                    int matrix_noise_shift,
>> +                                    int access_unit_size_pow2,
>> +                                    int32_t mask)
>> +{
>> +    unsigned int src_ch, i;
>> +    int index2 = 2 * index + 1;
>> +
>> +    int32_t delta_inc = (1 << 16) / blockpos;
>> +
>> +    for (i = 0; i < blockpos; i++) {
>> +        int64_t accum = 0, delta_accum = 0;
>> +
>> +        for (src_ch = 0; src_ch <= maxchan; src_ch++) {
>> +            accum += (int64_t) samples[src_ch] * seed_coeffs[src_ch];
>> +            delta_accum += (int64_t) samples[src_ch] * delta_coeffs[src_ch];
>> +        }
>> +
>> +        accum += ((delta_accum >> 18) * i * delta_inc * (1 << 18)) >> 16;
>> +
>> +        if (matrix_noise_shift) {
>> +            index &= access_unit_size_pow2 - 1;
>> +            accum += noise_buffer[index] * (1 << (matrix_noise_shift + 11));
>> +            index += index2;
>> +        }
>> +
>> +        samples[dest_ch] = ((accum >> 18) & mask) + *bypassed_lsbs;
>>           bypassed_lsbs += MAX_CHANNELS;
>>           samples += MAX_CHANNELS;
>>       }
>> @@ -115,9 +155,10 @@ int32_t ff_mlp_pack_output(int32_t lossless_check_data,
>>               int mat_ch = ch_assign[out_ch];
>>               int32_t sample = sample_buffer[i][mat_ch] *
>>                             (1U << output_shift[mat_ch]);
>> -            lossless_check_data ^= (sample & 0xffffff) << mat_ch;
>> +            lossless_check_data ^= (sample & 0xffffff) << (mat_ch & 7);
>> +
>>               if (is32)
>> -                *data_32++ = sample * 256U;
>> +                *data_32++ = sample * (1 << 8);
>>               else
>>                   *data_16++ = sample >> 8;
>>           }
>> @@ -129,6 +170,7 @@ av_cold void ff_mlpdsp_init(MLPDSPContext *c)
>>   {
>>       c->mlp_filter_channel = mlp_filter_channel;
>>       c->mlp_rematrix_channel = ff_mlp_rematrix_channel;
>> +    c->mlp_rematrix_interp_channel = ff_mlp_rematrix_interp_channel;
>>       c->mlp_select_pack_output = mlp_select_pack_output;
>>   #if ARCH_ARM
>>       ff_mlpdsp_init_arm(c);
>> diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
>> index 7a9ac228d3..fd29db10a7 100644
>> --- a/libavcodec/mlpdsp.h
>> +++ b/libavcodec/mlpdsp.h
>> @@ -37,6 +37,19 @@ void ff_mlp_rematrix_channel(int32_t *samples,
>>                                int access_unit_size_pow2,
>>                                int32_t mask);
>>   +void ff_mlp_rematrix_interp_channel(int32_t *samples,
>> +                                    const int32_t *seed_coeffs,
>> +                                    const int32_t *delta_coeffs,
>> +                                    const uint8_t *bypassed_lsbs,
>> +                                    const int8_t *noise_buffer,
>> +                                    int index,
>> +                                    unsigned int dest_ch,
>> +                                    uint16_t blockpos,
>> +                                    unsigned int maxchan,
>> +                                    int matrix_noise_shift,
>> +                                    int access_unit_size_pow2,
>> +                                    int32_t mask);
>> +
>>   int32_t ff_mlp_pack_output(int32_t lossless_check_data,
>>                              uint16_t blockpos,
>>                              int32_t (*sample_buffer)[MAX_CHANNELS],
>> @@ -62,6 +75,18 @@ typedef struct MLPDSPContext {
>>                                    int matrix_noise_shift,
>>                                    int access_unit_size_pow2,
>>                                    int32_t mask);
>> +    void (*mlp_rematrix_interp_channel)(int32_t *samples,
>> +                                        const int32_t *seed_coeffs,
>> +                                        const int32_t *delta_coeffs,
>> +                                        const uint8_t *bypassed_lsbs,
>> +                                        const int8_t *noise_buffer,
>> +                                        int index,
>> +                                        unsigned int dest_ch,
>> +                                        uint16_t blockpos,
>> +                                        unsigned int maxchan,
>> +                                        int matrix_noise_shift,
>> +                                        int access_unit_size_pow2,
>> +                                        int32_t mask);
>>       int32_t (*(*mlp_select_pack_output)(uint8_t *ch_assign,
>>                                           int8_t *output_shift,
>>                                           uint8_t max_matrix_channel,
>> diff --git a/libavcodec/x86/mlpdsp.asm b/libavcodec/x86/mlpdsp.asm
>> index 3dc641e89e..3b232d4551 100644
>> --- a/libavcodec/x86/mlpdsp.asm
>> +++ b/libavcodec/x86/mlpdsp.asm
>> @@ -61,12 +61,12 @@ SECTION .text
>>       paddq        xm0, xm1
>>       movq      accumq, xm0
>>       movzx     blsbsd, byte [blsbs_ptrq]             ; load *bypassed_lsbs
>> -    sar       accumq, 14                            ; accum >>= 14
>> +    sar       accumq, 18                            ; accum >>= 18
>>       and       accumd, maskd                         ; accum &= mask
>>       add       accumd, blsbsd                        ; accum += *bypassed_lsbs
>>       mov   [samplesq + dest_chq], accumd             ; samples[dest_ch] = accum
>> -    add   blsbs_ptrq, 8                             ; bypassed_lsbs += MAX_CHANNELS;
>> -    add     samplesq, 32                            ; samples += MAX_CHANNELS;
>> +    add   blsbs_ptrq, 16                            ; bypassed_lsbs += MAX_CHANNELS;
>> +    add     samplesq, 64                            ; samples += MAX_CHANNELS;
>>       cmp   blsbs_ptrq, cntq
>>   %endmacro
>>   @@ -80,12 +80,12 @@ SECTION .text
>>       SHLX      noiseq, mns                           ; noise_buffer[index] <<= matrix_noise_shift
>>       add       accumq, noiseq                        ; accum += noise_buffer[index]
>>       movzx     noised, byte [blsbs_ptrq]             ; load *bypassed_lsbs (reuse tmp noise register)
>> -    sar       accumq, 14                            ; accum >>= 14
>> +    sar       accumq, 18                            ; accum >>= 18
>>       and       accumd, maskd                         ; accum &= mask
>>       add       accumd, noised                        ; accum += *bypassed_lsbs
>>       mov   [samplesq + dest_chq], accumd             ; samples[dest_ch] = accum
>> -    add   blsbs_ptrq, 8                             ; bypassed_lsbs += MAX_CHANNELS;
>> -    add     samplesq, 32                            ; samples += MAX_CHANNELS;
>> +    add   blsbs_ptrq, 16                            ; bypassed_lsbs += MAX_CHANNELS;
>> +    add     samplesq, 64                            ; samples += MAX_CHANNELS;
>>       cmp   blsbs_ptrq, cntq
>>   %endmacro
>>   @@ -106,7 +106,8 @@ cglobal mlp_rematrix_channel, 0, 13, 5, samples, coeffs, blsbs_ptr, blsbs, \
>>       mov     dest_chd, dest_chm                      ; load dest_chd (not needed on UNIX64)
>>   %endif
>>       shl     dest_chd, 2
>> -    lea         cntq, [blsbs_ptrq + blockposq*8]
>> +    lea         cntq, [blsbs_ptrq + blockposq*8]    ; loop end address (bypassed_lsbs + blockpos * MAX_CHANNELS)
>> +    lea         cntq, [cntq + blockposq*8]
>>       test        mnsd, mnsd                          ; is matrix_noise_shift != 0?
>>       jne .shift                                      ; jump if true
>>       cmp     maxchand, 4                             ; is maxchan < 4?
>> @@ -144,7 +145,7 @@ align 16
>>       DEFINE_ARGS samples, coeffs, blsbs_ptr, noise_buffer, \
>>                   index, dest_ch, accum, index2, mns, \
>>                   ausp, mask, cnt, noise
>> -    add         mnsd, 7              ; matrix_noise_shift += 7
>> +    add         mnsd, 11             ; matrix_noise_shift += 11
>>   %else ; sse4
>>       mov           r6, rcx            ; move rcx elsewhere so we can use cl for matrix_noise_shift
>>   %if WIN64
>> @@ -156,7 +157,7 @@ align 16
>>       DEFINE_ARGS samples, coeffs, blsbs_ptr, mns, index, dest_ch, noise_buffer, \
>>                   index2, accum, ausp, mask, cnt, noise
>>   %endif
>> -    lea         mnsd, [r8 + 7]       ; rcx = matrix_noise_shift + 7
>> +    lea         mnsd, [r8 + 11]      ; rcx = matrix_noise_shift + 11
>>   %endif ; cpuflag
>>       sub        auspd, 1              ; access_unit_size_pow2 -= 1
>>       cmp          r7d, 4              ; is maxchan < 4?
> 
> If the changes are that few, why not do the aarch64 version too?
> 

I am not familiar with aarch64 assembly and I have at the moment no hints where shifts are performed nor the hardware to test :/

>> diff --git a/tests/fate/truehd.mak b/tests/fate/truehd.mak
>> index b0bc86a965..30c0e9628b 100644
>> --- a/tests/fate/truehd.mak
>> +++ b/tests/fate/truehd.mak
>> @@ -18,5 +18,15 @@ fate-truehd-mono1726: CMD = md5pipe -f truehd -i $(TARGET_SAMPLES)/truehd/ticket
>>   fate-truehd-mono1726: CMP = oneline
>>   fate-truehd-mono1726: REF = 9be9551fac418440bb02101bfdb11df9
>>   +FATE_TRUEHD-$(call DEMDEC, TRUEHD, TRUEHD) += fate-truehd-atmos-no-obj
>> +fate-truehd-atmos-no-obj: CMD = md5pipe -f truehd -extract_objects 0 -i $(TARGET_SAMPLES)/truehd/atmos.thd -f s32le
>> +fate-truehd-atmos-no-obj: CMP = oneline
>> +fate-truehd-atmos-no-obj: REF = 53da6ce35c778bcc2182ef2160bf16a2
>> +
>> +FATE_TRUEHD-$(call DEMDEC, TRUEHD, TRUEHD) += fate-truehd-atmos-obj
>> +fate-truehd-atmos-obj: CMD = md5pipe -f truehd -extract_objects 1 -i $(TARGET_SAMPLES)/truehd/atmos.thd -f s32le
>> +fate-truehd-atmos-obj: CMP = oneline
>> +fate-truehd-atmos-obj: REF = f19f6f8ec8b040050aaa019f016f7ddc
>> +
>>   FATE_SAMPLES_AUDIO += $(FATE_TRUEHD-yes)
>>   fate-truehd: $(FATE_TRUEHD-yes)
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data
  2025-03-22 17:49 [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data Massimo Eynard
  2025-03-23 17:31 ` Lynne
@ 2025-03-23 19:01 ` James Almer
  2025-03-23 19:33   ` Massimo Eynard
  2025-03-25 17:42   ` James Almer
  1 sibling, 2 replies; 11+ messages in thread
From: James Almer @ 2025-03-23 19:01 UTC (permalink / raw)
  To: ffmpeg-devel


[-- Attachment #1.1.1: Type: text/plain, Size: 1890 bytes --]

On 3/22/2025 2:49 PM, Massimo Eynard wrote:
> This patch adds support for decoding the fourth MLP substream
> which contains the 16-channel presentation used for Atmos
> audio objects.
> 
> By default only the first three substreams are decoded
> unless the new extract_objects flag is enabled as the resulting
> presentation contains audio object feeds instead of classic
> loudspeaker feeds.
> 
> As this introduces interpolation of primitive matrices, precision
> has been increased to 2.18 fixed point. Therefore this requires
> DSP code upgrade which has been done for C and x86 implementations
> but not the ARM implementation.
> 
> Adds two FATE tests using existing atmos.thd sample to reflect
> changes.
> 
> Signed-off-by: Massimo Eynard <eynard.massimo@gmail.com>
> ---
>   libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
>   libavcodec/arm/mlpdsp_init_arm.c |   3 +-
>   libavcodec/mlp.h                 |  10 +-
>   libavcodec/mlp_parse.c           |  31 ++-
>   libavcodec/mlp_parse.h           |   1 +
>   libavcodec/mlp_parser.c          |  11 +-
>   libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
>   libavcodec/mlpdsp.c              |  50 +++-
>   libavcodec/mlpdsp.h              |  25 ++
>   libavcodec/x86/mlpdsp.asm        |  19 +-
>   tests/fate/truehd.mak            |  10 +
>   11 files changed, 476 insertions(+), 75 deletions(-)

With atmos.thd i get:

> [aist#0:0/truehd @ 00000209caf3ee00] Guessed Channel Layout: 7.1.4
> Input #0, truehd, from '../samples/truehd/atmos.thd':
>   Duration: N/A, start: 0.000000, bitrate: N/A
>   Stream #0:0: Audio: truehd (Dolby TrueHD + Dolby Atmos), 48000 Hz, 7.1.4, s32 (24 bit)

Which is unlikely to be correct. The file has 11 (or 12) objects, which 
is exported as 12 channels in an unspecified layout, and automatically 
assumed to be a 7.1.4 fixed layout.


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data
  2025-03-23 19:01 ` James Almer
@ 2025-03-23 19:33   ` Massimo Eynard
  2025-03-23 20:35     ` James Almer
  2025-03-25 17:42   ` James Almer
  1 sibling, 1 reply; 11+ messages in thread
From: Massimo Eynard @ 2025-03-23 19:33 UTC (permalink / raw)
  To: ffmpeg-devel

On 23/03/2025 20:01, James Almer wrote:
> On 3/22/2025 2:49 PM, Massimo Eynard wrote:
>> This patch adds support for decoding the fourth MLP substream
>> which contains the 16-channel presentation used for Atmos
>> audio objects.
>>
>> By default only the first three substreams are decoded
>> unless the new extract_objects flag is enabled as the resulting
>> presentation contains audio object feeds instead of classic
>> loudspeaker feeds.
>>
>> As this introduces interpolation of primitive matrices, precision
>> has been increased to 2.18 fixed point. Therefore this requires
>> DSP code upgrade which has been done for C and x86 implementations
>> but not the ARM implementation.
>>
>> Adds two FATE tests using existing atmos.thd sample to reflect
>> changes.
>>
>> Signed-off-by: Massimo Eynard <eynard.massimo@gmail.com>
>> ---
>>   libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
>>   libavcodec/arm/mlpdsp_init_arm.c |   3 +-
>>   libavcodec/mlp.h                 |  10 +-
>>   libavcodec/mlp_parse.c           |  31 ++-
>>   libavcodec/mlp_parse.h           |   1 +
>>   libavcodec/mlp_parser.c          |  11 +-
>>   libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
>>   libavcodec/mlpdsp.c              |  50 +++-
>>   libavcodec/mlpdsp.h              |  25 ++
>>   libavcodec/x86/mlpdsp.asm        |  19 +-
>>   tests/fate/truehd.mak            |  10 +
>>   11 files changed, 476 insertions(+), 75 deletions(-)
> 
> With atmos.thd i get:
> 
>> [aist#0:0/truehd @ 00000209caf3ee00] Guessed Channel Layout: 7.1.4
>> Input #0, truehd, from '../samples/truehd/atmos.thd':
>>   Duration: N/A, start: 0.000000, bitrate: N/A
>>   Stream #0:0: Audio: truehd (Dolby TrueHD + Dolby Atmos), 48000 Hz, 7.1.4, s32 (24 bit)
> 
> Which is unlikely to be correct. The file has 11 (or 12) objects, which is exported as 12 channels in an unspecified layout, and automatically assumed to be a 7.1.4 fixed layout.
> 

This is caused by `guess_input_channel_layout` (in `ffmpeg_demux.c`) which tries to assume a layout.
Would using `AV_CHANNEL_ORDER_CUSTOM` with all channels set to `AV_CHAN_UNKNOWN` (for unknown position, except LFE if present) be a better solution?

> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data
  2025-03-23 19:33   ` Massimo Eynard
@ 2025-03-23 20:35     ` James Almer
  2025-03-23 21:47       ` Hendrik Leppkes
  2025-03-23 21:50       ` Marton Balint
  0 siblings, 2 replies; 11+ messages in thread
From: James Almer @ 2025-03-23 20:35 UTC (permalink / raw)
  To: ffmpeg-devel


[-- Attachment #1.1.1: Type: text/plain, Size: 2875 bytes --]

On 3/23/2025 4:33 PM, Massimo Eynard wrote:
> On 23/03/2025 20:01, James Almer wrote:
>> On 3/22/2025 2:49 PM, Massimo Eynard wrote:
>>> This patch adds support for decoding the fourth MLP substream
>>> which contains the 16-channel presentation used for Atmos
>>> audio objects.
>>>
>>> By default only the first three substreams are decoded
>>> unless the new extract_objects flag is enabled as the resulting
>>> presentation contains audio object feeds instead of classic
>>> loudspeaker feeds.
>>>
>>> As this introduces interpolation of primitive matrices, precision
>>> has been increased to 2.18 fixed point. Therefore this requires
>>> DSP code upgrade which has been done for C and x86 implementations
>>> but not the ARM implementation.
>>>
>>> Adds two FATE tests using existing atmos.thd sample to reflect
>>> changes.
>>>
>>> Signed-off-by: Massimo Eynard <eynard.massimo@gmail.com>
>>> ---
>>>    libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
>>>    libavcodec/arm/mlpdsp_init_arm.c |   3 +-
>>>    libavcodec/mlp.h                 |  10 +-
>>>    libavcodec/mlp_parse.c           |  31 ++-
>>>    libavcodec/mlp_parse.h           |   1 +
>>>    libavcodec/mlp_parser.c          |  11 +-
>>>    libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
>>>    libavcodec/mlpdsp.c              |  50 +++-
>>>    libavcodec/mlpdsp.h              |  25 ++
>>>    libavcodec/x86/mlpdsp.asm        |  19 +-
>>>    tests/fate/truehd.mak            |  10 +
>>>    11 files changed, 476 insertions(+), 75 deletions(-)
>>
>> With atmos.thd i get:
>>
>>> [aist#0:0/truehd @ 00000209caf3ee00] Guessed Channel Layout: 7.1.4
>>> Input #0, truehd, from '../samples/truehd/atmos.thd':
>>>    Duration: N/A, start: 0.000000, bitrate: N/A
>>>    Stream #0:0: Audio: truehd (Dolby TrueHD + Dolby Atmos), 48000 Hz, 7.1.4, s32 (24 bit)
>>
>> Which is unlikely to be correct. The file has 11 (or 12) objects, which is exported as 12 channels in an unspecified layout, and automatically assumed to be a 7.1.4 fixed layout.
>>
> 
> This is caused by `guess_input_channel_layout` (in `ffmpeg_demux.c`) which tries to assume a layout.
> Would using `AV_CHANNEL_ORDER_CUSTOM` with all channels set to `AV_CHAN_UNKNOWN` (for unknown position, except LFE if present) be a better solution?

Possibly, but it may make the stream undecodable unless you remap the 
channels (probably with a filter in the filterchain).

Is there no better representation for the output? What are these 12 
channels the sample exports? 16 channels (as you say the MLP substream 
contains) would match Ambisonics 3rd order, but i assume that doesn't 
apply here, unless you should also be outputting something else.


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data
  2025-03-23 20:35     ` James Almer
@ 2025-03-23 21:47       ` Hendrik Leppkes
  2025-03-23 23:00         ` James Almer
  2025-03-23 21:50       ` Marton Balint
  1 sibling, 1 reply; 11+ messages in thread
From: Hendrik Leppkes @ 2025-03-23 21:47 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Sun, Mar 23, 2025 at 9:35 PM James Almer <jamrial@gmail.com> wrote:
>
> On 3/23/2025 4:33 PM, Massimo Eynard wrote:
> > On 23/03/2025 20:01, James Almer wrote:
> >> On 3/22/2025 2:49 PM, Massimo Eynard wrote:
> >>> This patch adds support for decoding the fourth MLP substream
> >>> which contains the 16-channel presentation used for Atmos
> >>> audio objects.
> >>>
> >>> By default only the first three substreams are decoded
> >>> unless the new extract_objects flag is enabled as the resulting
> >>> presentation contains audio object feeds instead of classic
> >>> loudspeaker feeds.
> >>>
> >>> As this introduces interpolation of primitive matrices, precision
> >>> has been increased to 2.18 fixed point. Therefore this requires
> >>> DSP code upgrade which has been done for C and x86 implementations
> >>> but not the ARM implementation.
> >>>
> >>> Adds two FATE tests using existing atmos.thd sample to reflect
> >>> changes.
> >>>
> >>> Signed-off-by: Massimo Eynard <eynard.massimo@gmail.com>
> >>> ---
> >>>    libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
> >>>    libavcodec/arm/mlpdsp_init_arm.c |   3 +-
> >>>    libavcodec/mlp.h                 |  10 +-
> >>>    libavcodec/mlp_parse.c           |  31 ++-
> >>>    libavcodec/mlp_parse.h           |   1 +
> >>>    libavcodec/mlp_parser.c          |  11 +-
> >>>    libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
> >>>    libavcodec/mlpdsp.c              |  50 +++-
> >>>    libavcodec/mlpdsp.h              |  25 ++
> >>>    libavcodec/x86/mlpdsp.asm        |  19 +-
> >>>    tests/fate/truehd.mak            |  10 +
> >>>    11 files changed, 476 insertions(+), 75 deletions(-)
> >>
> >> With atmos.thd i get:
> >>
> >>> [aist#0:0/truehd @ 00000209caf3ee00] Guessed Channel Layout: 7.1.4
> >>> Input #0, truehd, from '../samples/truehd/atmos.thd':
> >>>    Duration: N/A, start: 0.000000, bitrate: N/A
> >>>    Stream #0:0: Audio: truehd (Dolby TrueHD + Dolby Atmos), 48000 Hz, 7.1.4, s32 (24 bit)
> >>
> >> Which is unlikely to be correct. The file has 11 (or 12) objects, which is exported as 12 channels in an unspecified layout, and automatically assumed to be a 7.1.4 fixed layout.
> >>
> >
> > This is caused by `guess_input_channel_layout` (in `ffmpeg_demux.c`) which tries to assume a layout.
> > Would using `AV_CHANNEL_ORDER_CUSTOM` with all channels set to `AV_CHAN_UNKNOWN` (for unknown position, except LFE if present) be a better solution?
>
> Possibly, but it may make the stream undecodable unless you remap the
> channels (probably with a filter in the filterchain).
>
> Is there no better representation for the output? What are these 12
> channels the sample exports? 16 channels (as you say the MLP substream
> contains) would match Ambisonics 3rd order, but i assume that doesn't
> apply here, unless you should also be outputting something else.
>

Its object-based audio. Every extra "channel" represents an audio
object at any arbitrary position in space, as defined by separate
metadata, which you are then supposed to mix together for your final
speaker configuration.
Typically, the "bed" channels (eg. the base 7.1) will contain audio
that doesn't require much localization information, music, background
noises, and the objects will contain audio which is more relevant to
have full spatial localization. A mixer is then tasked based on the
spatial metadata and knowledge of the physical speaker configuration
to mix the objects for ideal spatial representation.

We don't have a channel layout that would identify this sort of setup
as of yet, nevermind a mixer that could actually deal with it, or even
exporting the metadata from the TrueHD stream, but baby steps I
suppose.

FWIW, taking all this into account, I fully agree that it should by
default output the 7.1 representation that everyone can actually
process, because the bed+objects representation is rather unexpected
and unhandleable at this time.

- Hendrik
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data
  2025-03-23 20:35     ` James Almer
  2025-03-23 21:47       ` Hendrik Leppkes
@ 2025-03-23 21:50       ` Marton Balint
  1 sibling, 0 replies; 11+ messages in thread
From: Marton Balint @ 2025-03-23 21:50 UTC (permalink / raw)
  To: FFmpeg development discussions and patches



On Sun, 23 Mar 2025, James Almer wrote:

> On 3/23/2025 4:33 PM, Massimo Eynard wrote:
>>  On 23/03/2025 20:01, James Almer wrote:
>>>  On 3/22/2025 2:49 PM, Massimo Eynard wrote:
>>>>  This patch adds support for decoding the fourth MLP substream
>>>>  which contains the 16-channel presentation used for Atmos
>>>>  audio objects.
>>>>
>>>>  By default only the first three substreams are decoded
>>>>  unless the new extract_objects flag is enabled as the resulting
>>>>  presentation contains audio object feeds instead of classic
>>>>  loudspeaker feeds.
>>>>
>>>>  As this introduces interpolation of primitive matrices, precision
>>>>  has been increased to 2.18 fixed point. Therefore this requires
>>>>  DSP code upgrade which has been done for C and x86 implementations
>>>>  but not the ARM implementation.
>>>>
>>>>  Adds two FATE tests using existing atmos.thd sample to reflect
>>>>  changes.
>>>>
>>>>  Signed-off-by: Massimo Eynard <eynard.massimo@gmail.com>
>>>>  ---
>>>>     libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
>>>>     libavcodec/arm/mlpdsp_init_arm.c |   3 +-
>>>>     libavcodec/mlp.h                 |  10 +-
>>>>     libavcodec/mlp_parse.c           |  31 ++-
>>>>     libavcodec/mlp_parse.h           |   1 +
>>>>     libavcodec/mlp_parser.c          |  11 +-
>>>>     libavcodec/mlpdec.c              | 389
>>>>   +++++++++++++++++++++++++++----
>>>>     libavcodec/mlpdsp.c              |  50 +++-
>>>>     libavcodec/mlpdsp.h              |  25 ++
>>>>     libavcodec/x86/mlpdsp.asm        |  19 +-
>>>>     tests/fate/truehd.mak            |  10 +
>>>>     11 files changed, 476 insertions(+), 75 deletions(-)
>>>
>>>  With atmos.thd i get:
>>>
>>>>  [aist#0:0/truehd @ 00000209caf3ee00] Guessed Channel Layout: 7.1.4
>>>>  Input #0, truehd, from '../samples/truehd/atmos.thd':
>>>>     Duration: N/A, start: 0.000000, bitrate: N/A
>>>>     Stream #0:0: Audio: truehd (Dolby TrueHD + Dolby Atmos), 48000 Hz,
>>>>   7.1.4, s32 (24 bit)
>>>
>>>  Which is unlikely to be correct. The file has 11 (or 12) objects, which
>>>  is exported as 12 channels in an unspecified layout, and automatically
>>>  assumed to be a 7.1.4 fixed layout.
>>>
>>
>>  This is caused by `guess_input_channel_layout` (in `ffmpeg_demux.c`) which
>>  tries to assume a layout.

I wonder if the default value for guess_layout_max should be set to 8. An 
unspecified layout with more than 8 channels seems unlikely. (unless it is 
indeed unspecified/unknown)

>>  Would using `AV_CHANNEL_ORDER_CUSTOM` with all channels set to
>>  `AV_CHAN_UNKNOWN` (for unknown position, except LFE if present) be a
>>  better solution?
>

AV_CHANNEL_ORDER_CUSTOM with all channels being AV_CHAN_UNKNOWN is 
equvivalent to AV_CHANNEL_ORDER_UNSPEC. So unless there is a channel which 
is not AV_CHAN_UNKNOWN, there is no reason to use AV_CHANNEL_ORDER_CUSTOM.

Regards,
Marton
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data
  2025-03-23 21:47       ` Hendrik Leppkes
@ 2025-03-23 23:00         ` James Almer
  2025-03-24 19:07           ` Massimo Eynard
  0 siblings, 1 reply; 11+ messages in thread
From: James Almer @ 2025-03-23 23:00 UTC (permalink / raw)
  To: ffmpeg-devel


[-- Attachment #1.1.1: Type: text/plain, Size: 4353 bytes --]

On 3/23/2025 6:47 PM, Hendrik Leppkes wrote:
> On Sun, Mar 23, 2025 at 9:35 PM James Almer <jamrial@gmail.com> wrote:
>>
>> On 3/23/2025 4:33 PM, Massimo Eynard wrote:
>>> On 23/03/2025 20:01, James Almer wrote:
>>>> On 3/22/2025 2:49 PM, Massimo Eynard wrote:
>>>>> This patch adds support for decoding the fourth MLP substream
>>>>> which contains the 16-channel presentation used for Atmos
>>>>> audio objects.
>>>>>
>>>>> By default only the first three substreams are decoded
>>>>> unless the new extract_objects flag is enabled as the resulting
>>>>> presentation contains audio object feeds instead of classic
>>>>> loudspeaker feeds.
>>>>>
>>>>> As this introduces interpolation of primitive matrices, precision
>>>>> has been increased to 2.18 fixed point. Therefore this requires
>>>>> DSP code upgrade which has been done for C and x86 implementations
>>>>> but not the ARM implementation.
>>>>>
>>>>> Adds two FATE tests using existing atmos.thd sample to reflect
>>>>> changes.
>>>>>
>>>>> Signed-off-by: Massimo Eynard <eynard.massimo@gmail.com>
>>>>> ---
>>>>>     libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
>>>>>     libavcodec/arm/mlpdsp_init_arm.c |   3 +-
>>>>>     libavcodec/mlp.h                 |  10 +-
>>>>>     libavcodec/mlp_parse.c           |  31 ++-
>>>>>     libavcodec/mlp_parse.h           |   1 +
>>>>>     libavcodec/mlp_parser.c          |  11 +-
>>>>>     libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
>>>>>     libavcodec/mlpdsp.c              |  50 +++-
>>>>>     libavcodec/mlpdsp.h              |  25 ++
>>>>>     libavcodec/x86/mlpdsp.asm        |  19 +-
>>>>>     tests/fate/truehd.mak            |  10 +
>>>>>     11 files changed, 476 insertions(+), 75 deletions(-)
>>>>
>>>> With atmos.thd i get:
>>>>
>>>>> [aist#0:0/truehd @ 00000209caf3ee00] Guessed Channel Layout: 7.1.4
>>>>> Input #0, truehd, from '../samples/truehd/atmos.thd':
>>>>>     Duration: N/A, start: 0.000000, bitrate: N/A
>>>>>     Stream #0:0: Audio: truehd (Dolby TrueHD + Dolby Atmos), 48000 Hz, 7.1.4, s32 (24 bit)
>>>>
>>>> Which is unlikely to be correct. The file has 11 (or 12) objects, which is exported as 12 channels in an unspecified layout, and automatically assumed to be a 7.1.4 fixed layout.
>>>>
>>>
>>> This is caused by `guess_input_channel_layout` (in `ffmpeg_demux.c`) which tries to assume a layout.
>>> Would using `AV_CHANNEL_ORDER_CUSTOM` with all channels set to `AV_CHAN_UNKNOWN` (for unknown position, except LFE if present) be a better solution?
>>
>> Possibly, but it may make the stream undecodable unless you remap the
>> channels (probably with a filter in the filterchain).
>>
>> Is there no better representation for the output? What are these 12
>> channels the sample exports? 16 channels (as you say the MLP substream
>> contains) would match Ambisonics 3rd order, but i assume that doesn't
>> apply here, unless you should also be outputting something else.
>>
> 
> Its object-based audio. Every extra "channel" represents an audio
> object at any arbitrary position in space, as defined by separate
> metadata, which you are then supposed to mix together for your final
> speaker configuration.
> Typically, the "bed" channels (eg. the base 7.1) will contain audio
> that doesn't require much localization information, music, background
> noises, and the objects will contain audio which is more relevant to
> have full spatial localization. A mixer is then tasked based on the
> spatial metadata and knowledge of the physical speaker configuration
> to mix the objects for ideal spatial representation.
> 
> We don't have a channel layout that would identify this sort of setup
> as of yet, nevermind a mixer that could actually deal with it, or even
> exporting the metadata from the TrueHD stream, but baby steps I
> suppose.

So we'd need a new layout (or pseudo-channel) where you set arbitrary 
coordinates? Sort of like what Apple defined in 
https://developer.apple.com/documentation/coreaudiotypes/audio-channel-coordinates

> 
> FWIW, taking all this into account, I fully agree that it should by
> default output the 7.1 representation that everyone can actually
> process, because the bed+objects representation is rather unexpected
> and unhandleable at this time.
Agree.


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data
  2025-03-23 23:00         ` James Almer
@ 2025-03-24 19:07           ` Massimo Eynard
  0 siblings, 0 replies; 11+ messages in thread
From: Massimo Eynard @ 2025-03-24 19:07 UTC (permalink / raw)
  To: ffmpeg-devel

On 24/03/2025 00:00, James Almer wrote:
> On 3/23/2025 6:47 PM, Hendrik Leppkes wrote:
>> On Sun, Mar 23, 2025 at 9:35 PM James Almer <jamrial@gmail.com> wrote:
>>>
>>> On 3/23/2025 4:33 PM, Massimo Eynard wrote:
>>>> On 23/03/2025 20:01, James Almer wrote:
>>>>> On 3/22/2025 2:49 PM, Massimo Eynard wrote:
>>>>>> This patch adds support for decoding the fourth MLP substream
>>>>>> which contains the 16-channel presentation used for Atmos
>>>>>> audio objects.
>>>>>>
>>>>>> By default only the first three substreams are decoded
>>>>>> unless the new extract_objects flag is enabled as the resulting
>>>>>> presentation contains audio object feeds instead of classic
>>>>>> loudspeaker feeds.
>>>>>>
>>>>>> As this introduces interpolation of primitive matrices, precision
>>>>>> has been increased to 2.18 fixed point. Therefore this requires
>>>>>> DSP code upgrade which has been done for C and x86 implementations
>>>>>> but not the ARM implementation.
>>>>>>
>>>>>> Adds two FATE tests using existing atmos.thd sample to reflect
>>>>>> changes.
>>>>>>
>>>>>> Signed-off-by: Massimo Eynard <eynard.massimo@gmail.com>
>>>>>> ---
>>>>>>     libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
>>>>>>     libavcodec/arm/mlpdsp_init_arm.c |   3 +-
>>>>>>     libavcodec/mlp.h                 |  10 +-
>>>>>>     libavcodec/mlp_parse.c           |  31 ++-
>>>>>>     libavcodec/mlp_parse.h           |   1 +
>>>>>>     libavcodec/mlp_parser.c          |  11 +-
>>>>>>     libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
>>>>>>     libavcodec/mlpdsp.c              |  50 +++-
>>>>>>     libavcodec/mlpdsp.h              |  25 ++
>>>>>>     libavcodec/x86/mlpdsp.asm        |  19 +-
>>>>>>     tests/fate/truehd.mak            |  10 +
>>>>>>     11 files changed, 476 insertions(+), 75 deletions(-)
>>>>>
>>>>> With atmos.thd i get:
>>>>>
>>>>>> [aist#0:0/truehd @ 00000209caf3ee00] Guessed Channel Layout: 7.1.4
>>>>>> Input #0, truehd, from '../samples/truehd/atmos.thd':
>>>>>>     Duration: N/A, start: 0.000000, bitrate: N/A
>>>>>>     Stream #0:0: Audio: truehd (Dolby TrueHD + Dolby Atmos), 48000 Hz, 7.1.4, s32 (24 bit)
>>>>>
>>>>> Which is unlikely to be correct. The file has 11 (or 12) objects, which is exported as 12 channels in an unspecified layout, and automatically assumed to be a 7.1.4 fixed layout.
>>>>>
>>>>
>>>> This is caused by `guess_input_channel_layout` (in `ffmpeg_demux.c`) which tries to assume a layout.
>>>> Would using `AV_CHANNEL_ORDER_CUSTOM` with all channels set to `AV_CHAN_UNKNOWN` (for unknown position, except LFE if present) be a better solution?
>>>
>>> Possibly, but it may make the stream undecodable unless you remap the
>>> channels (probably with a filter in the filterchain).
>>>
>>> Is there no better representation for the output? What are these 12
>>> channels the sample exports? 16 channels (as you say the MLP substream
>>> contains) would match Ambisonics 3rd order, but i assume that doesn't
>>> apply here, unless you should also be outputting something else.
>>>
>>
>> Its object-based audio. Every extra "channel" represents an audio
>> object at any arbitrary position in space, as defined by separate
>> metadata, which you are then supposed to mix together for your final
>> speaker configuration.
>> Typically, the "bed" channels (eg. the base 7.1) will contain audio
>> that doesn't require much localization information, music, background
>> noises, and the objects will contain audio which is more relevant to
>> have full spatial localization. A mixer is then tasked based on the
>> spatial metadata and knowledge of the physical speaker configuration
>> to mix the objects for ideal spatial representation.
>>
>> We don't have a channel layout that would identify this sort of setup
>> as of yet, nevermind a mixer that could actually deal with it, or even
>> exporting the metadata from the TrueHD stream, but baby steps I
>> suppose.
> 
> So we'd need a new layout (or pseudo-channel) where you set arbitrary coordinates? Sort of like what Apple defined in https://developer.apple.com/documentation/coreaudiotypes/audio-channel-coordinates
> 

That would be the best approach I guess. Atmos in TrueHD is the same as in E-AC-3 (except for the audio coding part of course) which is described in section 4 of ETSI TS 103 420.
In the specification, the audio "channels" for objects are called "audio object essences" which are supplied to a mixer/renderer alongside the metadata.
Section 4.4 describes the metadata interface.

However the purpose of this patch is only to decode the essences. What should I do for now?

>>
>> FWIW, taking all this into account, I fully agree that it should by
>> default output the 7.1 representation that everyone can actually
>> process, because the bed+objects representation is rather unexpected
>> and unhandleable at this time.
> Agree.
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data
  2025-03-23 19:01 ` James Almer
  2025-03-23 19:33   ` Massimo Eynard
@ 2025-03-25 17:42   ` James Almer
  1 sibling, 0 replies; 11+ messages in thread
From: James Almer @ 2025-03-25 17:42 UTC (permalink / raw)
  To: ffmpeg-devel


[-- Attachment #1.1.1: Type: text/plain, Size: 2281 bytes --]



On 3/23/2025 4:01 PM, James Almer wrote:
> On 3/22/2025 2:49 PM, Massimo Eynard wrote:
>> This patch adds support for decoding the fourth MLP substream
>> which contains the 16-channel presentation used for Atmos
>> audio objects.
>>
>> By default only the first three substreams are decoded
>> unless the new extract_objects flag is enabled as the resulting
>> presentation contains audio object feeds instead of classic
>> loudspeaker feeds.
>>
>> As this introduces interpolation of primitive matrices, precision
>> has been increased to 2.18 fixed point. Therefore this requires
>> DSP code upgrade which has been done for C and x86 implementations
>> but not the ARM implementation.
>>
>> Adds two FATE tests using existing atmos.thd sample to reflect
>> changes.
>>
>> Signed-off-by: Massimo Eynard <eynard.massimo@gmail.com>
>> ---
>>   libavcodec/arm/mlpdsp_armv5te.S  |   2 +-
>>   libavcodec/arm/mlpdsp_init_arm.c |   3 +-
>>   libavcodec/mlp.h                 |  10 +-
>>   libavcodec/mlp_parse.c           |  31 ++-
>>   libavcodec/mlp_parse.h           |   1 +
>>   libavcodec/mlp_parser.c          |  11 +-
>>   libavcodec/mlpdec.c              | 389 +++++++++++++++++++++++++++----
>>   libavcodec/mlpdsp.c              |  50 +++-
>>   libavcodec/mlpdsp.h              |  25 ++
>>   libavcodec/x86/mlpdsp.asm        |  19 +-
>>   tests/fate/truehd.mak            |  10 +
>>   11 files changed, 476 insertions(+), 75 deletions(-)
> 
> With atmos.thd i get:
> 
>> [aist#0:0/truehd @ 00000209caf3ee00] Guessed Channel Layout: 7.1.4
>> Input #0, truehd, from '../samples/truehd/atmos.thd':
>>   Duration: N/A, start: 0.000000, bitrate: N/A
>>   Stream #0:0: Audio: truehd (Dolby TrueHD + Dolby Atmos), 48000 Hz, 
>> 7.1.4, s32 (24 bit)
> 
> Which is unlikely to be correct. The file has 11 (or 12) objects, which 
> is exported as 12 channels in an unspecified layout, and automatically 
> assumed to be a 7.1.4 fixed layout.

Actually, it may be ok to let the guess code select that layout. I see 
Apple defines kAudioChannelLayoutTag_Atmos_7_1_4 for 12 channel Atmos 
presentations.


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2025-03-25 17:42 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-22 17:49 [FFmpeg-devel] [PATCH] avcodec/mlpdec: Add decoding of object audio data Massimo Eynard
2025-03-23 17:31 ` Lynne
2025-03-23 18:58   ` Massimo Eynard
2025-03-23 19:01 ` James Almer
2025-03-23 19:33   ` Massimo Eynard
2025-03-23 20:35     ` James Almer
2025-03-23 21:47       ` Hendrik Leppkes
2025-03-23 23:00         ` James Almer
2025-03-24 19:07           ` Massimo Eynard
2025-03-23 21:50       ` Marton Balint
2025-03-25 17:42   ` James Almer

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git