Re: [FFmpeg-devel] [PATCH] avformat/avcodec: Add DTS-UHD demuxer and parser, movenc support.

From: Roy Funderburk <royffmpeg@funderburk.us>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH] avformat/avcodec: Add DTS-UHD demuxer and parser, movenc support.
Date: Thu, 17 Aug 2023 14:47:55 -0700
Message-ID: <918f8aa0-854e-755c-85b0-5b4034da7fd4@funderburk.us> (raw)
In-Reply-To: <61fca341-57cc-abe5-225e-561f5e2a9b4b@funderburk.us>

[-- Attachment #1: Type: text/plain, Size: 36 bytes --]

Updated for master branch changes.


[-- Attachment #2: 0001-avcodec-Add-DTS-UHD-parser.eml --]
[-- Type: message/rfc822, Size: 42744 bytes --]

From: Roy Funderburk <royffmpeg@funderburk.us>
To: ffmpeg-devel@ffmpeg.org
Subject: [PATCH] avcodec: Add DTS-UHD parser.
Date: Sat, 15 Apr 2023 13:04:42 -0700

Parsing of DTS-UHD input files per ETSI TS 102 114 is added
as parser for codec id AV_CODEC_ID_DTSUHD.

Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
---
 libavcodec/Makefile        |   1 +
 libavcodec/codec_desc.c    |   7 +
 libavcodec/codec_id.h      |   1 +
 libavcodec/dtsuhd_common.c | 982 +++++++++++++++++++++++++++++++++++++
 libavcodec/dtsuhd_common.h |  83 ++++
 libavcodec/dtsuhd_parser.c | 141 ++++++
 libavcodec/parsers.c       |   1 +
 libavcodec/version.h       |   2 +-
 8 files changed, 1217 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/dtsuhd_common.c
 create mode 100644 libavcodec/dtsuhd_common.h
 create mode 100644 libavcodec/dtsuhd_parser.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3c16b51462..583abd1f88 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1167,6 +1167,7 @@ OBJS-$(CONFIG_DIRAC_PARSER)            += dirac_parser.o
 OBJS-$(CONFIG_DNXHD_PARSER)            += dnxhd_parser.o dnxhddata.o
 OBJS-$(CONFIG_DOLBY_E_PARSER)          += dolby_e_parser.o dolby_e_parse.o
 OBJS-$(CONFIG_DPX_PARSER)              += dpx_parser.o
+OBJS-$(CONFIG_DTSUHD_PARSER)           += dtsuhd_parser.o dtsuhd_common.o
 OBJS-$(CONFIG_DVAUDIO_PARSER)          += dvaudio_parser.o
 OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsub_parser.o
 OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 4406dd8318..e6af7f2e99 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -3413,6 +3413,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("AC-4"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_DTSUHD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dtsuhd",
+        .long_name = NULL_IF_CONFIG_SMALL("DTSUHD (DTS-UHD Audio Format)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index a5a0cb8525..3e87aa1fe5 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -543,6 +543,7 @@ enum AVCodecID {
     AV_CODEC_ID_WAVARC,
     AV_CODEC_ID_RKA,
     AV_CODEC_ID_AC4,
+    AV_CODEC_ID_DTSUHD,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/dtsuhd_common.c b/libavcodec/dtsuhd_common.c
new file mode 100644
index 0000000000..3d6b4ab4e0
--- /dev/null
+++ b/libavcodec/dtsuhd_common.c
@@ -0,0 +1,982 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse DTS-UHD audio frame headers, report frame sizes and configuration.
+ * Specification: ETSI TS 103 491 V1.2.1
+ */
+
+#include <string.h>
+
+#include "dtsuhd_common.h"
+#include "get_bits.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/crc.h"
+
+#define DTSUHD_ALLOC_INCREMENT 16
+#define DTSUHD_CHUNK_HEADER    16
+#define DTSUHD_CRC_SEED 0xFFFF
+
+enum RepType {
+    REP_TYPE_CH_MASK_BASED,
+    REP_TYPE_MTRX2D_CH_MASK_BASED,
+    REP_TYPE_MTRX3D_CH_MASK_BASED,
+    REP_TYPE_BINAURAL,
+    REP_TYPE_AMBISONIC,
+    REP_TYPE_AUDIO_TRACKS,
+    REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF,
+    REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF,
+};
+
+typedef struct MDObject {
+    int started;  /* Object seen since last reset. */
+    int pres_index;
+    int rep_type;
+    int ch_activity_mask;
+} MDObject;
+
+typedef struct MD01 {
+    GetBitContext gb;
+    MDObject object[257]; /* object id max value is 256 */
+    int chunk_id;
+    int object_list[256]; int object_list_count;
+    int packets_acquired;
+    int static_md_extracted;
+    int static_md_packets;
+    int static_md_packet_size;
+    int static_md_update_flag;
+    uint8_t *buf; int buf_bytes; /* temporary buffer to accumulate static data */
+} MD01;
+
+typedef struct NAVI {
+    int bytes;
+    int id;
+    int index;
+    int present;
+} NAVI;
+
+typedef struct UHDAudio {
+    int mask;
+    int selectable;
+} UHDAudio;
+
+typedef struct UHDChunk {
+    int crc_flag;
+    int bytes;
+} UHDChunk;
+
+struct DTSUHD {
+    const uint8_t *data; int data_bytes;  /* Original audio frame buffer. */
+    const AVCRC *crc;
+    GetBitContext gb;
+    MD01 *md01; int md01_count;
+    NAVI *navi; int navi_alloc, navi_count;
+    UHDAudio audio[256];
+    UHDChunk *chunk; int chunk_alloc, chunk_count;
+    int chunk_bytes;
+    int clock_rate;
+    int frame_bytes;
+    int frame_duration;
+    int frame_duration_code;
+    int ftoc_bytes;
+    int major_version;
+    int num_audio_pres;
+    int sample_rate;
+    int sample_rate_mod;
+    unsigned full_channel_mix_flag:1;
+    unsigned interactive_obj_limits_present:1;
+    unsigned is_sync_frame:1;
+    unsigned saw_sync:1;
+};
+
+/* Read from the MD01 buffer (if present), falling back to the frame buffer */
+static inline int get_bits_md01(DTSUHD *h, MD01 *md01, int bits)
+{
+    if (md01->buf)
+        return get_bits(&md01->gb, bits);
+    return get_bits(&h->gb, bits);
+}
+
+/* In the specification, the pseudo code defaults the 'add' parameter to true.
+   Table 7-30 shows passing an explicit false, most other calls do not
+   pass the extractAndAdd parameter.  In this code, the add parameter is
+   applied to the input table itself, the last three entries being the bit
+   shifts of the first four entries (which would be zero if add was not set).
+
+   Function based on code in Table 5-2
+*/
+static int get_bits_var(GetBitContext *gb, const int table[])
+{
+    static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
+    static const int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 };
+    int code = show_bits(gb, 3); /* value range is [0, 7] */
+    int i;
+    int index = index_table[code];
+    int value = 0;
+
+    skip_bits(gb, bits_used[code]);
+    if (table[index] == 0)
+        return 0;
+
+    /* The 'code' read from the bit context determines which additional values to add. */
+    for (i = 0; i < index; i++)
+        value += table[4 + i];
+
+    return get_bits_long(gb, table[index]) + value;
+}
+
+/* Implied by Table 6-2, MD01 chunk objects appended in for loop */
+static MD01 *chunk_append_md01(DTSUHD *h, int id)
+{
+    int md01_alloc = h->md01_count + 1;
+    if (av_reallocp_array(&h->md01, md01_alloc, sizeof(*h->md01)))
+        return NULL;
+
+    memset(h->md01 + h->md01_count, 0, sizeof(*h->md01));
+    h->md01[h->md01_count].chunk_id = id;
+    return h->md01 + h->md01_count++;
+}
+
+/* Return existing MD01 chunk based on chunkID */
+static MD01 *chunk_find_md01(DTSUHD *h, int id)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        if (id == h->md01[i].chunk_id)
+            return h->md01 + i;
+
+    return NULL;
+}
+
+/* Table 6-3 */
+static void chunk_reset(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        av_freep(&h->md01[i].buf);
+    av_freep(&h->md01);
+    h->md01_count = 0;
+}
+
+static MDObject *find_default_audio(DTSUHD *h)
+{
+    MDObject *object;
+    int i, j;
+    int obj_index = -1;
+
+    for (i = 0; i < h->md01_count; i++) {
+        for (j = 0; j < 257; j++) {
+            object = h->md01[i].object + j;
+            if (object->started && h->audio[object->pres_index].selectable) {
+                if (obj_index < 0 || (object->pres_index < h->md01[i].object[obj_index].pres_index))
+                    obj_index = j;
+            }
+        }
+        if (obj_index >= 0)
+            return h->md01[i].object + obj_index;
+    }
+
+    return NULL;
+}
+
+/* Save channel mask, count, and rep type to descriptor info.
+   ETSI TS 103 491 Table 7-28 channel activity mask bits
+   mapping and SCTE DVS 243-4 Rev. 0.2 DG X Table 4.  Convert activity mask and
+   representation type to channel mask and channel counts.
+*/
+static void extract_object_info(MDObject *object, DTSUHDDescriptorInfo *info)
+{
+    int i;
+    static const struct {
+        uint32_t activity_mask;
+        uint32_t channel_mask; // Mask as defined by ETSI TS 103 491
+        uint64_t ffmpeg_channel_mask; // Mask as defined in ffmpeg
+    } activity_map[] = {
+        // act mask | chan mask | ffmpeg channel mask
+        { 0x000001, 0x00000001, AV_CH_FRONT_CENTER },
+        { 0x000002, 0x00000006, AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT },
+        { 0x000004, 0x00000018, AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT },
+        { 0x000008, 0x00000020, AV_CH_LOW_FREQUENCY },
+        { 0x000010, 0x00000040, AV_CH_BACK_CENTER },
+        { 0x000020, 0x0000A000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x000040, 0x00000180, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT },
+        { 0x000080, 0x00004000, AV_CH_TOP_FRONT_CENTER },
+        { 0x000100, 0x00080000, AV_CH_TOP_CENTER },
+        { 0x000200, 0x00001800, AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER },
+        { 0x000400, 0x00060000, AV_CHAN_WIDE_LEFT | AV_CHAN_WIDE_RIGHT },
+        { 0x000800, 0x00000600, AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT },
+        { 0x001000, 0x00010000, AV_CH_LOW_FREQUENCY_2 },
+        { 0x002000, 0x00300000, AV_CH_TOP_SIDE_LEFT | AV_CH_TOP_SIDE_RIGHT },
+        { 0x004000, 0x00400000, AV_CH_TOP_BACK_CENTER },
+        { 0x008000, 0x01800000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0x010000, 0x02000000, AV_CH_BOTTOM_FRONT_CENTER },
+        { 0x020000, 0x0C000000, AV_CH_BOTTOM_FRONT_LEFT | AV_CH_BOTTOM_FRONT_RIGHT },
+        { 0x140000, 0x30000000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x080000, 0xC0000000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0 } // Terminator
+    };
+
+    if (object) {
+        for (i = 0; activity_map[i].activity_mask; i++) {
+            if (activity_map[i].activity_mask & object->ch_activity_mask) {
+                info->channel_mask |= activity_map[i].channel_mask;
+                info->ffmpeg_channel_mask |= activity_map[i].ffmpeg_channel_mask;
+            }
+        }
+        info->channel_count = av_popcount(info->channel_mask);
+        info->rep_type = object->rep_type;
+    }
+}
+
+/* Assemble information for MP4 Sample Entry box.  Sample Size is always
+   16 bits.  The coding name is the name of the SampleEntry sub-box and is
+   'dtsx' unless the version of the bitstream is > 2.
+   If DecoderProfile == 2, then MaxPayloadCode will be zero.
+*/
+static void update_descriptor(DTSUHD *h, DTSUHDDescriptorInfo *info)
+{
+    static const char *coding_name[] = { "dtsx", "dtsy" };
+
+    memset(info, 0, sizeof(*info));
+    memcpy(info->coding_name, coding_name[h->major_version > 2], 5);
+    extract_object_info(find_default_audio(h), info);
+    info->base_sample_freq_code = h->sample_rate == 48000;
+    info->decoder_profile_code = h->major_version - 2;
+    info->frame_duration_code = h->frame_duration_code;
+    info->max_payload_code = 0 + (h->major_version > 2);
+    info->num_pres_code = h->num_audio_pres - 1;
+    info->sample_rate = h->sample_rate;
+    info->sample_rate_mod = h->sample_rate_mod;
+    info->sample_size = 16;
+    info->valid = 1;
+}
+
+/* Table 6-17 p47 */
+static int parse_explicit_object_lists(DTSUHD *h, int mask, int index)
+{
+    GetBitContext *gb = &h->gb;
+    int i;
+    static const int table[7] = { 4, 8, 16, 32, 16, 256, 65536 };
+
+    for (i = 0; i < index; i++) {
+        if ((mask >> i) & 0x01) {
+            if (h->is_sync_frame || get_bits1(gb))
+                get_bits_var(gb, table);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-15 p44, Table 6-16 p45 */
+static int parse_aud_pres_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio;
+    int i;
+    int read_mask;
+    static const int table[7] = { 0, 2, 4, 5, 1, 4, 16 };
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->num_audio_pres = 1;
+        else
+            h->num_audio_pres = get_bits_var(gb, table) + 1;
+        memset(h->audio, 0, sizeof(h->audio[0]) * h->num_audio_pres);
+    }
+
+    for (audio = 0; audio < h->num_audio_pres; audio++) {
+        if (h->is_sync_frame) {
+            if (h->full_channel_mix_flag)
+                h->audio[audio].selectable = 1;
+            else
+                h->audio[audio].selectable = get_bits1(gb);
+        }
+
+        if (h->audio[audio].selectable) {
+            if (h->is_sync_frame) {
+                read_mask = (audio > 0) ? get_bits(gb, audio) : 0;
+                h->audio[audio].mask = 0;
+                for (i = 0; read_mask; i++, read_mask >>= 1) {
+                    if (read_mask & 0x01)
+                        h->audio[audio].mask |= get_bits1(gb) << i;
+                }
+            }
+
+            if (parse_explicit_object_lists(h, h->audio[audio].mask, audio))
+                return 1;
+        } else {
+            h->audio[audio].mask = 0;
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-12 p 40 */
+static void decode_version(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int bits = get_bits1(gb) ? 3 : 6;
+
+    h->major_version = get_bits(gb, bits) + 2;
+    skip_bits(gb, bits);
+}
+
+/* Table 6-12 p 40 */
+static int parse_stream_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int has_ftoc_crc;
+    static const uint32_t table_base_duration[4] = { 512, 480, 384, 0 };
+    static const uint32_t table_clock_rate[4] = { 32000, 44100, 48000, 0 };
+
+    if (h->is_sync_frame)
+        h->full_channel_mix_flag = get_bits1(gb);
+
+    has_ftoc_crc = !h->full_channel_mix_flag || h->is_sync_frame;
+    if (has_ftoc_crc && av_crc(h->crc, DTSUHD_CRC_SEED, h->data, h->ftoc_bytes))
+        return 1;
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->major_version = 2;
+        else
+            decode_version(h);
+
+        h->frame_duration = table_base_duration[get_bits(gb, 2)];
+        h->frame_duration_code = get_bits(gb, 3);
+        h->frame_duration *= (h->frame_duration_code + 1);
+        h->clock_rate = table_clock_rate[get_bits(gb, 2)];
+        if (h->frame_duration == 0 || h->clock_rate == 0)
+            return 1; /* bitstream error */
+
+        skip_bits(gb, 36 * get_bits1(gb));  /* bTimeStampPresent */
+        h->sample_rate_mod = get_bits(gb, 2);
+        h->sample_rate = h->clock_rate * (1 << h->sample_rate_mod);
+
+        if (h->full_channel_mix_flag) {
+            h->interactive_obj_limits_present = 0;
+        } else {
+            skip_bits1(gb);  /* reserved flag. */
+            h->interactive_obj_limits_present = get_bits1(gb);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-24 p52 */
+static void navi_purge(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        if (!h->navi[i].present)
+            h->navi[i].bytes = 0;
+}
+
+/* Table 6-21 p50 */
+static void navi_clear(DTSUHD *h)
+{
+    if (h->navi)
+        memset(h->navi, 0, sizeof(h->navi[0]) * h->navi_count);
+    h->navi_count = 0;
+}
+
+/* Table 6-22 p51 */
+static void navi_clear_present(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        h->navi[i].present = 0;
+}
+
+/* Table 6-23 p51.  Return 0 on success, and the index is returned in
+   the *listIndex parameter.
+*/
+static int navi_find_index(DTSUHD *h, int desired_index, int *list_index)
+{
+    int avail_index = h->navi_count;
+    int i;
+    int navi_alloc;
+
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].index == desired_index) {
+            *list_index = i;
+            h->navi[i].present = 1;
+            return 0;
+        }
+
+        if ((h->navi[i].present == 0) && (h->navi[i].bytes == 0) && (avail_index > i))
+            avail_index = i;
+    }
+
+    if (avail_index >= h->navi_count) {
+        if (h->navi_count >= h->navi_alloc) {
+            navi_alloc = h->navi_count + DTSUHD_ALLOC_INCREMENT;
+            if (av_reallocp_array(&h->navi, navi_alloc, sizeof(*h->navi)))
+                return 1;
+            h->navi_alloc = navi_alloc;
+        }
+        h->navi_count++;
+    }
+
+    *list_index = avail_index;
+    h->navi[avail_index].bytes = 0;
+    h->navi[avail_index].present = 1;
+    h->navi[avail_index].id = 256;
+    h->navi[avail_index].index = desired_index;
+
+    return 0;
+}
+
+/* Table 6-20 p48 */
+static int parse_chunk_navi(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio_chunks = 1;
+    int bytes;
+    int i;
+    int id;
+    int id_present;
+    int index;
+    int list_index;
+    static const int table2468[7] = { 2, 4, 6, 8, 4, 16, 64 };
+    static const int table_audio_chunk_sizes[7] = { 9, 11, 13, 16, 512, 2048, 8192 };
+    static const int table_chunk_sizes[7] = { 6, 9, 12, 15, 64, 512, 4096 };
+
+    h->chunk_bytes = 0;
+    if (h->full_channel_mix_flag)
+        h->chunk_count = h->is_sync_frame;
+    else
+        h->chunk_count = get_bits_var(gb, table2468);
+
+    if (h->chunk_count >= h->chunk_alloc) {
+        int chunk_alloc = h->chunk_count + DTSUHD_ALLOC_INCREMENT;
+        if (av_reallocp_array(&h->chunk, chunk_alloc, sizeof(*h->chunk)))
+            return 1;
+        h->chunk_alloc = chunk_alloc;
+    }
+
+    for (i = 0; i < h->chunk_count; i++) {
+        h->chunk_bytes += h->chunk[i].bytes = get_bits_var(gb, table_chunk_sizes);
+        if (h->full_channel_mix_flag)
+            h->chunk[i].crc_flag = 0;
+        else
+        h->chunk[i].crc_flag = get_bits1(gb);
+    }
+
+    if (!h->full_channel_mix_flag)
+        audio_chunks = get_bits_var(gb, table2468);
+
+    if (h->is_sync_frame)
+        navi_clear(h);
+    else
+        navi_clear_present(h);
+
+    for (i = 0; i < audio_chunks; i++) {
+        if (h->full_channel_mix_flag)
+            index = 0;
+        else
+            index = get_bits_var(gb, table2468);
+
+        if (navi_find_index(h, index, &list_index))
+            return 1;
+
+        if (h->is_sync_frame)
+            id_present = 1;
+        else if (h->full_channel_mix_flag)
+            id_present = 0;
+        else
+            id_present = get_bits1(gb);
+
+        if (id_present) {
+            id = get_bits_var(gb, table2468);
+            h->navi[list_index].id = id;
+        }
+
+        bytes = get_bits_var(gb, table_audio_chunk_sizes);
+        h->chunk_bytes += bytes;
+        h->navi[list_index].bytes = bytes;
+    }
+
+    navi_purge(h);
+
+    return 0;
+}
+
+
+/* Table 6-6 */
+static int parse_md_chunk_list(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    static const int table1[7] = { 3, 4, 6, 8, 8, 16, 64 };
+    int i;
+
+    if (h->full_channel_mix_flag) {
+        md01->object_list_count = 1;
+        md01->object_list[0] = 256;
+    } else {
+        md01->object_list_count = get_bits_var(gb, table1);
+        for (i = 0; i < md01->object_list_count; i++)
+            md01->object_list[i] = get_bits(gb, get_bits1(gb) ? 8 : 4);
+    }
+
+    return 0;
+}
+
+/* Table 7-9 */
+static void skip_mp_param_set(DTSUHD *h, MD01 *md01, int nominal_flag)
+{
+    get_bits_md01(h, md01, 6); /* rLoudness */
+    if (nominal_flag == 0)
+        get_bits_md01(h, md01, 5);
+
+    get_bits_md01(h, md01, nominal_flag ? 2 : 4);
+}
+
+/* Table 7-8 */
+static int parse_static_md_params(DTSUHD *h, MD01 *md01, int only_first)
+{
+    int i;
+    int loudness_sets = 1;
+    int nominal_flag = 1;
+
+    if (h->full_channel_mix_flag == 0)
+        nominal_flag = get_bits_md01(h, md01, 1);
+
+    if (nominal_flag) {
+        if (h->full_channel_mix_flag == 0)
+            loudness_sets = get_bits_md01(h, md01, 1) ? 3 : 1;
+    } else {
+        loudness_sets = get_bits_md01(h, md01, 4) + 1;
+    }
+
+    for (i = 0; i < loudness_sets; i++)
+        skip_mp_param_set(h, md01, nominal_flag);
+
+    if (only_first)
+        return 0;
+
+    if (nominal_flag == 0)
+        get_bits_md01(h, md01, 1);
+
+    for (i = 0; i < 3; i++) { /* Table 7-12 suggest 3 types */
+        if (get_bits_md01(h, md01, 1)) {
+            if (get_bits_md01(h, md01, 4) == 15) /* Table 7-14 */
+                get_bits_md01(h, md01, 15);
+        }
+        if (get_bits_md01(h, md01, 1)) /* smooth md present */
+            get_bits_md01(h, md01, 6 * 6);
+    }
+
+    if (h->full_channel_mix_flag == 0) {
+        i = md01->static_md_packets * md01->static_md_packet_size - get_bits_count(&md01->gb);
+        skip_bits(&md01->gb, i);
+    }
+    md01->static_md_extracted = 1;
+
+    return 0;
+}
+
+/* Table 7-7 */
+static int parse_multi_frame_md(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    int i, n;
+    static const int table1[7] = { 0, 6, 9, 12, 1, 64, 512 };
+    static const int table2[7] = { 5, 7, 9, 11, 32, 128, 512 };
+
+    if (h->is_sync_frame) {
+        md01->packets_acquired = 0;
+        if (h->full_channel_mix_flag) {
+            md01->static_md_packets = 1;
+            md01->static_md_packet_size = 0;
+        } else {
+            md01->static_md_packets = get_bits_var(gb, table1) + 1;
+            md01->static_md_packet_size = get_bits_var(gb, table2) + 3;
+        }
+
+        n = md01->static_md_packets * md01->static_md_packet_size;
+        if (n > md01->buf_bytes) {
+            if (av_reallocp(&md01->buf, n))
+                return 1;
+            md01->buf_bytes = n;
+        }
+
+        init_get_bits(&md01->gb, md01->buf, md01->buf_bytes * 8);
+        if (md01->static_md_packets > 1)
+            md01->static_md_update_flag = get_bits1(gb);
+        else
+            md01->static_md_update_flag = 1;
+    }
+
+    if (md01->packets_acquired < md01->static_md_packets) {
+        n = md01->packets_acquired * md01->static_md_packet_size;
+        for (i = 0; i < md01->static_md_packet_size; i++)
+            md01->buf[n + i] = get_bits(gb, 8);
+        md01->packets_acquired++;
+
+        if (md01->packets_acquired == md01->static_md_packets) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 0))
+                    return 1;
+        } else if (md01->packets_acquired == 1) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 1))
+                    return 1;
+        }
+    }
+
+    return 0;
+}
+
+/* Return 1 if suitable, 0 if not.  Table 7-18.  OBJGROUPIDSTART=224 Sec 7.8.7 p75 */
+static int is_suitable_for_render(DTSUHD *h, MD01 *md01, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    static const int table[7] = { 8, 10, 12, 14, 256, 1024, 4096 };
+
+    if (object_id >= 224 || get_bits1(gb))
+        return 1;
+
+    /*  Reject the render and skip the render data. */
+    skip_bits1(gb);
+    skip_bits(gb, get_bits_var(gb, table));
+
+    return 0;
+}
+
+/* Table 7-26 */
+static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object)
+{
+    GetBitContext *gb = &h->gb;
+    const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 : get_bits(gb, 4);
+    static const int mask_table[14] = { /* Table 7-27 */
+        0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B, 0x00002F,
+        0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007, 0x000843,
+    };
+
+    if (ch_index == 14)
+        object->ch_activity_mask = get_bits(gb, 16);
+    else if (ch_index == 15)
+        object->ch_activity_mask = get_bits_long(gb, 32);
+    else
+        object->ch_activity_mask = mask_table[ch_index];
+}
+
+/* Table 7-22 */
+static int parse_object_metadata(DTSUHD *h, MD01 *md01, MDObject *object,
+                                 int start_frame_flag, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    int ch_mask_object_flag = 0;
+    int object_3d_metadata_flag = 0;
+    static const int table2[7] = { 1, 4, 4, 8, 2, 16, 16 };
+    static const int table3[7] = { 3, 3, 4, 8, 8, 8, 16 };
+
+    skip_bits(gb, object_id != 256);
+
+    if (start_frame_flag) {
+        object->rep_type = get_bits(gb, 3);
+        switch (object->rep_type) {
+            case REP_TYPE_BINAURAL:
+            case REP_TYPE_CH_MASK_BASED:
+            case REP_TYPE_MTRX2D_CH_MASK_BASED:
+            case REP_TYPE_MTRX3D_CH_MASK_BASED:
+                ch_mask_object_flag = 1;
+                break;
+
+            case REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF:
+            case REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF:
+                object_3d_metadata_flag = 1;
+                break;
+        }
+
+        if (ch_mask_object_flag) {
+            if (object_id != 256) {
+                skip_bits(gb, 3);  /* Object Importance Level */
+                if (get_bits1(gb))
+                    skip_bits(gb, get_bits1(gb) ? 3 : 5);
+
+                get_bits_var(gb, table2);
+                get_bits_var(gb, table3);
+
+                /* Skip optional Loudness block. */
+                if (get_bits1(gb))
+                    skip_bits(gb, 8);
+
+                /* Skip optional Object Interactive MD (Table 7-25). */
+                if (get_bits1(gb) && h->interactive_obj_limits_present) {
+                    if (get_bits1(gb))
+                        skip_bits(gb, 5 + 6 * object_3d_metadata_flag);
+                }
+            }
+
+            parse_ch_mask_params(h, md01, object);
+        }
+    }
+
+    /* Skip rest of object */
+    return 0;
+}
+
+/* Table 7-4 */
+static int parse_md01(DTSUHD *h, MD01 *md01, int pres_index)
+{
+    GetBitContext *gb = &h->gb;
+    uint32_t i;
+    uint32_t id;
+    uint32_t start_flag;
+
+    if (h->audio[pres_index].selectable) {
+        for (i = 0; i < 4; i++)  /* Table 7-5.  Scaling data. */
+            skip_bits(gb, 5 * get_bits1(gb));
+
+        if (get_bits1(gb) && parse_multi_frame_md(h, md01))
+            return 1;
+    }
+
+    /* Table 7-16: Object metadata. */
+    memset(md01->object, 0, sizeof(md01->object));
+    if (!h->full_channel_mix_flag)
+        skip_bits(gb, 11 * get_bits1(gb));
+
+    for (i = 0; i < md01->object_list_count; i++) {
+        id = md01->object_list[i];
+        if (!is_suitable_for_render(h, md01, id))
+            continue;
+
+        md01->object[id].pres_index = pres_index;
+        start_flag = 0;
+        if (!md01->object[id].started) {
+            skip_bits(gb, id != 256);
+            start_flag = md01->object[id].started = 1;
+        }
+
+        if ((id < 224 || id > 255) &&
+            parse_object_metadata(h, md01, md01->object + id, start_flag, id)) {
+            return 1;
+        }
+
+        break;
+    }
+
+    return 0;
+}
+
+/* Table 6-2 */
+static int parse_chunks(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    MD01 *md01;
+    const uint8_t *byte_start;
+    int bit_next;
+    int i;
+    static const int table_aud_pres[7] = { 0, 2, 4, 4, 1, 4, 16 };
+    int pres_index;
+    uint32_t id;
+
+    for (i = 0; i < h->chunk_count; i++) {
+        bit_next = get_bits_count(gb) + h->chunk[i].bytes * 8;
+        byte_start = h->data + get_bits_count(gb) / 8;
+        if (h->chunk[i].crc_flag && av_crc(h->crc, DTSUHD_CRC_SEED, byte_start, h->chunk[i].bytes))
+            return 1;
+
+        id = get_bits(gb, 8);
+        if (id == 1) {
+            pres_index = get_bits_var(gb, table_aud_pres);
+        if (pres_index > 255)
+            return 1;
+        md01 = chunk_find_md01(h, id);
+        if (md01 == NULL)
+            md01 = chunk_append_md01(h, id);
+        if (md01 == NULL)
+            return 1;
+        if (parse_md_chunk_list(h, md01))
+            return 1;
+        if (parse_md01(h, md01, pres_index))
+            return 1;
+        }
+
+        skip_bits(gb, bit_next - get_bits_count(gb));
+    }
+
+    return 0;
+}
+
+/** Allocate parsing handle.  The parsing handle should be used to parse
+    one DTS:X Profile 2 Audio stream, then freed by calling DTSUHD_destroy().
+    Do not use the same parsing handle to parse multiple audio streams.
+
+  @return Parsing handle for use with other functions, or NULL on failure.
+*/
+DTSUHD *av_dtsuhd_create(void)
+{
+    DTSUHD *h = av_calloc(1, sizeof(DTSUHD));
+    if (h)
+        h->crc = av_crc_get_table(AV_CRC_16_CCITT);
+    return h;
+}
+
+/** Free all resources used by the parsing handle.
+
+  @param[in] h Handle allocated by dtshd_create
+*/
+void av_dtsuhd_destroy(DTSUHD *h)
+{
+    if (h) {
+        chunk_reset(h);
+        av_freep(&h->chunk);
+        av_freep(&h->navi);
+        av_freep(&h);
+    }
+}
+
+/** Parse a single DTS:X Profile 2 frame.
+    The frame must start at the first byte of the data buffer, and enough
+    of the frame must be present to decode the majority of the FTOC.
+    From Table 6-11 p40.
+
+    A sync frame must be the first frame provided, before any non-sync frames.
+    Signatures: sync=0x40411BF2, non-sync=0x71C442E8.
+
+  @param[in] h Handle allocated by DTSUHD_create
+  @param[in] First byte of a buffer containing the frame to parse
+  @param[in] nData Number of valid bytes in 'data'
+  @param[out] fi Results of frame parsing, may be NULL
+  @param[out] di Results of descriptor parsing, may be NULL
+  @return 0 on success, DTSUHDStatus enumeration on error
+*/
+int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
+                    DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di)
+{
+    GetBitContext *gb;
+    int fraction = 1;
+    int i;
+    int syncword;
+    static const int table_payload[7] = { 5, 8, 10, 12, 32, 256, 1024 };
+
+    if (!h || !data)
+        return DTSUHD_NULL;
+
+    if (data_bytes < 4)
+        return DTSUHD_INCOMPLETE; /* Data buffer does not contain the signature */
+
+    h->data = data;
+    h->data_bytes = data_bytes;
+    gb = &h->gb;
+    init_get_bits(gb, data, data_bytes * 8);
+
+    syncword = get_bits_long(gb, 32);
+    h->is_sync_frame = syncword == DTSUHD_SYNCWORD;
+    h->saw_sync |= h->is_sync_frame;
+    if (!h->saw_sync || (!h->is_sync_frame && syncword != DTSUHD_NONSYNCWORD))
+        return DTSUHD_NOSYNC;  /* Invalid frame or have not parsed sync frame. */
+
+    h->ftoc_bytes = get_bits_var(gb, table_payload) + 1;
+    if (h->ftoc_bytes < 5 || h->ftoc_bytes >= data_bytes)
+        return DTSUHD_INCOMPLETE;  /* Data buffer does not contain entire FTOC */
+
+    if (parse_stream_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_aud_pres_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_chunk_navi(h))  /* AudioChunkTypes and payload sizes. */
+        return DTSUHD_INVALID_FRAME;
+
+    /* At this point in the parsing, we can calculate the size of the frame. */
+    h->frame_bytes = h->ftoc_bytes + h->chunk_bytes;
+    if (h->frame_bytes > data_bytes)
+        return DTSUHD_INCOMPLETE;
+
+    if (di && h->is_sync_frame) {
+        /* Skip PBRSmoothParams (Table 6-26) and align to the chunks immediately
+           following the FTOC CRC.
+        */
+        skip_bits(gb, h->ftoc_bytes * 8 - get_bits_count(gb));
+        if (parse_chunks(h))
+            return DTSUHD_INVALID_FRAME;
+        update_descriptor(h, di);
+    }
+
+    /* 6.3.6.9: audio frame duration may be a fraction of metadata frame duration. */
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].present) {
+            if (h->navi[i].id == 3)
+                fraction = 2;
+            else if (h->navi[i].id == 4)
+                fraction = 4;
+        }
+    }
+
+    if (fi) {
+        fi->sync = h->is_sync_frame;
+        fi->frame_bytes = h->frame_bytes;
+        fi->sample_rate = h->sample_rate;
+        fi->sample_count = (h->frame_duration * fi->sample_rate) / (h->clock_rate * fraction);
+    }
+
+    return DTSUHD_OK;
+}
+
+/** Return the offset of the first UHD audio frame.
+    When supplied a buffer containing DTSHDHDR file content, the DTSHD
+    headers are skipped and the offset to the first byte of the STRMDATA
+    chunk is returned, along with the size of that chunk.
+
+  @param[in] dataStart DTS:X Profile 2 file content to parse
+  @param[in] dataSize Number of valid bytes in 'dataStart'
+  @param[out] Number of leading DTS:X Profile 2 audio frames to discard,
+              may be NULL
+  @param[out] Size of STRMDATA payload, may be NULL
+  @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file
+*/
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, size_t *strmdata_size)
+{
+    const uint8_t *data = data_start;
+    const uint8_t *data_end = data + data_size;
+    uint64_t chunk_size = 0;
+
+    if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data, "DTSHDHDR", 8))
+        return 0;
+
+    for (; data + DTSUHD_CHUNK_HEADER <= data_end; data += chunk_size + DTSUHD_CHUNK_HEADER) {
+        chunk_size = AV_RB64(data + 8);
+        if (chunk_size < 4 || chunk_size > ((uint64_t)1 << 61))
+            return AVERROR_INVALIDDATA;
+
+        if (!memcmp(data, "STRMDATA", 8)) {
+            if (strmdata_size)
+                *strmdata_size = chunk_size;
+            return (int)(data - data_start) + DTSUHD_CHUNK_HEADER;
+        }
+    }
+
+    return 0;
+}
diff --git a/libavcodec/dtsuhd_common.h b/libavcodec/dtsuhd_common.h
new file mode 100644
index 0000000000..10280cd203
--- /dev/null
+++ b/libavcodec/dtsuhd_common.h
@@ -0,0 +1,83 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DTSUHD_COMMON_H
+#define AVCODEC_DTSUHD_COMMON_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#define DTSUHD_NONSYNCWORD 0x71C442E8
+#define DTSUHD_SYNCWORD    0x40411BF2
+
+#define DTSUHD_MAX_FRAME_SIZE 0x1000
+
+/* Return codes from dtsuhd_frame */
+enum DTSUHDStatus {
+    DTSUHD_OK,
+    DTSUHD_INCOMPLETE,    /* Entire frame not in buffer. */
+    DTSUHD_INVALID_FRAME, /* Error parsing frame. */
+    DTSUHD_NOSYNC,        /* No sync frame prior to non-sync frame. */
+    DTSUHD_NULL,          /* Function parameter may not be NULL. */
+};
+
+/* Return stream information from an audio frame parsed by dtsuhd_frame, */
+typedef struct DTSUHDDescriptorInfo {
+    unsigned valid:1; /* True if descriptor info is valid. */
+    char coding_name[5]; /* Four character, null term SampleEntry box name. */
+    int base_sample_freq_code;
+    int channel_count;
+    int decoder_profile_code;
+    int frame_duration_code;
+    int max_payload_code;
+    int num_pres_code;
+    int rep_type;
+    int sample_rate;
+    int sample_rate_mod;
+    int sample_size;
+    int channel_mask;
+    uint64_t ffmpeg_channel_mask;
+} DTSUHDDescriptorInfo;
+
+/* Return frame information from an audio frame parsed by dtsuhd_frame. */
+typedef struct DTSUHDFrameInfo {
+    int frame_bytes;  /* Size of entire frame in bytes. */
+    int sample_count; /* Number of samples in frame (samples per frame). */
+    int sample_rate;  /* Sample rate of frame (samples per second). */
+    unsigned sync:1;  /* True if frame is a sync frame. */
+} DTSUHDFrameInfo;
+
+struct DTSUHD;
+typedef struct DTSUHD DTSUHD;
+
+struct DTSUHD *av_dtsuhd_create(void);
+void av_dtsuhd_destroy(DTSUHD*);
+int av_dtsuhd_frame(DTSUHD*, const uint8_t *data, size_t nData,
+                    DTSUHDFrameInfo*, DTSUHDDescriptorInfo*);
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size,
+                               size_t *strmdata_size);
+
+static inline int dtsuhd_is_syncword(uint32_t syncword)
+{
+    return syncword == DTSUHD_NONSYNCWORD || syncword == DTSUHD_SYNCWORD;
+}
+
+#endif /* AVCODEC_DTSUHD_COMMON_H */
diff --git a/libavcodec/dtsuhd_parser.c b/libavcodec/dtsuhd_parser.c
new file mode 100644
index 0000000000..e8058e1701
--- /dev/null
+++ b/libavcodec/dtsuhd_parser.c
@@ -0,0 +1,141 @@
+/*
+ * DTS-UHD audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse raw DTS-UHD audio frame input and return individual audio frames.
+ */
+
+#include "dtsuhd_common.h"
+#include "libavutil/intreadwrite.h"
+#include "parser.h"
+
+#define DTSUHD_BUFFER_SIZE (DTSUHD_MAX_FRAME_SIZE * 128)
+
+typedef struct DTSUHDParseContext {
+    DTSUHD *dtsuhd;
+    int buf_offset;
+    int buf_bytes;
+    int frame_bytes;
+    uint8_t *buf;
+} DTSUHDParseContext;
+
+static av_cold int parser_init(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    pc->dtsuhd = av_dtsuhd_create();
+    pc->buf = av_calloc(DTSUHD_BUFFER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE, 1);
+    if (!pc->dtsuhd || !pc->buf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void parser_close(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    av_dtsuhd_destroy(pc->dtsuhd);
+    pc->dtsuhd = NULL;
+    av_freep(&pc->buf);
+    ff_parse_close(s);
+}
+
+// Keep data in contiguous buffer as required by dtsuhd_frame.
+static int append_buffer(DTSUHDParseContext *pc, const uint8_t **buf, int *buf_size, int *input_consumed)
+{
+    int copy_bytes;
+
+    pc->buf_offset += pc->frame_bytes;
+    pc->frame_bytes = 0;
+
+    // Buffer almost full, move partial frame to start of buffer for more space.
+    if (*buf_size > 0 && pc->buf_bytes + *buf_size > DTSUHD_BUFFER_SIZE) {
+        memmove(pc->buf, pc->buf + pc->buf_offset, pc->buf_bytes);
+        pc->buf_bytes -= pc->buf_offset;
+        pc->buf_offset = 0;
+    }
+
+    copy_bytes = FFMAX(0, FFMIN(DTSUHD_BUFFER_SIZE - pc->buf_bytes, *buf_size));
+
+    // Append input buffer to our context.
+    if (copy_bytes) {
+        memcpy(pc->buf + pc->buf_bytes, *buf, copy_bytes);
+        pc->buf_bytes += copy_bytes;
+    }
+
+    // Ensure buffer starts with a syncword
+    while (pc->buf_offset + 4 < pc->buf_bytes && !dtsuhd_is_syncword(AV_RB32(pc->buf + pc->buf_offset)))
+        pc->buf_offset++;
+
+    *input_consumed = copy_bytes;
+    *buf = pc->buf + pc->buf_offset;
+    *buf_size = pc->buf_bytes - pc->buf_offset;
+
+    return copy_bytes && pc->buf_bytes - pc->buf_offset < DTSUHD_MAX_FRAME_SIZE;
+}
+
+static int parser_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+    DTSUHDFrameInfo fi;
+    int input_consumed = 0;
+
+    if (append_buffer(pc, &buf, &buf_size, &input_consumed)) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return input_consumed;
+    }
+
+    switch (av_dtsuhd_frame(pc->dtsuhd, buf, buf_size, &fi, NULL)) {
+    case DTSUHD_OK:
+        if (fi.sample_count)
+            s->duration = fi.sample_count;
+        if (fi.sample_rate)
+            avctx->sample_rate = fi.sample_rate;
+        buf_size = pc->frame_bytes = fi.frame_bytes;
+        break;
+    case DTSUHD_INCOMPLETE:
+        pc->frame_bytes = buf_size;
+        buf = NULL;
+        buf_size = 0;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
+    return input_consumed;
+}
+
+AVCodecParser ff_dtsuhd_parser = {
+    .codec_ids      = { AV_CODEC_ID_DTSUHD },
+    .priv_data_size = sizeof(DTSUHDParseContext),
+    .parser_init    = parser_init,
+    .parser_parse   = parser_parse,
+    .parser_close   = parser_close,
+};
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index 285f81a901..6030a68e73 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -37,6 +37,7 @@ extern const AVCodecParser ff_dirac_parser;
 extern const AVCodecParser ff_dnxhd_parser;
 extern const AVCodecParser ff_dolby_e_parser;
 extern const AVCodecParser ff_dpx_parser;
+extern const AVCodecParser ff_dtsuhd_parser;
 extern const AVCodecParser ff_dvaudio_parser;
 extern const AVCodecParser ff_dvbsub_parser;
 extern const AVCodecParser ff_dvdsub_parser;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 728ab8839d..e0fe2eb7b8 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 
 #include "version_major.h"
 
-#define LIBAVCODEC_VERSION_MINOR  23
+#define LIBAVCODEC_VERSION_MINOR  24
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
-- 
2.17.1

[-- Attachment #3: 0001-avformat-Add-DTS-UHD-demuxer-and-movenc-support.eml --]
[-- Type: message/rfc822, Size: 14483 bytes --]

From: Roy Funderburk <royffmpeg@funderburk.us>
To: ffmpeg-devel@ffmpeg.org
Subject: [PATCH] avformat: Add DTS-UHD demuxer and movenc support.
Date: Sat, 15 Apr 2023 13:04:42 -0700

Demuxing of DTS-UHD input files per ETSI TS 102 114 is added as
demuxer "dtsuhd".  movenc supports DTS-UHD audio track.

Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
---
 Changelog                 |   2 +-
 configure                 |   1 +
 doc/general_contents.texi |   1 +
 libavformat/Makefile      |   1 +
 libavformat/allformats.c  |   1 +
 libavformat/dtshddec.c    |   2 +-
 libavformat/dtsuhddec.c   | 216 ++++++++++++++++++++++++++++++++++++++
 libavformat/movenc.c      |  32 ++++++
 libavformat/version.h     |   2 +-
 9 files changed, 255 insertions(+), 3 deletions(-)
 create mode 100644 libavformat/dtsuhddec.c

diff --git a/Changelog b/Changelog
index c010e86159..625fa2da39 100644
--- a/Changelog
+++ b/Changelog
@@ -30,7 +30,7 @@ version <next>:
 - support for the P_SKIP hinting to speed up libx264 encoding
 - Support HEVC,VP9,AV1 codec in enhanced flv format
 - apsnr and asisdr audio filters
-
+- DTS-UHD demuxer
 
 version 6.0:
 - Radiance HDR image support
diff --git a/configure b/configure
index ff6a5c3600..bc6d315e88 100755
--- a/configure
+++ b/configure
@@ -3493,6 +3493,7 @@ dash_demuxer_deps="libxml2"
 dirac_demuxer_select="dirac_parser"
 dts_demuxer_select="dca_parser"
 dtshd_demuxer_select="dca_parser"
+dtsuhd_demuxer_select="dtsuhd_parser"
 dv_demuxer_select="dvprofile"
 dv_muxer_select="dvprofile"
 dxa_demuxer_select="riffdec"
diff --git a/doc/general_contents.texi b/doc/general_contents.texi
index 8ac121dee1..a450285459 100644
--- a/doc/general_contents.texi
+++ b/doc/general_contents.texi
@@ -597,6 +597,7 @@ library:
 @item raw DNxHD                 @tab X @tab X
 @item raw DTS                   @tab X @tab X
 @item raw DTS-HD                @tab   @tab X
+@item raw DTS-UHD               @tab   @tab
 @item raw E-AC-3                @tab X @tab X
 @item raw FLAC                  @tab X @tab X
 @item raw GSM                   @tab   @tab X
diff --git a/libavformat/Makefile b/libavformat/Makefile
index bd78c206b9..fe47f5f0ba 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -188,6 +188,7 @@ OBJS-$(CONFIG_DSICIN_DEMUXER)            += dsicin.o
 OBJS-$(CONFIG_DSS_DEMUXER)               += dss.o
 OBJS-$(CONFIG_DTSHD_DEMUXER)             += dtshddec.o
 OBJS-$(CONFIG_DTS_DEMUXER)               += dtsdec.o rawdec.o
+OBJS-$(CONFIG_DTSUHD_DEMUXER)            += dtsuhddec.o
 OBJS-$(CONFIG_DTS_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_DV_MUXER)                  += dvenc.o
 OBJS-$(CONFIG_DVBSUB_DEMUXER)            += dvbsub.o rawdec.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 6324952bd2..1637eeeebc 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -146,6 +146,7 @@ extern const AVInputFormat  ff_dss_demuxer;
 extern const AVInputFormat  ff_dts_demuxer;
 extern const FFOutputFormat ff_dts_muxer;
 extern const AVInputFormat  ff_dtshd_demuxer;
+extern const AVInputFormat  ff_dtsuhd_demuxer;
 extern const AVInputFormat  ff_dv_demuxer;
 extern const FFOutputFormat ff_dv_muxer;
 extern const AVInputFormat  ff_dvbsub_demuxer;
diff --git a/libavformat/dtshddec.c b/libavformat/dtshddec.c
index a3dea0668f..6e9e78a335 100644
--- a/libavformat/dtshddec.c
+++ b/libavformat/dtshddec.c
@@ -46,7 +46,7 @@ typedef struct DTSHDDemuxContext {
 static int dtshd_probe(const AVProbeData *p)
 {
     if (AV_RB64(p->buf) == DTSHDHDR)
-        return AVPROBE_SCORE_MAX;
+        return AVPROBE_SCORE_MAX - 4; // DTSUHD (.dtsx) files also have this signature.
     return 0;
 }
 
diff --git a/libavformat/dtsuhddec.c b/libavformat/dtsuhddec.c
new file mode 100644
index 0000000000..d840c0a033
--- /dev/null
+++ b/libavformat/dtsuhddec.c
@@ -0,0 +1,216 @@
+/*
+ * DTS-UHD audio demuxer
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Report DTS-UHD audio stream configuration and extract raw packet data.
+ */
+
+#include "internal.h"
+#include "libavcodec/dtsuhd_common.h"
+#include "libavcodec/put_bits.h"
+#include "libavutil/intreadwrite.h"
+
+#define DTSUHD_BUFFER_SIZE (1024 * 1024)
+
+typedef struct DTSUHDDemuxContext {
+    size_t data_end;
+    struct DTSUHD *dtsuhd;
+    uint8_t *buf;
+} DTSUHDDemuxContext;
+
+static int probe(const AVProbeData *p)
+{
+    int offset = av_dtsuhd_strmdata_payload(p->buf, p->buf_size, NULL);
+    int score = 0;
+    struct DTSUHD *h = av_dtsuhd_create();
+
+    if (h && offset >= 0) {
+        for (; offset + 4 < p->buf_size; offset++) {
+            if (dtsuhd_is_syncword(AV_RB32(p->buf + offset))) {
+                if (DTSUHD_OK == av_dtsuhd_frame(h, p->buf + offset, p->buf_size - offset, NULL, NULL)) {
+                    score = AVPROBE_SCORE_MAX - 3;
+                    break;
+                }
+            }
+        }
+    }
+
+    av_dtsuhd_destroy(h);
+    return score;
+}
+
+static av_cold int read_close(AVFormatContext *s)
+{
+    DTSUHDDemuxContext *dtsxs = s->priv_data;
+
+    av_freep(&dtsxs->buf);
+    av_dtsuhd_destroy(dtsxs->dtsuhd);
+    dtsxs->dtsuhd = NULL;
+
+    return 0;
+}
+
+static int find_first_syncword(DTSUHDDemuxContext *dtsuhd, int data_start)
+{
+    while (data_start + 4 < DTSUHD_BUFFER_SIZE &&
+        !dtsuhd_is_syncword(AV_RB32(dtsuhd->buf + data_start))) {
+        data_start++;
+    }
+
+    return data_start;
+}
+
+static int write_extradata(AVCodecParameters *par, DTSUHDDescriptorInfo *di)
+{
+    PutBitContext pbc;
+    int ret;
+    int size;
+    uint8_t udts[32];
+
+    init_put_bits(&pbc, udts, sizeof(udts));
+    put_bits32(&pbc, 0); // udts box size
+    put_bits32(&pbc, AV_RB32("udts")); // udts box signature
+    put_bits(&pbc, 6, di->decoder_profile_code);
+    put_bits(&pbc, 2, di->frame_duration_code);
+    put_bits(&pbc, 3, di->max_payload_code);
+    put_bits(&pbc, 5, di->num_pres_code);
+    put_bits32(&pbc,  di->channel_mask);
+    put_bits(&pbc, 1, di->base_sample_freq_code);
+    put_bits(&pbc, 2, di->sample_rate_mod);
+    put_bits(&pbc, 3, di->rep_type);
+    put_bits(&pbc, 3, 0);
+    put_bits(&pbc, 1, 0);
+    put_bits64(&pbc, di->num_pres_code + 1, 0); // ID Tag present for each presentation.
+    flush_put_bits(&pbc); // byte align
+    size = put_bytes_output(&pbc);
+    AV_WB32(udts, size);
+
+    ret = ff_alloc_extradata(par, size);
+    if (ret < 0)
+        return ret;
+
+    memcpy(par->extradata, udts, size);
+
+    return 0;
+}
+
+static int read_header(AVFormatContext *s)
+{
+    AVIOContext *pb = s->pb;
+    AVStream *st = avformat_new_stream(s, NULL);
+    DTSUHDDemuxContext *dtsuhd = s->priv_data;
+    DTSUHDDescriptorInfo di;
+    DTSUHDFrameInfo fi;
+    int buf_bytes;
+    int ret = DTSUHD_INVALID_FRAME;
+    int data_start;
+
+    if (!(pb->seekable & AVIO_SEEKABLE_NORMAL))
+        return AVERROR(EIO);
+
+    dtsuhd->buf = av_malloc(DTSUHD_BUFFER_SIZE);
+    dtsuhd->dtsuhd = av_dtsuhd_create();
+    if (!dtsuhd->buf || !dtsuhd->dtsuhd || !st)
+        return AVERROR(ENOMEM);
+
+    buf_bytes = avio_read(pb, dtsuhd->buf, DTSUHD_BUFFER_SIZE);
+    if (buf_bytes < 0)
+        return buf_bytes;
+
+    data_start = av_dtsuhd_strmdata_payload(dtsuhd->buf, buf_bytes, &dtsuhd->data_end);
+    if (data_start < 0)
+        return data_start;
+
+    dtsuhd->data_end += data_start;
+    if (data_start == 0)
+        dtsuhd->data_end = avio_size(pb); // Not a DTSHDHDR chunk file, decode frames to end of file.
+
+    data_start = find_first_syncword(dtsuhd, data_start);
+    if (avio_seek(pb, data_start, SEEK_SET) < 0)
+        return AVERROR(EINVAL);
+
+    ret = av_dtsuhd_frame(dtsuhd->dtsuhd, dtsuhd->buf + data_start,
+        buf_bytes - data_start, &fi, &di);
+    if (ret != DTSUHD_OK || !di.valid) {
+        av_log(s, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    ffstream(st)->need_parsing          = AVSTREAM_PARSE_FULL_RAW;
+    st->codecpar->codec_type            = AVMEDIA_TYPE_AUDIO;
+    st->codecpar->codec_id              = s->iformat->raw_codec_id;
+    st->codecpar->ch_layout.order       = AV_CHANNEL_ORDER_NATIVE;
+    st->codecpar->ch_layout.nb_channels = di.channel_count;
+    st->codecpar->ch_layout.u.mask      = di.ffmpeg_channel_mask;
+    st->codecpar->codec_tag             = AV_RL32(di.coding_name);
+    st->codecpar->frame_size            = 512 << di.frame_duration_code;
+    st->codecpar->sample_rate           = di.sample_rate;
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    st->codecpar->channels       = di.channel_count;
+    st->codecpar->channel_layout = di.ffmpeg_channel_mask;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    ret = write_extradata(st->codecpar, &di);
+    if (ret < 0)
+        return ret;
+
+    if (st->codecpar->sample_rate)
+        avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+
+    return 0;
+}
+
+static int read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    DTSUHDDemuxContext *dtsuhd = s->priv_data;
+    int64_t size, left;
+    int ret;
+
+    left = dtsuhd->data_end - avio_tell(s->pb);
+    size = FFMIN(left, DTSUHD_MAX_FRAME_SIZE);
+    if (size <= 0)
+        return AVERROR_EOF;
+
+    ret = av_get_packet(s->pb, pkt, size);
+    if (ret < 0)
+        return ret;
+
+    pkt->stream_index = 0;
+
+    return ret;
+}
+
+AVInputFormat ff_dtsuhd_demuxer = {
+    .name           = "dtsuhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("DTS-UHD"),
+    .priv_data_size = sizeof(DTSUHDDemuxContext),
+    .read_probe     = probe,
+    .read_header    = read_header,
+    .read_packet    = read_packet,
+    .read_close     = read_close,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .extensions     = "dtsx",
+    .raw_codec_id   = AV_CODEC_ID_DTSUHD,
+};
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 7ef6cef46a..f382a00c3f 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -763,6 +763,24 @@ static int mov_write_esds_tag(AVIOContext *pb, MOVTrack *track) // Basic
     return update_size(pb, pos);
 }
 
+static int mov_write_udts_tag(AVIOContext *pb, MOVTrack *track)
+{
+    if (track->vos_len < 12) {
+        av_log(pb, AV_LOG_ERROR,
+               "Cannot write moov atom before DTS-UHD packets."
+               " Set the delay_moov flag to fix this.\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* Write vos_data is udts box. */
+    if (memcmp(track->vos_data + 4, "udts", 4) == 0) {
+        avio_write(pb, track->vos_data, track->vos_len);
+        return track->vos_len;
+    }
+
+    return 0;
+}
+
 static int mov_pcm_le_gt16(enum AVCodecID codec_id)
 {
     return codec_id == AV_CODEC_ID_PCM_S24LE ||
@@ -1373,6 +1391,8 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
         ret = mov_write_dops_tag(s, pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_TRUEHD)
         ret = mov_write_dmlp_tag(s, pb, track);
+    else if (track->par->codec_id == AV_CODEC_ID_DTSUHD)
+        ret = mov_write_udts_tag(pb, track);
     else if (tag == MOV_MP4_IPCM_TAG || tag == MOV_MP4_FPCM_TAG) {
         if (track->par->ch_layout.nb_channels > 1)
             ret = mov_write_chnl_tag(s, pb, track);
@@ -2817,6 +2837,7 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
     if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
          track->par->codec_id == AV_CODEC_ID_TRUEHD ||
          track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO ||
+         track->par->codec_id == AV_CODEC_ID_DTSUHD ||
          track->par->codec_tag == MKTAG('r','t','p',' ')) &&
         track->has_keyframes && track->has_keyframes < track->entry)
         mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
@@ -5712,6 +5733,14 @@ static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk)
     }
 }
 
+static void mov_parse_dtsuhd_frame(AVPacket *pkt, MOVTrack *trk)
+{
+    if (pkt->size > 4 && AV_RB32(pkt->data) == 0x40411BF2) {
+        trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
+        trk->has_keyframes++;
+    }
+ }
+
 static void mov_parse_truehd_frame(AVPacket *pkt, MOVTrack *trk)
 {
     int length;
@@ -6383,6 +6412,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
         mov_parse_vc1_frame(pkt, trk);
     } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
         mov_parse_truehd_frame(pkt, trk);
+    } else if (par->codec_id == AV_CODEC_ID_DTSUHD) {
+        mov_parse_dtsuhd_frame(pkt, trk);
     } else if (pkt->flags & AV_PKT_FLAG_KEY) {
         if (mov->mode == MODE_MOV && par->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
             trk->entry > 0) { // force sync sample for the first key frame
@@ -7841,6 +7872,7 @@ static const AVCodecTag codec_mp4_tags[] = {
     { AV_CODEC_ID_AC3,             MKTAG('a', 'c', '-', '3') },
     { AV_CODEC_ID_EAC3,            MKTAG('e', 'c', '-', '3') },
     { AV_CODEC_ID_DTS,             MKTAG('m', 'p', '4', 'a') },
+    { AV_CODEC_ID_DTSUHD,          MKTAG('d', 't', 's', 'x') },
     { AV_CODEC_ID_TRUEHD,          MKTAG('m', 'l', 'p', 'a') },
     { AV_CODEC_ID_FLAC,            MKTAG('f', 'L', 'a', 'C') },
     { AV_CODEC_ID_OPUS,            MKTAG('O', 'p', 'u', 's') },
diff --git a/libavformat/version.h b/libavformat/version.h
index 979952183c..1055753772 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -31,7 +31,7 @@
 
 #include "version_major.h"
 
-#define LIBAVFORMAT_VERSION_MINOR  10
+#define LIBAVFORMAT_VERSION_MINOR  11
 #define LIBAVFORMAT_VERSION_MICRO 100
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
-- 
2.17.1


[-- Attachment #4: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".