From: Roy Funderburk <royffmpeg@funderburk.us> To: ffmpeg-devel@ffmpeg.org Subject: Re: [FFmpeg-devel] [PATCH] avformat/avcodec: Add DTS-UHD demuxer and parser, movenc support. Date: Thu, 17 Aug 2023 14:47:55 -0700 Message-ID: <918f8aa0-854e-755c-85b0-5b4034da7fd4@funderburk.us> (raw) In-Reply-To: <61fca341-57cc-abe5-225e-561f5e2a9b4b@funderburk.us> [-- Attachment #1: Type: text/plain, Size: 36 bytes --] Updated for master branch changes. [-- Attachment #2: 0001-avcodec-Add-DTS-UHD-parser.eml --] [-- Type: message/rfc822, Size: 42744 bytes --] From: Roy Funderburk <royffmpeg@funderburk.us> To: ffmpeg-devel@ffmpeg.org Subject: [PATCH] avcodec: Add DTS-UHD parser. Date: Sat, 15 Apr 2023 13:04:42 -0700 Parsing of DTS-UHD input files per ETSI TS 102 114 is added as parser for codec id AV_CODEC_ID_DTSUHD. Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com> --- libavcodec/Makefile | 1 + libavcodec/codec_desc.c | 7 + libavcodec/codec_id.h | 1 + libavcodec/dtsuhd_common.c | 982 +++++++++++++++++++++++++++++++++++++ libavcodec/dtsuhd_common.h | 83 ++++ libavcodec/dtsuhd_parser.c | 141 ++++++ libavcodec/parsers.c | 1 + libavcodec/version.h | 2 +- 8 files changed, 1217 insertions(+), 1 deletion(-) create mode 100644 libavcodec/dtsuhd_common.c create mode 100644 libavcodec/dtsuhd_common.h create mode 100644 libavcodec/dtsuhd_parser.c diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 3c16b51462..583abd1f88 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1167,6 +1167,7 @@ OBJS-$(CONFIG_DIRAC_PARSER) += dirac_parser.o OBJS-$(CONFIG_DNXHD_PARSER) += dnxhd_parser.o dnxhddata.o OBJS-$(CONFIG_DOLBY_E_PARSER) += dolby_e_parser.o dolby_e_parse.o OBJS-$(CONFIG_DPX_PARSER) += dpx_parser.o +OBJS-$(CONFIG_DTSUHD_PARSER) += dtsuhd_parser.o dtsuhd_common.o OBJS-$(CONFIG_DVAUDIO_PARSER) += dvaudio_parser.o OBJS-$(CONFIG_DVBSUB_PARSER) += dvbsub_parser.o OBJS-$(CONFIG_DVD_NAV_PARSER) += dvd_nav_parser.o diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c index 4406dd8318..e6af7f2e99 100644 --- a/libavcodec/codec_desc.c +++ b/libavcodec/codec_desc.c @@ -3413,6 +3413,13 @@ static const AVCodecDescriptor codec_descriptors[] = { .long_name = NULL_IF_CONFIG_SMALL("AC-4"), .props = AV_CODEC_PROP_LOSSY, }, + { + .id = AV_CODEC_ID_DTSUHD, + .type = AVMEDIA_TYPE_AUDIO, + .name = "dtsuhd", + .long_name = NULL_IF_CONFIG_SMALL("DTSUHD (DTS-UHD Audio Format)"), + .props = AV_CODEC_PROP_LOSSY, + }, /* subtitle codecs */ { diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h index a5a0cb8525..3e87aa1fe5 100644 --- a/libavcodec/codec_id.h +++ b/libavcodec/codec_id.h @@ -543,6 +543,7 @@ enum AVCodecID { AV_CODEC_ID_WAVARC, AV_CODEC_ID_RKA, AV_CODEC_ID_AC4, + AV_CODEC_ID_DTSUHD, /* subtitle codecs */ AV_CODEC_ID_FIRST_SUBTITLE = 0x17000, ///< A dummy ID pointing at the start of subtitle codecs. diff --git a/libavcodec/dtsuhd_common.c b/libavcodec/dtsuhd_common.c new file mode 100644 index 0000000000..3d6b4ab4e0 --- /dev/null +++ b/libavcodec/dtsuhd_common.c @@ -0,0 +1,982 @@ +/* + * DTS-UHD common audio frame parsing code + * Copyright (c) 2023 Xperi Corporation / DTS, Inc. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Parse DTS-UHD audio frame headers, report frame sizes and configuration. + * Specification: ETSI TS 103 491 V1.2.1 + */ + +#include <string.h> + +#include "dtsuhd_common.h" +#include "get_bits.h" +#include "libavutil/channel_layout.h" +#include "libavutil/crc.h" + +#define DTSUHD_ALLOC_INCREMENT 16 +#define DTSUHD_CHUNK_HEADER 16 +#define DTSUHD_CRC_SEED 0xFFFF + +enum RepType { + REP_TYPE_CH_MASK_BASED, + REP_TYPE_MTRX2D_CH_MASK_BASED, + REP_TYPE_MTRX3D_CH_MASK_BASED, + REP_TYPE_BINAURAL, + REP_TYPE_AMBISONIC, + REP_TYPE_AUDIO_TRACKS, + REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF, + REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF, +}; + +typedef struct MDObject { + int started; /* Object seen since last reset. */ + int pres_index; + int rep_type; + int ch_activity_mask; +} MDObject; + +typedef struct MD01 { + GetBitContext gb; + MDObject object[257]; /* object id max value is 256 */ + int chunk_id; + int object_list[256]; int object_list_count; + int packets_acquired; + int static_md_extracted; + int static_md_packets; + int static_md_packet_size; + int static_md_update_flag; + uint8_t *buf; int buf_bytes; /* temporary buffer to accumulate static data */ +} MD01; + +typedef struct NAVI { + int bytes; + int id; + int index; + int present; +} NAVI; + +typedef struct UHDAudio { + int mask; + int selectable; +} UHDAudio; + +typedef struct UHDChunk { + int crc_flag; + int bytes; +} UHDChunk; + +struct DTSUHD { + const uint8_t *data; int data_bytes; /* Original audio frame buffer. */ + const AVCRC *crc; + GetBitContext gb; + MD01 *md01; int md01_count; + NAVI *navi; int navi_alloc, navi_count; + UHDAudio audio[256]; + UHDChunk *chunk; int chunk_alloc, chunk_count; + int chunk_bytes; + int clock_rate; + int frame_bytes; + int frame_duration; + int frame_duration_code; + int ftoc_bytes; + int major_version; + int num_audio_pres; + int sample_rate; + int sample_rate_mod; + unsigned full_channel_mix_flag:1; + unsigned interactive_obj_limits_present:1; + unsigned is_sync_frame:1; + unsigned saw_sync:1; +}; + +/* Read from the MD01 buffer (if present), falling back to the frame buffer */ +static inline int get_bits_md01(DTSUHD *h, MD01 *md01, int bits) +{ + if (md01->buf) + return get_bits(&md01->gb, bits); + return get_bits(&h->gb, bits); +} + +/* In the specification, the pseudo code defaults the 'add' parameter to true. + Table 7-30 shows passing an explicit false, most other calls do not + pass the extractAndAdd parameter. In this code, the add parameter is + applied to the input table itself, the last three entries being the bit + shifts of the first four entries (which would be zero if add was not set). + + Function based on code in Table 5-2 +*/ +static int get_bits_var(GetBitContext *gb, const int table[]) +{ + static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 }; + static const int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 }; + int code = show_bits(gb, 3); /* value range is [0, 7] */ + int i; + int index = index_table[code]; + int value = 0; + + skip_bits(gb, bits_used[code]); + if (table[index] == 0) + return 0; + + /* The 'code' read from the bit context determines which additional values to add. */ + for (i = 0; i < index; i++) + value += table[4 + i]; + + return get_bits_long(gb, table[index]) + value; +} + +/* Implied by Table 6-2, MD01 chunk objects appended in for loop */ +static MD01 *chunk_append_md01(DTSUHD *h, int id) +{ + int md01_alloc = h->md01_count + 1; + if (av_reallocp_array(&h->md01, md01_alloc, sizeof(*h->md01))) + return NULL; + + memset(h->md01 + h->md01_count, 0, sizeof(*h->md01)); + h->md01[h->md01_count].chunk_id = id; + return h->md01 + h->md01_count++; +} + +/* Return existing MD01 chunk based on chunkID */ +static MD01 *chunk_find_md01(DTSUHD *h, int id) +{ + int i; + + for (i = 0; i < h->md01_count; i++) + if (id == h->md01[i].chunk_id) + return h->md01 + i; + + return NULL; +} + +/* Table 6-3 */ +static void chunk_reset(DTSUHD *h) +{ + int i; + + for (i = 0; i < h->md01_count; i++) + av_freep(&h->md01[i].buf); + av_freep(&h->md01); + h->md01_count = 0; +} + +static MDObject *find_default_audio(DTSUHD *h) +{ + MDObject *object; + int i, j; + int obj_index = -1; + + for (i = 0; i < h->md01_count; i++) { + for (j = 0; j < 257; j++) { + object = h->md01[i].object + j; + if (object->started && h->audio[object->pres_index].selectable) { + if (obj_index < 0 || (object->pres_index < h->md01[i].object[obj_index].pres_index)) + obj_index = j; + } + } + if (obj_index >= 0) + return h->md01[i].object + obj_index; + } + + return NULL; +} + +/* Save channel mask, count, and rep type to descriptor info. + ETSI TS 103 491 Table 7-28 channel activity mask bits + mapping and SCTE DVS 243-4 Rev. 0.2 DG X Table 4. Convert activity mask and + representation type to channel mask and channel counts. +*/ +static void extract_object_info(MDObject *object, DTSUHDDescriptorInfo *info) +{ + int i; + static const struct { + uint32_t activity_mask; + uint32_t channel_mask; // Mask as defined by ETSI TS 103 491 + uint64_t ffmpeg_channel_mask; // Mask as defined in ffmpeg + } activity_map[] = { + // act mask | chan mask | ffmpeg channel mask + { 0x000001, 0x00000001, AV_CH_FRONT_CENTER }, + { 0x000002, 0x00000006, AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT }, + { 0x000004, 0x00000018, AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT }, + { 0x000008, 0x00000020, AV_CH_LOW_FREQUENCY }, + { 0x000010, 0x00000040, AV_CH_BACK_CENTER }, + { 0x000020, 0x0000A000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT }, + { 0x000040, 0x00000180, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT }, + { 0x000080, 0x00004000, AV_CH_TOP_FRONT_CENTER }, + { 0x000100, 0x00080000, AV_CH_TOP_CENTER }, + { 0x000200, 0x00001800, AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER }, + { 0x000400, 0x00060000, AV_CHAN_WIDE_LEFT | AV_CHAN_WIDE_RIGHT }, + { 0x000800, 0x00000600, AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT }, + { 0x001000, 0x00010000, AV_CH_LOW_FREQUENCY_2 }, + { 0x002000, 0x00300000, AV_CH_TOP_SIDE_LEFT | AV_CH_TOP_SIDE_RIGHT }, + { 0x004000, 0x00400000, AV_CH_TOP_BACK_CENTER }, + { 0x008000, 0x01800000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT }, + { 0x010000, 0x02000000, AV_CH_BOTTOM_FRONT_CENTER }, + { 0x020000, 0x0C000000, AV_CH_BOTTOM_FRONT_LEFT | AV_CH_BOTTOM_FRONT_RIGHT }, + { 0x140000, 0x30000000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT }, + { 0x080000, 0xC0000000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT }, + { 0 } // Terminator + }; + + if (object) { + for (i = 0; activity_map[i].activity_mask; i++) { + if (activity_map[i].activity_mask & object->ch_activity_mask) { + info->channel_mask |= activity_map[i].channel_mask; + info->ffmpeg_channel_mask |= activity_map[i].ffmpeg_channel_mask; + } + } + info->channel_count = av_popcount(info->channel_mask); + info->rep_type = object->rep_type; + } +} + +/* Assemble information for MP4 Sample Entry box. Sample Size is always + 16 bits. The coding name is the name of the SampleEntry sub-box and is + 'dtsx' unless the version of the bitstream is > 2. + If DecoderProfile == 2, then MaxPayloadCode will be zero. +*/ +static void update_descriptor(DTSUHD *h, DTSUHDDescriptorInfo *info) +{ + static const char *coding_name[] = { "dtsx", "dtsy" }; + + memset(info, 0, sizeof(*info)); + memcpy(info->coding_name, coding_name[h->major_version > 2], 5); + extract_object_info(find_default_audio(h), info); + info->base_sample_freq_code = h->sample_rate == 48000; + info->decoder_profile_code = h->major_version - 2; + info->frame_duration_code = h->frame_duration_code; + info->max_payload_code = 0 + (h->major_version > 2); + info->num_pres_code = h->num_audio_pres - 1; + info->sample_rate = h->sample_rate; + info->sample_rate_mod = h->sample_rate_mod; + info->sample_size = 16; + info->valid = 1; +} + +/* Table 6-17 p47 */ +static int parse_explicit_object_lists(DTSUHD *h, int mask, int index) +{ + GetBitContext *gb = &h->gb; + int i; + static const int table[7] = { 4, 8, 16, 32, 16, 256, 65536 }; + + for (i = 0; i < index; i++) { + if ((mask >> i) & 0x01) { + if (h->is_sync_frame || get_bits1(gb)) + get_bits_var(gb, table); + } + } + + return 0; +} + +/* Table 6-15 p44, Table 6-16 p45 */ +static int parse_aud_pres_params(DTSUHD *h) +{ + GetBitContext *gb = &h->gb; + int audio; + int i; + int read_mask; + static const int table[7] = { 0, 2, 4, 5, 1, 4, 16 }; + + if (h->is_sync_frame) { + if (h->full_channel_mix_flag) + h->num_audio_pres = 1; + else + h->num_audio_pres = get_bits_var(gb, table) + 1; + memset(h->audio, 0, sizeof(h->audio[0]) * h->num_audio_pres); + } + + for (audio = 0; audio < h->num_audio_pres; audio++) { + if (h->is_sync_frame) { + if (h->full_channel_mix_flag) + h->audio[audio].selectable = 1; + else + h->audio[audio].selectable = get_bits1(gb); + } + + if (h->audio[audio].selectable) { + if (h->is_sync_frame) { + read_mask = (audio > 0) ? get_bits(gb, audio) : 0; + h->audio[audio].mask = 0; + for (i = 0; read_mask; i++, read_mask >>= 1) { + if (read_mask & 0x01) + h->audio[audio].mask |= get_bits1(gb) << i; + } + } + + if (parse_explicit_object_lists(h, h->audio[audio].mask, audio)) + return 1; + } else { + h->audio[audio].mask = 0; + } + } + + return 0; +} + +/* Table 6-12 p 40 */ +static void decode_version(DTSUHD *h) +{ + GetBitContext *gb = &h->gb; + int bits = get_bits1(gb) ? 3 : 6; + + h->major_version = get_bits(gb, bits) + 2; + skip_bits(gb, bits); +} + +/* Table 6-12 p 40 */ +static int parse_stream_params(DTSUHD *h) +{ + GetBitContext *gb = &h->gb; + int has_ftoc_crc; + static const uint32_t table_base_duration[4] = { 512, 480, 384, 0 }; + static const uint32_t table_clock_rate[4] = { 32000, 44100, 48000, 0 }; + + if (h->is_sync_frame) + h->full_channel_mix_flag = get_bits1(gb); + + has_ftoc_crc = !h->full_channel_mix_flag || h->is_sync_frame; + if (has_ftoc_crc && av_crc(h->crc, DTSUHD_CRC_SEED, h->data, h->ftoc_bytes)) + return 1; + + if (h->is_sync_frame) { + if (h->full_channel_mix_flag) + h->major_version = 2; + else + decode_version(h); + + h->frame_duration = table_base_duration[get_bits(gb, 2)]; + h->frame_duration_code = get_bits(gb, 3); + h->frame_duration *= (h->frame_duration_code + 1); + h->clock_rate = table_clock_rate[get_bits(gb, 2)]; + if (h->frame_duration == 0 || h->clock_rate == 0) + return 1; /* bitstream error */ + + skip_bits(gb, 36 * get_bits1(gb)); /* bTimeStampPresent */ + h->sample_rate_mod = get_bits(gb, 2); + h->sample_rate = h->clock_rate * (1 << h->sample_rate_mod); + + if (h->full_channel_mix_flag) { + h->interactive_obj_limits_present = 0; + } else { + skip_bits1(gb); /* reserved flag. */ + h->interactive_obj_limits_present = get_bits1(gb); + } + } + + return 0; +} + +/* Table 6-24 p52 */ +static void navi_purge(DTSUHD *h) +{ + int i; + + for (i = 0; i < h->navi_count; i++) + if (!h->navi[i].present) + h->navi[i].bytes = 0; +} + +/* Table 6-21 p50 */ +static void navi_clear(DTSUHD *h) +{ + if (h->navi) + memset(h->navi, 0, sizeof(h->navi[0]) * h->navi_count); + h->navi_count = 0; +} + +/* Table 6-22 p51 */ +static void navi_clear_present(DTSUHD *h) +{ + int i; + + for (i = 0; i < h->navi_count; i++) + h->navi[i].present = 0; +} + +/* Table 6-23 p51. Return 0 on success, and the index is returned in + the *listIndex parameter. +*/ +static int navi_find_index(DTSUHD *h, int desired_index, int *list_index) +{ + int avail_index = h->navi_count; + int i; + int navi_alloc; + + for (i = 0; i < h->navi_count; i++) { + if (h->navi[i].index == desired_index) { + *list_index = i; + h->navi[i].present = 1; + return 0; + } + + if ((h->navi[i].present == 0) && (h->navi[i].bytes == 0) && (avail_index > i)) + avail_index = i; + } + + if (avail_index >= h->navi_count) { + if (h->navi_count >= h->navi_alloc) { + navi_alloc = h->navi_count + DTSUHD_ALLOC_INCREMENT; + if (av_reallocp_array(&h->navi, navi_alloc, sizeof(*h->navi))) + return 1; + h->navi_alloc = navi_alloc; + } + h->navi_count++; + } + + *list_index = avail_index; + h->navi[avail_index].bytes = 0; + h->navi[avail_index].present = 1; + h->navi[avail_index].id = 256; + h->navi[avail_index].index = desired_index; + + return 0; +} + +/* Table 6-20 p48 */ +static int parse_chunk_navi(DTSUHD *h) +{ + GetBitContext *gb = &h->gb; + int audio_chunks = 1; + int bytes; + int i; + int id; + int id_present; + int index; + int list_index; + static const int table2468[7] = { 2, 4, 6, 8, 4, 16, 64 }; + static const int table_audio_chunk_sizes[7] = { 9, 11, 13, 16, 512, 2048, 8192 }; + static const int table_chunk_sizes[7] = { 6, 9, 12, 15, 64, 512, 4096 }; + + h->chunk_bytes = 0; + if (h->full_channel_mix_flag) + h->chunk_count = h->is_sync_frame; + else + h->chunk_count = get_bits_var(gb, table2468); + + if (h->chunk_count >= h->chunk_alloc) { + int chunk_alloc = h->chunk_count + DTSUHD_ALLOC_INCREMENT; + if (av_reallocp_array(&h->chunk, chunk_alloc, sizeof(*h->chunk))) + return 1; + h->chunk_alloc = chunk_alloc; + } + + for (i = 0; i < h->chunk_count; i++) { + h->chunk_bytes += h->chunk[i].bytes = get_bits_var(gb, table_chunk_sizes); + if (h->full_channel_mix_flag) + h->chunk[i].crc_flag = 0; + else + h->chunk[i].crc_flag = get_bits1(gb); + } + + if (!h->full_channel_mix_flag) + audio_chunks = get_bits_var(gb, table2468); + + if (h->is_sync_frame) + navi_clear(h); + else + navi_clear_present(h); + + for (i = 0; i < audio_chunks; i++) { + if (h->full_channel_mix_flag) + index = 0; + else + index = get_bits_var(gb, table2468); + + if (navi_find_index(h, index, &list_index)) + return 1; + + if (h->is_sync_frame) + id_present = 1; + else if (h->full_channel_mix_flag) + id_present = 0; + else + id_present = get_bits1(gb); + + if (id_present) { + id = get_bits_var(gb, table2468); + h->navi[list_index].id = id; + } + + bytes = get_bits_var(gb, table_audio_chunk_sizes); + h->chunk_bytes += bytes; + h->navi[list_index].bytes = bytes; + } + + navi_purge(h); + + return 0; +} + + +/* Table 6-6 */ +static int parse_md_chunk_list(DTSUHD *h, MD01 *md01) +{ + GetBitContext *gb = &h->gb; + static const int table1[7] = { 3, 4, 6, 8, 8, 16, 64 }; + int i; + + if (h->full_channel_mix_flag) { + md01->object_list_count = 1; + md01->object_list[0] = 256; + } else { + md01->object_list_count = get_bits_var(gb, table1); + for (i = 0; i < md01->object_list_count; i++) + md01->object_list[i] = get_bits(gb, get_bits1(gb) ? 8 : 4); + } + + return 0; +} + +/* Table 7-9 */ +static void skip_mp_param_set(DTSUHD *h, MD01 *md01, int nominal_flag) +{ + get_bits_md01(h, md01, 6); /* rLoudness */ + if (nominal_flag == 0) + get_bits_md01(h, md01, 5); + + get_bits_md01(h, md01, nominal_flag ? 2 : 4); +} + +/* Table 7-8 */ +static int parse_static_md_params(DTSUHD *h, MD01 *md01, int only_first) +{ + int i; + int loudness_sets = 1; + int nominal_flag = 1; + + if (h->full_channel_mix_flag == 0) + nominal_flag = get_bits_md01(h, md01, 1); + + if (nominal_flag) { + if (h->full_channel_mix_flag == 0) + loudness_sets = get_bits_md01(h, md01, 1) ? 3 : 1; + } else { + loudness_sets = get_bits_md01(h, md01, 4) + 1; + } + + for (i = 0; i < loudness_sets; i++) + skip_mp_param_set(h, md01, nominal_flag); + + if (only_first) + return 0; + + if (nominal_flag == 0) + get_bits_md01(h, md01, 1); + + for (i = 0; i < 3; i++) { /* Table 7-12 suggest 3 types */ + if (get_bits_md01(h, md01, 1)) { + if (get_bits_md01(h, md01, 4) == 15) /* Table 7-14 */ + get_bits_md01(h, md01, 15); + } + if (get_bits_md01(h, md01, 1)) /* smooth md present */ + get_bits_md01(h, md01, 6 * 6); + } + + if (h->full_channel_mix_flag == 0) { + i = md01->static_md_packets * md01->static_md_packet_size - get_bits_count(&md01->gb); + skip_bits(&md01->gb, i); + } + md01->static_md_extracted = 1; + + return 0; +} + +/* Table 7-7 */ +static int parse_multi_frame_md(DTSUHD *h, MD01 *md01) +{ + GetBitContext *gb = &h->gb; + int i, n; + static const int table1[7] = { 0, 6, 9, 12, 1, 64, 512 }; + static const int table2[7] = { 5, 7, 9, 11, 32, 128, 512 }; + + if (h->is_sync_frame) { + md01->packets_acquired = 0; + if (h->full_channel_mix_flag) { + md01->static_md_packets = 1; + md01->static_md_packet_size = 0; + } else { + md01->static_md_packets = get_bits_var(gb, table1) + 1; + md01->static_md_packet_size = get_bits_var(gb, table2) + 3; + } + + n = md01->static_md_packets * md01->static_md_packet_size; + if (n > md01->buf_bytes) { + if (av_reallocp(&md01->buf, n)) + return 1; + md01->buf_bytes = n; + } + + init_get_bits(&md01->gb, md01->buf, md01->buf_bytes * 8); + if (md01->static_md_packets > 1) + md01->static_md_update_flag = get_bits1(gb); + else + md01->static_md_update_flag = 1; + } + + if (md01->packets_acquired < md01->static_md_packets) { + n = md01->packets_acquired * md01->static_md_packet_size; + for (i = 0; i < md01->static_md_packet_size; i++) + md01->buf[n + i] = get_bits(gb, 8); + md01->packets_acquired++; + + if (md01->packets_acquired == md01->static_md_packets) { + if (md01->static_md_update_flag || !md01->static_md_extracted) + if (parse_static_md_params(h, md01, 0)) + return 1; + } else if (md01->packets_acquired == 1) { + if (md01->static_md_update_flag || !md01->static_md_extracted) + if (parse_static_md_params(h, md01, 1)) + return 1; + } + } + + return 0; +} + +/* Return 1 if suitable, 0 if not. Table 7-18. OBJGROUPIDSTART=224 Sec 7.8.7 p75 */ +static int is_suitable_for_render(DTSUHD *h, MD01 *md01, int object_id) +{ + GetBitContext *gb = &h->gb; + static const int table[7] = { 8, 10, 12, 14, 256, 1024, 4096 }; + + if (object_id >= 224 || get_bits1(gb)) + return 1; + + /* Reject the render and skip the render data. */ + skip_bits1(gb); + skip_bits(gb, get_bits_var(gb, table)); + + return 0; +} + +/* Table 7-26 */ +static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object) +{ + GetBitContext *gb = &h->gb; + const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 : get_bits(gb, 4); + static const int mask_table[14] = { /* Table 7-27 */ + 0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B, 0x00002F, + 0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007, 0x000843, + }; + + if (ch_index == 14) + object->ch_activity_mask = get_bits(gb, 16); + else if (ch_index == 15) + object->ch_activity_mask = get_bits_long(gb, 32); + else + object->ch_activity_mask = mask_table[ch_index]; +} + +/* Table 7-22 */ +static int parse_object_metadata(DTSUHD *h, MD01 *md01, MDObject *object, + int start_frame_flag, int object_id) +{ + GetBitContext *gb = &h->gb; + int ch_mask_object_flag = 0; + int object_3d_metadata_flag = 0; + static const int table2[7] = { 1, 4, 4, 8, 2, 16, 16 }; + static const int table3[7] = { 3, 3, 4, 8, 8, 8, 16 }; + + skip_bits(gb, object_id != 256); + + if (start_frame_flag) { + object->rep_type = get_bits(gb, 3); + switch (object->rep_type) { + case REP_TYPE_BINAURAL: + case REP_TYPE_CH_MASK_BASED: + case REP_TYPE_MTRX2D_CH_MASK_BASED: + case REP_TYPE_MTRX3D_CH_MASK_BASED: + ch_mask_object_flag = 1; + break; + + case REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF: + case REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF: + object_3d_metadata_flag = 1; + break; + } + + if (ch_mask_object_flag) { + if (object_id != 256) { + skip_bits(gb, 3); /* Object Importance Level */ + if (get_bits1(gb)) + skip_bits(gb, get_bits1(gb) ? 3 : 5); + + get_bits_var(gb, table2); + get_bits_var(gb, table3); + + /* Skip optional Loudness block. */ + if (get_bits1(gb)) + skip_bits(gb, 8); + + /* Skip optional Object Interactive MD (Table 7-25). */ + if (get_bits1(gb) && h->interactive_obj_limits_present) { + if (get_bits1(gb)) + skip_bits(gb, 5 + 6 * object_3d_metadata_flag); + } + } + + parse_ch_mask_params(h, md01, object); + } + } + + /* Skip rest of object */ + return 0; +} + +/* Table 7-4 */ +static int parse_md01(DTSUHD *h, MD01 *md01, int pres_index) +{ + GetBitContext *gb = &h->gb; + uint32_t i; + uint32_t id; + uint32_t start_flag; + + if (h->audio[pres_index].selectable) { + for (i = 0; i < 4; i++) /* Table 7-5. Scaling data. */ + skip_bits(gb, 5 * get_bits1(gb)); + + if (get_bits1(gb) && parse_multi_frame_md(h, md01)) + return 1; + } + + /* Table 7-16: Object metadata. */ + memset(md01->object, 0, sizeof(md01->object)); + if (!h->full_channel_mix_flag) + skip_bits(gb, 11 * get_bits1(gb)); + + for (i = 0; i < md01->object_list_count; i++) { + id = md01->object_list[i]; + if (!is_suitable_for_render(h, md01, id)) + continue; + + md01->object[id].pres_index = pres_index; + start_flag = 0; + if (!md01->object[id].started) { + skip_bits(gb, id != 256); + start_flag = md01->object[id].started = 1; + } + + if ((id < 224 || id > 255) && + parse_object_metadata(h, md01, md01->object + id, start_flag, id)) { + return 1; + } + + break; + } + + return 0; +} + +/* Table 6-2 */ +static int parse_chunks(DTSUHD *h) +{ + GetBitContext *gb = &h->gb; + MD01 *md01; + const uint8_t *byte_start; + int bit_next; + int i; + static const int table_aud_pres[7] = { 0, 2, 4, 4, 1, 4, 16 }; + int pres_index; + uint32_t id; + + for (i = 0; i < h->chunk_count; i++) { + bit_next = get_bits_count(gb) + h->chunk[i].bytes * 8; + byte_start = h->data + get_bits_count(gb) / 8; + if (h->chunk[i].crc_flag && av_crc(h->crc, DTSUHD_CRC_SEED, byte_start, h->chunk[i].bytes)) + return 1; + + id = get_bits(gb, 8); + if (id == 1) { + pres_index = get_bits_var(gb, table_aud_pres); + if (pres_index > 255) + return 1; + md01 = chunk_find_md01(h, id); + if (md01 == NULL) + md01 = chunk_append_md01(h, id); + if (md01 == NULL) + return 1; + if (parse_md_chunk_list(h, md01)) + return 1; + if (parse_md01(h, md01, pres_index)) + return 1; + } + + skip_bits(gb, bit_next - get_bits_count(gb)); + } + + return 0; +} + +/** Allocate parsing handle. The parsing handle should be used to parse + one DTS:X Profile 2 Audio stream, then freed by calling DTSUHD_destroy(). + Do not use the same parsing handle to parse multiple audio streams. + + @return Parsing handle for use with other functions, or NULL on failure. +*/ +DTSUHD *av_dtsuhd_create(void) +{ + DTSUHD *h = av_calloc(1, sizeof(DTSUHD)); + if (h) + h->crc = av_crc_get_table(AV_CRC_16_CCITT); + return h; +} + +/** Free all resources used by the parsing handle. + + @param[in] h Handle allocated by dtshd_create +*/ +void av_dtsuhd_destroy(DTSUHD *h) +{ + if (h) { + chunk_reset(h); + av_freep(&h->chunk); + av_freep(&h->navi); + av_freep(&h); + } +} + +/** Parse a single DTS:X Profile 2 frame. + The frame must start at the first byte of the data buffer, and enough + of the frame must be present to decode the majority of the FTOC. + From Table 6-11 p40. + + A sync frame must be the first frame provided, before any non-sync frames. + Signatures: sync=0x40411BF2, non-sync=0x71C442E8. + + @param[in] h Handle allocated by DTSUHD_create + @param[in] First byte of a buffer containing the frame to parse + @param[in] nData Number of valid bytes in 'data' + @param[out] fi Results of frame parsing, may be NULL + @param[out] di Results of descriptor parsing, may be NULL + @return 0 on success, DTSUHDStatus enumeration on error +*/ +int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes, + DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di) +{ + GetBitContext *gb; + int fraction = 1; + int i; + int syncword; + static const int table_payload[7] = { 5, 8, 10, 12, 32, 256, 1024 }; + + if (!h || !data) + return DTSUHD_NULL; + + if (data_bytes < 4) + return DTSUHD_INCOMPLETE; /* Data buffer does not contain the signature */ + + h->data = data; + h->data_bytes = data_bytes; + gb = &h->gb; + init_get_bits(gb, data, data_bytes * 8); + + syncword = get_bits_long(gb, 32); + h->is_sync_frame = syncword == DTSUHD_SYNCWORD; + h->saw_sync |= h->is_sync_frame; + if (!h->saw_sync || (!h->is_sync_frame && syncword != DTSUHD_NONSYNCWORD)) + return DTSUHD_NOSYNC; /* Invalid frame or have not parsed sync frame. */ + + h->ftoc_bytes = get_bits_var(gb, table_payload) + 1; + if (h->ftoc_bytes < 5 || h->ftoc_bytes >= data_bytes) + return DTSUHD_INCOMPLETE; /* Data buffer does not contain entire FTOC */ + + if (parse_stream_params(h)) + return DTSUHD_INVALID_FRAME; + + if (parse_aud_pres_params(h)) + return DTSUHD_INVALID_FRAME; + + if (parse_chunk_navi(h)) /* AudioChunkTypes and payload sizes. */ + return DTSUHD_INVALID_FRAME; + + /* At this point in the parsing, we can calculate the size of the frame. */ + h->frame_bytes = h->ftoc_bytes + h->chunk_bytes; + if (h->frame_bytes > data_bytes) + return DTSUHD_INCOMPLETE; + + if (di && h->is_sync_frame) { + /* Skip PBRSmoothParams (Table 6-26) and align to the chunks immediately + following the FTOC CRC. + */ + skip_bits(gb, h->ftoc_bytes * 8 - get_bits_count(gb)); + if (parse_chunks(h)) + return DTSUHD_INVALID_FRAME; + update_descriptor(h, di); + } + + /* 6.3.6.9: audio frame duration may be a fraction of metadata frame duration. */ + for (i = 0; i < h->navi_count; i++) { + if (h->navi[i].present) { + if (h->navi[i].id == 3) + fraction = 2; + else if (h->navi[i].id == 4) + fraction = 4; + } + } + + if (fi) { + fi->sync = h->is_sync_frame; + fi->frame_bytes = h->frame_bytes; + fi->sample_rate = h->sample_rate; + fi->sample_count = (h->frame_duration * fi->sample_rate) / (h->clock_rate * fraction); + } + + return DTSUHD_OK; +} + +/** Return the offset of the first UHD audio frame. + When supplied a buffer containing DTSHDHDR file content, the DTSHD + headers are skipped and the offset to the first byte of the STRMDATA + chunk is returned, along with the size of that chunk. + + @param[in] dataStart DTS:X Profile 2 file content to parse + @param[in] dataSize Number of valid bytes in 'dataStart' + @param[out] Number of leading DTS:X Profile 2 audio frames to discard, + may be NULL + @param[out] Size of STRMDATA payload, may be NULL + @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file +*/ +int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, size_t *strmdata_size) +{ + const uint8_t *data = data_start; + const uint8_t *data_end = data + data_size; + uint64_t chunk_size = 0; + + if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data, "DTSHDHDR", 8)) + return 0; + + for (; data + DTSUHD_CHUNK_HEADER <= data_end; data += chunk_size + DTSUHD_CHUNK_HEADER) { + chunk_size = AV_RB64(data + 8); + if (chunk_size < 4 || chunk_size > ((uint64_t)1 << 61)) + return AVERROR_INVALIDDATA; + + if (!memcmp(data, "STRMDATA", 8)) { + if (strmdata_size) + *strmdata_size = chunk_size; + return (int)(data - data_start) + DTSUHD_CHUNK_HEADER; + } + } + + return 0; +} diff --git a/libavcodec/dtsuhd_common.h b/libavcodec/dtsuhd_common.h new file mode 100644 index 0000000000..10280cd203 --- /dev/null +++ b/libavcodec/dtsuhd_common.h @@ -0,0 +1,83 @@ +/* + * DTS-UHD common audio frame parsing code + * Copyright (c) 2023 Xperi Corporation / DTS, Inc. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DTSUHD_COMMON_H +#define AVCODEC_DTSUHD_COMMON_H + +#include <stdint.h> +#include <stdlib.h> + +#define DTSUHD_NONSYNCWORD 0x71C442E8 +#define DTSUHD_SYNCWORD 0x40411BF2 + +#define DTSUHD_MAX_FRAME_SIZE 0x1000 + +/* Return codes from dtsuhd_frame */ +enum DTSUHDStatus { + DTSUHD_OK, + DTSUHD_INCOMPLETE, /* Entire frame not in buffer. */ + DTSUHD_INVALID_FRAME, /* Error parsing frame. */ + DTSUHD_NOSYNC, /* No sync frame prior to non-sync frame. */ + DTSUHD_NULL, /* Function parameter may not be NULL. */ +}; + +/* Return stream information from an audio frame parsed by dtsuhd_frame, */ +typedef struct DTSUHDDescriptorInfo { + unsigned valid:1; /* True if descriptor info is valid. */ + char coding_name[5]; /* Four character, null term SampleEntry box name. */ + int base_sample_freq_code; + int channel_count; + int decoder_profile_code; + int frame_duration_code; + int max_payload_code; + int num_pres_code; + int rep_type; + int sample_rate; + int sample_rate_mod; + int sample_size; + int channel_mask; + uint64_t ffmpeg_channel_mask; +} DTSUHDDescriptorInfo; + +/* Return frame information from an audio frame parsed by dtsuhd_frame. */ +typedef struct DTSUHDFrameInfo { + int frame_bytes; /* Size of entire frame in bytes. */ + int sample_count; /* Number of samples in frame (samples per frame). */ + int sample_rate; /* Sample rate of frame (samples per second). */ + unsigned sync:1; /* True if frame is a sync frame. */ +} DTSUHDFrameInfo; + +struct DTSUHD; +typedef struct DTSUHD DTSUHD; + +struct DTSUHD *av_dtsuhd_create(void); +void av_dtsuhd_destroy(DTSUHD*); +int av_dtsuhd_frame(DTSUHD*, const uint8_t *data, size_t nData, + DTSUHDFrameInfo*, DTSUHDDescriptorInfo*); +int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, + size_t *strmdata_size); + +static inline int dtsuhd_is_syncword(uint32_t syncword) +{ + return syncword == DTSUHD_NONSYNCWORD || syncword == DTSUHD_SYNCWORD; +} + +#endif /* AVCODEC_DTSUHD_COMMON_H */ diff --git a/libavcodec/dtsuhd_parser.c b/libavcodec/dtsuhd_parser.c new file mode 100644 index 0000000000..e8058e1701 --- /dev/null +++ b/libavcodec/dtsuhd_parser.c @@ -0,0 +1,141 @@ +/* + * DTS-UHD audio frame parsing code + * Copyright (c) 2023 Xperi Corporation / DTS, Inc. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Parse raw DTS-UHD audio frame input and return individual audio frames. + */ + +#include "dtsuhd_common.h" +#include "libavutil/intreadwrite.h" +#include "parser.h" + +#define DTSUHD_BUFFER_SIZE (DTSUHD_MAX_FRAME_SIZE * 128) + +typedef struct DTSUHDParseContext { + DTSUHD *dtsuhd; + int buf_offset; + int buf_bytes; + int frame_bytes; + uint8_t *buf; +} DTSUHDParseContext; + +static av_cold int parser_init(AVCodecParserContext *s) +{ + DTSUHDParseContext *pc = s->priv_data; + + pc->dtsuhd = av_dtsuhd_create(); + pc->buf = av_calloc(DTSUHD_BUFFER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE, 1); + if (!pc->dtsuhd || !pc->buf) + return AVERROR(ENOMEM); + + return 0; +} + +static void parser_close(AVCodecParserContext *s) +{ + DTSUHDParseContext *pc = s->priv_data; + + av_dtsuhd_destroy(pc->dtsuhd); + pc->dtsuhd = NULL; + av_freep(&pc->buf); + ff_parse_close(s); +} + +// Keep data in contiguous buffer as required by dtsuhd_frame. +static int append_buffer(DTSUHDParseContext *pc, const uint8_t **buf, int *buf_size, int *input_consumed) +{ + int copy_bytes; + + pc->buf_offset += pc->frame_bytes; + pc->frame_bytes = 0; + + // Buffer almost full, move partial frame to start of buffer for more space. + if (*buf_size > 0 && pc->buf_bytes + *buf_size > DTSUHD_BUFFER_SIZE) { + memmove(pc->buf, pc->buf + pc->buf_offset, pc->buf_bytes); + pc->buf_bytes -= pc->buf_offset; + pc->buf_offset = 0; + } + + copy_bytes = FFMAX(0, FFMIN(DTSUHD_BUFFER_SIZE - pc->buf_bytes, *buf_size)); + + // Append input buffer to our context. + if (copy_bytes) { + memcpy(pc->buf + pc->buf_bytes, *buf, copy_bytes); + pc->buf_bytes += copy_bytes; + } + + // Ensure buffer starts with a syncword + while (pc->buf_offset + 4 < pc->buf_bytes && !dtsuhd_is_syncword(AV_RB32(pc->buf + pc->buf_offset))) + pc->buf_offset++; + + *input_consumed = copy_bytes; + *buf = pc->buf + pc->buf_offset; + *buf_size = pc->buf_bytes - pc->buf_offset; + + return copy_bytes && pc->buf_bytes - pc->buf_offset < DTSUHD_MAX_FRAME_SIZE; +} + +static int parser_parse(AVCodecParserContext *s, AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + DTSUHDParseContext *pc = s->priv_data; + DTSUHDFrameInfo fi; + int input_consumed = 0; + + if (append_buffer(pc, &buf, &buf_size, &input_consumed)) { + *poutbuf = NULL; + *poutbuf_size = 0; + return input_consumed; + } + + switch (av_dtsuhd_frame(pc->dtsuhd, buf, buf_size, &fi, NULL)) { + case DTSUHD_OK: + if (fi.sample_count) + s->duration = fi.sample_count; + if (fi.sample_rate) + avctx->sample_rate = fi.sample_rate; + buf_size = pc->frame_bytes = fi.frame_bytes; + break; + case DTSUHD_INCOMPLETE: + pc->frame_bytes = buf_size; + buf = NULL; + buf_size = 0; + break; + default: + av_log(avctx, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n"); + return AVERROR_INVALIDDATA; + } + + *poutbuf = buf; + *poutbuf_size = buf_size; + + return input_consumed; +} + +AVCodecParser ff_dtsuhd_parser = { + .codec_ids = { AV_CODEC_ID_DTSUHD }, + .priv_data_size = sizeof(DTSUHDParseContext), + .parser_init = parser_init, + .parser_parse = parser_parse, + .parser_close = parser_close, +}; diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c index 285f81a901..6030a68e73 100644 --- a/libavcodec/parsers.c +++ b/libavcodec/parsers.c @@ -37,6 +37,7 @@ extern const AVCodecParser ff_dirac_parser; extern const AVCodecParser ff_dnxhd_parser; extern const AVCodecParser ff_dolby_e_parser; extern const AVCodecParser ff_dpx_parser; +extern const AVCodecParser ff_dtsuhd_parser; extern const AVCodecParser ff_dvaudio_parser; extern const AVCodecParser ff_dvbsub_parser; extern const AVCodecParser ff_dvdsub_parser; diff --git a/libavcodec/version.h b/libavcodec/version.h index 728ab8839d..e0fe2eb7b8 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,7 +29,7 @@ #include "version_major.h" -#define LIBAVCODEC_VERSION_MINOR 23 +#define LIBAVCODEC_VERSION_MINOR 24 #define LIBAVCODEC_VERSION_MICRO 100 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ -- 2.17.1 [-- Attachment #3: 0001-avformat-Add-DTS-UHD-demuxer-and-movenc-support.eml --] [-- Type: message/rfc822, Size: 14483 bytes --] From: Roy Funderburk <royffmpeg@funderburk.us> To: ffmpeg-devel@ffmpeg.org Subject: [PATCH] avformat: Add DTS-UHD demuxer and movenc support. Date: Sat, 15 Apr 2023 13:04:42 -0700 Demuxing of DTS-UHD input files per ETSI TS 102 114 is added as demuxer "dtsuhd". movenc supports DTS-UHD audio track. Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com> --- Changelog | 2 +- configure | 1 + doc/general_contents.texi | 1 + libavformat/Makefile | 1 + libavformat/allformats.c | 1 + libavformat/dtshddec.c | 2 +- libavformat/dtsuhddec.c | 216 ++++++++++++++++++++++++++++++++++++++ libavformat/movenc.c | 32 ++++++ libavformat/version.h | 2 +- 9 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 libavformat/dtsuhddec.c diff --git a/Changelog b/Changelog index c010e86159..625fa2da39 100644 --- a/Changelog +++ b/Changelog @@ -30,7 +30,7 @@ version <next>: - support for the P_SKIP hinting to speed up libx264 encoding - Support HEVC,VP9,AV1 codec in enhanced flv format - apsnr and asisdr audio filters - +- DTS-UHD demuxer version 6.0: - Radiance HDR image support diff --git a/configure b/configure index ff6a5c3600..bc6d315e88 100755 --- a/configure +++ b/configure @@ -3493,6 +3493,7 @@ dash_demuxer_deps="libxml2" dirac_demuxer_select="dirac_parser" dts_demuxer_select="dca_parser" dtshd_demuxer_select="dca_parser" +dtsuhd_demuxer_select="dtsuhd_parser" dv_demuxer_select="dvprofile" dv_muxer_select="dvprofile" dxa_demuxer_select="riffdec" diff --git a/doc/general_contents.texi b/doc/general_contents.texi index 8ac121dee1..a450285459 100644 --- a/doc/general_contents.texi +++ b/doc/general_contents.texi @@ -597,6 +597,7 @@ library: @item raw DNxHD @tab X @tab X @item raw DTS @tab X @tab X @item raw DTS-HD @tab @tab X +@item raw DTS-UHD @tab @tab @item raw E-AC-3 @tab X @tab X @item raw FLAC @tab X @tab X @item raw GSM @tab @tab X diff --git a/libavformat/Makefile b/libavformat/Makefile index bd78c206b9..fe47f5f0ba 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -188,6 +188,7 @@ OBJS-$(CONFIG_DSICIN_DEMUXER) += dsicin.o OBJS-$(CONFIG_DSS_DEMUXER) += dss.o OBJS-$(CONFIG_DTSHD_DEMUXER) += dtshddec.o OBJS-$(CONFIG_DTS_DEMUXER) += dtsdec.o rawdec.o +OBJS-$(CONFIG_DTSUHD_DEMUXER) += dtsuhddec.o OBJS-$(CONFIG_DTS_MUXER) += rawenc.o OBJS-$(CONFIG_DV_MUXER) += dvenc.o OBJS-$(CONFIG_DVBSUB_DEMUXER) += dvbsub.o rawdec.o diff --git a/libavformat/allformats.c b/libavformat/allformats.c index 6324952bd2..1637eeeebc 100644 --- a/libavformat/allformats.c +++ b/libavformat/allformats.c @@ -146,6 +146,7 @@ extern const AVInputFormat ff_dss_demuxer; extern const AVInputFormat ff_dts_demuxer; extern const FFOutputFormat ff_dts_muxer; extern const AVInputFormat ff_dtshd_demuxer; +extern const AVInputFormat ff_dtsuhd_demuxer; extern const AVInputFormat ff_dv_demuxer; extern const FFOutputFormat ff_dv_muxer; extern const AVInputFormat ff_dvbsub_demuxer; diff --git a/libavformat/dtshddec.c b/libavformat/dtshddec.c index a3dea0668f..6e9e78a335 100644 --- a/libavformat/dtshddec.c +++ b/libavformat/dtshddec.c @@ -46,7 +46,7 @@ typedef struct DTSHDDemuxContext { static int dtshd_probe(const AVProbeData *p) { if (AV_RB64(p->buf) == DTSHDHDR) - return AVPROBE_SCORE_MAX; + return AVPROBE_SCORE_MAX - 4; // DTSUHD (.dtsx) files also have this signature. return 0; } diff --git a/libavformat/dtsuhddec.c b/libavformat/dtsuhddec.c new file mode 100644 index 0000000000..d840c0a033 --- /dev/null +++ b/libavformat/dtsuhddec.c @@ -0,0 +1,216 @@ +/* + * DTS-UHD audio demuxer + * Copyright (c) 2023 Xperi Corporation / DTS, Inc. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Report DTS-UHD audio stream configuration and extract raw packet data. + */ + +#include "internal.h" +#include "libavcodec/dtsuhd_common.h" +#include "libavcodec/put_bits.h" +#include "libavutil/intreadwrite.h" + +#define DTSUHD_BUFFER_SIZE (1024 * 1024) + +typedef struct DTSUHDDemuxContext { + size_t data_end; + struct DTSUHD *dtsuhd; + uint8_t *buf; +} DTSUHDDemuxContext; + +static int probe(const AVProbeData *p) +{ + int offset = av_dtsuhd_strmdata_payload(p->buf, p->buf_size, NULL); + int score = 0; + struct DTSUHD *h = av_dtsuhd_create(); + + if (h && offset >= 0) { + for (; offset + 4 < p->buf_size; offset++) { + if (dtsuhd_is_syncword(AV_RB32(p->buf + offset))) { + if (DTSUHD_OK == av_dtsuhd_frame(h, p->buf + offset, p->buf_size - offset, NULL, NULL)) { + score = AVPROBE_SCORE_MAX - 3; + break; + } + } + } + } + + av_dtsuhd_destroy(h); + return score; +} + +static av_cold int read_close(AVFormatContext *s) +{ + DTSUHDDemuxContext *dtsxs = s->priv_data; + + av_freep(&dtsxs->buf); + av_dtsuhd_destroy(dtsxs->dtsuhd); + dtsxs->dtsuhd = NULL; + + return 0; +} + +static int find_first_syncword(DTSUHDDemuxContext *dtsuhd, int data_start) +{ + while (data_start + 4 < DTSUHD_BUFFER_SIZE && + !dtsuhd_is_syncword(AV_RB32(dtsuhd->buf + data_start))) { + data_start++; + } + + return data_start; +} + +static int write_extradata(AVCodecParameters *par, DTSUHDDescriptorInfo *di) +{ + PutBitContext pbc; + int ret; + int size; + uint8_t udts[32]; + + init_put_bits(&pbc, udts, sizeof(udts)); + put_bits32(&pbc, 0); // udts box size + put_bits32(&pbc, AV_RB32("udts")); // udts box signature + put_bits(&pbc, 6, di->decoder_profile_code); + put_bits(&pbc, 2, di->frame_duration_code); + put_bits(&pbc, 3, di->max_payload_code); + put_bits(&pbc, 5, di->num_pres_code); + put_bits32(&pbc, di->channel_mask); + put_bits(&pbc, 1, di->base_sample_freq_code); + put_bits(&pbc, 2, di->sample_rate_mod); + put_bits(&pbc, 3, di->rep_type); + put_bits(&pbc, 3, 0); + put_bits(&pbc, 1, 0); + put_bits64(&pbc, di->num_pres_code + 1, 0); // ID Tag present for each presentation. + flush_put_bits(&pbc); // byte align + size = put_bytes_output(&pbc); + AV_WB32(udts, size); + + ret = ff_alloc_extradata(par, size); + if (ret < 0) + return ret; + + memcpy(par->extradata, udts, size); + + return 0; +} + +static int read_header(AVFormatContext *s) +{ + AVIOContext *pb = s->pb; + AVStream *st = avformat_new_stream(s, NULL); + DTSUHDDemuxContext *dtsuhd = s->priv_data; + DTSUHDDescriptorInfo di; + DTSUHDFrameInfo fi; + int buf_bytes; + int ret = DTSUHD_INVALID_FRAME; + int data_start; + + if (!(pb->seekable & AVIO_SEEKABLE_NORMAL)) + return AVERROR(EIO); + + dtsuhd->buf = av_malloc(DTSUHD_BUFFER_SIZE); + dtsuhd->dtsuhd = av_dtsuhd_create(); + if (!dtsuhd->buf || !dtsuhd->dtsuhd || !st) + return AVERROR(ENOMEM); + + buf_bytes = avio_read(pb, dtsuhd->buf, DTSUHD_BUFFER_SIZE); + if (buf_bytes < 0) + return buf_bytes; + + data_start = av_dtsuhd_strmdata_payload(dtsuhd->buf, buf_bytes, &dtsuhd->data_end); + if (data_start < 0) + return data_start; + + dtsuhd->data_end += data_start; + if (data_start == 0) + dtsuhd->data_end = avio_size(pb); // Not a DTSHDHDR chunk file, decode frames to end of file. + + data_start = find_first_syncword(dtsuhd, data_start); + if (avio_seek(pb, data_start, SEEK_SET) < 0) + return AVERROR(EINVAL); + + ret = av_dtsuhd_frame(dtsuhd->dtsuhd, dtsuhd->buf + data_start, + buf_bytes - data_start, &fi, &di); + if (ret != DTSUHD_OK || !di.valid) { + av_log(s, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n"); + return AVERROR_INVALIDDATA; + } + + ffstream(st)->need_parsing = AVSTREAM_PARSE_FULL_RAW; + st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; + st->codecpar->codec_id = s->iformat->raw_codec_id; + st->codecpar->ch_layout.order = AV_CHANNEL_ORDER_NATIVE; + st->codecpar->ch_layout.nb_channels = di.channel_count; + st->codecpar->ch_layout.u.mask = di.ffmpeg_channel_mask; + st->codecpar->codec_tag = AV_RL32(di.coding_name); + st->codecpar->frame_size = 512 << di.frame_duration_code; + st->codecpar->sample_rate = di.sample_rate; + +#if FF_API_OLD_CHANNEL_LAYOUT +FF_DISABLE_DEPRECATION_WARNINGS + st->codecpar->channels = di.channel_count; + st->codecpar->channel_layout = di.ffmpeg_channel_mask; +FF_ENABLE_DEPRECATION_WARNINGS +#endif + + ret = write_extradata(st->codecpar, &di); + if (ret < 0) + return ret; + + if (st->codecpar->sample_rate) + avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); + + return 0; +} + +static int read_packet(AVFormatContext *s, AVPacket *pkt) +{ + DTSUHDDemuxContext *dtsuhd = s->priv_data; + int64_t size, left; + int ret; + + left = dtsuhd->data_end - avio_tell(s->pb); + size = FFMIN(left, DTSUHD_MAX_FRAME_SIZE); + if (size <= 0) + return AVERROR_EOF; + + ret = av_get_packet(s->pb, pkt, size); + if (ret < 0) + return ret; + + pkt->stream_index = 0; + + return ret; +} + +AVInputFormat ff_dtsuhd_demuxer = { + .name = "dtsuhd", + .long_name = NULL_IF_CONFIG_SMALL("DTS-UHD"), + .priv_data_size = sizeof(DTSUHDDemuxContext), + .read_probe = probe, + .read_header = read_header, + .read_packet = read_packet, + .read_close = read_close, + .flags = AVFMT_GENERIC_INDEX, + .extensions = "dtsx", + .raw_codec_id = AV_CODEC_ID_DTSUHD, +}; diff --git a/libavformat/movenc.c b/libavformat/movenc.c index 7ef6cef46a..f382a00c3f 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -763,6 +763,24 @@ static int mov_write_esds_tag(AVIOContext *pb, MOVTrack *track) // Basic return update_size(pb, pos); } +static int mov_write_udts_tag(AVIOContext *pb, MOVTrack *track) +{ + if (track->vos_len < 12) { + av_log(pb, AV_LOG_ERROR, + "Cannot write moov atom before DTS-UHD packets." + " Set the delay_moov flag to fix this.\n"); + return AVERROR(EINVAL); + } + + /* Write vos_data is udts box. */ + if (memcmp(track->vos_data + 4, "udts", 4) == 0) { + avio_write(pb, track->vos_data, track->vos_len); + return track->vos_len; + } + + return 0; +} + static int mov_pcm_le_gt16(enum AVCodecID codec_id) { return codec_id == AV_CODEC_ID_PCM_S24LE || @@ -1373,6 +1391,8 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex ret = mov_write_dops_tag(s, pb, track); else if (track->par->codec_id == AV_CODEC_ID_TRUEHD) ret = mov_write_dmlp_tag(s, pb, track); + else if (track->par->codec_id == AV_CODEC_ID_DTSUHD) + ret = mov_write_udts_tag(pb, track); else if (tag == MOV_MP4_IPCM_TAG || tag == MOV_MP4_FPCM_TAG) { if (track->par->ch_layout.nb_channels > 1) ret = mov_write_chnl_tag(s, pb, track); @@ -2817,6 +2837,7 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO || track->par->codec_id == AV_CODEC_ID_TRUEHD || track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO || + track->par->codec_id == AV_CODEC_ID_DTSUHD || track->par->codec_tag == MKTAG('r','t','p',' ')) && track->has_keyframes && track->has_keyframes < track->entry) mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE); @@ -5712,6 +5733,14 @@ static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk) } } +static void mov_parse_dtsuhd_frame(AVPacket *pkt, MOVTrack *trk) +{ + if (pkt->size > 4 && AV_RB32(pkt->data) == 0x40411BF2) { + trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE; + trk->has_keyframes++; + } + } + static void mov_parse_truehd_frame(AVPacket *pkt, MOVTrack *trk) { int length; @@ -6383,6 +6412,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) mov_parse_vc1_frame(pkt, trk); } else if (par->codec_id == AV_CODEC_ID_TRUEHD) { mov_parse_truehd_frame(pkt, trk); + } else if (par->codec_id == AV_CODEC_ID_DTSUHD) { + mov_parse_dtsuhd_frame(pkt, trk); } else if (pkt->flags & AV_PKT_FLAG_KEY) { if (mov->mode == MODE_MOV && par->codec_id == AV_CODEC_ID_MPEG2VIDEO && trk->entry > 0) { // force sync sample for the first key frame @@ -7841,6 +7872,7 @@ static const AVCodecTag codec_mp4_tags[] = { { AV_CODEC_ID_AC3, MKTAG('a', 'c', '-', '3') }, { AV_CODEC_ID_EAC3, MKTAG('e', 'c', '-', '3') }, { AV_CODEC_ID_DTS, MKTAG('m', 'p', '4', 'a') }, + { AV_CODEC_ID_DTSUHD, MKTAG('d', 't', 's', 'x') }, { AV_CODEC_ID_TRUEHD, MKTAG('m', 'l', 'p', 'a') }, { AV_CODEC_ID_FLAC, MKTAG('f', 'L', 'a', 'C') }, { AV_CODEC_ID_OPUS, MKTAG('O', 'p', 'u', 's') }, diff --git a/libavformat/version.h b/libavformat/version.h index 979952183c..1055753772 100644 --- a/libavformat/version.h +++ b/libavformat/version.h @@ -31,7 +31,7 @@ #include "version_major.h" -#define LIBAVFORMAT_VERSION_MINOR 10 +#define LIBAVFORMAT_VERSION_MINOR 11 #define LIBAVFORMAT_VERSION_MICRO 100 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \ -- 2.17.1 [-- Attachment #4: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2023-08-17 21:48 UTC|newest] Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-04-14 15:39 Roy Funderburk 2023-04-14 16:40 ` Hendrik Leppkes 2023-04-14 16:48 ` Roy Funderburk 2023-04-15 14:56 ` Michael Niedermayer 2023-04-15 20:04 ` [FFmpeg-devel] [PATCH v2] " Roy Funderburk 2023-04-16 19:55 ` Michael Niedermayer 2023-04-16 21:52 ` Roy Funderburk 2023-04-17 4:12 ` [FFmpeg-devel] [PATCH v3 1/2] " Roy Funderburk 2023-05-08 17:49 ` Roy Funderburk 2023-05-13 7:28 ` Paul B Mahol 2023-05-15 14:50 ` Roy Funderburk 2023-05-15 20:35 ` Michael Niedermayer 2023-05-15 21:14 ` Roy Funderburk 2023-06-13 14:26 ` Paul B Mahol 2023-06-13 17:43 ` Roy Funderburk 2023-06-13 18:09 ` Paul B Mahol 2023-06-13 18:20 ` Roy Funderburk 2023-06-13 19:04 ` Anton Khirnov 2023-04-17 4:13 ` [FFmpeg-devel] [PATCH v3 2/2] " Roy Funderburk 2023-06-13 18:32 ` Paul B Mahol 2023-04-15 20:20 ` [FFmpeg-devel] [PATCH] " Roy Funderburk 2023-06-13 18:35 ` Paul B Mahol 2023-06-14 0:00 ` Roy Funderburk 2023-06-14 5:37 ` Paul B Mahol 2023-06-14 6:01 ` Paul B Mahol 2023-06-14 6:06 ` Paul B Mahol 2023-06-14 6:11 ` Paul B Mahol 2023-06-14 18:24 ` Roy Funderburk 2023-06-14 20:01 ` Roy Funderburk 2023-06-15 15:46 ` Paul B Mahol 2023-06-15 18:44 ` Roy Funderburk 2023-06-18 12:18 ` Paul B Mahol 2023-06-20 17:05 ` Roy Funderburk 2023-08-17 21:47 ` Roy Funderburk [this message] 2023-08-17 22:31 ` Paul B Mahol 2023-08-17 22:51 ` Roy Funderburk 2024-01-16 21:02 ` Roy Funderburk
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=918f8aa0-854e-755c-85b0-5b4034da7fd4@funderburk.us \ --to=royffmpeg@funderburk.us \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git