From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTP id 3B928496AD for ; Sat, 17 Feb 2024 22:04:57 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 27EBC68D31D; Sun, 18 Feb 2024 00:03:53 +0200 (EET) Received: from mail-pl1-f173.google.com (mail-pl1-f173.google.com [209.85.214.173]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id E66E068D308 for ; Sun, 18 Feb 2024 00:03:45 +0200 (EET) Received: by mail-pl1-f173.google.com with SMTP id d9443c01a7336-1d780a392fdso18781625ad.3 for ; Sat, 17 Feb 2024 14:03:45 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1708207424; x=1708812224; darn=ffmpeg.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:to:from:from:to:cc:subject:date:message-id :reply-to; bh=PinQSbH8TbYN5OczIWG1puGmUAhjsBEli+7Xkq62v+c=; b=gtnZd8T3L1jK80Sqn6a2OKPXWVId53ceN/yI4P/Y5GxmMao9/XGkTjSI10LTfZBMIU mr79fFaMnSPEM31R4HcWBXXlvx6AnoMDlPHxX+1efOjWgHRGV615w9nhUc9wD7iy2ymH Q5v64u8cDYl0o0WWShhMQ08qb06rKTHvqcOJzH9xLGaQqpYOTZYv4JBQOQc3YCKvtHCw R7AfZ2HR4qvBhyioDAfOZAR3LQ1D+hi3D3cLX0P1YqDDXOP8Zm1fVNKqx9e6bm9AI9vY rLtcE7ARKF/RMlBfGCmIUcx/rouoYng0mZSBuG+VjLIBgwqwJXd5Na1hqoNArOO9LTfe zuHQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1708207424; x=1708812224; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=PinQSbH8TbYN5OczIWG1puGmUAhjsBEli+7Xkq62v+c=; b=Ej827MHjKasxIReRKpTgwPjc+/y/Fq4b/hejyARViQ6J7za5+DOTqLHOntsdIOnijT og9WmOkCVpIsAXokZU1O6yIiZSdkk1rjRzBQfuvjASFwtO/fKBxDn5DcWQOzVo5eq6rs hffyW80Rjgd/cuHDojkrYxpYJF9Xu1zKCLEQtxtAPXSJ12+ppAhmnrzFm9JXrkYu68dD UNw84eHDMxZUSim5K9TwD0g/oygvw86EuO8EnVQHQ423gr+CLkDrheaCx8fSIEd2jycC 9UIxjypWXQA4kfs+3l1uJwVbZnEEzhrXaxuxeJNUmmfJN/E9uF8E7I5LGUqiQJZw1kQo uhaA== X-Gm-Message-State: AOJu0YzAI1D2NtQoJsqYp0JcaonH8LJzsCFPN0X/tgRXgreK4VlmSEOH ENBcdTTomnhJgWxVSXNGOn+JFhRvWrNnBKBqnXGIzrNhPmZPjQYpEy28+XKO X-Google-Smtp-Source: AGHT+IEguC5Ek9bInHiEwhVGd6P5IAurUXVF5XE082boo8zFHhmbYJFKa1poCLyj0q4by503BCklHw== X-Received: by 2002:a17:902:6546:b0:1db:ccd0:e77e with SMTP id d6-20020a170902654600b001dbccd0e77emr2390422pln.35.1708207423444; Sat, 17 Feb 2024 14:03:43 -0800 (PST) Received: from localhost.localdomain (host197.190-225-105.telecom.net.ar. [190.225.105.197]) by smtp.gmail.com with ESMTPSA id e6-20020a17090301c600b001d77a0e1374sm1850326plh.151.2024.02.17.14.03.42 for (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sat, 17 Feb 2024 14:03:42 -0800 (PST) From: James Almer To: ffmpeg-devel@ffmpeg.org Date: Sat, 17 Feb 2024 19:02:41 -0300 Message-ID: <20240217220242.62035-7-jamrial@gmail.com> X-Mailer: git-send-email 2.43.1 In-Reply-To: <20240217220242.62035-1-jamrial@gmail.com> References: <20240217220242.62035-1-jamrial@gmail.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 7/8] avformat/movenc: add support for Immersive Audio Model and Formats in ISOBMFF X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: Signed-off-by: James Almer --- libavformat/movenc.c | 349 +++++++++++++++++++++++++++++++++++-------- libavformat/movenc.h | 6 + 2 files changed, 293 insertions(+), 62 deletions(-) diff --git a/libavformat/movenc.c b/libavformat/movenc.c index c71a9983ed..cd63b353b8 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -32,6 +32,7 @@ #include "dovi_isom.h" #include "riff.h" #include "avio.h" +#include "iamf_writer.h" #include "isom.h" #include "av1.h" #include "avc.h" @@ -41,12 +42,14 @@ #include "libavcodec/flac.h" #include "libavcodec/get_bits.h" +#include "libavcodec/bsf.h" #include "libavcodec/internal.h" #include "libavcodec/put_bits.h" #include "libavcodec/vc1_common.h" #include "libavcodec/raw.h" #include "internal.h" #include "libavutil/avstring.h" +#include "libavutil/bprint.h" #include "libavutil/channel_layout.h" #include "libavutil/csp.h" #include "libavutil/intfloat.h" @@ -316,6 +319,32 @@ static int mov_write_sdtp_tag(AVIOContext *pb, MOVTrack *track) return update_size(pb, pos); } +static int mov_write_iacb_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track) +{ + AVIOContext *dyn_bc; + int64_t pos = avio_tell(pb); + uint8_t *dyn_buf = NULL; + int dyn_size; + int ret = avio_open_dyn_buf(&dyn_bc); + if (ret < 0) + return ret; + + avio_wb32(pb, 0); + ffio_wfourcc(pb, "iacb"); + avio_w8(pb, 1); // configurationVersion + + ret = ff_iamf_write_descriptors(track->iamf, dyn_bc, s); + if (ret < 0) + return ret; + + dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf); + ffio_write_leb(pb, dyn_size); + avio_write(pb, dyn_buf, dyn_size); + av_free(dyn_buf); + + return update_size(pb, pos); +} + static int mov_write_amr_tag(AVIOContext *pb, MOVTrack *track) { avio_wb32(pb, 0x11); /* size */ @@ -1358,6 +1387,8 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex ret = mov_write_wave_tag(s, pb, track); else if (track->tag == MKTAG('m','p','4','a')) ret = mov_write_esds_tag(pb, track); + else if (track->tag == MKTAG('i','a','m','f')) + ret = mov_write_iacb_tag(mov->fc, pb, track); else if (track->par->codec_id == AV_CODEC_ID_AMR_NB) ret = mov_write_amr_tag(pb, track); else if (track->par->codec_id == AV_CODEC_ID_AC3) @@ -2529,7 +2560,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex if (track->mode == MODE_AVIF) { mov_write_ccst_tag(pb); - if (s->nb_streams > 0 && track == &mov->tracks[1]) + if (mov->nb_streams > 0 && track == &mov->tracks[1]) mov_write_aux_tag(pb, "auxi"); } @@ -3124,9 +3155,9 @@ static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte avio_wb32(pb, 0); /* Version & flags */ avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */ avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */ - avio_wb16(pb, s->nb_streams); /* item_count */ + avio_wb16(pb, mov->nb_streams); /* item_count */ - for (int i = 0; i < s->nb_streams; i++) { + for (int i = 0; i < mov->nb_streams; i++) { avio_wb16(pb, i + 1); /* item_id */ avio_wb16(pb, 0); /* data_reference_index */ avio_wb16(pb, 1); /* extent_count */ @@ -3145,9 +3176,9 @@ static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte avio_wb32(pb, 0); /* size */ ffio_wfourcc(pb, "iinf"); avio_wb32(pb, 0); /* Version & flags */ - avio_wb16(pb, s->nb_streams); /* entry_count */ + avio_wb16(pb, mov->nb_streams); /* entry_count */ - for (int i = 0; i < s->nb_streams; i++) { + for (int i = 0; i < mov->nb_streams; i++) { int64_t infe_pos = avio_tell(pb); avio_wb32(pb, 0); /* size */ ffio_wfourcc(pb, "infe"); @@ -3216,7 +3247,7 @@ static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte int64_t pos = avio_tell(pb); avio_wb32(pb, 0); /* size */ ffio_wfourcc(pb, "ipco"); - for (int i = 0; i < s->nb_streams; i++) { + for (int i = 0; i < mov->nb_streams; i++) { mov_write_ispe_tag(pb, mov, s, i); mov_write_pixi_tag(pb, mov, s, i); mov_write_av1c_tag(pb, &mov->tracks[i]); @@ -3234,9 +3265,9 @@ static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatConte avio_wb32(pb, 0); /* size */ ffio_wfourcc(pb, "ipma"); avio_wb32(pb, 0); /* Version & flags */ - avio_wb32(pb, s->nb_streams); /* entry_count */ + avio_wb32(pb, mov->nb_streams); /* entry_count */ - for (int i = 0, index = 1; i < s->nb_streams; i++) { + for (int i = 0, index = 1; i < mov->nb_streams; i++) { avio_wb16(pb, i + 1); /* item_ID */ avio_w8(pb, 4); /* association_count */ @@ -4213,7 +4244,7 @@ static int mov_write_covr(AVIOContext *pb, AVFormatContext *s) int64_t pos = 0; int i; - for (i = 0; i < s->nb_streams; i++) { + for (i = 0; i < mov->nb_streams; i++) { MOVTrack *trk = &mov->tracks[i]; if (!is_cover_image(trk->st) || trk->cover_image->size <= 0) @@ -4360,7 +4391,7 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov, mov_write_pitm_tag(pb, 1); mov_write_iloc_tag(pb, mov, s); mov_write_iinf_tag(pb, mov, s); - if (s->nb_streams > 1) + if (mov->nb_streams > 1) mov_write_iref_tag(pb, mov, s); mov_write_iprp_tag(pb, mov, s); } else { @@ -4611,16 +4642,17 @@ static int mov_setup_track_ids(MOVMuxContext *mov, AVFormatContext *s) if (mov->use_stream_ids_as_track_ids) { int next_generated_track_id = 0; - for (i = 0; i < s->nb_streams; i++) { - if (s->streams[i]->id > next_generated_track_id) - next_generated_track_id = s->streams[i]->id; + for (i = 0; i < mov->nb_streams; i++) { + AVStream *st = mov->tracks[i].st; + if (st->id > next_generated_track_id) + next_generated_track_id = st->id; } for (i = 0; i < mov->nb_tracks; i++) { if (mov->tracks[i].entry <= 0 && !(mov->flags & FF_MOV_FLAG_FRAGMENT)) continue; - mov->tracks[i].track_id = i >= s->nb_streams ? ++next_generated_track_id : s->streams[i]->id; + mov->tracks[i].track_id = i >= mov->nb_streams ? ++next_generated_track_id : mov->tracks[i].st->id; } } else { for (i = 0; i < mov->nb_tracks; i++) { @@ -4657,7 +4689,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov, } if (mov->chapter_track) - for (i = 0; i < s->nb_streams; i++) { + for (i = 0; i < mov->nb_streams; i++) { mov->tracks[i].tref_tag = MKTAG('c','h','a','p'); mov->tracks[i].tref_id = mov->tracks[mov->chapter_track].track_id; } @@ -4697,7 +4729,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov, for (i = 0; i < mov->nb_tracks; i++) { if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) { - int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL); + int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < mov->nb_streams ? mov->tracks[i].st : NULL); if (ret < 0) return ret; } @@ -5489,10 +5521,20 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s) MOVMuxContext *mov = s->priv_data; int64_t pos = avio_tell(pb); int has_h264 = 0, has_av1 = 0, has_video = 0, has_dolby = 0; + int has_iamf = 0; int i; - for (i = 0; i < s->nb_streams; i++) { - AVStream *st = s->streams[i]; + for (i = 0; i < s->nb_stream_groups; i++) { + const AVStreamGroup *stg = s->stream_groups[i]; + + if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT || + stg->type == AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION) { + has_iamf = 1; + break; + } + } + for (i = 0; i < mov->nb_streams; i++) { + AVStream *st = mov->tracks[i].st; if (is_cover_image(st)) continue; if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) @@ -5560,6 +5602,8 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s) ffio_wfourcc(pb, "av01"); if (has_dolby) ffio_wfourcc(pb, "dby1"); + if (has_iamf) + ffio_wfourcc(pb, "iamf"); } else { if (mov->flags & FF_MOV_FLAG_FRAGMENT) ffio_wfourcc(pb, "iso6"); @@ -5667,8 +5711,8 @@ static int mov_write_identification(AVIOContext *pb, AVFormatContext *s) mov_write_ftyp_tag(pb,s); if (mov->mode == MODE_PSP) { int video_streams_nb = 0, audio_streams_nb = 0, other_streams_nb = 0; - for (i = 0; i < s->nb_streams; i++) { - AVStream *st = s->streams[i]; + for (i = 0; i < mov->nb_streams; i++) { + AVStream *st = mov->tracks[i].st; if (is_cover_image(st)) continue; if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) @@ -5855,7 +5899,7 @@ static int mov_write_squashed_packets(AVFormatContext *s) { MOVMuxContext *mov = s->priv_data; - for (int i = 0; i < s->nb_streams; i++) { + for (int i = 0; i < mov->nb_streams; i++) { MOVTrack *track = &mov->tracks[i]; int ret = AVERROR_BUG; @@ -5896,7 +5940,7 @@ static int mov_flush_fragment(AVFormatContext *s, int force) // of fragments was triggered automatically by an AVPacket, we // already have reliable info for the end of that track, but other // tracks may need to be filled in. - for (i = 0; i < s->nb_streams; i++) { + for (i = 0; i < mov->nb_streams; i++) { MOVTrack *track = &mov->tracks[i]; if (!track->end_reliable) { const AVPacket *pkt = ff_interleaved_peek(s, i); @@ -6097,10 +6141,8 @@ static int mov_auto_flush_fragment(AVFormatContext *s, int force) return ret; } -static int check_pkt(AVFormatContext *s, AVPacket *pkt) +static int check_pkt(AVFormatContext *s, MOVTrack *trk, AVPacket *pkt) { - MOVMuxContext *mov = s->priv_data; - MOVTrack *trk = &mov->tracks[pkt->stream_index]; int64_t ref; uint64_t duration; @@ -6138,15 +6180,21 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) { MOVMuxContext *mov = s->priv_data; AVIOContext *pb = s->pb; - MOVTrack *trk = &mov->tracks[pkt->stream_index]; - AVCodecParameters *par = trk->par; + MOVTrack *trk; + AVCodecParameters *par; AVProducerReferenceTime *prft; unsigned int samples_in_chunk = 0; int size = pkt->size, ret = 0, offset = 0; size_t prft_size; uint8_t *reformatted_data = NULL; - ret = check_pkt(s, pkt); + if (pkt->stream_index < s->nb_streams) + trk = s->streams[pkt->stream_index]->priv_data; + else // Timecode or chapter + trk = &mov->tracks[pkt->stream_index]; + par = trk->par; + + ret = check_pkt(s, trk, pkt); if (ret < 0) return ret; @@ -6236,7 +6284,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) if (par->codec_id == AV_CODEC_ID_AAC && pkt->size > 2 && (AV_RB16(pkt->data) & 0xfff0) == 0xfff0) { - if (!s->streams[pkt->stream_index]->nb_frames) { + if (!trk->st->nb_frames) { av_log(s, AV_LOG_ERROR, "Malformed AAC bitstream detected: " "use the audio bitstream filter 'aac_adtstoasc' to fix it " "('-bsf:a aac_adtstoasc' option with ffmpeg)\n"); @@ -6498,18 +6546,18 @@ err: static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt) { MOVMuxContext *mov = s->priv_data; - MOVTrack *trk = &mov->tracks[pkt->stream_index]; + MOVTrack *trk = s->streams[pkt->stream_index]->priv_data; AVCodecParameters *par = trk->par; int64_t frag_duration = 0; int size = pkt->size; - int ret = check_pkt(s, pkt); + int ret = check_pkt(s, trk, pkt); if (ret < 0) return ret; if (mov->flags & FF_MOV_FLAG_FRAG_DISCONT) { int i; - for (i = 0; i < s->nb_streams; i++) + for (i = 0; i < mov->nb_streams; i++) mov->tracks[i].frag_discont = 1; mov->flags &= ~FF_MOV_FLAG_FRAG_DISCONT; } @@ -6551,7 +6599,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt) return 0; /* Discard 0 sized packets */ } - if (trk->entry && pkt->stream_index < s->nb_streams) + if (trk->entry && pkt->stream_index < mov->nb_streams) frag_duration = av_rescale_q(pkt->dts - trk->cluster[0].dts, s->streams[pkt->stream_index]->time_base, AV_TIME_BASE_Q); @@ -6606,17 +6654,80 @@ static int mov_write_subtitle_end_packet(AVFormatContext *s, return ret; } +static int mov_build_iamf_packet(AVFormatContext *s, MOVTrack *trk, AVPacket *pkt) +{ + int ret; + + if (pkt->stream_index == trk->first_iamf_idx) { + ret = ff_iamf_write_parameter_blocks(trk->iamf, trk->iamf_buf, pkt, s); + if (ret < 0) + return ret; + } + + ret = ff_iamf_write_audio_frame(trk->iamf, trk->iamf_buf, + s->streams[pkt->stream_index]->id, pkt); + if (ret < 0) + return ret; + + if (pkt->stream_index == trk->last_iamf_idx) { + uint8_t *data; + + ret = avio_close_dyn_buf(trk->iamf_buf, &data); + trk->iamf_buf = NULL; + + if (!ret) { + if (pkt->size) { + // Either all or none of the packets for a single + // IA Sample may be empty. + av_log(s, AV_LOG_ERROR, "Unexpected packet from " + "stream #%d\n", pkt->stream_index); + ret = AVERROR_INVALIDDATA; + } + av_free(data); + return ret; + } + av_buffer_unref(&pkt->buf); + pkt->buf = av_buffer_create(data, ret, NULL, NULL, 0); + if (!pkt->buf) { + av_free(data); + return AVERROR(ENOMEM); + } + pkt->data = data; + pkt->size = ret; + pkt->stream_index = trk->first_iamf_idx; + + ret = avio_open_dyn_buf(&trk->iamf_buf); + if (ret < 0) + return ret; + } else + ret = AVERROR(EAGAIN); + + return ret; +} + static int mov_write_packet(AVFormatContext *s, AVPacket *pkt) { MOVMuxContext *mov = s->priv_data; MOVTrack *trk; + int ret; if (!pkt) { mov_flush_fragment(s, 1); return 1; } - trk = &mov->tracks[pkt->stream_index]; + trk = s->streams[pkt->stream_index]->priv_data; + + if (trk->iamf) { + ret = mov_build_iamf_packet(s, trk, pkt); + if (ret < 0) { + if (ret == AVERROR(EAGAIN)) + return 0; + av_log(s, AV_LOG_ERROR, "Error assembling an IAMF packet " + "for stream #%d\n", trk->st->index); + return ret; + } + } if (is_cover_image(trk->st)) { int ret; @@ -6817,12 +6928,12 @@ static int mov_create_chapter_track(AVFormatContext *s, int tracknum) } -static int mov_check_timecode_track(AVFormatContext *s, AVTimecode *tc, int src_index, const char *tcstr) +static int mov_check_timecode_track(AVFormatContext *s, AVTimecode *tc, AVStream *src_st, const char *tcstr) { int ret; /* compute the frame number */ - ret = av_timecode_init_from_string(tc, s->streams[src_index]->avg_frame_rate, tcstr, s); + ret = av_timecode_init_from_string(tc, src_st->avg_frame_rate, tcstr, s); return ret; } @@ -6830,7 +6941,7 @@ static int mov_create_timecode_track(AVFormatContext *s, int index, int src_inde { MOVMuxContext *mov = s->priv_data; MOVTrack *track = &mov->tracks[index]; - AVStream *src_st = s->streams[src_index]; + AVStream *src_st = mov->tracks[src_index].st; uint8_t data[4]; AVPacket *pkt = mov->pkt; AVRational rate = src_st->avg_frame_rate; @@ -6890,8 +7001,8 @@ static void enable_tracks(AVFormatContext *s) first[i] = -1; } - for (i = 0; i < s->nb_streams; i++) { - AVStream *st = s->streams[i]; + for (i = 0; i < mov->nb_streams; i++) { + AVStream *st = mov->tracks[i].st; if (st->codecpar->codec_type <= AVMEDIA_TYPE_UNKNOWN || st->codecpar->codec_type >= AVMEDIA_TYPE_NB || @@ -6925,6 +7036,9 @@ static void mov_free(AVFormatContext *s) MOVMuxContext *mov = s->priv_data; int i; + for (i = 0; i < s->nb_streams; i++) + s->streams[i]->priv_data = NULL; + if (!mov->tracks) return; @@ -6954,6 +7068,11 @@ static void mov_free(AVFormatContext *s) ff_mov_cenc_free(&track->cenc); ffio_free_dyn_buf(&track->mdat_buf); + ffio_free_dyn_buf(&track->iamf_buf); + if (track->iamf) + ff_iamf_write_deinit(track->iamf); + av_freep(&track->iamf); + avpriv_packet_list_free(&track->squashed_packet_queue); } @@ -7027,6 +7146,66 @@ static int mov_create_dvd_sub_decoder_specific_info(MOVTrack *track, return 0; } +static int mov_init_iamf_track(AVFormatContext *s) +{ + MOVMuxContext *mov = s->priv_data; + MOVTrack *track = &mov->tracks[0]; // IAMF if present is always the first track + int nb_audio_elements = 0, nb_mix_presentations = 0; + int ret; + + for (int i = 0; i < s->nb_stream_groups; i++) { + const AVStreamGroup *stg = s->stream_groups[i]; + + if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT) + nb_audio_elements++; + if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION) + nb_mix_presentations++; + } + + if (!nb_audio_elements && !nb_mix_presentations) + return 0; + + if ((nb_audio_elements < 1 && nb_audio_elements > 2) || nb_mix_presentations < 1) { + av_log(s, AV_LOG_ERROR, "There must be >= 1 and <= 2 IAMF_AUDIO_ELEMENT and at least " + "one IAMF_MIX_PRESENTATION stream groups to write a IMAF track\n"); + return AVERROR(EINVAL); + } + + track->iamf = av_mallocz(sizeof(*track->iamf)); + if (!track->iamf) + return AVERROR(ENOMEM); + + for (int i = 0; i < s->nb_stream_groups; i++) { + const AVStreamGroup *stg = s->stream_groups[i]; + switch(stg->type) { + case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT: + for (int j = 0; j < stg->nb_streams; j++) { + track->first_iamf_idx = FFMIN(stg->streams[j]->index, track->first_iamf_idx); + track->last_iamf_idx = FFMAX(stg->streams[j]->index, track->last_iamf_idx); + stg->streams[j]->priv_data = track; + } + + ret = ff_iamf_add_audio_element(track->iamf, stg, s); + break; + case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION: + ret = ff_iamf_add_mix_presentation(track->iamf, stg, s); + break; + default: + av_assert0(0); + } + if (ret < 0) + return ret; + } + + track->tag = MKTAG('i','a','m','f'); + + ret = avio_open_dyn_buf(&track->iamf_buf); + if (ret < 0) + return ret; + + return 0; +} + static int mov_init(AVFormatContext *s) { MOVMuxContext *mov = s->priv_data; @@ -7164,7 +7343,37 @@ static int mov_init(AVFormatContext *s) s->streams[0]->disposition |= AV_DISPOSITION_DEFAULT; } - mov->nb_tracks = s->nb_streams; + for (i = 0; i < s->nb_stream_groups; i++) { + AVStreamGroup *stg = s->stream_groups[i]; + + if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT) + continue; + + for (int j = 0; j < stg->nb_streams; j++) { + AVStream *st = stg->streams[j]; + + if (st->priv_data) { + av_log(s, AV_LOG_ERROR, "Stream %d is present in more than one Stream Group of type " + "IAMF Audio Element\n", j); + return AVERROR(EINVAL); + } + st->priv_data = st; + } + + if (!mov->nb_tracks) // We support one track for the entire IAMF structure + mov->nb_tracks++; + } + + for (i = 0; i < s->nb_streams; i++) { + AVStream *st = s->streams[i]; + if (st->priv_data) + continue; + st->priv_data = st; + mov->nb_tracks++; + } + + mov->nb_streams = mov->nb_tracks; + if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters) mov->chapter_track = mov->nb_tracks++; @@ -7190,7 +7399,7 @@ static int mov_init(AVFormatContext *s) if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && (t || (t=av_dict_get(st->metadata, "timecode", NULL, 0)))) { AVTimecode tc; - ret = mov_check_timecode_track(s, &tc, i, t->value); + ret = mov_check_timecode_track(s, &tc, st, t->value); if (ret >= 0) mov->nb_meta_tmcd++; } @@ -7239,18 +7448,33 @@ static int mov_init(AVFormatContext *s) } } + ret = mov_init_iamf_track(s); + if (ret < 0) + return ret; + + for (int j = 0, i = 0; j < s->nb_streams; j++) { + AVStream *st = s->streams[j]; + + if (st != st->priv_data) + continue; + st->priv_data = &mov->tracks[i++]; + } + for (i = 0; i < s->nb_streams; i++) { AVStream *st= s->streams[i]; - MOVTrack *track= &mov->tracks[i]; + MOVTrack *track = st->priv_data; AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0); - track->st = st; - track->par = st->codecpar; + if (!track->st) { + track->st = st; + track->par = st->codecpar; + } track->language = ff_mov_iso639_to_lang(lang?lang->value:"und", mov->mode!=MODE_MOV); if (track->language < 0) track->language = 32767; // Unspecified Macintosh language code track->mode = mov->mode; - track->tag = mov_find_codec_tag(s, track); + if (!track->tag) + track->tag = mov_find_codec_tag(s, track); if (!track->tag) { av_log(s, AV_LOG_ERROR, "Could not find tag for codec %s in stream #%d, " "codec not currently supported in container\n", @@ -7442,25 +7666,26 @@ static int mov_write_header(AVFormatContext *s) { AVIOContext *pb = s->pb; MOVMuxContext *mov = s->priv_data; - int i, ret, hint_track = 0, tmcd_track = 0, nb_tracks = s->nb_streams; + int i, ret, hint_track = 0, tmcd_track = 0, nb_tracks = mov->nb_streams; if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters) nb_tracks++; if (mov->flags & FF_MOV_FLAG_RTP_HINT) { hint_track = nb_tracks; - for (i = 0; i < s->nb_streams; i++) - if (rtp_hinting_needed(s->streams[i])) + for (i = 0; i < mov->nb_streams; i++) { + if (rtp_hinting_needed(mov->tracks[i].st)) nb_tracks++; + } } if (mov->nb_meta_tmcd) tmcd_track = nb_tracks; - for (i = 0; i < s->nb_streams; i++) { + for (i = 0; i < mov->nb_streams; i++) { int j; - AVStream *st= s->streams[i]; - MOVTrack *track= &mov->tracks[i]; + MOVTrack *track = &mov->tracks[i]; + AVStream *st = track->st; /* copy extradata if it exists */ if (st->codecpar->extradata_size) { @@ -7482,8 +7707,8 @@ static int mov_write_header(AVFormatContext *s) &(AVChannelLayout)AV_CHANNEL_LAYOUT_MONO)) continue; - for (j = 0; j < s->nb_streams; j++) { - AVStream *stj= s->streams[j]; + for (j = 0; j < mov->nb_streams; j++) { + AVStream *stj= mov->tracks[j].st; MOVTrack *trackj= &mov->tracks[j]; if (j == i) continue; @@ -7546,8 +7771,8 @@ static int mov_write_header(AVFormatContext *s) return ret; if (mov->flags & FF_MOV_FLAG_RTP_HINT) { - for (i = 0; i < s->nb_streams; i++) { - if (rtp_hinting_needed(s->streams[i])) { + for (i = 0; i < mov->nb_streams; i++) { + if (rtp_hinting_needed(mov->tracks[i].st)) { if ((ret = ff_mov_init_hinting(s, hint_track, i)) < 0) return ret; hint_track++; @@ -7559,8 +7784,8 @@ static int mov_write_header(AVFormatContext *s) const AVDictionaryEntry *t, *global_tcr = av_dict_get(s->metadata, "timecode", NULL, 0); /* Initialize the tmcd tracks */ - for (i = 0; i < s->nb_streams; i++) { - AVStream *st = s->streams[i]; + for (i = 0; i < mov->nb_streams; i++) { + AVStream *st = mov->tracks[i].st; t = global_tcr; if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { @@ -7569,7 +7794,7 @@ static int mov_write_header(AVFormatContext *s) t = av_dict_get(st->metadata, "timecode", NULL, 0); if (!t) continue; - if (mov_check_timecode_track(s, &tc, i, t->value) < 0) + if (mov_check_timecode_track(s, &tc, st, t->value) < 0) continue; if ((ret = mov_create_timecode_track(s, tmcd_track, i, tc)) < 0) return ret; @@ -7690,7 +7915,7 @@ static int mov_write_trailer(AVFormatContext *s) int64_t moov_pos; if (mov->need_rewrite_extradata) { - for (i = 0; i < s->nb_streams; i++) { + for (i = 0; i < mov->nb_streams; i++) { MOVTrack *track = &mov->tracks[i]; AVCodecParameters *par = track->par; @@ -7830,7 +8055,7 @@ static int avif_write_trailer(AVFormatContext *s) if (mov->moov_written) return 0; mov->is_animated_avif = s->streams[0]->nb_frames > 1; - if (mov->is_animated_avif && s->nb_streams > 1) { + if (mov->is_animated_avif && mov->nb_streams > 1) { // For animated avif with alpha channel, we need to write a tref tag // with type "auxl". mov->tracks[1].tref_tag = MKTAG('a', 'u', 'x', 'l'); @@ -7840,7 +8065,7 @@ static int avif_write_trailer(AVFormatContext *s) mov_write_meta_tag(pb, mov, s); moov_size = get_moov_size(s); - for (i = 0; i < s->nb_streams; i++) + for (i = 0; i < mov->nb_tracks; i++) mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8; if (mov->is_animated_avif) { @@ -7862,7 +8087,7 @@ static int avif_write_trailer(AVFormatContext *s) // write extent offsets. pos_backup = avio_tell(pb); - for (i = 0; i < s->nb_streams; i++) { + for (i = 0; i < mov->nb_streams; i++) { if (extent_offsets[i] != (uint32_t)extent_offsets[i]) { av_log(s, AV_LOG_ERROR, "extent offset does not fit in 32 bits\n"); return AVERROR_INVALIDDATA; diff --git a/libavformat/movenc.h b/libavformat/movenc.h index 60363198c9..08d580594d 100644 --- a/libavformat/movenc.h +++ b/libavformat/movenc.h @@ -170,6 +170,11 @@ typedef struct MOVTrack { unsigned int squash_fragment_samples_to_one; //< flag to note formats where all samples for a fragment are to be squashed PacketList squashed_packet_queue; + + struct IAMFContext *iamf; + int first_iamf_idx; + int last_iamf_idx; + AVIOContext *iamf_buf; } MOVTrack; typedef enum { @@ -188,6 +193,7 @@ typedef struct MOVMuxContext { const AVClass *av_class; int mode; int64_t time; + int nb_streams; int nb_tracks; int nb_meta_tmcd; ///< number of new created tmcd track based on metadata (aka not data copy) int chapter_track; ///< qt chapter track number -- 2.43.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".