From: James Almer <jamrial@gmail.com> To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH 4/6 v3] avformat/mov: add support for Immersive Audio Model and Formats in ISOBMFF Date: Wed, 31 Jan 2024 14:26:52 -0300 Message-ID: <20240131172654.15869-4-jamrial@gmail.com> (raw) In-Reply-To: <20240131172654.15869-1-jamrial@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com> --- configure | 2 +- libavformat/Makefile | 3 +- libavformat/isom.h | 6 + libavformat/mov.c | 283 ++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 276 insertions(+), 18 deletions(-) diff --git a/configure b/configure index 68f675a4bc..42ba5ec502 100755 --- a/configure +++ b/configure @@ -3545,7 +3545,7 @@ matroska_demuxer_suggest="bzlib zlib" matroska_muxer_select="mpeg4audio riffenc aac_adtstoasc_bsf pgs_frame_merge_bsf vp9_superframe_bsf" mlp_demuxer_select="mlp_parser" mmf_muxer_select="riffenc" -mov_demuxer_select="iso_media riffdec" +mov_demuxer_select="iso_media riffdec iamf_frame_split_bsf" mov_demuxer_suggest="zlib" mov_muxer_select="iso_media riffenc rtpenc_chain vp9_superframe_bsf aac_adtstoasc_bsf ac3_parser" mp3_demuxer_select="mpegaudio_parser" diff --git a/libavformat/Makefile b/libavformat/Makefile index 05b9b8a115..4131279e69 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -364,7 +364,8 @@ OBJS-$(CONFIG_MMF_MUXER) += mmf.o rawenc.o OBJS-$(CONFIG_MODS_DEMUXER) += mods.o OBJS-$(CONFIG_MOFLEX_DEMUXER) += moflex.o OBJS-$(CONFIG_MOV_DEMUXER) += mov.o mov_chan.o mov_esds.o \ - qtpalette.o replaygain.o dovi_isom.o + qtpalette.o replaygain.o dovi_isom.o \ + iamf.o OBJS-$(CONFIG_MOV_MUXER) += movenc.o av1.o avc.o hevc.o vvc.o vpcc.o \ movenchint.o mov_chan.o rtp.o \ movenccenc.o movenc_ttml.o rawutils.o \ diff --git a/libavformat/isom.h b/libavformat/isom.h index a4925b3b08..9f202b3d73 100644 --- a/libavformat/isom.h +++ b/libavformat/isom.h @@ -33,6 +33,7 @@ #include "libavutil/stereo3d.h" #include "avio.h" +#include "iamf.h" #include "internal.h" #include "dv.h" @@ -167,6 +168,7 @@ typedef struct MOVStreamContext { AVIOContext *pb; int refcount; int pb_is_copied; + int id; ///< AVStream id int ffindex; ///< AVStream index int next_chunk; unsigned int chunk_count; @@ -261,6 +263,10 @@ typedef struct MOVStreamContext { AVEncryptionInfo *default_encrypted_sample; MOVEncryptionIndex *encryption_index; } cenc; + + IAMFContext *iamf; + uint8_t *iamf_descriptors; + int iamf_descriptors_size; } MOVStreamContext; typedef struct HEIFItem { diff --git a/libavformat/mov.c b/libavformat/mov.c index 555c72dc79..981a48e074 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -58,6 +58,7 @@ #include "internal.h" #include "avio_internal.h" #include "demux.h" +#include "iamf_parse.h" #include "dovi_isom.h" #include "riff.h" #include "isom.h" @@ -211,6 +212,7 @@ static int mov_read_covr(MOVContext *c, AVIOContext *pb, int type, int len) } st = c->fc->streams[c->fc->nb_streams - 1]; st->priv_data = sc; + sc->id = st->id; sc->refcount = 1; if (st->attached_pic.size >= 8 && id != AV_CODEC_ID_BMP) { @@ -835,6 +837,178 @@ static int mov_read_dac3(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; } +static int mov_read_iacb(MOVContext *c, AVIOContext *pb, MOVAtom atom) +{ + AVStream *st; + MOVStreamContext *sc; + FFIOContext b; + AVIOContext *descriptor_pb; + AVDictionary *metadata; + IAMFContext *iamf; + char args[32]; + int64_t start_time, duration; + unsigned size; + int nb_frames, disposition; + int version, ret; + + if (atom.size < 5) + return AVERROR_INVALIDDATA; + + if (c->fc->nb_streams < 1) + return 0; + + version = avio_r8(pb); + if (version != 1) { + av_log(c->fc, AV_LOG_ERROR, "%s configurationVersion %d", + version < 1 ? "invalid" : "unsupported", version); + return AVERROR_INVALIDDATA; + } + + size = ffio_read_leb(pb); + if (!size) + return AVERROR_INVALIDDATA; + + st = c->fc->streams[c->fc->nb_streams - 1]; + sc = st->priv_data; + + iamf = sc->iamf = av_mallocz(sizeof(*iamf)); + if (!iamf) + return AVERROR(ENOMEM); + + sc->iamf_descriptors = av_malloc(size); + if (!sc->iamf_descriptors) + return AVERROR(ENOMEM); + + sc->iamf_descriptors_size = size; + ret = avio_read(pb, sc->iamf_descriptors, size); + if (ret != size) + return ret < 0 ? ret : AVERROR_INVALIDDATA; + + ffio_init_read_context(&b, sc->iamf_descriptors, size); + descriptor_pb = &b.pub; + + ret = ff_iamfdec_read_descriptors(iamf, descriptor_pb, size, c->fc); + if (ret < 0) + return ret; + + metadata = st->metadata; + st->metadata = NULL; + start_time = st->start_time; + nb_frames = st->nb_frames; + duration = st->duration; + disposition = st->disposition; + + for (int i = 0; i < iamf->nb_audio_elements; i++) { + IAMFAudioElement *audio_element = iamf->audio_elements[i]; + AVStreamGroup *stg = + avformat_stream_group_create(c->fc, AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT, NULL); + + if (!stg) { + ret = AVERROR(ENOMEM); + goto fail; + } + + stg->id = audio_element->audio_element_id; + stg->params.iamf_audio_element = audio_element->element; + audio_element->element = NULL; + + for (int j = 0; j < audio_element->nb_substreams; j++) { + IAMFSubStream *substream = &audio_element->substreams[j]; + AVStream *stream; + + if (!i && !j) + stream = st; + else + stream = avformat_new_stream(c->fc, NULL); + if (!stream) { + ret = AVERROR(ENOMEM); + goto fail; + } + + stream->start_time = start_time; + stream->nb_frames = nb_frames; + stream->duration = duration; + stream->disposition = disposition; + if (stream != st) { + stream->priv_data = sc; + sc->refcount++; + } + + ret = avcodec_parameters_copy(stream->codecpar, substream->codecpar); + if (ret < 0) + goto fail; + + stream->id = substream->audio_substream_id; + + avpriv_set_pts_info(st, 64, 1, sc->time_scale); + + ret = avformat_stream_group_add_stream(stg, stream); + if (ret < 0) + goto fail; + } + + ret = av_dict_copy(&stg->metadata, metadata, 0); + if (ret < 0) + goto fail; + } + + for (int i = 0; i < iamf->nb_mix_presentations; i++) { + IAMFMixPresentation *mix_presentation = iamf->mix_presentations[i]; + const AVIAMFMixPresentation *mix = mix_presentation->mix; + AVStreamGroup *stg = + avformat_stream_group_create(c->fc, AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION, NULL); + + if (!stg) { + ret = AVERROR(ENOMEM); + goto fail; + } + + stg->id = mix_presentation->mix_presentation_id; + stg->params.iamf_mix_presentation = mix_presentation->mix; + mix_presentation->mix = NULL; + + for (int j = 0; j < mix->nb_submixes; j++) { + const AVIAMFSubmix *submix = mix->submixes[j]; + + for (int k = 0; k < submix->nb_elements; k++) { + const AVIAMFSubmixElement *submix_element = submix->elements[k]; + const AVStreamGroup *audio_element = NULL; + + for (int l = 0; l < c->fc->nb_stream_groups; l++) + if (c->fc->stream_groups[l]->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT && + c->fc->stream_groups[l]->id == submix_element->audio_element_id) { + audio_element = c->fc->stream_groups[l]; + break; + } + av_assert0(audio_element); + + for (int l = 0; l < audio_element->nb_streams; l++) { + ret = avformat_stream_group_add_stream(stg, audio_element->streams[l]); + if (ret < 0 && ret != AVERROR(EEXIST)) + goto fail; + } + } + } + + ret = av_dict_copy(&stg->metadata, metadata, 0); + if (ret < 0) + goto fail; + } + + snprintf(args, sizeof(args), "first_index=%d", st->index); + + ret = ff_stream_add_bitstream_filter(st, "iamf_frame_split", args); + if (ret == AVERROR_BSF_NOT_FOUND) { + av_log(c->fc, AV_LOG_ERROR, "iamf_frame_split bitstream filter " + "not found. This is a bug, please report it.\n"); + ret = AVERROR_BUG; + } +fail: + av_dict_free(&metadata); + + return ret; +} + static int mov_read_dec3(MOVContext *c, AVIOContext *pb, MOVAtom atom) { AVStream *st; @@ -1380,7 +1554,7 @@ static int64_t get_frag_time(AVFormatContext *s, AVStream *dst_st, // If the stream is referenced by any sidx, limit the search // to fragments that referenced this stream in the sidx if (sc->has_sidx) { - frag_stream_info = get_frag_stream_info(frag_index, index, dst_st->id); + frag_stream_info = get_frag_stream_info(frag_index, index, sc->id); if (frag_stream_info->sidx_pts != AV_NOPTS_VALUE) return frag_stream_info->sidx_pts; if (frag_stream_info->first_tfra_pts != AV_NOPTS_VALUE) @@ -1391,9 +1565,11 @@ static int64_t get_frag_time(AVFormatContext *s, AVStream *dst_st, for (i = 0; i < frag_index->item[index].nb_stream_info; i++) { AVStream *frag_stream = NULL; frag_stream_info = &frag_index->item[index].stream_info[i]; - for (j = 0; j < s->nb_streams; j++) - if (s->streams[j]->id == frag_stream_info->id) + for (j = 0; j < s->nb_streams; j++) { + MOVStreamContext *sc2 = s->streams[j]->priv_data; + if (sc2->id == frag_stream_info->id) frag_stream = s->streams[j]; + } if (!frag_stream) { av_log(s, AV_LOG_WARNING, "No stream matching sidx ID found.\n"); continue; @@ -1459,12 +1635,13 @@ static int update_frag_index(MOVContext *c, int64_t offset) for (i = 0; i < c->fc->nb_streams; i++) { // Avoid building frag index if streams lack track id. - if (c->fc->streams[i]->id < 0) { + MOVStreamContext *sc = c->fc->streams[i]->priv_data; + if (sc->id < 0) { av_free(frag_stream_info); return AVERROR_INVALIDDATA; } - frag_stream_info[i].id = c->fc->streams[i]->id; + frag_stream_info[i].id = sc->id; frag_stream_info[i].sidx_pts = AV_NOPTS_VALUE; frag_stream_info[i].tfdt_dts = AV_NOPTS_VALUE; frag_stream_info[i].next_trun_dts = AV_NOPTS_VALUE; @@ -3259,7 +3436,7 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom) "All samples in data stream index:id [%d:%d] have zero " "duration, stream set to be discarded by default. Override " "using AVStream->discard or -discard for ffmpeg command.\n", - st->index, st->id); + st->index, sc->id); st->discard = AVDISCARD_ALL; } sc->track_end = duration; @@ -4639,6 +4816,50 @@ static void fix_timescale(MOVContext *c, MOVStreamContext *sc) } } +static int mov_update_iamf_streams(MOVContext *c, const AVStream *st) +{ + const MOVStreamContext *sc = st->priv_data; + + for (int i = 0; i < sc->iamf->nb_audio_elements; i++) { + const AVStreamGroup *stg = NULL; + + for (int j = 0; j < c->fc->nb_stream_groups; j++) + if (c->fc->stream_groups[j]->id == sc->iamf->audio_elements[i]->audio_element_id) + stg = c->fc->stream_groups[j]; + av_assert0(stg); + + for (int j = 0; j < stg->nb_streams; j++) { + const FFStream *sti = cffstream(st); + AVStream *out = stg->streams[j]; + FFStream *out_sti = ffstream(stg->streams[j]); + + out->codecpar->bit_rate = 0; + + if (out == st) + continue; + + out->time_base = st->time_base; + out->start_time = st->start_time; + out->duration = st->duration; + out->nb_frames = st->nb_frames; + out->disposition = st->disposition; + out->discard = st->discard; + + av_assert0(!out_sti->index_entries); + out_sti->index_entries = av_malloc(sti->index_entries_allocated_size); + if (!out_sti->index_entries) + return AVERROR(ENOMEM); + + out_sti->index_entries_allocated_size = sti->index_entries_allocated_size; + out_sti->nb_index_entries = sti->nb_index_entries; + out_sti->skip_samples = sti->skip_samples; + memcpy(out_sti->index_entries, sti->index_entries, sti->index_entries_allocated_size); + } + } + + return 0; +} + static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom) { AVStream *st; @@ -4702,6 +4923,12 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom) mov_build_index(c, st); + if (sc->iamf) { + ret = mov_update_iamf_streams(c, st); + if (ret < 0) + return ret; + } + if (sc->dref_id-1 < sc->drefs_count && sc->drefs[sc->dref_id-1].path) { MOVDref *dref = &sc->drefs[sc->dref_id - 1]; if (c->enable_drefs) { @@ -4934,6 +5161,7 @@ static int heif_add_stream(MOVContext *c, HEIFItem *item) st->priv_data = sc; st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO; st->codecpar->codec_id = mov_codec_id(st, item->type); + sc->id = st->id; sc->ffindex = st->index; c->trak_index = st->index; st->avg_frame_rate.num = st->avg_frame_rate.den = 1; @@ -5032,6 +5260,7 @@ static int mov_read_tkhd(MOVContext *c, AVIOContext *pb, MOVAtom atom) avio_rb32(pb); /* modification time */ } st->id = (int)avio_rb32(pb); /* track id (NOT 0 !)*/ + sc->id = st->id; avio_rb32(pb); /* reserved */ /* highlevel (considering edits) duration in movie timebase */ @@ -5206,7 +5435,8 @@ static int mov_read_tfdt(MOVContext *c, AVIOContext *pb, MOVAtom atom) int64_t base_media_decode_time; for (i = 0; i < c->fc->nb_streams; i++) { - if (c->fc->streams[i]->id == frag->track_id) { + sc = c->fc->streams[i]->priv_data; + if (sc->id == frag->track_id) { st = c->fc->streams[i]; break; } @@ -5259,7 +5489,8 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom) } for (i = 0; i < c->fc->nb_streams; i++) { - if (c->fc->streams[i]->id == frag->track_id) { + sc = c->fc->streams[i]->priv_data; + if (sc->id == frag->track_id) { st = c->fc->streams[i]; sti = ffstream(st); break; @@ -5562,7 +5793,8 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom) track_id = avio_rb32(pb); // Reference ID for (i = 0; i < c->fc->nb_streams; i++) { - if (c->fc->streams[i]->id == track_id) { + sc = c->fc->streams[i]->priv_data; + if (sc->id == track_id) { st = c->fc->streams[i]; break; } @@ -6454,7 +6686,8 @@ static int get_current_encryption_info(MOVContext *c, MOVEncryptionIndex **encry frag_stream_info = get_current_frag_stream_info(&c->frag_index); if (frag_stream_info) { for (i = 0; i < c->fc->nb_streams; i++) { - if (c->fc->streams[i]->id == frag_stream_info->id) { + *sc = c->fc->streams[i]->priv_data; + if ((*sc)->id == frag_stream_info->id) { st = c->fc->streams[i]; break; } @@ -7398,7 +7631,7 @@ static int cenc_filter(MOVContext *mov, AVStream* st, MOVStreamContext *sc, AVPa AVEncryptionInfo *encrypted_sample; int encrypted_index, ret; - frag_stream_info = get_frag_stream_info_from_pkt(&mov->frag_index, pkt, st->id); + frag_stream_info = get_frag_stream_info_from_pkt(&mov->frag_index, pkt, sc->id); encrypted_index = current_index; encryption_index = NULL; if (frag_stream_info) { @@ -8175,6 +8408,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = { { MKTAG('i','s','p','e'), mov_read_ispe }, { MKTAG('i','p','r','p'), mov_read_iprp }, { MKTAG('i','i','n','f'), mov_read_iinf }, +{ MKTAG('i','a','c','b'), mov_read_iacb }, { 0, NULL } }; @@ -8406,11 +8640,13 @@ static void mov_read_chapters(AVFormatContext *s) AVStream *st = NULL; FFStream *sti = NULL; chapter_track = mov->chapter_tracks[j]; - for (i = 0; i < s->nb_streams; i++) - if (s->streams[i]->id == chapter_track) { + for (i = 0; i < s->nb_streams; i++) { + sc = mov->fc->streams[i]->priv_data; + if (sc->id == chapter_track) { st = s->streams[i]; break; } + } if (!st) { av_log(s, AV_LOG_ERROR, "Referenced QT chapter track not found\n"); continue; @@ -8642,6 +8878,11 @@ static void mov_free_stream_context(AVFormatContext *s, AVStream *st) av_freep(&sc->spherical); av_freep(&sc->mastering); av_freep(&sc->coll); + + ff_iamf_uninit_context(sc->iamf); + av_freep(&sc->iamf); + av_freep(&sc->iamf_descriptors); + sc->iamf_descriptors_size = 0; } static int mov_read_close(AVFormatContext *s) @@ -8896,9 +9137,11 @@ static int mov_read_header(AVFormatContext *s) AVDictionaryEntry *tcr; int tmcd_st_id = -1; - for (j = 0; j < s->nb_streams; j++) - if (s->streams[j]->id == sc->timecode_track) + for (j = 0; j < s->nb_streams; j++) { + MOVStreamContext *sc2 = s->streams[j]->priv_data; + if (sc2->id == sc->timecode_track) tmcd_st_id = j; + } if (tmcd_st_id < 0 || tmcd_st_id == i) continue; @@ -9211,7 +9454,15 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt) if (st->codecpar->codec_id == AV_CODEC_ID_EIA_608 && sample->size > 8) ret = get_eia608_packet(sc->pb, pkt, sample->size); - else + else if (sc->iamf_descriptors_size) { + ret = av_new_packet(pkt, sc->iamf_descriptors_size); + if (ret < 0) + return ret; + pkt->pos = avio_tell(sc->pb); + memcpy(pkt->data, sc->iamf_descriptors, sc->iamf_descriptors_size); + sc->iamf_descriptors_size = 0; + ret = av_append_packet(sc->pb, pkt, sample->size); + } else ret = av_get_packet(sc->pb, pkt, sample->size); if (ret < 0) { if (should_retry(sc->pb, ret)) { -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-01-31 17:27 UTC|newest] Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-01-31 17:26 [FFmpeg-devel] [PATCH 1/6 v3] avcodec: add an Immersive Audio Model and Formats frame split bsf James Almer 2024-01-31 17:26 ` [FFmpeg-devel] [PATCH 2/6 v3] avformat/demux: support inserting bitstream filters in demuxing scenarios James Almer 2024-01-31 17:26 ` [FFmpeg-devel] [PATCH 3/6 v3] avformat/mov: make MOVStreamContext refcounted James Almer 2024-01-31 17:26 ` James Almer [this message] 2024-01-31 17:26 ` [FFmpeg-devel] [PATCH 5/6 v2] avcodec: add an Immersive Audio Model and Formats frame merge bsf James Almer 2024-01-31 17:26 ` [FFmpeg-devel] [PATCH 6/6 v2] avformat/movenc: add support for Immersive Audio Model and Formats in ISOBMFF James Almer 2024-02-03 14:50 ` Andreas Rheinhardt 2024-02-05 15:08 ` James Almer 2024-02-05 15:12 ` Andreas Rheinhardt 2024-02-05 15:12 ` James Almer 2024-02-05 15:28 ` Andreas Rheinhardt 2024-02-05 15:28 ` James Almer 2024-02-05 16:18 ` Andreas Rheinhardt 2024-02-05 16:51 ` James Almer 2024-02-05 17:06 ` Andreas Rheinhardt 2024-02-05 17:25 ` James Almer
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240131172654.15869-4-jamrial@gmail.com \ --to=jamrial@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git