[FFmpeg-devel] [PATCH] [RFC]avformat: introduce AVStreamGroup

From: James Almer <jamrial@gmail.com>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH] [RFC]avformat: introduce AVStreamGroup
Date: Wed,  6 Sep 2023 11:38:32 -0300
Message-ID: <20230906143832.54604-1-jamrial@gmail.com> (raw)

Signed-off-by: James Almer <jamrial@gmail.com>
---
This is an initial proof of concept for AVStream groups, something that's
needed for quite a few existing and upcoming formats that lavf has no way to
currently export. Said formats define a single video or audio stream composed
by merging several individualy multiplexed streams within a media file.
This is the case of HEIF, a format defining a tiled image where each tile is a
separate image (either hevc, av1, etc) all of which need to be decoded
individualy and then stitched together for presentation using container level
information; IAMF, a new audio format where several individual streams (mono or
stereo) need to be decoded individually and then combined to form audio streams
with different channel layouts; and MPEG-TS programs, currently exported as
AVProgram, which this new general purpose API would replace.
There may be others too, like something ISOBMFF specific and not HEIF related,
I'm told.

A new struct, AVStreamGroup, would cover all these cases by grouping the
relevant streams and propagating format specific metadata for the purpose of
combining them into what's expected for presentation (Something a filter for
example would have to take care of).

Missing from this first version is something like a type field, which could be
an enum listing the different currently known formats the user would then use
to interpret the attached metadata, defined perhaps in codecpar.extradata

I'd like to hear opinions and suggestions to improve and properly handle this.

 libavformat/avformat.c |  26 ++++++++
 libavformat/avformat.h | 143 ++++++++++++++++++++++++++++++++++++++++-
 libavformat/dump.c     |  65 +++++++++++++++++--
 libavformat/internal.h |  28 ++++++++
 libavformat/options.c  |  77 ++++++++++++++++++++++
 5 files changed, 332 insertions(+), 7 deletions(-)

diff --git a/libavformat/avformat.c b/libavformat/avformat.c
index 356b4de931..109b73ca43 100644
--- a/libavformat/avformat.c
+++ b/libavformat/avformat.c
@@ -74,6 +74,20 @@ void ff_free_stream(AVStream **pst)
     av_freep(pst);
 }
 
+void ff_free_stream_group(AVStreamGroup **pstg)
+{
+    AVStreamGroup *stg = *pstg;
+
+    if (!stg)
+        return;
+
+    av_freep(&stg->stream_index);
+    avcodec_parameters_free(&stg->codecpar);
+    av_freep(&stg->priv_data);
+
+    av_freep(pstg);
+}
+
 void ff_remove_stream(AVFormatContext *s, AVStream *st)
 {
     av_assert0(s->nb_streams>0);
@@ -82,6 +96,14 @@ void ff_remove_stream(AVFormatContext *s, AVStream *st)
     ff_free_stream(&s->streams[ --s->nb_streams ]);
 }
 
+void ff_remove_stream_group(AVFormatContext *s, AVStreamGroup *stg)
+{
+    av_assert0(s->nb_stream_groups > 0);
+    av_assert0(s->stream_groups[ s->nb_stream_groups - 1 ] == stg);
+
+    ff_free_stream_group(&s->stream_groups[ --s->nb_stream_groups ]);
+}
+
 /* XXX: suppress the packet queue */
 void ff_flush_packet_queue(AVFormatContext *s)
 {
@@ -112,6 +134,9 @@ void avformat_free_context(AVFormatContext *s)
 
     for (unsigned i = 0; i < s->nb_streams; i++)
         ff_free_stream(&s->streams[i]);
+    for (unsigned i = 0; i < s->nb_stream_groups; i++)
+        ff_free_stream_group(&s->stream_groups[i]);
+    s->nb_stream_groups = 0;
     s->nb_streams = 0;
 
     for (unsigned i = 0; i < s->nb_programs; i++) {
@@ -132,6 +157,7 @@ void avformat_free_context(AVFormatContext *s)
     av_dict_free(&si->id3v2_meta);
     av_packet_free(&si->pkt);
     av_packet_free(&si->parse_pkt);
+    av_freep(&s->stream_groups);
     av_freep(&s->streams);
     ff_flush_packet_queue(s);
     av_freep(&s->url);
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 1916aa2dc5..d18eafb933 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1007,6 +1007,59 @@ typedef struct AVStream {
     int pts_wrap_bits;
 } AVStream;
 
+typedef struct AVStreamGroup {
+    /**
+     * A class for @ref avoptions. Set on stream creation.
+     */
+    const AVClass *av_class;
+
+    /**
+     * Group index in AVFormatContext.
+     */
+    int index;
+
+    /**
+     * Format-specific group ID.
+     * decoding: set by libavformat
+     * encoding: set by the user, replaced by libavformat if left unset
+     */
+    int id;
+
+    /**
+     * Codec parameters associated with this stream group. Allocated and freed
+     * by libavformat in avformat_new_stream_group() and avformat_free_context()
+     * respectively.
+     *
+     * - demuxing: filled by libavformat on stream group creation or in
+     *             avformat_find_stream_info()
+     * - muxing: filled by the caller before avformat_write_header()
+     */
+    AVCodecParameters *codecpar;
+
+    void *priv_data;
+
+    /**
+     * Number of elements in AVStreamGroup.stream_index.
+     *
+     * Set by av_stream_group_add_stream() and av_stream_group_new_stream(), must not
+     * be modified by any other code.
+     */
+    int nb_stream_indexes;
+
+    /**
+     * A list of indexes of streams in the group. New entries are created with
+     * av_stream_group_add_stream() and av_stream_group_new_stream().
+     *
+     * - demuxing: entries are created by libavformat in avformat_open_input().
+     *             If AVFMTCTX_NOHEADER is set in ctx_flags, then new entries may also
+     *             appear in av_read_frame().
+     * - muxing: entries are created by the user before avformat_write_header().
+     *
+     * Freed by libavformat in avformat_free_context().
+     */
+    int *stream_index;
+} AVStreamGroup;
+
 struct AVCodecParserContext *av_stream_get_parser(const AVStream *s);
 
 #if FF_API_GET_END_PTS
@@ -1162,7 +1215,7 @@ typedef struct AVFormatContext {
     unsigned int nb_streams;
     /**
      * A list of all streams in the file. New streams are created with
-     * avformat_new_stream().
+     * avformat_new_stream() or av_stream_group_new_stream().
      *
      * - demuxing: streams are created by libavformat in avformat_open_input().
      *             If AVFMTCTX_NOHEADER is set in ctx_flags, then new streams may also
@@ -1713,6 +1766,27 @@ typedef struct AVFormatContext {
      * @return 0 on success, a negative AVERROR code on failure
      */
     int (*io_close2)(struct AVFormatContext *s, AVIOContext *pb);
+
+    /**
+     * Number of elements in AVFormatContext.stream_groups.
+     *
+     * Set by avformat_new_stream_group(), must not be modified by any other code.
+     */
+    unsigned int nb_stream_groups;
+
+    /**
+     * A list of all stream groups in the file. New groups are created with
+     * avformat_new_stream_group(), and filled with av_stream_group_add_stream() or
+     * av_stream_group_new_stream().
+     *
+     * - demuxing: groups may be created by libavformat in avformat_open_input().
+     *             If AVFMTCTX_NOHEADER is set in ctx_flags, then new groups may also
+     *             appear in av_read_frame().
+     * - muxing: groups may be created by the user before avformat_write_header().
+     *
+     * Freed by libavformat in avformat_free_context().
+     */
+    AVStreamGroup **stream_groups;
 } AVFormatContext;
 
 /**
@@ -1825,6 +1899,28 @@ const AVClass *avformat_get_class(void);
  */
 const AVClass *av_stream_get_class(void);
 
+/**
+ * Add a new empty stream group to a media file.
+ *
+ * When demuxing, it may be called by the demuxer in read_header(). If the
+ * flag AVFMTCTX_NOHEADER is set in s.ctx_flags, then it may also
+ * be called in read_packet().
+ *
+ * When muxing, may be called by the user before avformat_write_header().
+ *
+ * User is required to call avformat_free_context() to clean up the allocation
+ * by avformat_new_stream_group().
+ *
+ * New streams can be added to the group with av_stream_group_new_stream() and
+ * av_stream_group_add_stream().
+ *
+ * @param s media file handle
+ *
+ * @return newly created group or NULL on error.
+ * @see avformat_new_stream, av_stream_group_new_stream, av_stream_group_add_stream.
+ */
+AVStreamGroup *avformat_new_stream_group(AVFormatContext *s);
+
 /**
  * Add a new stream to a media file.
  *
@@ -1844,6 +1940,51 @@ const AVClass *av_stream_get_class(void);
  */
 AVStream *avformat_new_stream(AVFormatContext *s, const AVCodec *c);
 
+/**
+ * Add a new stream to a stream group.
+ *
+ * When demuxing, it may be called by the demuxer in read_header(). If the
+ * flag AVFMTCTX_NOHEADER is set in s.ctx_flags, then it may also
+ * be called in read_packet().
+ *
+ * When muxing, may be called by the user before avformat_write_header() after
+ * having allocated a new group with avformat_new_stream_group().
+ *
+ * User is required to call avformat_free_context() to clean up the allocation
+ * by av_stream_group_new_stream().
+ *
+ * This is functionally the same as avformat_new_stream() while also adding the
+ * newly allocated stream to the group belonging to the media file.
+ *
+ * @param stg stream group belonging to a media file.
+ *
+ * @return newly created stream or NULL on error.
+ * @see av_stream_group_add_stream, avformat_new_stream_group.
+ */
+AVStream *av_stream_group_new_stream(AVStreamGroup *stg);
+
+/**
+ * Add an already allocated stream to a stream group.
+ *
+ * When demuxing, it may be called by the demuxer in read_header(). If the
+ * flag AVFMTCTX_NOHEADER is set in s.ctx_flags, then it may also
+ * be called in read_packet().
+ *
+ * When muxing, may be called by the user before avformat_write_header() after
+ * having allocated a new group with avformat_new_stream_group() and stream with
+ * avformat_new_stream().
+ *
+ * User is required to call avformat_free_context() to clean up the allocation
+ * by av_stream_group_add_stream().
+ *
+ * @param stg stream group belonging to a media file.
+ * @param st  stream in the media file to add to the group.
+ *
+ * @return 0 on success, or a negative AVERROR otherwise.
+ * @see avformat_new_stream, av_stream_group_new_stream, avformat_new_stream_group.
+ */
+int av_stream_group_add_stream(AVStreamGroup *stg, const AVStream *st);
+
 /**
  * Wrap an existing array as stream side data.
  *
diff --git a/libavformat/dump.c b/libavformat/dump.c
index d31e4c2ec6..b3b61e6b30 100644
--- a/libavformat/dump.c
+++ b/libavformat/dump.c
@@ -509,7 +509,7 @@ static void dump_sidedata(void *ctx, const AVStream *st, const char *indent)
 
 /* "user interface" functions */
 static void dump_stream_format(const AVFormatContext *ic, int i,
-                               int index, int is_output)
+                               int group_index, int index, int is_output)
 {
     char buf[256];
     int flags = (is_output ? ic->oformat->flags : ic->iformat->flags);
@@ -517,6 +517,8 @@ static void dump_stream_format(const AVFormatContext *ic, int i,
     const FFStream *const sti = cffstream(st);
     const AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL, 0);
     const char *separator = ic->dump_separator;
+    const char *group_indent = group_index >= 0 ? "  " : "";
+    const char *extra_indent = group_index >= 0 ? "      " : "    ";
     AVCodecContext *avctx;
     int ret;
 
@@ -543,7 +545,10 @@ static void dump_stream_format(const AVFormatContext *ic, int i,
     avcodec_string(buf, sizeof(buf), avctx, is_output);
     avcodec_free_context(&avctx);
 
-    av_log(NULL, AV_LOG_INFO, "  Stream #%d:%d", index, i);
+    av_log(NULL, AV_LOG_INFO, "%s  Stream #%d", group_indent, index);
+    if (group_index >= 0)
+        av_log(NULL, AV_LOG_INFO, ":%d", group_index);
+    av_log(NULL, AV_LOG_INFO, ":%d", i);
 
     /* the pid is an important information, so we display it */
     /* XXX: add a generic system */
@@ -621,9 +626,54 @@ static void dump_stream_format(const AVFormatContext *ic, int i,
         av_log(NULL, AV_LOG_INFO, " (non-diegetic)");
     av_log(NULL, AV_LOG_INFO, "\n");
 
-    dump_metadata(NULL, st->metadata, "    ");
+    dump_metadata(NULL, st->metadata, extra_indent);
 
-    dump_sidedata(NULL, st, "    ");
+    dump_sidedata(NULL, st, extra_indent);
+}
+
+static void dump_stream_group(const AVFormatContext *ic, uint8_t *printed,
+                              int i, int index, int is_output)
+{
+    const AVStreamGroup *stg = ic->stream_groups[i];
+    const char *separator = ic->dump_separator;
+    const AVCodecParameters *par = stg->codecpar;
+    char buf[512];
+    int ret;
+
+    av_log(NULL, AV_LOG_INFO, "  Stream group #%d:%d:", index, i);
+
+    switch (par->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+        av_log(NULL, AV_LOG_INFO, " Video: %s%s", avcodec_get_name(par->codec_id),
+               separator);
+        av_log(NULL, AV_LOG_INFO, "%dx%d", par->width, par->height);
+        if (par->sample_aspect_ratio.num) {
+            AVRational display_aspect_ratio;
+            av_reduce(&display_aspect_ratio.num, &display_aspect_ratio.den,
+                      par->width  * (int64_t)par->sample_aspect_ratio.num,
+                      par->height * (int64_t)par->sample_aspect_ratio.den,
+                      1024 * 1024);
+            av_log(NULL, AV_LOG_INFO, ", SAR %d:%d DAR %d:%d",
+                   par->sample_aspect_ratio.num, par->sample_aspect_ratio.den,
+                   display_aspect_ratio.num, display_aspect_ratio.den);
+        }
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        av_log(NULL, AV_LOG_INFO, " Audio: %s%s", avcodec_get_name(par->codec_id),
+               separator);
+        if (par->sample_rate)
+            av_log(NULL, AV_LOG_INFO, "%d Hz, ", par->sample_rate);
+        ret = av_channel_layout_describe(&par->ch_layout, buf, sizeof(buf));
+        if (ret >= 0)
+            av_log(NULL, AV_LOG_INFO, "%s", buf);
+        break;
+    }
+    av_log(NULL, AV_LOG_INFO, "\n");
+
+    for (int j = 0; j < stg->nb_stream_indexes; j++) {
+        dump_stream_format(ic, j, i, index, is_output);
+        printed[stg->stream_index[j]] = 1;
+    }
 }
 
 void av_dump_format(AVFormatContext *ic, int index,
@@ -699,7 +749,7 @@ void av_dump_format(AVFormatContext *ic, int index,
             dump_metadata(NULL, program->metadata, "    ");
             for (k = 0; k < program->nb_stream_indexes; k++) {
                 dump_stream_format(ic, program->stream_index[k],
-                                   index, is_output);
+                                    -1, index, is_output);
                 printed[program->stream_index[k]] = 1;
             }
             total += program->nb_stream_indexes;
@@ -708,9 +758,12 @@ void av_dump_format(AVFormatContext *ic, int index,
             av_log(NULL, AV_LOG_INFO, "  No Program\n");
     }
 
+    for (i = 0; i < ic->nb_stream_groups; i++)
+         dump_stream_group(ic, printed, i, index, is_output);
+
     for (i = 0; i < ic->nb_streams; i++)
         if (!printed[i])
-            dump_stream_format(ic, i, index, is_output);
+            dump_stream_format(ic, i, -1, index, is_output);
 
     av_free(printed);
 }
diff --git a/libavformat/internal.h b/libavformat/internal.h
index 594afd731d..116a41ce31 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -199,6 +199,7 @@ typedef struct FFStream {
      */
     AVStream pub;
 
+    AVFormatContext *fmtctx;
     /**
      * Set to 1 if the codec allows reordering, so pts can be different
      * from dts.
@@ -422,6 +423,21 @@ static av_always_inline const FFStream *cffstream(const AVStream *st)
     return (FFStream*)st;
 }
 
+typedef struct FFStreamGroup {
+    /**
+     * The public context.
+     */
+    AVStreamGroup pub;
+
+    AVFormatContext *fmtctx;
+} FFStreamGroup;
+
+
+static av_always_inline FFStreamGroup *ffstreamgroup(AVStreamGroup *stg)
+{
+    return (FFStreamGroup*)stg;
+}
+
 #ifdef __GNUC__
 #define dynarray_add(tab, nb_ptr, elem)\
 do {\
@@ -603,6 +619,18 @@ void ff_free_stream(AVStream **st);
  */
 void ff_remove_stream(AVFormatContext *s, AVStream *st);
 
+/**
+ * Frees a stream group without modifying the corresponding AVFormatContext.
+ * Must only be called if the latter doesn't matter or if the stream
+ * is not yet attached to an AVFormatContext.
+ */
+void ff_free_stream_group(AVStreamGroup **pstg);
+/**
+ * Remove a stream group from its AVFormatContext and free it.
+ * The group must be the last stream of the AVFormatContext.
+ */
+void ff_remove_stream_group(AVFormatContext *s, AVStreamGroup *stg);
+
 unsigned int ff_codec_get_tag(const AVCodecTag *tags, enum AVCodecID id);
 
 enum AVCodecID ff_codec_get_id(const AVCodecTag *tags, unsigned int tag);
diff --git a/libavformat/options.c b/libavformat/options.c
index e4a3aceed0..e0ac179c71 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -268,6 +268,7 @@ AVStream *avformat_new_stream(AVFormatContext *s, const AVCodec *c)
     if (!st->codecpar)
         goto fail;
 
+    sti->fmtctx = s;
     sti->avctx = avcodec_alloc_context3(NULL);
     if (!sti->avctx)
         goto fail;
@@ -320,6 +321,82 @@ fail:
     return NULL;
 }
 
+AVStreamGroup *avformat_new_stream_group(AVFormatContext *s)
+{
+    AVStreamGroup **stream_groups;
+    AVStreamGroup *stg;
+    FFStreamGroup *stgi;
+
+    stream_groups = av_realloc_array(s->stream_groups, s->nb_stream_groups + 1,
+                                     sizeof(*stream_groups));
+    if (!stream_groups)
+        return NULL;
+    s->stream_groups = stream_groups;
+
+    stgi = av_mallocz(sizeof(*stgi));
+    if (!stgi)
+        return NULL;
+    stg = &stgi->pub;
+
+    stg->codecpar = avcodec_parameters_alloc();
+    if (!stg->codecpar)
+        goto fail;
+
+    stgi->fmtctx = s;
+    stg->index   = s->nb_stream_groups;
+
+    s->stream_groups[s->nb_stream_groups++] = stg;
+
+    return stg;
+fail:
+    ff_free_stream_group(&stg);
+    return NULL;
+}
+
+static int stream_group_add_stream(AVStreamGroup *stg, int index)
+{
+    int *stream_index = av_realloc_array(stg->stream_index, stg->nb_stream_indexes + 1,
+                                         sizeof(*stg->stream_index));
+    if (!stream_index)
+        return AVERROR(ENOMEM);
+
+    stg->stream_index = stream_index;
+    stg->stream_index[stg->nb_stream_indexes++] = index;
+
+    return 0;
+}
+
+AVStream *av_stream_group_new_stream(AVStreamGroup *stg)
+{
+    FFStreamGroup *stgi = ffstreamgroup(stg);
+    AVStream *st = avformat_new_stream(stgi->fmtctx, NULL);
+
+    if (!st)
+        return NULL;
+
+    if (stream_group_add_stream(stg, st->index) < 0) {
+        ff_remove_stream(stgi->fmtctx, st);
+        return NULL;
+    }
+
+    return st;
+}
+
+int av_stream_group_add_stream(AVStreamGroup *stg, const AVStream *st)
+{
+    FFStreamGroup *stgi = ffstreamgroup(stg);
+    const FFStream *sti = cffstream(st);
+
+    if (stgi->fmtctx != sti->fmtctx)
+        return AVERROR(EINVAL);
+
+    for (int i = 0; i < stg->nb_stream_indexes; i++)
+        if (stg->stream_index[i] == st->index)
+            return AVERROR(EINVAL);
+
+    return stream_group_add_stream(stg, st->index);
+}
+
 static int option_is_disposition(const AVOption *opt)
 {
     return opt->type == AV_OPT_TYPE_CONST &&
-- 
2.42.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".