* [FFmpeg-devel] [PATCH 1/9] avutil/mem: add av_dynarray2_add_nofree
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
@ 2023-11-26 1:28 ` James Almer
2023-11-30 10:39 ` Anton Khirnov
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 2/9] avcodec/get_bits: add get_leb() James Almer
` (11 subsequent siblings)
12 siblings, 1 reply; 29+ messages in thread
From: James Almer @ 2023-11-26 1:28 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavutil/mem.c | 17 +++++++++++++++++
libavutil/mem.h | 32 +++++++++++++++++++++++++++++---
2 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/libavutil/mem.c b/libavutil/mem.c
index 36b8940a0c..bd37710968 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c
@@ -356,6 +356,23 @@ void *av_dynarray2_add(void **tab_ptr, int *nb_ptr, size_t elem_size,
return tab_elem_data;
}
+void *av_dynarray2_add_nofree(void **tab_ptr, int *nb_ptr, size_t elem_size,
+ const uint8_t *elem_data)
+{
+ uint8_t *tab_elem_data = NULL;
+
+ FF_DYNARRAY_ADD(INT_MAX, elem_size, *tab_ptr, *nb_ptr, {
+ tab_elem_data = (uint8_t *)*tab_ptr + (*nb_ptr) * elem_size;
+ if (elem_data)
+ memcpy(tab_elem_data, elem_data, elem_size);
+ else if (CONFIG_MEMORY_POISONING)
+ memset(tab_elem_data, FF_MEMORY_POISON, elem_size);
+ }, {
+ return NULL;
+ });
+ return tab_elem_data;
+}
+
static void fill16(uint8_t *dst, int len)
{
uint32_t v = AV_RN16(dst - 2);
diff --git a/libavutil/mem.h b/libavutil/mem.h
index ab7648ac57..c0161be243 100644
--- a/libavutil/mem.h
+++ b/libavutil/mem.h
@@ -519,7 +519,7 @@ void av_memcpy_backptr(uint8_t *dst, int back, int cnt);
* @param[in,out] tab_ptr Pointer to the array to grow
* @param[in,out] nb_ptr Pointer to the number of elements in the array
* @param[in] elem Element to add
- * @see av_dynarray_add_nofree(), av_dynarray2_add()
+ * @see av_dynarray_add_nofree(), av_dynarray2_add(), av_dynarray2_add_nofree()
*/
void av_dynarray_add(void *tab_ptr, int *nb_ptr, void *elem);
@@ -531,7 +531,7 @@ void av_dynarray_add(void *tab_ptr, int *nb_ptr, void *elem);
* instead and leave current buffer untouched.
*
* @return >=0 on success, negative otherwise
- * @see av_dynarray_add(), av_dynarray2_add()
+ * @see av_dynarray_add(), av_dynarray2_add(), av_dynarray2_add_nofree()
*/
av_warn_unused_result
int av_dynarray_add_nofree(void *tab_ptr, int *nb_ptr, void *elem);
@@ -557,11 +557,37 @@ int av_dynarray_add_nofree(void *tab_ptr, int *nb_ptr, void *elem);
*
* @return Pointer to the data of the element to copy in the newly allocated
* space
- * @see av_dynarray_add(), av_dynarray_add_nofree()
+ * @see av_dynarray2_add_nofree(), av_dynarray_add(), av_dynarray_add_nofree()
*/
void *av_dynarray2_add(void **tab_ptr, int *nb_ptr, size_t elem_size,
const uint8_t *elem_data);
+/**
+ * Add an element of size `elem_size` to a dynamic array.
+ *
+ * The array is reallocated when its number of elements reaches powers of 2.
+ * Therefore, the amortized cost of adding an element is constant.
+ *
+ * In case of success, the pointer to the array is updated in order to
+ * point to the new grown array, and the number pointed to by `nb_ptr`
+ * is incremented.
+ * In case of failure, the array and `nb_ptr` are left untouched, and NULL
+ * is returned.
+ *
+ * @param[in,out] tab_ptr Pointer to the array to grow
+ * @param[in,out] nb_ptr Pointer to the number of elements in the array
+ * @param[in] elem_size Size in bytes of an element in the array
+ * @param[in] elem_data Pointer to the data of the element to add. If
+ * `NULL`, the space of the newly added element is
+ * allocated but left uninitialized.
+ *
+ * @return Pointer to the data of the element to copy in the newly allocated
+ * space on success, NULL otherwise.
+ * @see av_dynarray2_add(), av_dynarray_add(), av_dynarray_add_nofree()
+ */
+void *av_dynarray2_add_nofree(void **tab_ptr, int *nb_ptr, size_t elem_size,
+ const uint8_t *elem_data);
+
/**
* @}
*/
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 2/9] avcodec/get_bits: add get_leb()
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 1/9] avutil/mem: add av_dynarray2_add_nofree James Almer
@ 2023-11-26 1:28 ` James Almer
2023-11-26 13:40 ` Leo Izen
2023-11-30 10:40 ` Anton Khirnov
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 3/9] avformat/aviobuf: add ffio_read_leb() and ffio_write_leb() James Almer
` (10 subsequent siblings)
12 siblings, 2 replies; 29+ messages in thread
From: James Almer @ 2023-11-26 1:28 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavcodec/bitstream.h | 2 ++
libavcodec/bitstream_template.h | 22 ++++++++++++++++++++++
libavcodec/get_bits.h | 23 +++++++++++++++++++++++
3 files changed, 47 insertions(+)
diff --git a/libavcodec/bitstream.h b/libavcodec/bitstream.h
index 35b7873b9c..17f8a5da83 100644
--- a/libavcodec/bitstream.h
+++ b/libavcodec/bitstream.h
@@ -103,6 +103,7 @@
# define bits_apply_sign bits_apply_sign_le
# define bits_read_vlc bits_read_vlc_le
# define bits_read_vlc_multi bits_read_vlc_multi_le
+# define bits_read_leb bits_read_leb_le
#elif defined(BITS_DEFAULT_BE)
@@ -132,6 +133,7 @@
# define bits_apply_sign bits_apply_sign_be
# define bits_read_vlc bits_read_vlc_be
# define bits_read_vlc_multi bits_read_vlc_multi_be
+# define bits_read_leb bits_read_leb_be
#endif
diff --git a/libavcodec/bitstream_template.h b/libavcodec/bitstream_template.h
index 4f3d07275f..86cbab288e 100644
--- a/libavcodec/bitstream_template.h
+++ b/libavcodec/bitstream_template.h
@@ -562,6 +562,28 @@ static inline int BS_FUNC(read_vlc_multi)(BSCTX *bc, uint8_t dst[8],
return ret;
}
+static inline unsigned BS_FUNC(read_leb)(BSCTX *bc) {
+ int more, i = 0;
+ unsigned leb = 0;
+
+ do {
+ unsigned bits;
+ int byte = BS_FUNC(read)(bc, 8);
+ more = byte & 0x80;
+ bits = byte & 0x7f;
+ if (i <= 3 || (i == 4 && bits < (1 << 4))) {
+ leb |= bits << (i * 7);
+ } else if (bits) { // leb > UINT_MAX
+ leb |= (bits & 0xF) << (i * 7);
+ break;
+ }
+ if (++i == 8 && more)
+ break; // invalid leb
+ } while (more);
+
+ return leb;
+}
+
#undef BSCTX
#undef BS_FUNC
#undef BS_JOIN3
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index cfcf97c021..cf9d5129b5 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -94,6 +94,7 @@ typedef BitstreamContext GetBitContext;
#define align_get_bits bits_align
#define get_vlc2 bits_read_vlc
#define get_vlc_multi bits_read_vlc_multi
+#define get_leb bits_read_leb
#define init_get_bits8_le(s, buffer, byte_size) bits_init8_le((BitstreamContextLE*)s, buffer, byte_size)
#define get_bits_le(s, n) bits_read_le((BitstreamContextLE*)s, n)
@@ -710,6 +711,28 @@ static inline int skip_1stop_8data_bits(GetBitContext *gb)
return 0;
}
+static inline unsigned get_leb(GetBitContext *gb) {
+ int more, i = 0;
+ unsigned leb = 0;
+
+ do {
+ unsigned bits;
+ int byte = get_bits(gb, 8);
+ more = byte & 0x80;
+ bits = byte & 0x7f;
+ if (i <= 3 || (i == 4 && bits < (1 << 4))) {
+ leb |= bits << (i * 7);
+ } else if (bits) { // leb > UINT_MAX
+ leb |= (bits & 0xF) << (i * 7);
+ break;
+ }
+ if (++i == 8 && more)
+ break; // invalid leb
+ } while (more);
+
+ return leb;
+}
+
#endif // CACHED_BITSTREAM_READER
#endif /* AVCODEC_GET_BITS_H */
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/9] avcodec/get_bits: add get_leb()
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 2/9] avcodec/get_bits: add get_leb() James Almer
@ 2023-11-26 13:40 ` Leo Izen
2023-11-26 13:43 ` James Almer
2023-11-30 10:40 ` Anton Khirnov
1 sibling, 1 reply; 29+ messages in thread
From: Leo Izen @ 2023-11-26 13:40 UTC (permalink / raw)
To: ffmpeg-devel
On 11/25/23 20:28, James Almer wrote:
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
> libavcodec/bitstream.h | 2 ++
> libavcodec/bitstream_template.h | 22 ++++++++++++++++++++++
> libavcodec/get_bits.h | 23 +++++++++++++++++++++++
> 3 files changed, 47 insertions(+)
>
Out of curiosity, why do this (and the avio version) have to be global?
What are they used for other than the iamf demuxer? Would it make sense
to have a static inline read_leb function defined in iamf.c?
- Leo Izen (Traneptora)
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/9] avcodec/get_bits: add get_leb()
2023-11-26 13:40 ` Leo Izen
@ 2023-11-26 13:43 ` James Almer
0 siblings, 0 replies; 29+ messages in thread
From: James Almer @ 2023-11-26 13:43 UTC (permalink / raw)
To: ffmpeg-devel
On 11/26/2023 10:40 AM, Leo Izen wrote:
> On 11/25/23 20:28, James Almer wrote:
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>> libavcodec/bitstream.h | 2 ++
>> libavcodec/bitstream_template.h | 22 ++++++++++++++++++++++
>> libavcodec/get_bits.h | 23 +++++++++++++++++++++++
>> 3 files changed, 47 insertions(+)
>>
>
> Out of curiosity, why do this (and the avio version) have to be global?
> What are they used for other than the iamf demuxer? Would it make sense
> to have a static inline read_leb function defined in iamf.c?
They can be used by the av1 modules too, and potentially other formats
in the future using the OBU encapsulation.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/9] avcodec/get_bits: add get_leb()
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 2/9] avcodec/get_bits: add get_leb() James Almer
2023-11-26 13:40 ` Leo Izen
@ 2023-11-30 10:40 ` Anton Khirnov
2023-11-30 12:49 ` Paul B Mahol
2023-11-30 13:08 ` James Almer
1 sibling, 2 replies; 29+ messages in thread
From: Anton Khirnov @ 2023-11-30 10:40 UTC (permalink / raw)
To: FFmpeg development discussions and patches
> add get_leb()
Do you expect people to understand what this means?
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/9] avcodec/get_bits: add get_leb()
2023-11-30 10:40 ` Anton Khirnov
@ 2023-11-30 12:49 ` Paul B Mahol
2023-11-30 13:08 ` James Almer
1 sibling, 0 replies; 29+ messages in thread
From: Paul B Mahol @ 2023-11-30 12:49 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Thu, Nov 30, 2023 at 11:40 AM Anton Khirnov <anton@khirnov.net> wrote:
> > add get_leb()
>
> Do you expect people to understand what this means?
>
get_leb() : get little-endian bits.
> --
> Anton Khirnov
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/9] avcodec/get_bits: add get_leb()
2023-11-30 10:40 ` Anton Khirnov
2023-11-30 12:49 ` Paul B Mahol
@ 2023-11-30 13:08 ` James Almer
1 sibling, 0 replies; 29+ messages in thread
From: James Almer @ 2023-11-30 13:08 UTC (permalink / raw)
To: ffmpeg-devel
On 11/30/2023 7:40 AM, Anton Khirnov wrote:
>> add get_leb()
>
> Do you expect people to understand what this means?
Will add "Read an unsigned integer coded as a variable number of
little-endian bytes".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 3/9] avformat/aviobuf: add ffio_read_leb() and ffio_write_leb()
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 1/9] avutil/mem: add av_dynarray2_add_nofree James Almer
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 2/9] avcodec/get_bits: add get_leb() James Almer
@ 2023-11-26 1:28 ` James Almer
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 4/9] avutil: introduce an Immersive Audio Model and Formats API James Almer
` (9 subsequent siblings)
12 siblings, 0 replies; 29+ messages in thread
From: James Almer @ 2023-11-26 1:28 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavformat/avio_internal.h | 4 ++++
libavformat/aviobuf.c | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 41 insertions(+)
diff --git a/libavformat/avio_internal.h b/libavformat/avio_internal.h
index bd58499b64..6b6cd6e8b3 100644
--- a/libavformat/avio_internal.h
+++ b/libavformat/avio_internal.h
@@ -146,6 +146,10 @@ int ffio_rewind_with_probe_data(AVIOContext *s, unsigned char **buf, int buf_siz
uint64_t ffio_read_varlen(AVIOContext *bc);
+unsigned int ffio_read_leb(AVIOContext *s);
+
+void ffio_write_leb(AVIOContext *s, unsigned val);
+
/**
* Read size bytes from AVIOContext into buf.
* Check that exactly size bytes have been read.
diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c
index 2899c75521..cdd1528155 100644
--- a/libavformat/aviobuf.c
+++ b/libavformat/aviobuf.c
@@ -971,6 +971,43 @@ uint64_t ffio_read_varlen(AVIOContext *bc){
return val;
}
+unsigned int ffio_read_leb(AVIOContext *s) {
+ int more, i = 0;
+ unsigned leb = 0;
+
+ do {
+ int byte = avio_r8(s);
+ unsigned bits = byte & 0x7f;
+ more = byte & 0x80;
+ if (i <= 3 || (i == 4 && bits < (1 << 4)))
+ leb |= bits << (i * 7);
+ else if (bits) { // leb > UINT_MAX
+ leb |= (bits & 0xF) << (i * 7);
+ break;
+ }
+ if (++i == 8 && more)
+ break; // invalid leb
+ } while (more);
+
+ return leb;
+}
+
+void ffio_write_leb(AVIOContext *s, unsigned val)
+{
+ int len;
+ uint8_t byte;
+
+ len = (av_log2(val) + 7) / 7;
+
+ for (int i = 0; i < len; i++) {
+ byte = val >> (7 * i) & 0x7f;
+ if (i < len - 1)
+ byte |= 0x80;
+
+ avio_w8(s, byte);
+ }
+}
+
int ffio_fdopen(AVIOContext **s, URLContext *h)
{
uint8_t *buffer = NULL;
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 4/9] avutil: introduce an Immersive Audio Model and Formats API
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
` (2 preceding siblings ...)
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 3/9] avformat/aviobuf: add ffio_read_leb() and ffio_write_leb() James Almer
@ 2023-11-26 1:28 ` James Almer
2023-11-30 11:01 ` Anton Khirnov
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 5/9] avformat: introduce AVStreamGroup James Almer
` (8 subsequent siblings)
12 siblings, 1 reply; 29+ messages in thread
From: James Almer @ 2023-11-26 1:28 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavutil/Makefile | 2 +
libavutil/iamf.c | 582 +++++++++++++++++++++++++++++++++++++++++++++
libavutil/iamf.h | 377 +++++++++++++++++++++++++++++
3 files changed, 961 insertions(+)
create mode 100644 libavutil/iamf.c
create mode 100644 libavutil/iamf.h
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 4711f8cde8..62cc1a1831 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -51,6 +51,7 @@ HEADERS = adler32.h \
hwcontext_videotoolbox.h \
hwcontext_vdpau.h \
hwcontext_vulkan.h \
+ iamf.h \
imgutils.h \
intfloat.h \
intreadwrite.h \
@@ -140,6 +141,7 @@ OBJS = adler32.o \
hdr_dynamic_vivid_metadata.o \
hmac.o \
hwcontext.o \
+ iamf.o \
imgutils.o \
integer.o \
intmath.o \
diff --git a/libavutil/iamf.c b/libavutil/iamf.c
new file mode 100644
index 0000000000..fffb9fab20
--- /dev/null
+++ b/libavutil/iamf.c
@@ -0,0 +1,582 @@
+/*
+ * Immersive Audio Model and Formats helper functions and defines
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "avassert.h"
+#include "error.h"
+#include "iamf.h"
+#include "log.h"
+#include "mem.h"
+#include "opt.h"
+
+#define IAMF_ADD_FUNC_TEMPLATE(parent_type, parent_name, child_type, child_name, suffix) \
+int av_iamf_ ## parent_name ## _add_ ## child_name(parent_type *parent_name, AVDictionary **options) \
+{ \
+ child_type **child_name ## suffix, *child_name; \
+ \
+ if (parent_name->num_## child_name ## suffix == UINT_MAX) \
+ return AVERROR(EINVAL); \
+ \
+ child_name ## suffix = av_realloc_array(parent_name->child_name ## suffix, \
+ parent_name->num_## child_name ## suffix + 1, \
+ sizeof(*parent_name->child_name ## suffix)); \
+ if (!child_name ## suffix) \
+ return AVERROR(ENOMEM); \
+ \
+ parent_name->child_name ## suffix = child_name ## suffix; \
+ \
+ child_name = parent_name->child_name ## suffix[parent_name->num_## child_name ## suffix] \
+ = av_mallocz(sizeof(*child_name)); \
+ if (!child_name) \
+ return AVERROR(ENOMEM); \
+ \
+ child_name->av_class = &child_name ## _class; \
+ av_opt_set_defaults(child_name); \
+ if (options) { \
+ int ret = av_opt_set_dict2(child_name, options, AV_OPT_SEARCH_CHILDREN); \
+ if (ret < 0) { \
+ av_freep(&parent_name->child_name ## suffix[parent_name->num_## child_name ## suffix]); \
+ return ret; \
+ } \
+ } \
+ parent_name->num_## child_name ## suffix++; \
+ \
+ return 0; \
+}
+
+#define FLAGS AV_OPT_FLAG_ENCODING_PARAM
+
+//
+// Param Definition
+//
+#define OFFSET(x) offsetof(AVIAMFMixGainParameterData, x)
+static const AVOption mix_gain_options[] = {
+ { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
+ { "animation_type", "set animation_type", OFFSET(animation_type), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 2, FLAGS },
+ { "start_point_value", "set start_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
+ { "end_point_value", "set end_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
+ { "control_point_value", "set control_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
+ { "control_point_relative_time", "set control_point_relative_time", OFFSET(animation_type), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, UINT8_MAX, FLAGS },
+ { NULL },
+};
+
+static const AVClass mix_gain_class = {
+ .class_name = "AVIAMFSubmixElement",
+ .item_name = av_default_item_name,
+ .version = LIBAVUTIL_VERSION_INT,
+ .option = mix_gain_options,
+};
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFDemixingInfoParameterData, x)
+static const AVOption demixing_info_options[] = {
+ { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
+ { "dmixp_mode", "set dmixp_mode", OFFSET(dmixp_mode), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 6, FLAGS },
+ { NULL },
+};
+
+static const AVClass demixing_info_class = {
+ .class_name = "AVIAMFDemixingInfoParameterData",
+ .item_name = av_default_item_name,
+ .version = LIBAVUTIL_VERSION_INT,
+ .option = demixing_info_options,
+};
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFReconGainParameterData, x)
+static const AVOption recon_gain_options[] = {
+ { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
+ { NULL },
+};
+
+static const AVClass recon_gain_class = {
+ .class_name = "AVIAMFReconGainParameterData",
+ .item_name = av_default_item_name,
+ .version = LIBAVUTIL_VERSION_INT,
+ .option = recon_gain_options,
+};
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFParamDefinition, x)
+static const AVOption param_definition_options[] = {
+ { "parameter_id", "set parameter_id", OFFSET(parameter_id), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+ { "parameter_rate", "set parameter_rate", OFFSET(parameter_rate), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+ { "param_definition_mode", "set param_definition_mode", OFFSET(param_definition_mode), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, 1, FLAGS },
+ { "duration", "set duration", OFFSET(duration), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+ { "constant_subblock_duration", "set constant_subblock_duration", OFFSET(constant_subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+ { NULL },
+};
+
+static const AVClass *param_definition_child_iterate(void **opaque)
+{
+ uintptr_t i = (uintptr_t)*opaque;
+ const AVClass *ret = NULL;
+
+ switch(i) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+ ret = &mix_gain_class;
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+ ret = &demixing_info_class;
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+ ret = &recon_gain_class;
+ break;
+ default:
+ break;
+ }
+
+ if (ret)
+ *opaque = (void*)(i + 1);
+ return ret;
+}
+
+static const AVClass param_definition_class = {
+ .class_name = "AVIAMFParamDefinition",
+ .item_name = av_default_item_name,
+ .version = LIBAVUTIL_VERSION_INT,
+ .option = param_definition_options,
+ .child_class_iterate = param_definition_child_iterate,
+};
+
+const AVClass *av_iamf_param_definition_get_class(void)
+{
+ return ¶m_definition_class;
+}
+
+AVIAMFParamDefinition *av_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType type, AVDictionary **options,
+ unsigned int num_subblocks, AVDictionary **subblock_options,
+ size_t *out_size)
+{
+
+ struct MixGainStruct {
+ AVIAMFParamDefinition p;
+ AVIAMFMixGainParameterData m;
+ };
+ struct DemixStruct {
+ AVIAMFParamDefinition p;
+ AVIAMFDemixingInfoParameterData d;
+ };
+ struct ReconGainStruct {
+ AVIAMFParamDefinition p;
+ AVIAMFReconGainParameterData r;
+ };
+ size_t subblocks_offset, subblock_size;
+ size_t size;
+ AVIAMFParamDefinition *par;
+
+ switch (type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+ subblocks_offset = offsetof(struct MixGainStruct, m);
+ subblock_size = sizeof(AVIAMFMixGainParameterData);
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+ subblocks_offset = offsetof(struct DemixStruct, d);
+ subblock_size = sizeof(AVIAMFDemixingInfoParameterData);
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+ subblocks_offset = offsetof(struct ReconGainStruct, r);
+ subblock_size = sizeof(AVIAMFReconGainParameterData);
+ break;
+ default:
+ return NULL;
+ }
+
+ size = subblocks_offset;
+ if (num_subblocks > (SIZE_MAX - size) / subblock_size)
+ return NULL;
+ size += subblock_size * num_subblocks;
+
+ par = av_mallocz(size);
+ if (!par)
+ return NULL;
+
+ par->av_class = ¶m_definition_class;
+ av_opt_set_defaults(par);
+ if (options) {
+ int ret = av_opt_set_dict(par, options);
+ if (ret < 0) {
+ av_free(par);
+ return NULL;
+ }
+ }
+ par->param_definition_type = type;
+ par->num_subblocks = num_subblocks;
+ par->subblock_size = subblock_size;
+ par->subblocks_offset = subblocks_offset;
+
+ for (int i = 0; i < num_subblocks; i++) {
+ void *subblock = av_iamf_param_definition_get_subblock(par, i);
+
+ switch (type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+ ((AVIAMFMixGainParameterData *)subblock)->av_class = &mix_gain_class;
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+ ((AVIAMFDemixingInfoParameterData *)subblock)->av_class = &demixing_info_class;
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+ ((AVIAMFReconGainParameterData *)subblock)->av_class = &recon_gain_class;
+ break;
+ default:
+ av_assert0(0);
+ }
+
+ av_opt_set_defaults(subblock);
+ if (subblock_options && subblock_options[i]) {
+ int ret = av_opt_set_dict(subblock, &subblock_options[i]);
+ if (ret < 0) {
+ av_free(par);
+ return NULL;
+ }
+ }
+ }
+
+ if (out_size)
+ *out_size = size;
+
+ return par;
+}
+
+//
+// Audio Element
+//
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFLayer, x)
+static const AVOption layer_options[] = {
+ { "ch_layout", "set ch_layout", OFFSET(ch_layout), AV_OPT_TYPE_CHLAYOUT, {.str = NULL }, 0, 0, FLAGS },
+ { "recon_gain_is_present", "set recon_gain_is_present", OFFSET(recon_gain_is_present), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS },
+ { "output_gain_flags", "set output_gain_flags", OFFSET(output_gain_flags), AV_OPT_TYPE_FLAGS,
+ {.i64 = 0 }, 0, (1 << 6) - 1, FLAGS, "output_gain_flags" },
+ {"FL", "Left channel", 0, AV_OPT_TYPE_CONST,
+ {.i64 = 1 << 5 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+ {"FR", "Right channel", 0, AV_OPT_TYPE_CONST,
+ {.i64 = 1 << 4 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+ {"BL", "Left surround channel", 0, AV_OPT_TYPE_CONST,
+ {.i64 = 1 << 3 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+ {"BR", "Right surround channel", 0, AV_OPT_TYPE_CONST,
+ {.i64 = 1 << 2 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+ {"TFL", "Left top front channel", 0, AV_OPT_TYPE_CONST,
+ {.i64 = 1 << 1 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+ {"TFR", "Right top front channel", 0, AV_OPT_TYPE_CONST,
+ {.i64 = 1 << 0 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+ { "output_gain", "set output_gain", OFFSET(output_gain), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+ { "ambisonics_mode", "set ambisonics_mode", OFFSET(ambisonics_mode), AV_OPT_TYPE_INT,
+ { .i64 = AV_IAMF_AMBISONICS_MODE_MONO },
+ AV_IAMF_AMBISONICS_MODE_MONO, AV_IAMF_AMBISONICS_MODE_PROJECTION, FLAGS, "ambisonics_mode" },
+ { "mono", NULL, 0, AV_OPT_TYPE_CONST,
+ { .i64 = AV_IAMF_AMBISONICS_MODE_MONO }, .unit = "ambisonics_mode" },
+ { "projection", NULL, 0, AV_OPT_TYPE_CONST,
+ { .i64 = AV_IAMF_AMBISONICS_MODE_PROJECTION }, .unit = "ambisonics_mode" },
+ { NULL },
+};
+
+static const AVClass layer_class = {
+ .class_name = "AVIAMFLayer",
+ .item_name = av_default_item_name,
+ .version = LIBAVUTIL_VERSION_INT,
+ .option = layer_options,
+};
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFAudioElement, x)
+static const AVOption audio_element_options[] = {
+ { "audio_element_type", "set audio_element_type", OFFSET(audio_element_type), AV_OPT_TYPE_INT,
+ {.i64 = AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL },
+ AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, FLAGS, "audio_element_type" },
+ { "channel", NULL, 0, AV_OPT_TYPE_CONST,
+ { .i64 = AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL }, .unit = "audio_element_type" },
+ { "scene", NULL, 0, AV_OPT_TYPE_CONST,
+ { .i64 = AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE }, .unit = "audio_element_type" },
+ { "default_w", "set default_w", OFFSET(default_w), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 10, FLAGS },
+ { NULL },
+};
+
+static const AVClass *audio_element_child_iterate(void **opaque)
+{
+ uintptr_t i = (uintptr_t)*opaque;
+ const AVClass *ret = NULL;
+
+ if (i)
+ ret = &layer_class;
+
+ if (ret)
+ *opaque = (void*)(i + 1);
+ return ret;
+}
+
+static const AVClass audio_element_class = {
+ .class_name = "AVIAMFAudioElement",
+ .item_name = av_default_item_name,
+ .version = LIBAVUTIL_VERSION_INT,
+ .option = audio_element_options,
+ .child_class_iterate = audio_element_child_iterate,
+};
+
+const AVClass *av_iamf_audio_element_get_class(void)
+{
+ return &audio_element_class;
+}
+
+AVIAMFAudioElement *av_iamf_audio_element_alloc(void)
+{
+ AVIAMFAudioElement *audio_element = av_mallocz(sizeof(*audio_element));
+
+ if (audio_element) {
+ audio_element->av_class = &audio_element_class;
+ av_opt_set_defaults(audio_element);
+ }
+
+ return audio_element;
+}
+
+IAMF_ADD_FUNC_TEMPLATE(AVIAMFAudioElement, audio_element, AVIAMFLayer, layer, s)
+
+void av_iamf_audio_element_free(AVIAMFAudioElement **paudio_element)
+{
+ AVIAMFAudioElement *audio_element = *paudio_element;
+
+ if (!audio_element)
+ return;
+
+ for (int i = 0; i < audio_element->num_layers; i++) {
+ AVIAMFLayer *layer = audio_element->layers[i];
+ av_opt_free(layer);
+ av_free(layer->demixing_matrix);
+ av_free(layer);
+ }
+ av_free(audio_element->layers);
+
+ av_free(audio_element->demixing_info);
+ av_free(audio_element->recon_gain_info);
+ av_freep(paudio_element);
+}
+
+//
+// Mix Presentation
+//
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFSubmixElement, x)
+static const AVOption submix_element_options[] = {
+ { "headphones_rendering_mode", "Headphones rendering mode", OFFSET(headphones_rendering_mode), AV_OPT_TYPE_INT,
+ { .i64 = AV_IAMF_HEADPHONES_MODE_STEREO },
+ AV_IAMF_HEADPHONES_MODE_STEREO, AV_IAMF_HEADPHONES_MODE_BINAURAL, FLAGS, "headphones_rendering_mode" },
+ { "stereo", NULL, 0, AV_OPT_TYPE_CONST,
+ { .i64 = AV_IAMF_HEADPHONES_MODE_STEREO }, .unit = "headphones_rendering_mode" },
+ { "binaural", NULL, 0, AV_OPT_TYPE_CONST,
+ { .i64 = AV_IAMF_HEADPHONES_MODE_BINAURAL }, .unit = "headphones_rendering_mode" },
+ { "default_mix_gain", "Default mix gain", OFFSET(default_mix_gain), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+ { "annotations", "Annotations", OFFSET(annotations), AV_OPT_TYPE_DICT, { .str = NULL }, 0, 0, FLAGS },
+ { NULL },
+};
+
+static void *submix_element_child_next(void *obj, void *prev)
+{
+ AVIAMFSubmixElement *submix_element = obj;
+ if (!prev)
+ return submix_element->element_mix_config;
+
+ return NULL;
+}
+
+static const AVClass *submix_element_child_iterate(void **opaque)
+{
+ uintptr_t i = (uintptr_t)*opaque;
+ const AVClass *ret = NULL;
+
+ if (i)
+ ret = ¶m_definition_class;
+
+ if (ret)
+ *opaque = (void*)(i + 1);
+ return ret;
+}
+
+static const AVClass element_class = {
+ .class_name = "AVIAMFSubmixElement",
+ .item_name = av_default_item_name,
+ .version = LIBAVUTIL_VERSION_INT,
+ .option = submix_element_options,
+ .child_next = submix_element_child_next,
+ .child_class_iterate = submix_element_child_iterate,
+};
+
+IAMF_ADD_FUNC_TEMPLATE(AVIAMFSubmix, submix, AVIAMFSubmixElement, element, s)
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFSubmixLayout, x)
+static const AVOption submix_layout_options[] = {
+ { "layout_type", "Layout type", OFFSET(layout_type), AV_OPT_TYPE_INT,
+ { .i64 = AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS },
+ AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS, AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL, FLAGS, "layout_type" },
+ { "loudspeakers", NULL, 0, AV_OPT_TYPE_CONST,
+ { .i64 = AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS }, .unit = "layout_type" },
+ { "binaural", NULL, 0, AV_OPT_TYPE_CONST,
+ { .i64 = AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL }, .unit = "layout_type" },
+ { "sound_system", "Sound System", OFFSET(sound_system), AV_OPT_TYPE_CHLAYOUT, { .str = NULL }, 0, 0, FLAGS },
+ { "integrated_loudness", "Integrated loudness", OFFSET(integrated_loudness), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+ { "digital_peak", "Digital peak", OFFSET(digital_peak), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+ { "true_peak", "True peak", OFFSET(true_peak), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+ { "dialog_anchored_loudness", "Anchored loudness (Dialog)", OFFSET(dialogue_anchored_loudness), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+ { "album_anchored_loudness", "Anchored loudness (Album)", OFFSET(album_anchored_loudness), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+ { NULL },
+};
+
+static const AVClass layout_class = {
+ .class_name = "AVIAMFSubmixLayout",
+ .item_name = av_default_item_name,
+ .version = LIBAVUTIL_VERSION_INT,
+ .option = submix_layout_options,
+};
+
+IAMF_ADD_FUNC_TEMPLATE(AVIAMFSubmix, submix, AVIAMFSubmixLayout, layout, s)
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFSubmix, x)
+static const AVOption submix_presentation_options[] = {
+ { "default_mix_gain", "Default mix gain", OFFSET(default_mix_gain), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+ { NULL },
+};
+
+static void *submix_presentation_child_next(void *obj, void *prev)
+{
+ AVIAMFSubmix *sub_mix = obj;
+ if (!prev)
+ return sub_mix->output_mix_config;
+
+ return NULL;
+}
+
+static const AVClass *submix_presentation_child_iterate(void **opaque)
+{
+ uintptr_t i = (uintptr_t)*opaque;
+ const AVClass *ret = NULL;
+
+ switch(i) {
+ case 0:
+ ret = &element_class;
+ break;
+ case 1:
+ ret = &layout_class;
+ break;
+ case 2:
+ ret = ¶m_definition_class;
+ break;
+ default:
+ break;
+ }
+
+ if (ret)
+ *opaque = (void*)(i + 1);
+ return ret;
+}
+
+static const AVClass submix_class = {
+ .class_name = "AVIAMFSubmix",
+ .item_name = av_default_item_name,
+ .version = LIBAVUTIL_VERSION_INT,
+ .option = submix_presentation_options,
+ .child_next = submix_presentation_child_next,
+ .child_class_iterate = submix_presentation_child_iterate,
+};
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFMixPresentation, x)
+static const AVOption mix_presentation_options[] = {
+ { "annotations", "set annotations", OFFSET(annotations), AV_OPT_TYPE_DICT, {.str = NULL }, 0, 0, FLAGS },
+ { NULL },
+};
+
+#undef OFFSET
+#undef FLAGS
+
+static const AVClass *mix_presentation_child_iterate(void **opaque)
+{
+ uintptr_t i = (uintptr_t)*opaque;
+ const AVClass *ret = NULL;
+
+ if (i)
+ ret = &submix_class;
+
+ if (ret)
+ *opaque = (void*)(i + 1);
+ return ret;
+}
+
+static const AVClass mix_presentation_class = {
+ .class_name = "AVIAMFMixPresentation",
+ .item_name = av_default_item_name,
+ .version = LIBAVUTIL_VERSION_INT,
+ .option = mix_presentation_options,
+ .child_class_iterate = mix_presentation_child_iterate,
+};
+
+const AVClass *av_iamf_mix_presentation_get_class(void)
+{
+ return &mix_presentation_class;
+}
+
+AVIAMFMixPresentation *av_iamf_mix_presentation_alloc(void)
+{
+ AVIAMFMixPresentation *mix_presentation = av_mallocz(sizeof(*mix_presentation));
+
+ if (mix_presentation) {
+ mix_presentation->av_class = &mix_presentation_class;
+ av_opt_set_defaults(mix_presentation);
+ }
+
+ return mix_presentation;
+}
+
+IAMF_ADD_FUNC_TEMPLATE(AVIAMFMixPresentation, mix_presentation, AVIAMFSubmix, submix, es)
+
+void av_iamf_mix_presentation_free(AVIAMFMixPresentation **pmix_presentation)
+{
+ AVIAMFMixPresentation *mix_presentation = *pmix_presentation;
+
+ if (!mix_presentation)
+ return;
+
+ for (int i = 0; i < mix_presentation->num_submixes; i++) {
+ AVIAMFSubmix *sub_mix = mix_presentation->submixes[i];
+ for (int j = 0; j < sub_mix->num_elements; j++) {
+ AVIAMFSubmixElement *submix_element = sub_mix->elements[j];
+ av_opt_free(submix_element);
+ av_free(submix_element->element_mix_config);
+ av_free(submix_element);
+ }
+ av_free(sub_mix->elements);
+ for (int j = 0; j < sub_mix->num_layouts; j++) {
+ AVIAMFSubmixLayout *submix_layout = sub_mix->layouts[j];
+ av_opt_free(submix_layout);
+ av_free(submix_layout);
+ }
+ av_free(sub_mix->layouts);
+ av_free(sub_mix->output_mix_config);
+ av_free(sub_mix);
+ }
+ av_opt_free(mix_presentation);
+ av_free(mix_presentation->submixes);
+
+ av_freep(pmix_presentation);
+}
diff --git a/libavutil/iamf.h b/libavutil/iamf.h
new file mode 100644
index 0000000000..1f4919efdb
--- /dev/null
+++ b/libavutil/iamf.h
@@ -0,0 +1,377 @@
+/*
+ * Immersive Audio Model and Formats helper functions and defines
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_IAMF_H
+#define AVUTIL_IAMF_H
+
+/**
+ * @file
+ * Immersive Audio Model and Formats API header
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "attributes.h"
+#include "avassert.h"
+#include "channel_layout.h"
+#include "dict.h"
+#include "rational.h"
+
+enum AVIAMFAudioElementType {
+ AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL,
+ AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE,
+};
+
+/**
+ * @defgroup lavf_iamf_params Parameter Definition
+ * @{
+ * Parameters as defined in section 3.6.1 and 3.8
+ * @}
+ * @defgroup lavf_iamf_audio Audio Element
+ * @{
+ * Audio Elements as defined in section 3.6
+ * @}
+ * @defgroup lavf_iamf_mix Mix Presentation
+ * @{
+ * Mix Presentations as defined in section 3.7
+ * @}
+ *
+ * @}
+ * @addtogroup lavf_iamf_params
+ * @{
+ */
+enum AVIAMFAnimationType {
+ AV_IAMF_ANIMATION_TYPE_STEP,
+ AV_IAMF_ANIMATION_TYPE_LINEAR,
+ AV_IAMF_ANIMATION_TYPE_BEZIER,
+};
+
+/**
+ * Mix Gain Parameter Data as defined in section 3.8.1
+ *
+ * Subblocks in AVIAMFParamDefinition use this struct when the value or
+ * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
+ * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN.
+ */
+typedef struct AVIAMFMixGainParameterData {
+ const AVClass *av_class;
+
+ // AVOption enabled fields
+ unsigned int subblock_duration;
+ enum AVIAMFAnimationType animation_type;
+ AVRational start_point_value;
+ AVRational end_point_value;
+ AVRational control_point_value;
+ unsigned int control_point_relative_time;
+} AVIAMFMixGainParameterData;
+
+/**
+ * Demixing Info Parameter Data as defined in section 3.8.2
+ *
+ * Subblocks in AVIAMFParamDefinition use this struct when the value or
+ * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
+ * AV_IAMF_PARAMETER_DEFINITION_DEMIXING.
+ */
+typedef struct AVIAMFDemixingInfoParameterData {
+ const AVClass *av_class;
+
+ // AVOption enabled fields
+ unsigned int subblock_duration;
+ unsigned int dmixp_mode;
+} AVIAMFDemixingInfoParameterData;
+
+/**
+ * Recon Gain Info Parameter Data as defined in section 3.8.3
+ *
+ * Subblocks in AVIAMFParamDefinition use this struct when the value or
+ * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
+ * AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN.
+ */
+typedef struct AVIAMFReconGainParameterData {
+ const AVClass *av_class;
+
+ // AVOption enabled fields
+ unsigned int subblock_duration;
+ // End of AVOption enabled fields
+ uint8_t recon_gain[6][12];
+} AVIAMFReconGainParameterData;
+
+enum AVIAMFParamDefinitionType {
+ AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN,
+ AV_IAMF_PARAMETER_DEFINITION_DEMIXING,
+ AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN,
+};
+
+/**
+ * Parameters as defined in section 3.6.1
+ */
+typedef struct AVIAMFParamDefinition {
+ const AVClass *av_class;
+
+ size_t subblocks_offset;
+ size_t subblock_size;
+
+ enum AVIAMFParamDefinitionType param_definition_type;
+ unsigned int num_subblocks;
+
+ // AVOption enabled fields
+ unsigned int parameter_id;
+ unsigned int parameter_rate;
+ unsigned int param_definition_mode;
+ unsigned int duration;
+ unsigned int constant_subblock_duration;
+} AVIAMFParamDefinition;
+
+const AVClass *av_iamf_param_definition_get_class(void);
+
+AVIAMFParamDefinition *av_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType param_definition_type,
+ AVDictionary **options,
+ unsigned int num_subblocks, AVDictionary **subblock_options,
+ size_t *size);
+
+/**
+ * Get the subblock at the specified {@code idx}. Must be between 0 and num_subblocks - 1.
+ *
+ * The @ref AVIAMFParamDefinition.param_definition_type "param definition type" defines
+ * the struct type of the returned pointer.
+ */
+static av_always_inline void*
+av_iamf_param_definition_get_subblock(AVIAMFParamDefinition *par, unsigned int idx)
+{
+ av_assert0(idx < par->num_subblocks);
+ return (void *)((uint8_t *)par + par->subblocks_offset + idx * par->subblock_size);
+}
+
+/**
+ * @}
+ * @addtogroup lavf_iamf_audio
+ * @{
+ */
+
+enum AVIAMFAmbisonicsMode {
+ AV_IAMF_AMBISONICS_MODE_MONO,
+ AV_IAMF_AMBISONICS_MODE_PROJECTION,
+};
+
+/**
+ * A layer defining a Channel Layout in the Audio Element.
+ *
+ * When audio_element_type is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this
+ * corresponds to an Scalable Channel Layout layer as defined in section 3.6.2.
+ * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel
+ * layout as defined in section 3.6.3
+ */
+typedef struct AVIAMFLayer {
+ const AVClass *av_class;
+
+ // AVOption enabled fields
+ AVChannelLayout ch_layout;
+
+ unsigned int recon_gain_is_present;
+ /**
+ * Output gain flags as defined in section 3.6.2
+ *
+ * This field is defined only if audio_element_type is
+ * AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, must be 0 otherwise.
+ */
+ unsigned int output_gain_flags;
+ /**
+ * Output gain as defined in section 3.6.2
+ *
+ * Must be 0 if @ref output_gain_flags is 0.
+ */
+ AVRational output_gain;
+ /**
+ * Ambisonics mode as defined in section 3.6.3
+ *
+ * This field is defined only if audio_element_type is
+ * AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, must be 0 otherwise.
+ *
+ * If 0, channel_mapping is defined implicitly (Ambisonic Order)
+ * or explicitly (Custom Order with ambi channels) in @ref ch_layout.
+ * If 1, @ref demixing_matrix must be set.
+ */
+ enum AVIAMFAmbisonicsMode ambisonics_mode;
+
+ // End of AVOption enabled fields
+ /**
+ * Demixing matrix as defined in section 3.6.3
+ *
+ * Set only if @ref ambisonics_mode == 1, must be NULL otherwise.
+ */
+ AVRational *demixing_matrix;
+} AVIAMFLayer;
+
+typedef struct AVIAMFAudioElement {
+ const AVClass *av_class;
+
+ AVIAMFLayer **layers;
+ /**
+ * Number of layers, or channel groups, in the Audio Element.
+ * For audio_element_type AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, there
+ * may be exactly 1.
+ *
+ * Set by av_iamf_audio_element_add_layer(), must not be
+ * modified by any other code.
+ */
+ unsigned int num_layers;
+
+ unsigned int codec_config_id;
+
+ AVIAMFParamDefinition *demixing_info;
+ AVIAMFParamDefinition *recon_gain_info;
+
+ // AVOption enabled fields
+ /**
+ * Audio element type as defined in section 3.6
+ */
+ enum AVIAMFAudioElementType audio_element_type;
+
+ /**
+ * Default weight value as defined in section 3.6
+ */
+ unsigned int default_w;
+} AVIAMFAudioElement;
+
+const AVClass *av_iamf_audio_element_get_class(void);
+
+AVIAMFAudioElement *av_iamf_audio_element_alloc(void);
+
+int av_iamf_audio_element_add_layer(AVIAMFAudioElement *audio_element, AVDictionary **options);
+
+void av_iamf_audio_element_free(AVIAMFAudioElement **audio_element);
+
+/**
+ * @}
+ * @addtogroup lavf_iamf_mix
+ * @{
+ */
+
+enum AVIAMFHeadphonesMode {
+ AV_IAMF_HEADPHONES_MODE_STEREO,
+ AV_IAMF_HEADPHONES_MODE_BINAURAL,
+};
+
+typedef struct AVIAMFSubmixElement {
+ const AVClass *av_class;
+
+ unsigned int audio_element_id;
+
+ AVIAMFParamDefinition *element_mix_config;
+
+ // AVOption enabled fields
+ enum AVIAMFHeadphonesMode headphones_rendering_mode;
+
+ AVRational default_mix_gain;
+
+ /**
+ * A dictionary of string describing the submix. Must have the same
+ * amount of entries as @ref AVIAMFMixPresentation.annotations "the
+ * mix's annotations".
+ *
+ * decoding: set by libavformat
+ * encoding: set by the user
+ */
+ AVDictionary *annotations;
+} AVIAMFSubmixElement;
+
+enum AVIAMFSubmixLayoutType {
+ AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS = 2,
+ AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL = 3,
+};
+
+typedef struct AVIAMFSubmixLayout {
+ const AVClass *av_class;
+
+ // AVOption enabled fields
+ enum AVIAMFSubmixLayoutType layout_type;
+ AVChannelLayout sound_system;
+ AVRational integrated_loudness;
+ AVRational digital_peak;
+ AVRational true_peak;
+ AVRational dialogue_anchored_loudness;
+ AVRational album_anchored_loudness;
+} AVIAMFSubmixLayout;
+
+typedef struct AVIAMFSubmix {
+ const AVClass *av_class;
+
+ AVIAMFSubmixElement **elements;
+ /**
+ * Set by av_iamf_mix_presentation_add_submix(), must not be
+ * modified by any other code.
+ */
+ unsigned int num_elements;
+
+ AVIAMFSubmixLayout **layouts;
+ /**
+ * Set by av_iamf_mix_presentation_add_submix(), must not be
+ * modified by any other code.
+ */
+ unsigned int num_layouts;
+
+ AVIAMFParamDefinition *output_mix_config;
+
+ // AVOption enabled fields
+ AVRational default_mix_gain;
+} AVIAMFSubmix;
+
+typedef struct AVIAMFMixPresentation {
+ const AVClass *av_class;
+
+ AVIAMFSubmix **submixes;
+ /**
+ * Number of submixes in the presentation.
+ *
+ * Set by av_iamf_mix_presentation_add_submix(), must not be
+ * modified by any other code.
+ */
+ unsigned int num_submixes;
+
+ // AVOption enabled fields
+ /**
+ * A dictionary of string describing the mix. Must have the same
+ * amount of entries as every @ref AVIAMFSubmixElement.annotations
+ * "Submix element annotations".
+ *
+ * decoding: set by libavformat
+ * encoding: set by the user
+ */
+ AVDictionary *annotations;
+} AVIAMFMixPresentation;
+
+const AVClass *av_iamf_mix_presentation_get_class(void);
+
+AVIAMFMixPresentation *av_iamf_mix_presentation_alloc(void);
+
+int av_iamf_mix_presentation_add_submix(AVIAMFMixPresentation *mix_presentation,
+ AVDictionary **options);
+
+int av_iamf_submix_add_element(AVIAMFSubmix *submix, AVDictionary **options);
+
+int av_iamf_submix_add_layout(AVIAMFSubmix *submix, AVDictionary **options);
+
+void av_iamf_mix_presentation_free(AVIAMFMixPresentation **mix_presentation);
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_IAMF_H */
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 4/9] avutil: introduce an Immersive Audio Model and Formats API
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 4/9] avutil: introduce an Immersive Audio Model and Formats API James Almer
@ 2023-11-30 11:01 ` Anton Khirnov
2023-11-30 13:01 ` James Almer
0 siblings, 1 reply; 29+ messages in thread
From: Anton Khirnov @ 2023-11-30 11:01 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting James Almer (2023-11-26 02:28:53)
> diff --git a/libavutil/iamf.h b/libavutil/iamf.h
> new file mode 100644
> index 0000000000..1f4919efdb
> --- /dev/null
> +++ b/libavutil/iamf.h
> +enum AVIAMFAudioElementType {
> + AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL,
> + AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE,
'audio' in the names is redundant and makes already long identifiers
unnecessarily longer
> +};
> +
> +/**
> + * @defgroup lavf_iamf_params Parameter Definition
> + * @{
> + * Parameters as defined in section 3.6.1 and 3.8
of what?
> +/**
> + * Mix Gain Parameter Data as defined in section 3.8.1
> + *
> + * Subblocks in AVIAMFParamDefinition use this struct when the value or
> + * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
> + * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN.
> + */
> +typedef struct AVIAMFMixGainParameterData {
Does 'ParameterData' at the end really serve any purpose?
> + const AVClass *av_class;
> +
> + // AVOption enabled fields
> + unsigned int subblock_duration;
> + enum AVIAMFAnimationType animation_type;
> + AVRational start_point_value;
> + AVRational end_point_value;
> + AVRational control_point_value;
> + unsigned int control_point_relative_time;
All these should really be documented. Also, some vertical alignment
would improve readability.
> +/**
> + * Parameters as defined in section 3.6.1
This really REALLY needs more documentation.
> + */
> +typedef struct AVIAMFParamDefinition {
> + const AVClass *av_class;
> +
> + size_t subblocks_offset;
> + size_t subblock_size;
> +
> + enum AVIAMFParamDefinitionType param_definition_type;
> + unsigned int num_subblocks;
We use nb_foo generally.
> +AVIAMFParamDefinition *av_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType param_definition_type,
> + AVDictionary **options,
> + unsigned int num_subblocks, AVDictionary **subblock_options,
What are the dicts for?
> + *
> + * When audio_element_type is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this
> + * corresponds to an Scalable Channel Layout layer as defined in section 3.6.2.
> + * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel
> + * layout as defined in section 3.6.3
> + */
> +typedef struct AVIAMFLayer {
> + const AVClass *av_class;
> +
> + // AVOption enabled fields
> + AVChannelLayout ch_layout;
> +
> + unsigned int recon_gain_is_present;
Every time you dedicate 4 bytes to storing one bit, God kills a kitten.
> + /**
> + * Output gain flags as defined in section 3.6.2
It would be really really nice if people could understand the struct
contents without some external document.
> + * This field is defined only if audio_element_type is
presumably the parent's audio_element_type
> + * AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, must be 0 otherwise.
> + */
> + unsigned int output_gain_flags;
> + /**
> + * Output gain as defined in section 3.6.2
> + *
> + * Must be 0 if @ref output_gain_flags is 0.
> + */
> + AVRational output_gain;
> + /**
> + * Ambisonics mode as defined in section 3.6.3
> + *
> + * This field is defined only if audio_element_type is
> + * AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, must be 0 otherwise.
> + *
> + * If 0, channel_mapping is defined implicitly (Ambisonic Order)
> + * or explicitly (Custom Order with ambi channels) in @ref ch_layout.
> + * If 1, @ref demixing_matrix must be set.
> + */
> + enum AVIAMFAmbisonicsMode ambisonics_mode;
> +
> + // End of AVOption enabled fields
What purpose does this comment serve?
> + /**
> + * Demixing matrix as defined in section 3.6.3
> + *
> + * Set only if @ref ambisonics_mode == 1, must be NULL otherwise.
> + */
> + AVRational *demixing_matrix;
Who sets this?
> +typedef struct AVIAMFAudioElement {
> + const AVClass *av_class;
> +
> + AVIAMFLayer **layers;
> + /**
> + * Number of layers, or channel groups, in the Audio Element.
> + * For audio_element_type AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, there
> + * may be exactly 1.
> + *
> + * Set by av_iamf_audio_element_add_layer(), must not be
> + * modified by any other code.
> + */
> + unsigned int num_layers;
> +
> + unsigned int codec_config_id;
???
> +int av_iamf_audio_element_add_layer(AVIAMFAudioElement *audio_element, AVDictionary **options);
I would much prefer to have the caller call av_opt_set* manually rather
than sprinkle AVDictionary function arguments everywhere.
Do note that their usage in lavc and lavf APIs is out of necessity, not
because it's very pretty.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 4/9] avutil: introduce an Immersive Audio Model and Formats API
2023-11-30 11:01 ` Anton Khirnov
@ 2023-11-30 13:01 ` James Almer
2023-11-30 13:47 ` Anton Khirnov
0 siblings, 1 reply; 29+ messages in thread
From: James Almer @ 2023-11-30 13:01 UTC (permalink / raw)
To: ffmpeg-devel
On 11/30/2023 8:01 AM, Anton Khirnov wrote:
> Quoting James Almer (2023-11-26 02:28:53)
>> diff --git a/libavutil/iamf.h b/libavutil/iamf.h
>> new file mode 100644
>> index 0000000000..1f4919efdb
>> --- /dev/null
>> +++ b/libavutil/iamf.h
>> +enum AVIAMFAudioElementType {
>> + AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL,
>> + AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE,
>
> 'audio' in the names is redundant and makes already long identifiers
> unnecessarily longer
I'm trying to keep everything namespaced. Audio Elements are not the
only part of the spec to use "element". See Submixes.
>
>> +};
>> +
>> +/**
>> + * @defgroup lavf_iamf_params Parameter Definition
>> + * @{
>> + * Parameters as defined in section 3.6.1 and 3.8
>
> of what?
Should i link https://aomediacodec.github.io/iamf/ somewhere?
>
>> +/**
>> + * Mix Gain Parameter Data as defined in section 3.8.1
>> + *
>> + * Subblocks in AVIAMFParamDefinition use this struct when the value or
>> + * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
>> + * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN.
>> + */
>> +typedef struct AVIAMFMixGainParameterData {
>
> Does 'ParameterData' at the end really serve any purpose?
Follow the names as in
https://aomediacodec.github.io/iamf/#obu-parameterblock
I can change it to Parameters or Params, or just remove it.
>
>> + const AVClass *av_class;
>> +
>> + // AVOption enabled fields
>> + unsigned int subblock_duration;
>> + enum AVIAMFAnimationType animation_type;
>> + AVRational start_point_value;
>> + AVRational end_point_value;
>> + AVRational control_point_value;
>> + unsigned int control_point_relative_time;
>
> All these should really be documented. Also, some vertical alignment
> would improve readability.
>
>> +/**
>> + * Parameters as defined in section 3.6.1
>
> This really REALLY needs more documentation.
Yes, was keeping better documentation for last.
>
>> + */
>> +typedef struct AVIAMFParamDefinition {
>> + const AVClass *av_class;
>> +
>> + size_t subblocks_offset;
>> + size_t subblock_size;
>> +
>> + enum AVIAMFParamDefinitionType param_definition_type;
>> + unsigned int num_subblocks;
>
> We use nb_foo generally.
For these public fields i'm keeping the same name as they are in the
spec. I use nb_foo for arrays of structs in the demuxer/muxer patches.
But i can change it if you prefer.
>
>> +AVIAMFParamDefinition *av_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType param_definition_type,
>> + AVDictionary **options,
>> + unsigned int num_subblocks, AVDictionary **subblock_options,
>
> What are the dicts for?
Setting AVOptions for the AVIAMFParamDefinition and each subblock,
respectively.
>
>> + *
>> + * When audio_element_type is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this
>> + * corresponds to an Scalable Channel Layout layer as defined in section 3.6.2.
>> + * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel
>> + * layout as defined in section 3.6.3
>> + */
>> +typedef struct AVIAMFLayer {
>> + const AVClass *av_class;
>> +
>> + // AVOption enabled fields
>> + AVChannelLayout ch_layout;
>> +
>> + unsigned int recon_gain_is_present;
>
> Every time you dedicate 4 bytes to storing one bit, God kills a kitten.
I'll shave a few bytes.
>
>> + /**
>> + * Output gain flags as defined in section 3.6.2
>
> It would be really really nice if people could understand the struct
> contents without some external document.
>
>> + * This field is defined only if audio_element_type is
>
> presumably the parent's audio_element_type
Yes, forgot the @ref. Good catch.
>
>> + * AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, must be 0 otherwise.
>> + */
>> + unsigned int output_gain_flags;
>> + /**
>> + * Output gain as defined in section 3.6.2
>> + *
>> + * Must be 0 if @ref output_gain_flags is 0.
>> + */
>> + AVRational output_gain;
>> + /**
>> + * Ambisonics mode as defined in section 3.6.3
>> + *
>> + * This field is defined only if audio_element_type is
>> + * AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, must be 0 otherwise.
>> + *
>> + * If 0, channel_mapping is defined implicitly (Ambisonic Order)
>> + * or explicitly (Custom Order with ambi channels) in @ref ch_layout.
>> + * If 1, @ref demixing_matrix must be set.
>> + */
>> + enum AVIAMFAmbisonicsMode ambisonics_mode;
>> +
>> + // End of AVOption enabled fields
>
> What purpose does this comment serve?
It was a reminder for me of what could be set through AVOptions. I'll
remove it.
>
>> + /**
>> + * Demixing matrix as defined in section 3.6.3
>> + *
>> + * Set only if @ref ambisonics_mode == 1, must be NULL otherwise.
>> + */
>> + AVRational *demixing_matrix;
>
> Who sets this?
lavf for demuxing and the user for muxing, as usual. Will mention it.
>
>> +typedef struct AVIAMFAudioElement {
>> + const AVClass *av_class;
>> +
>> + AVIAMFLayer **layers;
>> + /**
>> + * Number of layers, or channel groups, in the Audio Element.
>> + * For audio_element_type AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, there
>> + * may be exactly 1.
>> + *
>> + * Set by av_iamf_audio_element_add_layer(), must not be
>> + * modified by any other code.
>> + */
>> + unsigned int num_layers;
>> +
>> + unsigned int codec_config_id;
>
> ???
Ah, good catch. Need to remove this and adapt the muxer.
>
>> +int av_iamf_audio_element_add_layer(AVIAMFAudioElement *audio_element, AVDictionary **options);
>
> I would much prefer to have the caller call av_opt_set* manually rather
> than sprinkle AVDictionary function arguments everywhere.
> Do note that their usage in lavc and lavf APIs is out of necessity, not
> because it's very pretty.
Alright, will try to remove it from most of these.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 4/9] avutil: introduce an Immersive Audio Model and Formats API
2023-11-30 13:01 ` James Almer
@ 2023-11-30 13:47 ` Anton Khirnov
2023-11-30 14:27 ` James Almer
0 siblings, 1 reply; 29+ messages in thread
From: Anton Khirnov @ 2023-11-30 13:47 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting James Almer (2023-11-30 14:01:16)
>
> Should i link https://aomediacodec.github.io/iamf/ somewhere?
Most definitely.
> >
> >> +/**
> >> + * Mix Gain Parameter Data as defined in section 3.8.1
> >> + *
> >> + * Subblocks in AVIAMFParamDefinition use this struct when the value or
> >> + * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
> >> + * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN.
> >> + */
> >> +typedef struct AVIAMFMixGainParameterData {
> >
> > Does 'ParameterData' at the end really serve any purpose?
>
> Follow the names as in
> https://aomediacodec.github.io/iamf/#obu-parameterblock
> I can change it to Parameters or Params, or just remove it.
I'd drop it. The names are long enough as they are.
> >> + const AVClass *av_class;
> >> +
> >> + // AVOption enabled fields
> >> + unsigned int subblock_duration;
> >> + enum AVIAMFAnimationType animation_type;
> >> + AVRational start_point_value;
> >> + AVRational end_point_value;
> >> + AVRational control_point_value;
> >> + unsigned int control_point_relative_time;
> >
> > All these should really be documented. Also, some vertical alignment
> > would improve readability.
> >
> >> +/**
> >> + * Parameters as defined in section 3.6.1
> >
> > This really REALLY needs more documentation.
>
> Yes, was keeping better documentation for last.
>
> >
> >> + */
> >> +typedef struct AVIAMFParamDefinition {
> >> + const AVClass *av_class;
> >> +
> >> + size_t subblocks_offset;
> >> + size_t subblock_size;
> >> +
> >> + enum AVIAMFParamDefinitionType param_definition_type;
> >> + unsigned int num_subblocks;
> >
> > We use nb_foo generally.
>
> For these public fields i'm keeping the same name as they are in the
> spec. I use nb_foo for arrays of structs in the demuxer/muxer patches.
> But i can change it if you prefer.
I prefer to be consistent with ourselves in this rather than a spec.
Specs come and go.
> >
> >> + *
> >> + * When audio_element_type is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this
> >> + * corresponds to an Scalable Channel Layout layer as defined in section 3.6.2.
> >> + * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel
> >> + * layout as defined in section 3.6.3
> >> + */
> >> +typedef struct AVIAMFLayer {
> >> + const AVClass *av_class;
> >> +
> >> + // AVOption enabled fields
> >> + AVChannelLayout ch_layout;
> >> +
> >> + unsigned int recon_gain_is_present;
> >
> > Every time you dedicate 4 bytes to storing one bit, God kills a kitten.
>
> I'll shave a few bytes.
I don't see how that can be done easily due to struct alignment. I was
thinking you could make it into a flags field instead.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 4/9] avutil: introduce an Immersive Audio Model and Formats API
2023-11-30 13:47 ` Anton Khirnov
@ 2023-11-30 14:27 ` James Almer
2023-11-30 14:30 ` Anton Khirnov
0 siblings, 1 reply; 29+ messages in thread
From: James Almer @ 2023-11-30 14:27 UTC (permalink / raw)
To: ffmpeg-devel
On 11/30/2023 10:47 AM, Anton Khirnov wrote:
> Quoting James Almer (2023-11-30 14:01:16)
>>
>> Should i link https://aomediacodec.github.io/iamf/ somewhere?
>
> Most definitely.
>
>>>
>>>> +/**
>>>> + * Mix Gain Parameter Data as defined in section 3.8.1
>>>> + *
>>>> + * Subblocks in AVIAMFParamDefinition use this struct when the value or
>>>> + * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
>>>> + * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN.
>>>> + */
>>>> +typedef struct AVIAMFMixGainParameterData {
>>>
>>> Does 'ParameterData' at the end really serve any purpose?
>>
>> Follow the names as in
>> https://aomediacodec.github.io/iamf/#obu-parameterblock
>> I can change it to Parameters or Params, or just remove it.
>
> I'd drop it. The names are long enough as they are.
>
>>>> + const AVClass *av_class;
>>>> +
>>>> + // AVOption enabled fields
>>>> + unsigned int subblock_duration;
>>>> + enum AVIAMFAnimationType animation_type;
>>>> + AVRational start_point_value;
>>>> + AVRational end_point_value;
>>>> + AVRational control_point_value;
>>>> + unsigned int control_point_relative_time;
>>>
>>> All these should really be documented. Also, some vertical alignment
>>> would improve readability.
>>>
>>>> +/**
>>>> + * Parameters as defined in section 3.6.1
>>>
>>> This really REALLY needs more documentation.
>>
>> Yes, was keeping better documentation for last.
>>
>>>
>>>> + */
>>>> +typedef struct AVIAMFParamDefinition {
>>>> + const AVClass *av_class;
>>>> +
>>>> + size_t subblocks_offset;
>>>> + size_t subblock_size;
>>>> +
>>>> + enum AVIAMFParamDefinitionType param_definition_type;
>>>> + unsigned int num_subblocks;
>>>
>>> We use nb_foo generally.
>>
>> For these public fields i'm keeping the same name as they are in the
>> spec. I use nb_foo for arrays of structs in the demuxer/muxer patches.
>> But i can change it if you prefer.
>
> I prefer to be consistent with ourselves in this rather than a spec.
> Specs come and go.
>
>>>
>>>> + *
>>>> + * When audio_element_type is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this
>>>> + * corresponds to an Scalable Channel Layout layer as defined in section 3.6.2.
>>>> + * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel
>>>> + * layout as defined in section 3.6.3
>>>> + */
>>>> +typedef struct AVIAMFLayer {
>>>> + const AVClass *av_class;
>>>> +
>>>> + // AVOption enabled fields
>>>> + AVChannelLayout ch_layout;
>>>> +
>>>> + unsigned int recon_gain_is_present;
>>>
>>> Every time you dedicate 4 bytes to storing one bit, God kills a kitten.
>>
>> I'll shave a few bytes.
>
> I don't see how that can be done easily due to struct alignment. I was
> thinking you could make it into a flags field instead.
But this is the only boolean field. Also, there can be at most six
layers, so it's not exactly a huge waste either way.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 5/9] avformat: introduce AVStreamGroup
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
` (3 preceding siblings ...)
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 4/9] avutil: introduce an Immersive Audio Model and Formats API James Almer
@ 2023-11-26 1:28 ` James Almer
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 6/9] ffmpeg: add support for muxing AVStreamGroups James Almer
` (7 subsequent siblings)
12 siblings, 0 replies; 29+ messages in thread
From: James Almer @ 2023-11-26 1:28 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
doc/fftools-common-opts.texi | 17 +++-
libavformat/avformat.c | 185 ++++++++++++++++++++++++++++++++++-
libavformat/avformat.h | 169 ++++++++++++++++++++++++++++++++
libavformat/dump.c | 147 +++++++++++++++++++++++-----
libavformat/internal.h | 33 +++++++
libavformat/options.c | 139 ++++++++++++++++++++++++++
6 files changed, 656 insertions(+), 34 deletions(-)
diff --git a/doc/fftools-common-opts.texi b/doc/fftools-common-opts.texi
index d9145704d6..f459bfdc1d 100644
--- a/doc/fftools-common-opts.texi
+++ b/doc/fftools-common-opts.texi
@@ -37,9 +37,9 @@ Matches the stream with this index. E.g. @code{-threads:1 4} would set the
thread count for the second stream to 4. If @var{stream_index} is used as an
additional stream specifier (see below), then it selects stream number
@var{stream_index} from the matching streams. Stream numbering is based on the
-order of the streams as detected by libavformat except when a program ID is
-also specified. In this case it is based on the ordering of the streams in the
-program.
+order of the streams as detected by libavformat except when a stream group
+specifier or program ID is also specified. In this case it is based on the
+ordering of the streams in the group or program.
@item @var{stream_type}[:@var{additional_stream_specifier}]
@var{stream_type} is one of following: 'v' or 'V' for video, 'a' for audio, 's'
for subtitle, 'd' for data, and 't' for attachments. 'v' matches all video
@@ -48,6 +48,17 @@ thumbnails or cover arts. If @var{additional_stream_specifier} is used, then
it matches streams which both have this type and match the
@var{additional_stream_specifier}. Otherwise, it matches all streams of the
specified type.
+@item g:@var{group_specifier}[:@var{additional_stream_specifier}]
+Matches streams which are in the group with the specifier @var{group_specifier}.
+if @var{additional_stream_specifier} is used, then it matches streams which both
+are part of the group and match the @var{additional_stream_specifier}.
+@var{group_specifier} may be one of the following:
+@table @option
+@item @var{group_index}
+Match the stream with this group index.
+@item #@var{group_id} or i:@var{group_id}
+Match the stream with this group id.
+@end table
@item p:@var{program_id}[:@var{additional_stream_specifier}]
Matches streams which are in the program with the id @var{program_id}. If
@var{additional_stream_specifier} is used, then it matches streams which both
diff --git a/libavformat/avformat.c b/libavformat/avformat.c
index 5b8bb7879e..863fbdd7b8 100644
--- a/libavformat/avformat.c
+++ b/libavformat/avformat.c
@@ -24,6 +24,7 @@
#include "libavutil/avstring.h"
#include "libavutil/channel_layout.h"
#include "libavutil/frame.h"
+#include "libavutil/iamf.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mem.h"
#include "libavutil/opt.h"
@@ -80,6 +81,32 @@ FF_ENABLE_DEPRECATION_WARNINGS
av_freep(pst);
}
+void ff_free_stream_group(AVStreamGroup **pstg)
+{
+ AVStreamGroup *stg = *pstg;
+
+ if (!stg)
+ return;
+
+ av_freep(&stg->streams);
+ av_dict_free(&stg->metadata);
+ av_freep(&stg->priv_data);
+ switch (stg->type) {
+ case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT: {
+ av_iamf_audio_element_free(&stg->params.iamf_audio_element);
+ break;
+ }
+ case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION: {
+ av_iamf_mix_presentation_free(&stg->params.iamf_mix_presentation);
+ break;
+ }
+ default:
+ break;
+ }
+
+ av_freep(pstg);
+}
+
void ff_remove_stream(AVFormatContext *s, AVStream *st)
{
av_assert0(s->nb_streams>0);
@@ -88,6 +115,14 @@ void ff_remove_stream(AVFormatContext *s, AVStream *st)
ff_free_stream(&s->streams[ --s->nb_streams ]);
}
+void ff_remove_stream_group(AVFormatContext *s, AVStreamGroup *stg)
+{
+ av_assert0(s->nb_stream_groups > 0);
+ av_assert0(s->stream_groups[ s->nb_stream_groups - 1 ] == stg);
+
+ ff_free_stream_group(&s->stream_groups[ --s->nb_stream_groups ]);
+}
+
/* XXX: suppress the packet queue */
void ff_flush_packet_queue(AVFormatContext *s)
{
@@ -118,6 +153,9 @@ void avformat_free_context(AVFormatContext *s)
for (unsigned i = 0; i < s->nb_streams; i++)
ff_free_stream(&s->streams[i]);
+ for (unsigned i = 0; i < s->nb_stream_groups; i++)
+ ff_free_stream_group(&s->stream_groups[i]);
+ s->nb_stream_groups = 0;
s->nb_streams = 0;
for (unsigned i = 0; i < s->nb_programs; i++) {
@@ -139,6 +177,7 @@ void avformat_free_context(AVFormatContext *s)
av_packet_free(&si->pkt);
av_packet_free(&si->parse_pkt);
av_freep(&s->streams);
+ av_freep(&s->stream_groups);
ff_flush_packet_queue(s);
av_freep(&s->url);
av_free(s);
@@ -464,7 +503,7 @@ int av_find_best_stream(AVFormatContext *ic, enum AVMediaType type,
*/
static int match_stream_specifier(const AVFormatContext *s, const AVStream *st,
const char *spec, const char **indexptr,
- const AVProgram **p)
+ const AVStreamGroup **g, const AVProgram **p)
{
int match = 1; /* Stores if the specifier matches so far. */
while (*spec) {
@@ -493,6 +532,46 @@ static int match_stream_specifier(const AVFormatContext *s, const AVStream *st,
match = 0;
if (nopic && (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
match = 0;
+ } else if (*spec == 'g' && *(spec + 1) == ':') {
+ int64_t group_idx = -1, group_id = -1;
+ int found = 0;
+ char *endptr;
+ spec += 2;
+ if (*spec == '#' || (*spec == 'i' && *(spec + 1) == ':')) {
+ spec += 1 + (*spec == 'i');
+ group_id = strtol(spec, &endptr, 0);
+ if (spec == endptr || (*endptr && *endptr++ != ':'))
+ return AVERROR(EINVAL);
+ spec = endptr;
+ } else {
+ group_idx = strtol(spec, &endptr, 0);
+ /* Disallow empty id and make sure that if we are not at the end, then another specifier must follow. */
+ if (spec == endptr || (*endptr && *endptr++ != ':'))
+ return AVERROR(EINVAL);
+ spec = endptr;
+ }
+ if (match) {
+ if (group_id > 0) {
+ for (unsigned i = 0; i < s->nb_stream_groups; i++) {
+ if (group_id == s->stream_groups[i]->id) {
+ group_idx = i;
+ break;
+ }
+ }
+ }
+ if (group_idx < 0 || group_idx > s->nb_stream_groups)
+ return AVERROR(EINVAL);
+ for (unsigned j = 0; j < s->stream_groups[group_idx]->nb_streams; j++) {
+ if (st->index == s->stream_groups[group_idx]->streams[j]->index) {
+ found = 1;
+ if (g)
+ *g = s->stream_groups[group_idx];
+ break;
+ }
+ }
+ }
+ if (!found)
+ match = 0;
} else if (*spec == 'p' && *(spec + 1) == ':') {
int prog_id;
int found = 0;
@@ -591,10 +670,11 @@ int avformat_match_stream_specifier(AVFormatContext *s, AVStream *st,
int ret, index;
char *endptr;
const char *indexptr = NULL;
+ const AVStreamGroup *g = NULL;
const AVProgram *p = NULL;
int nb_streams;
- ret = match_stream_specifier(s, st, spec, &indexptr, &p);
+ ret = match_stream_specifier(s, st, spec, &indexptr, &g, &p);
if (ret < 0)
goto error;
@@ -612,10 +692,11 @@ int avformat_match_stream_specifier(AVFormatContext *s, AVStream *st,
return (index == st->index);
/* If we requested a matching stream index, we have to ensure st is that. */
- nb_streams = p ? p->nb_stream_indexes : s->nb_streams;
+ nb_streams = g ? g->nb_streams : (p ? p->nb_stream_indexes : s->nb_streams);
for (int i = 0; i < nb_streams && index >= 0; i++) {
- const AVStream *candidate = s->streams[p ? p->stream_index[i] : i];
- ret = match_stream_specifier(s, candidate, spec, NULL, NULL);
+ unsigned idx = g ? g->streams[i]->index : (p ? p->stream_index[i] : i);
+ const AVStream *candidate = s->streams[idx];
+ ret = match_stream_specifier(s, candidate, spec, NULL, NULL, NULL);
if (ret < 0)
goto error;
if (ret > 0 && index-- == 0 && st == candidate)
@@ -629,6 +710,100 @@ error:
return ret;
}
+/**
+ * Matches a stream specifier (but ignores requested index).
+ *
+ * @param indexptr set to point to the requested stream index if there is one
+ *
+ * @return <0 on error
+ * 0 if st is NOT a matching stream
+ * >0 if st is a matching stream
+ */
+static int match_stream_group_specifier(const AVFormatContext *s, const AVStreamGroup *stg,
+ const char *spec, const char **indexptr)
+{
+ int match = 1; /* Stores if the specifier matches so far. */
+ while (*spec) {
+ if (*spec <= '9' && *spec >= '0') { /* opt:index */
+ if (indexptr)
+ *indexptr = spec;
+ return match;
+ } else if (*spec == 't' && *(spec + 1) == ':') {
+ int64_t group_type = -1;
+ int found = 0;
+ char *endptr;
+ spec += 2;
+ group_type = strtol(spec, &endptr, 0);
+ /* Disallow empty type and make sure that if we are not at the end, then another specifier must follow. */
+ if (spec == endptr || (*endptr && *endptr++ != ':'))
+ return AVERROR(EINVAL);
+ spec = endptr;
+ if (match && group_type > 0) {
+ for (unsigned i = 0; i < s->nb_stream_groups; i++) {
+ if (group_type == s->stream_groups[i]->type) {
+ found = 1;
+ break;
+ }
+ }
+ }
+ if (!found)
+ match = 0;
+ } else if (*spec == '#' ||
+ (*spec == 'i' && *(spec + 1) == ':')) {
+ int group_id;
+ char *endptr;
+ spec += 1 + (*spec == 'i');
+ group_id = strtol(spec, &endptr, 0);
+ if (spec == endptr || *endptr) /* Disallow empty id and make sure we are at the end. */
+ return AVERROR(EINVAL);
+ return match && (group_id == stg->id);
+ }
+ }
+
+ return match;
+}
+
+int avformat_match_stream_group_specifier(AVFormatContext *s, AVStreamGroup *stg,
+ const char *spec)
+{
+ int ret, index;
+ char *endptr;
+ const char *indexptr = NULL;
+
+ ret = match_stream_group_specifier(s, stg, spec, &indexptr);
+ if (ret < 0)
+ goto error;
+
+ if (!indexptr)
+ return ret;
+
+ index = strtol(indexptr, &endptr, 0);
+ if (*endptr) { /* We can't have anything after the requested index. */
+ ret = AVERROR(EINVAL);
+ goto error;
+ }
+
+ /* This is not really needed but saves us a loop for simple stream index specifiers. */
+ if (spec == indexptr)
+ return (index == stg->index);
+
+ /* If we requested a matching stream index, we have to ensure stg is that. */
+ for (int i = 0; i < s->nb_stream_groups && index >= 0; i++) {
+ const AVStreamGroup *candidate = s->stream_groups[i];
+ ret = match_stream_group_specifier(s, candidate, spec, NULL);
+ if (ret < 0)
+ goto error;
+ if (ret > 0 && index-- == 0 && stg == candidate)
+ return 1;
+ }
+ return 0;
+
+error:
+ if (ret == AVERROR(EINVAL))
+ av_log(s, AV_LOG_ERROR, "Invalid stream group specifier: %s.\n", spec);
+ return ret;
+}
+
AVRational av_guess_sample_aspect_ratio(AVFormatContext *format, AVStream *stream, AVFrame *frame)
{
AVRational undef = {0, 1};
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 9e7eca007e..9e428ee843 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1018,6 +1018,83 @@ typedef struct AVStream {
int pts_wrap_bits;
} AVStream;
+enum AVStreamGroupParamsType {
+ AV_STREAM_GROUP_PARAMS_NONE,
+ AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT,
+ AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION,
+};
+
+struct AVIAMFAudioElement;
+struct AVIAMFMixPresentation;
+
+typedef struct AVStreamGroup {
+ /**
+ * A class for @ref avoptions. Set by avformat_stream_group_create().
+ */
+ const AVClass *av_class;
+
+ void *priv_data;
+
+ /**
+ * Group index in AVFormatContext.
+ */
+ unsigned int index;
+
+ /**
+ * Group type-specific group ID.
+ *
+ * decoding: set by libavformat
+ * encoding: may set by the user
+ */
+ int64_t id;
+
+ /**
+ * Group type
+ *
+ * decoding: set by libavformat on group creation
+ * encoding: set by avformat_stream_group_create()
+ */
+ enum AVStreamGroupParamsType type;
+
+ /**
+ * Group type-specific parameters
+ */
+ union {
+ struct AVIAMFAudioElement *iamf_audio_element;
+ struct AVIAMFMixPresentation *iamf_mix_presentation;
+ } params;
+
+ /**
+ * Metadata that applies to the whole group.
+ *
+ * - demuxing: set by libavformat on group creation
+ * - muxing: may be set by the caller before avformat_write_header()
+ *
+ * Freed by libavformat in avformat_free_context().
+ */
+ AVDictionary *metadata;
+
+ /**
+ * Number of elements in AVStreamGroup.streams.
+ *
+ * Set by avformat_stream_group_add_stream() must not be modified by any other code.
+ */
+ unsigned int nb_streams;
+
+ /**
+ * A list of streams in the group. New entries are created with
+ * avformat_stream_group_add_stream().
+ *
+ * - demuxing: entries are created by libavformat on group creation.
+ * If AVFMTCTX_NOHEADER is set in ctx_flags, then new entries may also
+ * appear in av_read_frame().
+ * - muxing: entries are created by the user before avformat_write_header().
+ *
+ * Freed by libavformat in avformat_free_context().
+ */
+ AVStream **streams;
+} AVStreamGroup;
+
struct AVCodecParserContext *av_stream_get_parser(const AVStream *s);
#if FF_API_GET_END_PTS
@@ -1726,6 +1803,26 @@ typedef struct AVFormatContext {
* @return 0 on success, a negative AVERROR code on failure
*/
int (*io_close2)(struct AVFormatContext *s, AVIOContext *pb);
+
+ /**
+ * Number of elements in AVFormatContext.stream_groups.
+ *
+ * Set by avformat_stream_group_create(), must not be modified by any other code.
+ */
+ unsigned int nb_stream_groups;
+
+ /**
+ * A list of all stream groups in the file. New groups are created with
+ * avformat_stream_group_create(), and filled with avformat_stream_group_add_stream().
+ *
+ * - demuxing: groups may be created by libavformat in avformat_open_input().
+ * If AVFMTCTX_NOHEADER is set in ctx_flags, then new groups may also
+ * appear in av_read_frame().
+ * - muxing: groups may be created by the user before avformat_write_header().
+ *
+ * Freed by libavformat in avformat_free_context().
+ */
+ AVStreamGroup **stream_groups;
} AVFormatContext;
/**
@@ -1844,6 +1941,37 @@ const AVClass *avformat_get_class(void);
*/
const AVClass *av_stream_get_class(void);
+/**
+ * Get the AVClass for AVStreamGroup. It can be used in combination with
+ * AV_OPT_SEARCH_FAKE_OBJ for examining options.
+ *
+ * @see av_opt_find().
+ */
+const AVClass *av_stream_group_get_class(void);
+
+/**
+ * Add a new empty stream group to a media file.
+ *
+ * When demuxing, it may be called by the demuxer in read_header(). If the
+ * flag AVFMTCTX_NOHEADER is set in s.ctx_flags, then it may also
+ * be called in read_packet().
+ *
+ * When muxing, may be called by the user before avformat_write_header().
+ *
+ * User is required to call avformat_free_context() to clean up the allocation
+ * by avformat_stream_group_create().
+ *
+ * New streams can be added to the group with avformat_stream_group_add_stream().
+ *
+ * @param s media file handle
+ *
+ * @return newly created group or NULL on error.
+ * @see avformat_new_stream, avformat_stream_group_add_stream.
+ */
+AVStreamGroup *avformat_stream_group_create(AVFormatContext *s,
+ enum AVStreamGroupParamsType type,
+ AVDictionary **options);
+
/**
* Add a new stream to a media file.
*
@@ -1863,6 +1991,31 @@ const AVClass *av_stream_get_class(void);
*/
AVStream *avformat_new_stream(AVFormatContext *s, const struct AVCodec *c);
+/**
+ * Add an already allocated stream to a stream group.
+ *
+ * When demuxing, it may be called by the demuxer in read_header(). If the
+ * flag AVFMTCTX_NOHEADER is set in s.ctx_flags, then it may also
+ * be called in read_packet().
+ *
+ * When muxing, may be called by the user before avformat_write_header() after
+ * having allocated a new group with avformat_stream_group_create() and stream with
+ * avformat_new_stream().
+ *
+ * User is required to call avformat_free_context() to clean up the allocation
+ * by avformat_stream_group_add_stream().
+ *
+ * @param stg stream group belonging to a media file.
+ * @param st stream in the media file to add to the group.
+ *
+ * @retval 0 success
+ * @retval AVERROR(EEXIST) the stream was already in the group
+ * @retval "another negative error code" legitimate errors
+ *
+ * @see avformat_new_stream, avformat_stream_group_create.
+ */
+int avformat_stream_group_add_stream(AVStreamGroup *stg, AVStream *st);
+
#if FF_API_AVSTREAM_SIDE_DATA
/**
* Wrap an existing array as stream side data.
@@ -2819,6 +2972,22 @@ AVRational av_guess_frame_rate(AVFormatContext *ctx, AVStream *stream,
int avformat_match_stream_specifier(AVFormatContext *s, AVStream *st,
const char *spec);
+/**
+ * Check if the group stg contained in s is matched by the stream group
+ * specifier spec.
+ *
+ * See the "stream group specifiers" chapter in the documentation for the
+ * syntax of spec.
+ *
+ * @return >0 if stg is matched by spec;
+ * 0 if stg is not matched by spec;
+ * AVERROR code if spec is invalid
+ *
+ * @note A stream group specifier can match several groups in the format.
+ */
+int avformat_match_stream_group_specifier(AVFormatContext *s, AVStreamGroup *stg,
+ const char *spec);
+
int avformat_queue_attached_pictures(AVFormatContext *s);
enum AVTimebaseSource {
diff --git a/libavformat/dump.c b/libavformat/dump.c
index c0868a1bb3..81e5f13004 100644
--- a/libavformat/dump.c
+++ b/libavformat/dump.c
@@ -24,6 +24,7 @@
#include "libavutil/channel_layout.h"
#include "libavutil/display.h"
+#include "libavutil/iamf.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/log.h"
#include "libavutil/mastering_display_metadata.h"
@@ -134,28 +135,36 @@ static void print_fps(double d, const char *postfix)
av_log(NULL, AV_LOG_INFO, "%1.0fk %s", d / 1000, postfix);
}
-static void dump_metadata(void *ctx, const AVDictionary *m, const char *indent)
+static void dump_dictionary(void *ctx, const AVDictionary *m,
+ const char *name, const char *indent)
{
- if (m && !(av_dict_count(m) == 1 && av_dict_get(m, "language", NULL, 0))) {
- const AVDictionaryEntry *tag = NULL;
-
- av_log(ctx, AV_LOG_INFO, "%sMetadata:\n", indent);
- while ((tag = av_dict_iterate(m, tag)))
- if (strcmp("language", tag->key)) {
- const char *p = tag->value;
- av_log(ctx, AV_LOG_INFO,
- "%s %-16s: ", indent, tag->key);
- while (*p) {
- size_t len = strcspn(p, "\x8\xa\xb\xc\xd");
- av_log(ctx, AV_LOG_INFO, "%.*s", (int)(FFMIN(255, len)), p);
- p += len;
- if (*p == 0xd) av_log(ctx, AV_LOG_INFO, " ");
- if (*p == 0xa) av_log(ctx, AV_LOG_INFO, "\n%s %-16s: ", indent, "");
- if (*p) p++;
- }
- av_log(ctx, AV_LOG_INFO, "\n");
+ const AVDictionaryEntry *tag = NULL;
+
+ if (!m)
+ return;
+
+ av_log(ctx, AV_LOG_INFO, "%s%s:\n", indent, name);
+ while ((tag = av_dict_iterate(m, tag)))
+ if (strcmp("language", tag->key)) {
+ const char *p = tag->value;
+ av_log(ctx, AV_LOG_INFO,
+ "%s %-16s: ", indent, tag->key);
+ while (*p) {
+ size_t len = strcspn(p, "\x8\xa\xb\xc\xd");
+ av_log(ctx, AV_LOG_INFO, "%.*s", (int)(FFMIN(255, len)), p);
+ p += len;
+ if (*p == 0xd) av_log(ctx, AV_LOG_INFO, " ");
+ if (*p == 0xa) av_log(ctx, AV_LOG_INFO, "\n%s %-16s: ", indent, "");
+ if (*p) p++;
}
- }
+ av_log(ctx, AV_LOG_INFO, "\n");
+ }
+}
+
+static void dump_metadata(void *ctx, const AVDictionary *m, const char *indent)
+{
+ if (m && !(av_dict_count(m) == 1 && av_dict_get(m, "language", NULL, 0)))
+ dump_dictionary(ctx, m, "Metadata", indent);
}
/* param change side data*/
@@ -509,7 +518,7 @@ static void dump_sidedata(void *ctx, const AVStream *st, const char *indent)
/* "user interface" functions */
static void dump_stream_format(const AVFormatContext *ic, int i,
- int index, int is_output)
+ int group_index, int index, int is_output)
{
char buf[256];
int flags = (is_output ? ic->oformat->flags : ic->iformat->flags);
@@ -517,6 +526,8 @@ static void dump_stream_format(const AVFormatContext *ic, int i,
const FFStream *const sti = cffstream(st);
const AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL, 0);
const char *separator = ic->dump_separator;
+ const char *group_indent = group_index >= 0 ? " " : "";
+ const char *extra_indent = group_index >= 0 ? " " : " ";
AVCodecContext *avctx;
int ret;
@@ -543,7 +554,8 @@ static void dump_stream_format(const AVFormatContext *ic, int i,
avcodec_string(buf, sizeof(buf), avctx, is_output);
avcodec_free_context(&avctx);
- av_log(NULL, AV_LOG_INFO, " Stream #%d:%d", index, i);
+ av_log(NULL, AV_LOG_INFO, "%s Stream #%d", group_indent, index);
+ av_log(NULL, AV_LOG_INFO, ":%d", i);
/* the pid is an important information, so we display it */
/* XXX: add a generic system */
@@ -621,9 +633,89 @@ static void dump_stream_format(const AVFormatContext *ic, int i,
av_log(NULL, AV_LOG_INFO, " (non-diegetic)");
av_log(NULL, AV_LOG_INFO, "\n");
- dump_metadata(NULL, st->metadata, " ");
+ dump_metadata(NULL, st->metadata, extra_indent);
+
+ dump_sidedata(NULL, st, extra_indent);
+}
+
+static void dump_stream_group(const AVFormatContext *ic, uint8_t *printed,
+ int i, int index, int is_output)
+{
+ const AVStreamGroup *stg = ic->stream_groups[i];
+ int flags = (is_output ? ic->oformat->flags : ic->iformat->flags);
+ char buf[512];
+ int ret;
- dump_sidedata(NULL, st, " ");
+ av_log(NULL, AV_LOG_INFO, " Stream group #%d:%d", index, i);
+ if (flags & AVFMT_SHOW_IDS)
+ av_log(NULL, AV_LOG_INFO, "[0x%"PRIx64"]", stg->id);
+ av_log(NULL, AV_LOG_INFO, ":");
+
+ switch (stg->type) {
+ case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT: {
+ const AVIAMFAudioElement *audio_element = stg->params.iamf_audio_element;
+ av_log(NULL, AV_LOG_INFO, " IAMF Audio Element\n");
+ dump_metadata(NULL, stg->metadata, " ");
+ for (int j = 0; j < audio_element->num_layers; j++) {
+ const AVIAMFLayer *layer = audio_element->layers[j];
+ int channel_count = layer->ch_layout.nb_channels;
+ av_log(NULL, AV_LOG_INFO, " Layer %d:", j);
+ ret = av_channel_layout_describe(&layer->ch_layout, buf, sizeof(buf));
+ if (ret >= 0)
+ av_log(NULL, AV_LOG_INFO, " %s", buf);
+ av_log(NULL, AV_LOG_INFO, "\n");
+ for (int k = 0; channel_count > 0 && k < stg->nb_streams; k++) {
+ AVStream *st = stg->streams[k];
+ dump_stream_format(ic, st->index, i, index, is_output);
+ printed[st->index] = 1;
+ channel_count -= st->codecpar->ch_layout.nb_channels;
+ }
+ }
+ break;
+ }
+ case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION: {
+ const AVIAMFMixPresentation *mix_presentation = stg->params.iamf_mix_presentation;
+ av_log(NULL, AV_LOG_INFO, " IAMF Mix Presentation\n");
+ dump_metadata(NULL, stg->metadata, " ");
+ dump_dictionary(NULL, mix_presentation->annotations, "Annotations", " ");
+ for (int j = 0; j < mix_presentation->num_submixes; j++) {
+ AVIAMFSubmix *sub_mix = mix_presentation->submixes[j];
+ av_log(NULL, AV_LOG_INFO, " Submix %d:\n", j);
+ for (int k = 0; k < sub_mix->num_elements; k++) {
+ const AVIAMFSubmixElement *submix_element = sub_mix->elements[k];
+ const AVStreamGroup *audio_element = NULL;
+ for (int l = 0; l < ic->nb_stream_groups; l++)
+ if (ic->stream_groups[l]->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT &&
+ ic->stream_groups[l]->id == submix_element->audio_element_id) {
+ audio_element = ic->stream_groups[l];
+ break;
+ }
+ if (audio_element) {
+ av_log(NULL, AV_LOG_INFO, " IAMF Audio Element #%d:%d",
+ index, audio_element->index);
+ if (flags & AVFMT_SHOW_IDS)
+ av_log(NULL, AV_LOG_INFO, "[0x%"PRIx64"]", audio_element->id);
+ av_log(NULL, AV_LOG_INFO, "\n");
+ dump_dictionary(NULL, submix_element->annotations, "Annotations", " ");
+ }
+ }
+ for (int k = 0; k < sub_mix->num_layouts; k++) {
+ const AVIAMFSubmixLayout *submix_layout = sub_mix->layouts[k];
+ av_log(NULL, AV_LOG_INFO, " Layout #%d:", k);
+ if (submix_layout->layout_type == 2) {
+ ret = av_channel_layout_describe(&submix_layout->sound_system, buf, sizeof(buf));
+ if (ret >= 0)
+ av_log(NULL, AV_LOG_INFO, " %s", buf);
+ } else if (submix_layout->layout_type == 3)
+ av_log(NULL, AV_LOG_INFO, " Binaural");
+ av_log(NULL, AV_LOG_INFO, "\n");
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
}
void av_dump_format(AVFormatContext *ic, int index,
@@ -699,7 +791,7 @@ void av_dump_format(AVFormatContext *ic, int index,
dump_metadata(NULL, program->metadata, " ");
for (k = 0; k < program->nb_stream_indexes; k++) {
dump_stream_format(ic, program->stream_index[k],
- index, is_output);
+ -1, index, is_output);
printed[program->stream_index[k]] = 1;
}
total += program->nb_stream_indexes;
@@ -708,9 +800,12 @@ void av_dump_format(AVFormatContext *ic, int index,
av_log(NULL, AV_LOG_INFO, " No Program\n");
}
+ for (i = 0; i < ic->nb_stream_groups; i++)
+ dump_stream_group(ic, printed, i, index, is_output);
+
for (i = 0; i < ic->nb_streams; i++)
if (!printed[i])
- dump_stream_format(ic, i, index, is_output);
+ dump_stream_format(ic, i, -1, index, is_output);
av_free(printed);
}
diff --git a/libavformat/internal.h b/libavformat/internal.h
index 7702986c9c..c6181683ef 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -202,6 +202,7 @@ typedef struct FFStream {
*/
AVStream pub;
+ AVFormatContext *fmtctx;
/**
* Set to 1 if the codec allows reordering, so pts can be different
* from dts.
@@ -427,6 +428,26 @@ static av_always_inline const FFStream *cffstream(const AVStream *st)
return (const FFStream*)st;
}
+typedef struct FFStreamGroup {
+ /**
+ * The public context.
+ */
+ AVStreamGroup pub;
+
+ AVFormatContext *fmtctx;
+} FFStreamGroup;
+
+
+static av_always_inline FFStreamGroup *ffstreamgroup(AVStreamGroup *stg)
+{
+ return (FFStreamGroup*)stg;
+}
+
+static av_always_inline const FFStreamGroup *cffstreamgroup(const AVStreamGroup *stg)
+{
+ return (const FFStreamGroup*)stg;
+}
+
#ifdef __GNUC__
#define dynarray_add(tab, nb_ptr, elem)\
do {\
@@ -608,6 +629,18 @@ void ff_free_stream(AVStream **st);
*/
void ff_remove_stream(AVFormatContext *s, AVStream *st);
+/**
+ * Frees a stream group without modifying the corresponding AVFormatContext.
+ * Must only be called if the latter doesn't matter or if the stream
+ * is not yet attached to an AVFormatContext.
+ */
+void ff_free_stream_group(AVStreamGroup **pstg);
+/**
+ * Remove a stream group from its AVFormatContext and free it.
+ * The group must be the last stream of the AVFormatContext.
+ */
+void ff_remove_stream_group(AVFormatContext *s, AVStreamGroup *stg);
+
unsigned int ff_codec_get_tag(const AVCodecTag *tags, enum AVCodecID id);
enum AVCodecID ff_codec_get_id(const AVCodecTag *tags, unsigned int tag);
diff --git a/libavformat/options.c b/libavformat/options.c
index 1d8c52246b..bf6113ca95 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -26,6 +26,7 @@
#include "libavcodec/codec_par.h"
#include "libavutil/avassert.h"
+#include "libavutil/iamf.h"
#include "libavutil/internal.h"
#include "libavutil/intmath.h"
#include "libavutil/opt.h"
@@ -271,6 +272,7 @@ AVStream *avformat_new_stream(AVFormatContext *s, const AVCodec *c)
if (!st->codecpar)
goto fail;
+ sti->fmtctx = s;
sti->avctx = avcodec_alloc_context3(NULL);
if (!sti->avctx)
goto fail;
@@ -325,6 +327,143 @@ fail:
return NULL;
}
+static void *stream_group_child_next(void *obj, void *prev)
+{
+ AVStreamGroup *stg = obj;
+ if (!prev) {
+ switch(stg->type) {
+ case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT:
+ return stg->params.iamf_audio_element;
+ case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION:
+ return stg->params.iamf_mix_presentation;
+ default:
+ break;
+ }
+ }
+ return NULL;
+}
+
+static const AVClass *stream_group_child_iterate(void **opaque)
+{
+ uintptr_t i = (uintptr_t)*opaque;
+ const AVClass *ret = NULL;
+
+ switch(i) {
+ case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT:
+ ret = av_iamf_audio_element_get_class();
+ break;
+ case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION:
+ ret = av_iamf_mix_presentation_get_class();
+ break;
+ default:
+ break;
+ }
+
+ if (ret)
+ *opaque = (void*)(i + 1);
+ return ret;
+}
+
+static const AVOption stream_group_options[] = {
+ {"id", "Set group id", offsetof(AVStreamGroup, id), AV_OPT_TYPE_INT64, {.i64 = 0}, 0, INT64_MAX, AV_OPT_FLAG_ENCODING_PARAM },
+ { NULL }
+};
+
+static const AVClass stream_group_class = {
+ .class_name = "AVStreamGroup",
+ .item_name = av_default_item_name,
+ .version = LIBAVUTIL_VERSION_INT,
+ .option = stream_group_options,
+ .child_next = stream_group_child_next,
+ .child_class_iterate = stream_group_child_iterate,
+};
+
+const AVClass *av_stream_group_get_class(void)
+{
+ return &stream_group_class;
+}
+
+AVStreamGroup *avformat_stream_group_create(AVFormatContext *s,
+ enum AVStreamGroupParamsType type,
+ AVDictionary **options)
+{
+ AVStreamGroup **stream_groups;
+ AVStreamGroup *stg;
+ FFStreamGroup *stgi;
+
+ stream_groups = av_realloc_array(s->stream_groups, s->nb_stream_groups + 1,
+ sizeof(*stream_groups));
+ if (!stream_groups)
+ return NULL;
+ s->stream_groups = stream_groups;
+
+ stgi = av_mallocz(sizeof(*stgi));
+ if (!stgi)
+ return NULL;
+ stg = &stgi->pub;
+
+ stg->av_class = &stream_group_class;
+ av_opt_set_defaults(stg);
+ stg->type = type;
+ switch (type) {
+ case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT:
+ stg->params.iamf_audio_element = av_iamf_audio_element_alloc();
+ if (!stg->params.iamf_audio_element)
+ goto fail;
+ break;
+ case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION:
+ stg->params.iamf_mix_presentation = av_iamf_mix_presentation_alloc();
+ if (!stg->params.iamf_mix_presentation)
+ goto fail;
+ break;
+ default:
+ goto fail;
+ }
+
+ if (options) {
+ if (av_opt_set_dict2(stg, options, AV_OPT_SEARCH_CHILDREN))
+ goto fail;
+ }
+
+ stgi->fmtctx = s;
+ stg->index = s->nb_stream_groups;
+
+ s->stream_groups[s->nb_stream_groups++] = stg;
+
+ return stg;
+fail:
+ ff_free_stream_group(&stg);
+ return NULL;
+}
+
+static int stream_group_add_stream(AVStreamGroup *stg, AVStream *st)
+{
+ AVStream **streams = av_realloc_array(stg->streams, stg->nb_streams + 1,
+ sizeof(*stg->streams));
+ if (!streams)
+ return AVERROR(ENOMEM);
+
+ stg->streams = streams;
+ stg->streams[stg->nb_streams++] = st;
+
+ return 0;
+}
+
+int avformat_stream_group_add_stream(AVStreamGroup *stg, AVStream *st)
+{
+ const FFStreamGroup *stgi = cffstreamgroup(stg);
+ const FFStream *sti = cffstream(st);
+
+ if (stgi->fmtctx != sti->fmtctx)
+ return AVERROR(EINVAL);
+
+ for (int i = 0; i < stg->nb_streams; i++)
+ if (stg->streams[i]->index == st->index)
+ return AVERROR(EEXIST);
+
+ return stream_group_add_stream(stg, st);
+}
+
static int option_is_disposition(const AVOption *opt)
{
return opt->type == AV_OPT_TYPE_CONST &&
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 6/9] ffmpeg: add support for muxing AVStreamGroups
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
` (4 preceding siblings ...)
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 5/9] avformat: introduce AVStreamGroup James Almer
@ 2023-11-26 1:28 ` James Almer
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 7/9] avcodec/packet: add IAMF Parameters side data types James Almer
` (6 subsequent siblings)
12 siblings, 0 replies; 29+ messages in thread
From: James Almer @ 2023-11-26 1:28 UTC (permalink / raw)
To: ffmpeg-devel
Starting with IAMF support.
Signed-off-by: James Almer <jamrial@gmail.com>
---
fftools/ffmpeg.h | 2 +
fftools/ffmpeg_mux_init.c | 327 ++++++++++++++++++++++++++++++++++++++
fftools/ffmpeg_opt.c | 2 +
3 files changed, 331 insertions(+)
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index 41935d39d5..057535adbb 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -262,6 +262,8 @@ typedef struct OptionsContext {
int nb_disposition;
SpecifierOpt *program;
int nb_program;
+ SpecifierOpt *stream_groups;
+ int nb_stream_groups;
SpecifierOpt *time_bases;
int nb_time_bases;
SpecifierOpt *enc_time_bases;
diff --git a/fftools/ffmpeg_mux_init.c b/fftools/ffmpeg_mux_init.c
index 63a25a350f..a4c564e5ec 100644
--- a/fftools/ffmpeg_mux_init.c
+++ b/fftools/ffmpeg_mux_init.c
@@ -39,6 +39,7 @@
#include "libavutil/dict.h"
#include "libavutil/display.h"
#include "libavutil/getenv_utf8.h"
+#include "libavutil/iamf.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/log.h"
#include "libavutil/mem.h"
@@ -1943,6 +1944,328 @@ static int setup_sync_queues(Muxer *mux, AVFormatContext *oc, int64_t buf_size_u
return 0;
}
+static int of_parse_iamf_audio_element_layers(Muxer *mux, AVStreamGroup *stg, char **ptr)
+{
+ AVIAMFAudioElement *audio_element = stg->params.iamf_audio_element;
+ AVDictionary *dict = NULL;
+ const char *token;
+ int ret = 0;
+
+ audio_element->demixing_info =
+ av_iamf_param_definition_alloc(AV_IAMF_PARAMETER_DEFINITION_DEMIXING, NULL, 1, NULL, NULL);
+ audio_element->recon_gain_info =
+ av_iamf_param_definition_alloc(AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN, NULL, 1, NULL, NULL);
+
+ if (!audio_element->demixing_info ||
+ !audio_element->recon_gain_info)
+ return AVERROR(ENOMEM);
+
+ /* process manually set layers and parameters */
+ token = av_strtok(NULL, ",", ptr);
+ while (token) {
+ const AVDictionaryEntry *e;
+ int demixing = 0, recon_gain = 0;
+ int layer = 0;
+
+ if (av_strstart(token, "layer=", &token))
+ layer = 1;
+ else if (av_strstart(token, "demixing=", &token))
+ demixing = 1;
+ else if (av_strstart(token, "recon_gain=", &token))
+ recon_gain = 1;
+
+ av_dict_free(&dict);
+ ret = av_dict_parse_string(&dict, token, "=", ":", 0);
+ if (ret < 0) {
+ av_log(mux, AV_LOG_ERROR, "Error parsing audio element specification %s\n", token);
+ goto fail;
+ }
+
+ if (layer) {
+ ret = av_iamf_audio_element_add_layer(audio_element, &dict);
+ if (ret < 0) {
+ av_log(mux, AV_LOG_ERROR, "Error adding layer to stream group %d\n", stg->index);
+ goto fail;
+ }
+ } else if (demixing || recon_gain) {
+ AVIAMFParamDefinition *param = demixing ? audio_element->demixing_info
+ : audio_element->recon_gain_info;
+ void *subblock = av_iamf_param_definition_get_subblock(param, 0);
+
+ av_opt_set_dict(param, &dict);
+ av_opt_set_dict(subblock, &dict);
+
+ /* Hardcode spec parameters */
+ param->param_definition_mode = 0;
+ param->parameter_rate = stg->streams[0]->codecpar->sample_rate;
+ param->duration =
+ param->constant_subblock_duration = stg->streams[0]->codecpar->frame_size;
+ }
+
+ // make sure that no entries are left in the dict
+ e = NULL;
+ if (e = av_dict_iterate(dict, e)) {
+ av_log(mux, AV_LOG_FATAL, "Unknown layer key %s.\n", e->key);
+ ret = AVERROR(EINVAL);
+ goto fail;
+ }
+ token = av_strtok(NULL, ",", ptr);
+ }
+
+fail:
+ av_dict_free(&dict);
+ if (!ret && !audio_element->num_layers) {
+ av_log(mux, AV_LOG_ERROR, "No layer in audio element specification\n");
+ ret = AVERROR(EINVAL);
+ }
+
+ return ret;
+}
+
+static int of_parse_iamf_submixes(Muxer *mux, AVStreamGroup *stg, char **ptr)
+{
+ AVFormatContext *oc = mux->fc;
+ AVIAMFMixPresentation *mix = stg->params.iamf_mix_presentation;
+ AVDictionary *dict = NULL;
+ const char *token;
+ char *submix_str = NULL;
+ int ret = 0;
+
+ /* process manually set submixes */
+ token = av_strtok(NULL, ",", ptr);
+ while (token) {
+ AVIAMFSubmix *submix = NULL;
+ const char *subtoken;
+ char *subptr = NULL;
+
+ if (!av_strstart(token, "submix=", &token)) {
+ av_log(mux, AV_LOG_ERROR, "No submix in mix presentation specification \"%s\"\n", token);
+ goto fail;
+ }
+
+ submix_str = av_strdup(token);
+ if (!submix_str)
+ goto fail;
+
+ ret = av_iamf_mix_presentation_add_submix(mix, NULL);
+ if (!ret) {
+ submix = mix->submixes[mix->num_submixes - 1];
+ submix->output_mix_config =
+ av_iamf_param_definition_alloc(AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, 0, NULL, NULL);
+ if (!submix->output_mix_config)
+ ret = AVERROR(ENOMEM);
+ }
+ if (ret < 0) {
+ av_log(mux, AV_LOG_ERROR, "Error adding submix to stream group %d\n", stg->index);
+ goto fail;
+ }
+
+ submix->output_mix_config->parameter_rate = stg->streams[0]->codecpar->sample_rate;
+
+ subptr = NULL;
+ subtoken = av_strtok(submix_str, "|", &subptr);
+ while (subtoken) {
+ const AVDictionaryEntry *e;
+ int element = 0, layout = 0;
+
+ if (av_strstart(subtoken, "element=", &subtoken))
+ element = 1;
+ else if (av_strstart(subtoken, "layout=", &subtoken))
+ layout = 1;
+
+ av_dict_free(&dict);
+ ret = av_dict_parse_string(&dict, subtoken, "=", ":", 0);
+ if (ret < 0) {
+ av_log(mux, AV_LOG_ERROR, "Error parsing submix specification \"%s\"\n", subtoken);
+ goto fail;
+ }
+
+ if (element) {
+ AVIAMFSubmixElement *submix_element;
+ int idx = -1;
+
+ if (e = av_dict_get(dict, "stg", NULL, 0))
+ idx = strtol(e->value, NULL, 0);
+ av_dict_set(&dict, "stg", NULL, 0);
+ if (idx < 0 || idx >= oc->nb_stream_groups) {
+ av_log(mux, AV_LOG_ERROR, "Invalid or missing stream group index in "
+ "submix element specification \"%s\"\n", subtoken);
+ ret = AVERROR(EINVAL);
+ goto fail;
+ }
+ ret = av_iamf_submix_add_element(submix, NULL);
+ if (ret < 0)
+ av_log(mux, AV_LOG_ERROR, "Error adding element to submix\n");
+
+ submix_element = submix->elements[submix->num_elements - 1];
+ submix_element->audio_element_id = oc->stream_groups[idx]->id;
+
+ submix_element->element_mix_config =
+ av_iamf_param_definition_alloc(AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, 0, NULL, NULL);
+ if (!submix_element->element_mix_config)
+ ret = AVERROR(ENOMEM);
+ av_opt_set_dict2(submix_element, &dict, AV_OPT_SEARCH_CHILDREN);
+ submix_element->element_mix_config->parameter_rate = stg->streams[0]->codecpar->sample_rate;
+ } else if (layout) {
+ ret = av_iamf_submix_add_layout(submix, &dict);
+ if (ret < 0)
+ av_log(mux, AV_LOG_ERROR, "Error adding layout to submix\n");
+ } else
+ av_opt_set_dict2(submix, &dict, AV_OPT_SEARCH_CHILDREN);
+
+ if (ret < 0) {
+ goto fail;
+ }
+
+ // make sure that no entries are left in the dict
+ e = NULL;
+ while (e = av_dict_iterate(dict, e)) {
+ av_log(mux, AV_LOG_FATAL, "Unknown submix key %s.\n", e->key);
+ ret = AVERROR(EINVAL);
+ goto fail;
+ }
+ subtoken = av_strtok(NULL, "|", &subptr);
+ }
+ av_freep(&submix_str);
+
+ if (!submix->num_elements) {
+ av_log(mux, AV_LOG_ERROR, "No audio elements in submix specification \"%s\"\n", token);
+ ret = AVERROR(EINVAL);
+ }
+ token = av_strtok(NULL, ",", ptr);
+ }
+
+fail:
+ av_dict_free(&dict);
+ av_free(submix_str);
+
+ return ret;
+}
+
+static int of_add_groups(Muxer *mux, const OptionsContext *o)
+{
+ AVFormatContext *oc = mux->fc;
+ int ret;
+
+ /* process manually set groups */
+ for (int i = 0; i < o->nb_stream_groups; i++) {
+ AVDictionary *dict = NULL, *tmp = NULL;
+ const AVDictionaryEntry *e;
+ AVStreamGroup *stg = NULL;
+ int type;
+ const char *token;
+ char *str, *ptr = NULL;
+ const AVOption opts[] = {
+ { "type", "Set group type", offsetof(AVStreamGroup, type), AV_OPT_TYPE_INT,
+ { .i64 = 0 }, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "type" },
+ { "iamf_audio_element", NULL, 0, AV_OPT_TYPE_CONST,
+ { .i64 = AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT }, .unit = "type" },
+ { "iamf_mix_presentation", NULL, 0, AV_OPT_TYPE_CONST,
+ { .i64 = AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION }, .unit = "type" },
+ { NULL },
+ };
+ const AVClass class = {
+ .class_name = "StreamGroupType",
+ .item_name = av_default_item_name,
+ .option = opts,
+ .version = LIBAVUTIL_VERSION_INT,
+ };
+ const AVClass *pclass = &class;
+
+ str = av_strdup(o->stream_groups[i].u.str);
+ if (!str)
+ goto end;
+
+ token = av_strtok(str, ",", &ptr);
+ if (token) {
+ ret = av_dict_parse_string(&dict, token, "=", ":", AV_DICT_MULTIKEY);
+ if (ret < 0) {
+ av_log(mux, AV_LOG_ERROR, "Error parsing group specification %s\n", token);
+ goto end;
+ }
+
+ // "type" is not a user settable option in AVStreamGroup
+ e = av_dict_get(dict, "type", NULL, 0);
+ if (!e) {
+ av_log(mux, AV_LOG_ERROR, "No type define for Steam Group %d\n", i);
+ ret = AVERROR(EINVAL);
+ goto end;
+ }
+
+ ret = av_opt_eval_int(&pclass, opts, e->value, &type);
+ if (ret < 0 || type == AV_STREAM_GROUP_PARAMS_NONE) {
+ av_log(mux, AV_LOG_ERROR, "Invalid group type \"%s\"\n", e->value);
+ goto end;
+ }
+
+ av_dict_copy(&tmp, dict, 0);
+ stg = avformat_stream_group_create(oc, type, &tmp);
+ if (!stg) {
+ ret = AVERROR(ENOMEM);
+ goto end;
+ }
+ av_dict_set(&tmp, "type", NULL, 0);
+
+ e = NULL;
+ while (e = av_dict_get(dict, "st", e, 0)) {
+ unsigned int idx = strtol(e->value, NULL, 0);
+ if (idx >= oc->nb_streams) {
+ av_log(mux, AV_LOG_ERROR, "Invalid stream index %d\n", idx);
+ ret = AVERROR(EINVAL);
+ goto end;
+ }
+ avformat_stream_group_add_stream(stg, oc->streams[idx]);
+ }
+ while (e = av_dict_get(dict, "stg", e, 0)) {
+ unsigned int idx = strtol(e->value, NULL, 0);
+ if (idx >= oc->nb_stream_groups || idx == stg->index) {
+ av_log(mux, AV_LOG_ERROR, "Invalid stream group index %d\n", idx);
+ ret = AVERROR(EINVAL);
+ goto end;
+ }
+ for (int j = 0; j < oc->stream_groups[idx]->nb_streams; j++)
+ avformat_stream_group_add_stream(stg, oc->stream_groups[idx]->streams[j]);
+ }
+
+ switch(type) {
+ case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT:
+ ret = of_parse_iamf_audio_element_layers(mux, stg, &ptr);
+ break;
+ case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION:
+ ret = of_parse_iamf_submixes(mux, stg, &ptr);
+ break;
+ default:
+ av_log(mux, AV_LOG_FATAL, "Unknown group type %d.\n", type);
+ ret = AVERROR(EINVAL);
+ break;
+ }
+
+ if (ret < 0)
+ goto end;
+
+ // make sure that nothing but "st" and "stg" entries are left in the dict
+ e = NULL;
+ while (e = av_dict_iterate(tmp, e)) {
+ if (!strcmp(e->key, "st") || !strcmp(e->key, "stg"))
+ continue;
+
+ av_log(mux, AV_LOG_FATAL, "Unknown group key %s.\n", e->key);
+ ret = AVERROR(EINVAL);
+ goto end;
+ }
+ }
+
+end:
+ av_dict_free(&dict);
+ av_dict_free(&tmp);
+ av_free(str);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
static int of_add_programs(Muxer *mux, const OptionsContext *o)
{
AVFormatContext *oc = mux->fc;
@@ -2740,6 +3063,10 @@ int of_open(const OptionsContext *o, const char *filename)
if (err < 0)
return err;
+ err = of_add_groups(mux, o);
+ if (err < 0)
+ return err;
+
err = of_add_programs(mux, o);
if (err < 0)
return err;
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index 304471dd03..1144f64f89 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -1491,6 +1491,8 @@ const OptionDef options[] = {
"add metadata", "string=string" },
{ "program", HAS_ARG | OPT_STRING | OPT_SPEC | OPT_OUTPUT, { .off = OFFSET(program) },
"add program with specified streams", "title=string:st=number..." },
+ { "stream_group", HAS_ARG | OPT_STRING | OPT_SPEC | OPT_OUTPUT, { .off = OFFSET(stream_groups) },
+ "add stream group with specified streams and group type-specific arguments", "id=number:st=number..." },
{ "dframes", HAS_ARG | OPT_PERFILE | OPT_EXPERT |
OPT_OUTPUT, { .func_arg = opt_data_frames },
"set the number of data frames to output", "number" },
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 7/9] avcodec/packet: add IAMF Parameters side data types
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
` (5 preceding siblings ...)
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 6/9] ffmpeg: add support for muxing AVStreamGroups James Almer
@ 2023-11-26 1:28 ` James Almer
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 8/9] avformat: Immersive Audio Model and Formats demuxer James Almer
` (5 subsequent siblings)
12 siblings, 0 replies; 29+ messages in thread
From: James Almer @ 2023-11-26 1:28 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavcodec/avpacket.c | 3 +++
libavcodec/packet.h | 24 ++++++++++++++++++++++++
2 files changed, 27 insertions(+)
diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
index e29725c2d2..0f8c9b77ae 100644
--- a/libavcodec/avpacket.c
+++ b/libavcodec/avpacket.c
@@ -301,6 +301,9 @@ const char *av_packet_side_data_name(enum AVPacketSideDataType type)
case AV_PKT_DATA_DOVI_CONF: return "DOVI configuration record";
case AV_PKT_DATA_S12M_TIMECODE: return "SMPTE ST 12-1:2014 timecode";
case AV_PKT_DATA_DYNAMIC_HDR10_PLUS: return "HDR10+ Dynamic Metadata (SMPTE 2094-40)";
+ case AV_PKT_DATA_IAMF_MIX_GAIN_PARAM: return "IAMF Mix Gain Parameter Data";
+ case AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM: return "IAMF Demixing Info Parameter Data";
+ case AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM: return "IAMF Recon Gain Info Parameter Data";
}
return NULL;
}
diff --git a/libavcodec/packet.h b/libavcodec/packet.h
index b19409b719..2c57d262c6 100644
--- a/libavcodec/packet.h
+++ b/libavcodec/packet.h
@@ -299,6 +299,30 @@ enum AVPacketSideDataType {
*/
AV_PKT_DATA_DYNAMIC_HDR10_PLUS,
+ /**
+ * IAMF Mix Gain Parameter Data associated with the audio frame. This metadata
+ * is in the form of the AVIAMFParamDefinition struct and contains information
+ * defined in sections 3.6.1 and 3.8.1 of the Immersive Audio Model and
+ * Formats standard.
+ */
+ AV_PKT_DATA_IAMF_MIX_GAIN_PARAM,
+
+ /**
+ * IAMF Demixing Info Parameter Data associated with the audio frame. This
+ * metadata is in the form of the AVIAMFParamDefinition struct and contains
+ * information defined in sections 3.6.1 and 3.8.2 of the Immersive Audio Model
+ * and Formats standard.
+ */
+ AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM,
+
+ /**
+ * IAMF Recon Gain Info Parameter Data associated with the audio frame. This
+ * metadata is in the form of the AVIAMFParamDefinition struct and contains
+ * information defined in sections 3.6.1 and 3.8.3 of the Immersive Audio Model
+ * and Formats standard.
+ */
+ AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM,
+
/**
* The number of side data types.
* This is not part of the public API/ABI in the sense that it may
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 8/9] avformat: Immersive Audio Model and Formats demuxer
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
` (6 preceding siblings ...)
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 7/9] avcodec/packet: add IAMF Parameters side data types James Almer
@ 2023-11-26 1:28 ` James Almer
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 9/9] avformat: Immersive Audio Model and Formats muxer James Almer
` (4 subsequent siblings)
12 siblings, 0 replies; 29+ messages in thread
From: James Almer @ 2023-11-26 1:28 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavformat/Makefile | 1 +
libavformat/allformats.c | 1 +
libavformat/iamf.c | 1149 ++++++++++++++++++++++++++++++++++++++
libavformat/iamf.h | 167 ++++++
libavformat/iamfdec.c | 495 ++++++++++++++++
5 files changed, 1813 insertions(+)
create mode 100644 libavformat/iamf.c
create mode 100644 libavformat/iamf.h
create mode 100644 libavformat/iamfdec.c
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 329055ccfd..752833f5a8 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -258,6 +258,7 @@ OBJS-$(CONFIG_EVC_MUXER) += rawenc.o
OBJS-$(CONFIG_HLS_DEMUXER) += hls.o hls_sample_encryption.o
OBJS-$(CONFIG_HLS_MUXER) += hlsenc.o hlsplaylist.o avc.o
OBJS-$(CONFIG_HNM_DEMUXER) += hnm.o
+OBJS-$(CONFIG_IAMF_DEMUXER) += iamfdec.o iamf.o
OBJS-$(CONFIG_ICO_DEMUXER) += icodec.o
OBJS-$(CONFIG_ICO_MUXER) += icoenc.o
OBJS-$(CONFIG_IDCIN_DEMUXER) += idcin.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index d4b505a5a3..63ca44bacd 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -212,6 +212,7 @@ extern const FFOutputFormat ff_hevc_muxer;
extern const AVInputFormat ff_hls_demuxer;
extern const FFOutputFormat ff_hls_muxer;
extern const AVInputFormat ff_hnm_demuxer;
+extern const AVInputFormat ff_iamf_demuxer;
extern const AVInputFormat ff_ico_demuxer;
extern const FFOutputFormat ff_ico_muxer;
extern const AVInputFormat ff_idcin_demuxer;
diff --git a/libavformat/iamf.c b/libavformat/iamf.c
new file mode 100644
index 0000000000..be63db663c
--- /dev/null
+++ b/libavformat/iamf.c
@@ -0,0 +1,1149 @@
+/*
+ * Immersive Audio Model and Formats parsing
+ * Copyright (c) 2023 James Almer <jamrial@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/iamf.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+#include "libavcodec/get_bits.h"
+#include "libavcodec/flac.h"
+#include "libavcodec/mpeg4audio.h"
+#include "libavcodec/put_bits.h"
+#include "avio_internal.h"
+#include "iamf.h"
+#include "isom.h"
+
+const AVChannelLayout ff_iamf_scalable_ch_layouts[10] = {
+ AV_CHANNEL_LAYOUT_MONO,
+ AV_CHANNEL_LAYOUT_STEREO,
+ // "Loudspeaker configuration for Sound System B"
+ AV_CHANNEL_LAYOUT_5POINT1_BACK,
+ // "Loudspeaker configuration for Sound System C"
+ AV_CHANNEL_LAYOUT_5POINT1POINT2_BACK,
+ // "Loudspeaker configuration for Sound System D"
+ AV_CHANNEL_LAYOUT_5POINT1POINT4_BACK,
+ // "Loudspeaker configuration for Sound System I"
+ AV_CHANNEL_LAYOUT_7POINT1,
+ // "Loudspeaker configuration for Sound System I" + Ltf + Rtf
+ AV_CHANNEL_LAYOUT_7POINT1POINT2,
+ // "Loudspeaker configuration for Sound System J"
+ AV_CHANNEL_LAYOUT_7POINT1POINT4_BACK,
+ // Front subset of "Loudspeaker configuration for Sound System J"
+ AV_CHANNEL_LAYOUT_3POINT1POINT2,
+ // Binaural
+ AV_CHANNEL_LAYOUT_STEREO,
+};
+
+const struct IAMFSoundSystemMap ff_iamf_sound_system_map[13] = {
+ { SOUND_SYSTEM_A_0_2_0, AV_CHANNEL_LAYOUT_STEREO },
+ { SOUND_SYSTEM_B_0_5_0, AV_CHANNEL_LAYOUT_5POINT1_BACK },
+ { SOUND_SYSTEM_C_2_5_0, AV_CHANNEL_LAYOUT_5POINT1POINT2_BACK },
+ { SOUND_SYSTEM_D_4_5_0, AV_CHANNEL_LAYOUT_5POINT1POINT4_BACK },
+ { SOUND_SYSTEM_E_4_5_1,
+ {
+ .nb_channels = 11,
+ .order = AV_CHANNEL_ORDER_NATIVE,
+ .u.mask = AV_CH_LAYOUT_5POINT1POINT4_BACK | AV_CH_BOTTOM_FRONT_CENTER,
+ },
+ },
+ { SOUND_SYSTEM_F_3_7_0, AV_CHANNEL_LAYOUT_7POINT2POINT3 },
+ { SOUND_SYSTEM_G_4_9_0, AV_CHANNEL_LAYOUT_9POINT1POINT4_BACK },
+ { SOUND_SYSTEM_H_9_10_3, AV_CHANNEL_LAYOUT_22POINT2 },
+ { SOUND_SYSTEM_I_0_7_0, AV_CHANNEL_LAYOUT_7POINT1 },
+ { SOUND_SYSTEM_J_4_7_0, AV_CHANNEL_LAYOUT_7POINT1POINT4_BACK },
+ { SOUND_SYSTEM_10_2_7_0, AV_CHANNEL_LAYOUT_7POINT1POINT2 },
+ { SOUND_SYSTEM_11_2_3_0, AV_CHANNEL_LAYOUT_3POINT1POINT2 },
+ { SOUND_SYSTEM_12_0_1_0, AV_CHANNEL_LAYOUT_MONO },
+};
+
+static int opus_decoder_config(IAMFCodecConfig *codec_config,
+ AVIOContext *pb, int len)
+{
+ int left = len - avio_tell(pb);
+
+ if (left < 11)
+ return AVERROR_INVALIDDATA;
+
+ codec_config->extradata = av_malloc(left + 8);
+ if (!codec_config->extradata)
+ return AVERROR(ENOMEM);
+
+ AV_WB32(codec_config->extradata, MKBETAG('O','p','u','s'));
+ AV_WB32(codec_config->extradata + 4, MKBETAG('H','e','a','d'));
+ codec_config->extradata_size = avio_read(pb, codec_config->extradata + 8, left);
+ if (codec_config->extradata_size < left)
+ return AVERROR_INVALIDDATA;
+
+ codec_config->extradata_size += 8;
+ codec_config->sample_rate = 48000;
+
+ return 0;
+}
+
+static int aac_decoder_config(IAMFCodecConfig *codec_config,
+ AVIOContext *pb, int len, void *logctx)
+{
+ MPEG4AudioConfig cfg = { 0 };
+ int object_type_id, codec_id, stream_type;
+ int ret, tag, left;
+
+ tag = avio_r8(pb);
+ if (tag != MP4DecConfigDescrTag)
+ return AVERROR_INVALIDDATA;
+
+ object_type_id = avio_r8(pb);
+ if (object_type_id != 0x40)
+ return AVERROR_INVALIDDATA;
+
+ stream_type = avio_r8(pb);
+ if (((stream_type >> 2) != 5) || ((stream_type >> 1) & 1))
+ return AVERROR_INVALIDDATA;
+
+ avio_skip(pb, 3); // buffer size db
+ avio_skip(pb, 4); // rc_max_rate
+ avio_skip(pb, 4); // avg bitrate
+
+ codec_id = ff_codec_get_id(ff_mp4_obj_type, object_type_id);
+ if (codec_id && codec_id != codec_config->codec_id)
+ return AVERROR_INVALIDDATA;
+
+ tag = avio_r8(pb);
+ if (tag != MP4DecSpecificDescrTag)
+ return AVERROR_INVALIDDATA;
+
+ left = len - avio_tell(pb);
+ if (left <= 0)
+ return AVERROR_INVALIDDATA;
+
+ codec_config->extradata = av_malloc(left);
+ if (!codec_config->extradata)
+ return AVERROR(ENOMEM);
+
+ codec_config->extradata_size = avio_read(pb, codec_config->extradata, left);
+ if (codec_config->extradata_size < left)
+ return AVERROR_INVALIDDATA;
+
+ ret = avpriv_mpeg4audio_get_config2(&cfg, codec_config->extradata,
+ codec_config->extradata_size, 1, logctx);
+ if (ret < 0)
+ return ret;
+
+ codec_config->sample_rate = cfg.sample_rate;
+
+ return 0;
+}
+
+static int flac_decoder_config(IAMFCodecConfig *codec_config,
+ AVIOContext *pb, int len)
+{
+ int left;
+
+ avio_skip(pb, 4); // METADATA_BLOCK_HEADER
+
+ left = len - avio_tell(pb);
+ if (left < FLAC_STREAMINFO_SIZE)
+ return AVERROR_INVALIDDATA;
+
+ codec_config->extradata = av_malloc(left);
+ if (!codec_config->extradata)
+ return AVERROR(ENOMEM);
+
+ codec_config->extradata_size = avio_read(pb, codec_config->extradata, left);
+ if (codec_config->extradata_size < left)
+ return AVERROR_INVALIDDATA;
+
+ codec_config->sample_rate = AV_RB24(codec_config->extradata + 10) >> 4;
+
+ return 0;
+}
+
+static int ipcm_decoder_config(IAMFCodecConfig *codec_config,
+ AVIOContext *pb, int len)
+{
+ static const enum AVSampleFormat sample_fmt[2][3] = {
+ { AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S24BE, AV_CODEC_ID_PCM_S32BE },
+ { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S32LE },
+ };
+ int sample_format = avio_r8(pb); // 0 = BE, 1 = LE
+ int sample_size = (avio_r8(pb) / 8 - 2); // 16, 24, 32
+ if (sample_format > 1 || sample_size > 2)
+ return AVERROR_INVALIDDATA;
+
+ codec_config->codec_id = sample_fmt[sample_format][sample_size];
+ codec_config->sample_rate = avio_rb32(pb);
+
+ if (len - avio_tell(pb))
+ return AVERROR_INVALIDDATA;
+
+ return 0;
+}
+
+static int codec_config_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
+{
+ IAMFCodecConfig *codec_config = NULL;
+ FFIOContext b;
+ AVIOContext *pbc;
+ uint8_t *buf;
+ enum AVCodecID avcodec_id;
+ unsigned codec_config_id, nb_samples, codec_id;
+ int16_t seek_preroll;
+ int ret;
+
+ buf = av_malloc(len);
+ if (!buf)
+ return AVERROR(ENOMEM);
+
+ ret = avio_read(pb, buf, len);
+ if (ret != len) {
+ if (ret >= 0)
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+ pbc = &b.pub;
+
+ codec_config_id = ffio_read_leb(pbc);
+ codec_id = avio_rb32(pbc);
+ nb_samples = ffio_read_leb(pbc);
+ seek_preroll = avio_rb16(pbc);
+
+ switch(codec_id) {
+ case MKBETAG('O','p','u','s'):
+ avcodec_id = AV_CODEC_ID_OPUS;
+ break;
+ case MKBETAG('m','p','4','a'):
+ avcodec_id = AV_CODEC_ID_AAC;
+ break;
+ case MKBETAG('f','L','a','C'):
+ avcodec_id = AV_CODEC_ID_FLAC;
+ break;
+ default:
+ avcodec_id = AV_CODEC_ID_NONE;
+ break;
+ }
+
+ for (int i = 0; i < c->nb_codec_configs; i++)
+ if (c->codec_configs[i].codec_config_id == codec_config_id) {
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ codec_config = av_dynarray2_add_nofree((void **)&c->codec_configs, &c->nb_codec_configs,
+ sizeof(*c->codec_configs), NULL);
+ if (!codec_config) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ memset(codec_config, 0, sizeof(*codec_config));
+
+ codec_config->codec_config_id = codec_config_id;
+ codec_config->codec_id = avcodec_id;
+ codec_config->nb_samples = nb_samples;
+ codec_config->seek_preroll = seek_preroll;
+
+ switch(codec_id) {
+ case MKBETAG('O','p','u','s'):
+ ret = opus_decoder_config(codec_config, pbc, len);
+ break;
+ case MKBETAG('m','p','4','a'):
+ ret = aac_decoder_config(codec_config, pbc, len, s);
+ break;
+ case MKBETAG('f','L','a','C'):
+ ret = flac_decoder_config(codec_config, pbc, len);
+ break;
+ case MKBETAG('i','p','c','m'):
+ ret = ipcm_decoder_config(codec_config, pbc, len);
+ break;
+ default:
+ break;
+ }
+ if (ret < 0)
+ goto fail;
+
+ len -= avio_tell(pbc);
+ if (len)
+ av_log(s, AV_LOG_WARNING, "Underread in codec_config_obu. %d bytes left at the end\n", len);
+
+ ret = 0;
+fail:
+ av_free(buf);
+ return ret;
+}
+
+static int update_extradata(AVCodecParameters *codecpar)
+{
+ GetBitContext gb;
+ PutBitContext pb;
+ int ret;
+
+ switch(codecpar->codec_id) {
+ case AV_CODEC_ID_OPUS:
+ AV_WB8(codecpar->extradata + 9, codecpar->ch_layout.nb_channels);
+ break;
+ case AV_CODEC_ID_AAC: {
+ uint8_t buf[5];
+
+ init_put_bits(&pb, buf, sizeof(buf));
+ ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size);
+ if (ret < 0)
+ return ret;
+
+ ret = get_bits(&gb, 5);
+ put_bits(&pb, 5, ret);
+ if (ret == AOT_ESCAPE) // violates section 3.11.2, but better check for it
+ put_bits(&pb, 6, get_bits(&gb, 6));
+ ret = get_bits(&gb, 4);
+ put_bits(&pb, 4, ret);
+ if (ret == 0x0f)
+ put_bits(&pb, 24, get_bits(&gb, 24));
+
+ skip_bits(&gb, 4);
+ put_bits(&pb, 4, codecpar->ch_layout.nb_channels); // set channel config
+ ret = put_bits_left(&pb);
+ put_bits(&pb, ret, get_bits(&gb, ret));
+ flush_put_bits(&pb);
+
+ memcpy(codecpar->extradata, buf, sizeof(buf));
+ break;
+ }
+ case AV_CODEC_ID_FLAC: {
+ uint8_t buf[13];
+
+ init_put_bits(&pb, buf, sizeof(buf));
+ ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size);
+ if (ret < 0)
+ return ret;
+
+ put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize
+ put_bits64(&pb, 48, get_bits64(&gb, 48)); // min/max framesize
+ put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate
+ skip_bits(&gb, 3);
+ put_bits(&pb, 3, codecpar->ch_layout.nb_channels - 1);
+ ret = put_bits_left(&pb);
+ put_bits(&pb, ret, get_bits(&gb, ret));
+ flush_put_bits(&pb);
+
+ memcpy(codecpar->extradata, buf, sizeof(buf));
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int scalable_channel_layout_config(void *s, AVIOContext *pb,
+ IAMFAudioElement *audio_element,
+ const IAMFCodecConfig *codec_config)
+{
+ int num_layers, k = 0;
+
+ num_layers = avio_r8(pb) >> 5; // get_bits(&gb, 3);
+ // skip_bits(&gb, 5); //reserved
+
+ if (num_layers > 6)
+ return AVERROR_INVALIDDATA;
+
+ for (int i = 0; i < num_layers; i++) {
+ AVIAMFLayer *layer;
+ int loudspeaker_layout, output_gain_is_present_flag;
+ int substream_count, coupled_substream_count;
+ int ret, byte = avio_r8(pb);
+
+ ret = av_iamf_audio_element_add_layer(audio_element->element, NULL);
+ if (ret < 0)
+ return ret;
+
+ loudspeaker_layout = byte >> 4; // get_bits(&gb, 4);
+ output_gain_is_present_flag = (byte >> 3) & 1; //get_bits1(&gb);
+ layer = audio_element->element->layers[i];
+ layer->recon_gain_is_present = (byte >> 2) & 1;
+ substream_count = avio_r8(pb);
+ coupled_substream_count = avio_r8(pb);
+
+ if (output_gain_is_present_flag) {
+ layer->output_gain_flags = avio_r8(pb) >> 2; // get_bits(&gb, 6);
+ layer->output_gain = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
+ }
+
+ if (loudspeaker_layout < 10)
+ av_channel_layout_copy(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
+ else
+ layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
+ .nb_channels = substream_count +
+ coupled_substream_count };
+
+ for (int j = 0; j < substream_count; j++) {
+ IAMFSubStream *substream = &audio_element->substreams[k++];
+
+ substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
+ (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
+
+ ret = update_extradata(substream->codecpar);
+ if (ret < 0)
+ return ret;
+ }
+
+ }
+
+ return 0;
+}
+
+static int ambisonics_config(void *s, AVIOContext *pb,
+ IAMFAudioElement *audio_element,
+ const IAMFCodecConfig *codec_config)
+{
+ AVIAMFLayer *layer;
+ unsigned ambisonics_mode;
+ int output_channel_count, substream_count, order;
+ int ret;
+
+ ambisonics_mode = ffio_read_leb(pb);
+ if (ambisonics_mode > 1)
+ return 0;
+
+ output_channel_count = avio_r8(pb); // C
+ substream_count = avio_r8(pb); // N
+ if (audio_element->nb_substreams != substream_count)
+ return AVERROR_INVALIDDATA;
+
+ order = floor(sqrt(output_channel_count - 1));
+ /* incomplete order - some harmonics are missing */
+ if ((order + 1) * (order + 1) != output_channel_count)
+ return AVERROR_INVALIDDATA;
+
+ ret = av_iamf_audio_element_add_layer(audio_element->element, NULL);
+ if (ret < 0)
+ return ret;
+
+ layer = audio_element->element->layers[0];
+ layer->ambisonics_mode = ambisonics_mode;
+ if (ambisonics_mode == 0) {
+ for (int i = 0; i < substream_count; i++) {
+ IAMFSubStream *substream = &audio_element->substreams[i];
+
+ substream->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
+
+ ret = update_extradata(substream->codecpar);
+ if (ret < 0)
+ return ret;
+ }
+
+ layer->ch_layout.order = AV_CHANNEL_ORDER_CUSTOM;
+ layer->ch_layout.nb_channels = output_channel_count;
+ layer->ch_layout.u.map = av_calloc(output_channel_count, sizeof(*layer->ch_layout.u.map));
+ if (!layer->ch_layout.u.map)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < output_channel_count; i++)
+ layer->ch_layout.u.map[i].id = avio_r8(pb) + AV_CHAN_AMBISONIC_BASE;
+ } else {
+ int coupled_substream_count = avio_r8(pb); // M
+ int nb_demixing_matrix = substream_count + coupled_substream_count;
+ int demixing_matrix_size = nb_demixing_matrix * output_channel_count;
+
+ layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_AMBISONIC, .nb_channels = output_channel_count };
+ layer->demixing_matrix = av_malloc_array(demixing_matrix_size, sizeof(*layer->demixing_matrix));
+ if (!layer->demixing_matrix)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < demixing_matrix_size; i++)
+ layer->demixing_matrix[i] = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
+
+ for (int i = 0; i < substream_count; i++) {
+ IAMFSubStream *substream = &audio_element->substreams[i];
+
+ substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
+ (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
+
+
+ ret = update_extradata(substream->codecpar);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int param_parse(void *s, IAMFContext *c, AVIOContext *pb,
+ unsigned int param_definition_type,
+ const AVIAMFAudioElement *audio_element,
+ AVIAMFParamDefinition **out_param_definition)
+{
+ IAMFParamDefinition *param_definition = NULL;
+ AVIAMFParamDefinition *param;
+ unsigned int parameter_id, parameter_rate, param_definition_mode;
+ unsigned int duration = 0, constant_subblock_duration = 0, num_subblocks = 0;
+ size_t param_size;
+
+ parameter_id = ffio_read_leb(pb);
+
+ for (int i = 0; i < c->nb_param_definitions; i++)
+ if (c->param_definitions[i].param->parameter_id == parameter_id) {
+ param_definition = &c->param_definitions[i];
+ break;
+ }
+
+ parameter_rate = ffio_read_leb(pb);
+ param_definition_mode = avio_r8(pb) >> 7;
+
+ if (param_definition_mode == 0) {
+ duration = ffio_read_leb(pb);
+ constant_subblock_duration = ffio_read_leb(pb);
+ if (constant_subblock_duration == 0)
+ num_subblocks = ffio_read_leb(pb);
+ else
+ num_subblocks = duration / constant_subblock_duration;
+ }
+
+ param = av_iamf_param_definition_alloc(param_definition_type, NULL, num_subblocks, NULL, ¶m_size);
+ if (!param)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < num_subblocks; i++) {
+ void *subblock = av_iamf_param_definition_get_subblock(param, i);
+ unsigned int subblock_duration = constant_subblock_duration;
+
+ if (constant_subblock_duration == 0)
+ subblock_duration = ffio_read_leb(pb);
+
+ switch (param_definition_type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+ AVIAMFMixGainParameterData *mix = subblock;
+ mix->subblock_duration = subblock_duration;
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+ AVIAMFDemixingInfoParameterData *demix = subblock;
+ demix->subblock_duration = subblock_duration;
+ // DemixingInfoParameterData
+ demix->dmixp_mode = avio_r8(pb) >> 5;
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+ AVIAMFReconGainParameterData *recon = subblock;
+ recon->subblock_duration = subblock_duration;
+ break;
+ }
+ default:
+ av_free(param);
+ return AVERROR_INVALIDDATA;
+ }
+ }
+
+ param->parameter_id = parameter_id;
+ param->parameter_rate = parameter_rate;
+ param->param_definition_mode = param_definition_mode;
+ param->duration = duration;
+ param->constant_subblock_duration = constant_subblock_duration;
+ param->num_subblocks = num_subblocks;
+
+ if (param_definition) {
+ if (param_definition->param_size != param_size || memcmp(param_definition->param, param, param_size)) {
+ av_log(s, AV_LOG_ERROR, "Incosistent parameters for parameter_id %u\n", parameter_id);
+ av_free(param);
+ return AVERROR_INVALIDDATA;
+ }
+ } else {
+ param_definition = av_dynarray2_add_nofree((void **)&c->param_definitions, &c->nb_param_definitions,
+ sizeof(*c->param_definitions), NULL);
+ if (!param_definition) {
+ av_free(param);
+ return AVERROR(ENOMEM);
+ }
+ param_definition->param = param;
+ param_definition->param_size = param_size;
+ param_definition->audio_element = audio_element;
+ }
+
+ av_assert0(out_param_definition);
+ *out_param_definition = param;
+
+ return 0;
+}
+
+static int audio_element_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
+{
+ const IAMFCodecConfig *codec_config = NULL;
+ AVIAMFAudioElement *element;
+ IAMFAudioElement *audio_element;
+ FFIOContext b;
+ AVIOContext *pbc;
+ uint8_t *buf;
+ unsigned audio_element_id, codec_config_id, num_substreams, num_parameters;
+ int audio_element_type, ret;
+
+ buf = av_malloc(len);
+ if (!buf)
+ return AVERROR(ENOMEM);
+
+ ret = avio_read(pb, buf, len);
+ if (ret != len) {
+ if (ret >= 0)
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+ pbc = &b.pub;
+
+ audio_element_id = ffio_read_leb(pbc);
+
+ for (int i = 0; i < c->nb_audio_elements; i++)
+ if (c->audio_elements[i].audio_element_id == audio_element_id) {
+ av_log(s, AV_LOG_ERROR, "Duplicate audio_element_id %d\n", audio_element_id);
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ audio_element_type = avio_r8(pbc) >> 5;
+ codec_config_id = ffio_read_leb(pbc);
+
+ for (int i = 0; i < c->nb_codec_configs; i++) {
+ if (c->codec_configs[i].codec_config_id == codec_config_id) {
+ codec_config = &c->codec_configs[i];
+ break;
+ }
+ }
+
+ if (!codec_config) {
+ av_log(s, AV_LOG_ERROR, "Non existant codec config id %d referenced in an audio element\n", codec_config_id);
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ if (codec_config->codec_id == AV_CODEC_ID_NONE) {
+ av_log(s, AV_LOG_DEBUG, "Unknown codec id referenced in an audio element. Ignoring\n");
+ ret = 0;
+ goto fail;
+ }
+
+ num_substreams = ffio_read_leb(pbc);
+
+ audio_element = av_dynarray2_add_nofree((void **)&c->audio_elements, &c->nb_audio_elements,
+ sizeof(*c->audio_elements), NULL);
+ if (!audio_element) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ memset(audio_element, 0, sizeof(*audio_element));
+
+ audio_element->codec_config = codec_config;
+ audio_element->audio_element_id = audio_element_id;
+ element = audio_element->element = av_iamf_audio_element_alloc();
+ if (!element) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ element->codec_config_id = codec_config_id;
+ element->audio_element_type = audio_element_type;
+
+ for (int i = 0; i < num_substreams; i++) {
+ IAMFSubStream *substream;
+
+ substream = av_dynarray2_add_nofree((void **)&audio_element->substreams, &audio_element->nb_substreams,
+ sizeof(*audio_element->substreams), NULL);
+ if (!substream) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ substream->codecpar = avcodec_parameters_alloc();
+ if (!substream->codecpar) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ substream->audio_substream_id = ffio_read_leb(pbc);
+
+ substream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+ substream->codecpar->codec_id = codec_config->codec_id;
+ substream->codecpar->frame_size = codec_config->nb_samples;
+ substream->codecpar->sample_rate = codec_config->sample_rate;
+ substream->codecpar->seek_preroll = codec_config->seek_preroll;
+
+ switch(substream->codecpar->codec_id) {
+ case AV_CODEC_ID_AAC:
+ case AV_CODEC_ID_FLAC:
+ case AV_CODEC_ID_OPUS:
+ substream->codecpar->extradata = av_malloc(codec_config->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+ if (!substream->codecpar->extradata) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+ memcpy(substream->codecpar->extradata, codec_config->extradata, codec_config->extradata_size);
+ memset(substream->codecpar->extradata + codec_config->extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+ substream->codecpar->extradata_size = codec_config->extradata_size;
+ break;
+ }
+ }
+
+ num_parameters = ffio_read_leb(pbc);
+ if (num_parameters && audio_element_type != 0) {
+ av_log(s, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
+ " for Scene representations\n", num_parameters);
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ for (int i = 0; i < num_parameters; i++) {
+ unsigned param_definition_type;
+
+ param_definition_type = ffio_read_leb(pbc);
+ if (param_definition_type == AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN) {
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ } else if (param_definition_type == AV_IAMF_PARAMETER_DEFINITION_DEMIXING) {
+ ret = param_parse(s, c, pbc, param_definition_type, element, &element->demixing_info);
+ if (ret < 0)
+ goto fail;
+
+ element->default_w = avio_r8(pbc) >> 4;
+ } else if (param_definition_type == AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
+ ret = param_parse(s, c, pbc, param_definition_type, element, &element->recon_gain_info);
+ if (ret < 0)
+ goto fail;
+ } else {
+ unsigned param_definition_size = ffio_read_leb(pbc);
+ avio_skip(pbc, param_definition_size);
+ }
+ }
+
+ if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
+ ret = scalable_channel_layout_config(s, pbc, audio_element, codec_config);
+ if (ret < 0)
+ goto fail;
+ } else if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
+ ret = ambisonics_config(s, pbc, audio_element, codec_config);
+ if (ret < 0)
+ goto fail;
+ } else {
+ unsigned audio_element_config_size = ffio_read_leb(pbc);
+ avio_skip(pbc, audio_element_config_size);
+ }
+
+ len -= avio_tell(pbc);
+ if (len)
+ av_log(s, AV_LOG_WARNING, "Underread in audio_element_obu. %d bytes left at the end\n", len);
+
+ ret = 0;
+fail:
+ av_free(buf);
+
+ return ret;
+}
+
+static int label_string(AVIOContext *pb, char **label)
+{
+ uint8_t buf[128];
+
+ avio_get_str(pb, sizeof(buf), buf, sizeof(buf));
+
+ if (pb->error)
+ return pb->error;
+ if (pb->eof_reached)
+ return AVERROR_INVALIDDATA;
+ *label = av_strdup(buf);
+ if (!*label)
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+static int mix_presentation_obu(void *s, IAMFContext *c, AVIOContext *pb, int len)
+{
+ AVIAMFMixPresentation *mix;
+ IAMFMixPresentation *mix_presentation;
+ FFIOContext b;
+ AVIOContext *pbc;
+ uint8_t *buf;
+ unsigned mix_presentation_id;
+ int ret;
+
+ buf = av_malloc(len);
+ if (!buf)
+ return AVERROR(ENOMEM);
+
+ ret = avio_read(pb, buf, len);
+ if (ret != len) {
+ if (ret >= 0)
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+ pbc = &b.pub;
+
+ mix_presentation_id = ffio_read_leb(pbc);
+
+ for (int i = 0; i < c->nb_mix_presentations; i++)
+ if (c->mix_presentations[i].mix_presentation_id == mix_presentation_id) {
+ av_log(s, AV_LOG_ERROR, "Duplicate mix_presentation_id %d\n", mix_presentation_id);
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ mix_presentation = av_dynarray2_add_nofree((void **)&c->mix_presentations, &c->nb_mix_presentations,
+ sizeof(*c->mix_presentations), NULL);
+ if (!mix_presentation) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ memset(mix_presentation, 0, sizeof(*mix_presentation));
+
+ mix_presentation->mix_presentation_id = mix_presentation_id;
+ mix = mix_presentation->mix = av_iamf_mix_presentation_alloc();
+ if (!mix) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ mix_presentation->count_label = ffio_read_leb(pbc);
+
+ mix_presentation->language_label = av_calloc(mix_presentation->count_label,
+ sizeof(*mix_presentation->language_label));
+ if (!mix_presentation->language_label) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ for (int i = 0; i < mix_presentation->count_label; i++) {
+ ret = label_string(pbc, &mix_presentation->language_label[i]);
+ if (ret < 0)
+ goto fail;
+ }
+
+ for (int i = 0; i < mix_presentation->count_label; i++) {
+ char *annotation = NULL;
+ ret = label_string(pbc, &annotation);
+ if (ret < 0)
+ goto fail;
+ ret = av_dict_set(&mix->annotations, mix_presentation->language_label[i], annotation,
+ AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
+ if (ret < 0)
+ goto fail;
+ }
+
+ mix->num_submixes = ffio_read_leb(pbc);
+ mix->submixes = av_calloc(mix->num_submixes, sizeof(*mix->submixes));
+ if (!mix->submixes) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ for (int i = 0; i < mix->num_submixes; i++) {
+ AVIAMFSubmix *sub_mix;
+
+ sub_mix = mix->submixes[i] = av_mallocz(sizeof(*sub_mix));
+ if (!sub_mix) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ sub_mix->num_elements = ffio_read_leb(pbc);
+ sub_mix->elements = av_calloc(sub_mix->num_elements, sizeof(*sub_mix->elements));
+ if (!sub_mix->elements) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ for (int j = 0; j < sub_mix->num_elements; j++) {
+ AVIAMFSubmixElement *submix_element;
+ IAMFAudioElement *audio_element = NULL;
+ unsigned int rendering_config_extension_size;
+
+ submix_element = sub_mix->elements[j] = av_mallocz(sizeof(*submix_element));
+ if (!submix_element) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ submix_element->audio_element_id = ffio_read_leb(pbc);
+
+ for (int k = 0; k < c->nb_audio_elements; k++)
+ if (c->audio_elements[k].audio_element_id == submix_element->audio_element_id) {
+ audio_element = &c->audio_elements[k];
+ break;
+ }
+
+ if (!audio_element) {
+ av_log(s, AV_LOG_ERROR, "Invalid Audio Element with id %u referenced by Mix Parameters %u\n",
+ submix_element->audio_element_id, mix_presentation_id);
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ for (int k = 0; k < mix_presentation->count_label; k++) {
+ char *annotation = NULL;
+ ret = label_string(pbc, &annotation);
+ if (ret < 0)
+ goto fail;
+ ret = av_dict_set(&submix_element->annotations, mix_presentation->language_label[k], annotation,
+ AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE);
+ if (ret < 0)
+ goto fail;
+ }
+
+ submix_element->headphones_rendering_mode = avio_r8(pbc) >> 6;
+
+ rendering_config_extension_size = ffio_read_leb(pbc);
+ avio_skip(pbc, rendering_config_extension_size);
+
+ ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN,
+ audio_element->element,
+ &submix_element->element_mix_config);
+ if (ret < 0)
+ goto fail;
+ submix_element->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+ }
+
+ ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, &sub_mix->output_mix_config);
+ if (ret < 0)
+ goto fail;
+ sub_mix->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+
+ sub_mix->num_layouts = ffio_read_leb(pbc);
+ sub_mix->layouts = av_calloc(sub_mix->num_layouts, sizeof(*sub_mix->layouts));
+ if (!sub_mix->layouts) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ for (int j = 0; j < sub_mix->num_layouts; j++) {
+ AVIAMFSubmixLayout *submix_layout;
+ int info_type;
+ int byte = avio_r8(pbc);
+
+ submix_layout = sub_mix->layouts[j] = av_mallocz(sizeof(*submix_layout));
+ if (!submix_layout) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ submix_layout->layout_type = byte >> 6;
+ if (submix_layout->layout_type < AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS &&
+ submix_layout->layout_type > AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) {
+ av_log(s, AV_LOG_ERROR, "Invalid Layout type %u in a submix from Mix Presentation %u\n",
+ submix_layout->layout_type, mix_presentation_id);
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+ if (submix_layout->layout_type == 2) {
+ int sound_system;
+ sound_system = (byte >> 2) & 0xF;
+ av_channel_layout_copy(&submix_layout->sound_system, &ff_iamf_sound_system_map[sound_system].layout);
+ }
+
+ info_type = avio_r8(pbc);
+ submix_layout->integrated_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+ submix_layout->digital_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+
+ if (info_type & 1)
+ submix_layout->true_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+ if (info_type & 2) {
+ unsigned int num_anchored_loudness = avio_r8(pbc);
+
+ for (int k = 0; k < num_anchored_loudness; k++) {
+ unsigned int anchor_element = avio_r8(pbc);
+ AVRational anchored_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8);
+ if (anchor_element == IAMF_ANCHOR_ELEMENT_DIALOGUE)
+ submix_layout->dialogue_anchored_loudness = anchored_loudness;
+ else if (anchor_element <= IAMF_ANCHOR_ELEMENT_ALBUM)
+ submix_layout->album_anchored_loudness = anchored_loudness;
+ else
+ av_log(s, AV_LOG_DEBUG, "Unknown anchor_element. Ignoring\n");
+ }
+ }
+
+ if (info_type & 0xFC) {
+ unsigned int info_type_size = ffio_read_leb(pbc);
+ avio_skip(pbc, info_type_size);
+ }
+ }
+ }
+
+ len -= avio_tell(pbc);
+ if (len)
+ av_log(s, AV_LOG_WARNING, "Underread in mix_presentation_obu. %d bytes left at the end\n", len);
+
+ ret = 0;
+fail:
+ av_free(buf);
+
+ return ret;
+}
+
+int ff_iamf_parse_obu_header(const uint8_t *buf, int buf_size,
+ unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
+ unsigned *skip_samples, unsigned *discard_padding)
+{
+ GetBitContext gb;
+ int ret, extension_flag, trimming, start;
+ unsigned skip = 0, discard = 0;
+ unsigned size;
+
+ ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE));
+ if (ret < 0)
+ return ret;
+
+ *type = get_bits(&gb, 5);
+ /*redundant =*/ get_bits1(&gb);
+ trimming = get_bits1(&gb);
+ extension_flag = get_bits1(&gb);
+
+ *obu_size = get_leb(&gb);
+ if (*obu_size > INT_MAX)
+ return AVERROR_INVALIDDATA;
+
+ start = get_bits_count(&gb) / 8;
+
+ if (trimming) {
+ discard = get_leb(&gb); // num_samples_to_trim_at_end
+ skip = get_leb(&gb); // num_samples_to_trim_at_start
+ }
+
+ if (skip_samples)
+ *skip_samples = skip;
+ if (discard_padding)
+ *discard_padding = discard;
+
+ if (extension_flag) {
+ unsigned int extension_bytes;
+ extension_bytes = get_leb(&gb);
+ if (extension_bytes > INT_MAX / 8)
+ return AVERROR_INVALIDDATA;
+ skip_bits_long(&gb, extension_bytes * 8);
+ }
+
+ if (get_bits_left(&gb) < 0)
+ return AVERROR_INVALIDDATA;
+
+ size = *obu_size + start;
+ if (size > INT_MAX)
+ return AVERROR_INVALIDDATA;
+
+ *obu_size -= get_bits_count(&gb) / 8 - start;
+ *start_pos = size - *obu_size;
+
+ return size;
+}
+
+int ff_iamfdec_read_descriptors(IAMFContext *c, AVIOContext *pb,
+ int max_size, void *log_ctx)
+{
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
+ int ret;
+
+ while (1) {
+ unsigned obu_size;
+ enum IAMF_OBU_Type type;
+ int start_pos, len, size;
+
+ if ((ret = ffio_ensure_seekback(pb, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size))) < 0)
+ return ret;
+ size = avio_read(pb, header, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size));
+ if (size < 0)
+ return size;
+
+ len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type, NULL, NULL);
+ if (len < 0 || obu_size > max_size) {
+ av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu\n");
+ avio_seek(pb, -size, SEEK_CUR);
+ return len;
+ }
+
+ if (type >= IAMF_OBU_IA_PARAMETER_BLOCK && type < IAMF_OBU_IA_SEQUENCE_HEADER) {
+ avio_seek(pb, -size, SEEK_CUR);
+ break;
+ }
+
+ avio_seek(pb, -(size - start_pos), SEEK_CUR);
+ switch (type) {
+ case IAMF_OBU_IA_CODEC_CONFIG:
+ ret = codec_config_obu(log_ctx, c, pb, obu_size);
+ break;
+ case IAMF_OBU_IA_AUDIO_ELEMENT:
+ ret = audio_element_obu(log_ctx, c, pb, obu_size);
+ break;
+ case IAMF_OBU_IA_MIX_PRESENTATION:
+ ret = mix_presentation_obu(log_ctx, c, pb, obu_size);
+ break;
+ case IAMF_OBU_IA_TEMPORAL_DELIMITER:
+ break;
+ default: {
+ int64_t offset = avio_skip(pb, obu_size);
+ if (offset < 0)
+ ret = offset;
+ break;
+ }
+ }
+ if (ret < 0)
+ return ret;
+ max_size -= obu_size + start_pos;
+ if (max_size < 0)
+ return AVERROR_INVALIDDATA;
+ if (!max_size)
+ break;
+ }
+
+ return 0;
+}
+
+void ff_iamf_uninit_context(IAMFContext *c)
+{
+ if (!c)
+ return;
+
+ for (int i = 0; i < c->nb_codec_configs; i++)
+ av_free(c->codec_configs[i].extradata);
+ av_freep(&c->codec_configs);
+ c->nb_codec_configs = 0;
+
+ for (int i = 0; i < c->nb_audio_elements; i++) {
+ IAMFAudioElement *audio_element = &c->audio_elements[i];
+ for (int j = 0; j < audio_element->nb_substreams; j++)
+ avcodec_parameters_free(&audio_element->substreams[i].codecpar);
+ av_free(audio_element->substreams);
+ av_free(audio_element->layers);
+ av_iamf_audio_element_free(&audio_element->element);
+ }
+ av_freep(&c->audio_elements);
+ c->nb_audio_elements = 0;
+
+ for (int i = 0; i < c->nb_mix_presentations; i++) {
+ IAMFMixPresentation *mix_presentation = &c->mix_presentations[i];
+ for (int j = 0; j < mix_presentation->count_label; j++)
+ av_free(mix_presentation->language_label[j]);
+ av_free(mix_presentation->language_label);
+ av_iamf_mix_presentation_free(&mix_presentation->mix);
+ }
+ av_freep(&c->mix_presentations);
+ c->nb_mix_presentations = 0;
+
+ av_freep(&c->param_definitions);
+ c->nb_param_definitions = 0;
+}
diff --git a/libavformat/iamf.h b/libavformat/iamf.h
new file mode 100644
index 0000000000..7e3239d500
--- /dev/null
+++ b/libavformat/iamf.h
@@ -0,0 +1,167 @@
+/*
+ * Immersive Audio Model and Formats parsing
+ * Copyright (c) 2023 James Almer <jamrial@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_IAMF_H
+#define AVFORMAT_IAMF_H
+
+#include <stdint.h>
+
+#include "libavutil/iamf.h"
+#include "libavcodec/codec_id.h"
+#include "libavcodec/codec_par.h"
+#include "avformat.h"
+#include "avio.h"
+
+#define MAX_IAMF_OBU_HEADER_SIZE (1 + 8 * 3)
+
+// OBU types (section 3.2).
+enum IAMF_OBU_Type {
+ IAMF_OBU_IA_CODEC_CONFIG = 0,
+ IAMF_OBU_IA_AUDIO_ELEMENT = 1,
+ IAMF_OBU_IA_MIX_PRESENTATION = 2,
+ IAMF_OBU_IA_PARAMETER_BLOCK = 3,
+ IAMF_OBU_IA_TEMPORAL_DELIMITER = 4,
+ IAMF_OBU_IA_AUDIO_FRAME = 5,
+ IAMF_OBU_IA_AUDIO_FRAME_ID0 = 6,
+ IAMF_OBU_IA_AUDIO_FRAME_ID1 = 7,
+ IAMF_OBU_IA_AUDIO_FRAME_ID2 = 8,
+ IAMF_OBU_IA_AUDIO_FRAME_ID3 = 9,
+ IAMF_OBU_IA_AUDIO_FRAME_ID4 = 10,
+ IAMF_OBU_IA_AUDIO_FRAME_ID5 = 11,
+ IAMF_OBU_IA_AUDIO_FRAME_ID6 = 12,
+ IAMF_OBU_IA_AUDIO_FRAME_ID7 = 13,
+ IAMF_OBU_IA_AUDIO_FRAME_ID8 = 14,
+ IAMF_OBU_IA_AUDIO_FRAME_ID9 = 15,
+ IAMF_OBU_IA_AUDIO_FRAME_ID10 = 16,
+ IAMF_OBU_IA_AUDIO_FRAME_ID11 = 17,
+ IAMF_OBU_IA_AUDIO_FRAME_ID12 = 18,
+ IAMF_OBU_IA_AUDIO_FRAME_ID13 = 19,
+ IAMF_OBU_IA_AUDIO_FRAME_ID14 = 20,
+ IAMF_OBU_IA_AUDIO_FRAME_ID15 = 21,
+ IAMF_OBU_IA_AUDIO_FRAME_ID16 = 22,
+ IAMF_OBU_IA_AUDIO_FRAME_ID17 = 23,
+ // 24~30 reserved.
+ IAMF_OBU_IA_SEQUENCE_HEADER = 31,
+};
+
+typedef struct IAMFCodecConfig {
+ unsigned codec_config_id;
+ enum AVCodecID codec_id;
+ uint32_t codec_tag;
+ unsigned nb_samples;
+ int seek_preroll;
+ uint8_t *extradata;
+ int extradata_size;
+ int sample_rate;
+} IAMFCodecConfig;
+
+typedef struct IAMFLayer {
+ unsigned int substream_count;
+ unsigned int coupled_substream_count;
+} IAMFLayer;
+
+typedef struct IAMFSubStream {
+ unsigned int audio_substream_id;
+
+ // demux
+ AVCodecParameters *codecpar;
+} IAMFSubStream;
+
+typedef struct IAMFAudioElement {
+ AVIAMFAudioElement *element;
+ unsigned int audio_element_id;
+
+ IAMFSubStream *substreams;
+ unsigned int nb_substreams;
+
+ const IAMFCodecConfig *codec_config;
+
+ // mux
+ IAMFLayer *layers;
+ unsigned int nb_layers;
+} IAMFAudioElement;
+
+typedef struct IAMFMixPresentation {
+ AVIAMFMixPresentation *mix;
+ unsigned int mix_presentation_id;
+
+ // demux
+ unsigned int count_label;
+ char **language_label;
+} IAMFMixPresentation;
+
+typedef struct IAMFParamDefinition {
+ const AVIAMFAudioElement *audio_element;
+ AVIAMFParamDefinition *param;
+ size_t param_size;
+} IAMFParamDefinition;
+
+typedef struct IAMFContext {
+ IAMFCodecConfig *codec_configs;
+ int nb_codec_configs;
+ IAMFAudioElement *audio_elements;
+ int nb_audio_elements;
+ IAMFMixPresentation *mix_presentations;
+ int nb_mix_presentations;
+ IAMFParamDefinition *param_definitions;
+ int nb_param_definitions;
+} IAMFContext;
+
+enum IAMF_Anchor_Element {
+ IAMF_ANCHOR_ELEMENT_UNKNWONW,
+ IAMF_ANCHOR_ELEMENT_DIALOGUE,
+ IAMF_ANCHOR_ELEMENT_ALBUM,
+};
+
+enum IAMF_Sound_System {
+ SOUND_SYSTEM_A_0_2_0 = 0, // "Loudspeaker configuration for Sound System A"
+ SOUND_SYSTEM_B_0_5_0 = 1, // "Loudspeaker configuration for Sound System B"
+ SOUND_SYSTEM_C_2_5_0 = 2, // "Loudspeaker configuration for Sound System C"
+ SOUND_SYSTEM_D_4_5_0 = 3, // "Loudspeaker configuration for Sound System D"
+ SOUND_SYSTEM_E_4_5_1 = 4, // "Loudspeaker configuration for Sound System E"
+ SOUND_SYSTEM_F_3_7_0 = 5, // "Loudspeaker configuration for Sound System F"
+ SOUND_SYSTEM_G_4_9_0 = 6, // "Loudspeaker configuration for Sound System G"
+ SOUND_SYSTEM_H_9_10_3 = 7, // "Loudspeaker configuration for Sound System H"
+ SOUND_SYSTEM_I_0_7_0 = 8, // "Loudspeaker configuration for Sound System I"
+ SOUND_SYSTEM_J_4_7_0 = 9, // "Loudspeaker configuration for Sound System J"
+ SOUND_SYSTEM_10_2_7_0 = 10, // "Loudspeaker configuration for Sound System I" + Ltf + Rtf
+ SOUND_SYSTEM_11_2_3_0 = 11, // Front subset of "Loudspeaker configuration for Sound System J"
+ SOUND_SYSTEM_12_0_1_0 = 12, // Mono
+};
+
+struct IAMFSoundSystemMap {
+ enum IAMF_Sound_System id;
+ AVChannelLayout layout;
+};
+
+extern const AVChannelLayout ff_iamf_scalable_ch_layouts[10];
+extern const struct IAMFSoundSystemMap ff_iamf_sound_system_map[13];
+
+int ff_iamf_parse_obu_header(const uint8_t *buf, int buf_size,
+ unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
+ unsigned *skip_samples, unsigned *discard_padding);
+
+int ff_iamfdec_read_descriptors(IAMFContext *c,AVIOContext *pb,
+ int size, void *log_ctx);
+
+void ff_iamf_uninit_context(IAMFContext *c);
+
+#endif /* AVFORMAT_IAMF_H */
diff --git a/libavformat/iamfdec.c b/libavformat/iamfdec.c
new file mode 100644
index 0000000000..2011cba566
--- /dev/null
+++ b/libavformat/iamfdec.c
@@ -0,0 +1,495 @@
+/*
+ * Immersive Audio Model and Formats demuxer
+ * Copyright (c) 2023 James Almer <jamrial@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/iamf.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+#include "libavcodec/mathops.h"
+#include "avformat.h"
+#include "avio_internal.h"
+#include "demux.h"
+#include "iamf.h"
+#include "internal.h"
+
+typedef struct IAMFDemuxContext {
+ IAMFContext iamf;
+
+ // Packet side data
+ AVIAMFParamDefinition *mix;
+ size_t mix_size;
+ AVIAMFParamDefinition *demix;
+ size_t demix_size;
+ AVIAMFParamDefinition *recon;
+ size_t recon_size;
+} IAMFDemuxContext;
+
+static AVStream *find_stream_by_id(AVFormatContext *s, int id)
+{
+ for (int i = 0; i < s->nb_streams; i++)
+ if (s->streams[i]->id == id)
+ return s->streams[i];
+
+ av_log(s, AV_LOG_ERROR, "Invalid stream id %d\n", id);
+ return NULL;
+}
+
+static int audio_frame_obu(AVFormatContext *s, AVPacket *pkt, int len,
+ enum IAMF_OBU_Type type,
+ unsigned skip_samples, unsigned discard_padding,
+ int id_in_bitstream)
+{
+ const IAMFDemuxContext *const c = s->priv_data;
+ AVStream *st;
+ int ret, audio_substream_id;
+
+ if (id_in_bitstream) {
+ unsigned explicit_audio_substream_id;
+ int64_t pos = avio_tell(s->pb);
+ explicit_audio_substream_id = ffio_read_leb(s->pb);
+ len -= avio_tell(s->pb) - pos;
+ audio_substream_id = explicit_audio_substream_id;
+ } else
+ audio_substream_id = type - IAMF_OBU_IA_AUDIO_FRAME_ID0;
+
+ st = find_stream_by_id(s, audio_substream_id);
+ if (!st)
+ return AVERROR_INVALIDDATA;
+
+ ret = av_get_packet(s->pb, pkt, len);
+ if (ret < 0)
+ return ret;
+ if (ret != len)
+ return AVERROR_INVALIDDATA;
+
+ if (skip_samples || discard_padding) {
+ uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ AV_WL32(side_data, skip_samples);
+ AV_WL32(side_data + 4, discard_padding);
+ }
+ if (c->mix) {
+ uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_MIX_GAIN_PARAM, c->mix_size);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ memcpy(side_data, c->mix, c->mix_size);
+ }
+ if (c->demix) {
+ uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM, c->demix_size);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ memcpy(side_data, c->demix, c->demix_size);
+ }
+ if (c->recon) {
+ uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM, c->recon_size);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ memcpy(side_data, c->recon, c->recon_size);
+ }
+
+ pkt->stream_index = st->index;
+ return 0;
+}
+
+static const IAMFParamDefinition *get_param_definition(AVFormatContext *s, unsigned int parameter_id)
+{
+ const IAMFDemuxContext *const c = s->priv_data;
+ const IAMFContext *const iamf = &c->iamf;
+ const IAMFParamDefinition *param_definition = NULL;
+
+ for (int i = 0; i < iamf->nb_param_definitions; i++)
+ if (iamf->param_definitions[i].param->parameter_id == parameter_id) {
+ param_definition = &iamf->param_definitions[i];
+ break;
+ }
+
+ return param_definition;
+}
+
+static int parameter_block_obu(AVFormatContext *s, int len)
+{
+ IAMFDemuxContext *const c = s->priv_data;
+ const IAMFParamDefinition *param_definition;
+ const AVIAMFParamDefinition *param;
+ AVIAMFParamDefinition *out_param = NULL;
+ FFIOContext b;
+ AVIOContext *pb;
+ uint8_t *buf;
+ unsigned int duration, constant_subblock_duration;
+ unsigned int num_subblocks;
+ unsigned int parameter_id;
+ size_t out_param_size;
+ int ret;
+
+ buf = av_malloc(len);
+ if (!buf)
+ return AVERROR(ENOMEM);
+
+ ret = avio_read(s->pb, buf, len);
+ if (ret != len) {
+ if (ret >= 0)
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+ pb = &b.pub;
+
+ parameter_id = ffio_read_leb(pb);
+ param_definition = get_param_definition(s, parameter_id);
+ if (!param_definition) {
+ av_log(s, AV_LOG_VERBOSE, "Non existant parameter_id %d referenced in a parameter block. Ignoring\n",
+ parameter_id);
+ ret = 0;
+ goto fail;
+ }
+
+ param = param_definition->param;
+ if (param->param_definition_mode) {
+ duration = ffio_read_leb(pb);
+ constant_subblock_duration = ffio_read_leb(pb);
+ if (constant_subblock_duration == 0)
+ num_subblocks = ffio_read_leb(pb);
+ else
+ num_subblocks = duration / constant_subblock_duration;
+ } else {
+ duration = param->duration;
+ constant_subblock_duration = param->constant_subblock_duration;
+ num_subblocks = param->num_subblocks;
+ if (!num_subblocks)
+ num_subblocks = duration / constant_subblock_duration;
+ }
+
+ out_param = av_iamf_param_definition_alloc(param->param_definition_type, NULL, num_subblocks,
+ NULL, &out_param_size);
+ if (!out_param) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ out_param->parameter_id = param->parameter_id;
+ out_param->param_definition_type = param->param_definition_type;
+ out_param->parameter_rate = param->parameter_rate;
+ out_param->param_definition_mode = param->param_definition_mode;
+ out_param->duration = duration;
+ out_param->constant_subblock_duration = constant_subblock_duration;
+ out_param->num_subblocks = num_subblocks;
+
+ for (int i = 0; i < num_subblocks; i++) {
+ void *subblock = av_iamf_param_definition_get_subblock(out_param, i);
+ unsigned int subblock_duration = constant_subblock_duration;
+
+ if (param->param_definition_mode && !constant_subblock_duration)
+ subblock_duration = ffio_read_leb(pb);
+
+ switch (param->param_definition_type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+ AVIAMFMixGainParameterData *mix = subblock;
+
+ mix->animation_type = ffio_read_leb(pb);
+ if (mix->animation_type > AV_IAMF_ANIMATION_TYPE_BEZIER) {
+ ret = 0;
+ av_free(out_param);
+ goto fail;
+ }
+
+ mix->start_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
+ if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR)
+ mix->end_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
+ if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) {
+ mix->control_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
+ mix->control_point_relative_time = avio_r8(pb);
+ }
+ mix->subblock_duration = subblock_duration;
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+ AVIAMFDemixingInfoParameterData *demix = subblock;
+
+ demix->dmixp_mode = avio_r8(pb) >> 5;
+ demix->subblock_duration = subblock_duration;
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+ AVIAMFReconGainParameterData *recon = subblock;
+ const AVIAMFAudioElement *audio_element = param_definition->audio_element;
+
+ av_assert0(audio_element);
+ for (int i = 0; i < audio_element->num_layers; i++) {
+ const AVIAMFLayer *layer = audio_element->layers[i];
+ if (layer->recon_gain_is_present) {
+ unsigned int recon_gain_flags = ffio_read_leb(pb);
+ unsigned int bitcount = 7 + 5 * !!(recon_gain_flags & 0x80);
+ recon_gain_flags = (recon_gain_flags & 0x7F) | ((recon_gain_flags & 0xFF00) >> 1);
+ for (int j = 0; j < bitcount; j++) {
+ if (recon_gain_flags & (1 << j))
+ recon->recon_gain[i][j] = avio_r8(pb);
+ }
+ }
+ }
+ recon->subblock_duration = subblock_duration;
+ break;
+ }
+ default:
+ av_assert0(0);
+ }
+ }
+
+ len -= avio_tell(pb);
+ if (len) {
+ int level = (s->error_recognition & AV_EF_EXPLODE) ? AV_LOG_ERROR : AV_LOG_WARNING;
+ av_log(s, level, "Underread in parameter_block_obu. %d bytes left at the end\n", len);
+ }
+
+ switch (param->param_definition_type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+ av_free(c->mix);
+ c->mix = out_param;
+ c->mix_size = out_param_size;
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+ av_free(c->demix);
+ c->demix = out_param;
+ c->demix_size = out_param_size;
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+ av_free(c->recon);
+ c->recon = out_param;
+ c->recon_size = out_param_size;
+ break;
+ default:
+ av_assert0(0);
+ }
+
+ ret = 0;
+fail:
+ if (ret < 0)
+ av_free(out_param);
+ av_free(buf);
+
+ return ret;
+}
+
+static int iamf_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+ IAMFDemuxContext *const c = s->priv_data;
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
+ unsigned obu_size;
+ int ret;
+
+ while (1) {
+ enum IAMF_OBU_Type type;
+ unsigned skip_samples, discard_padding;
+ int len, size, start_pos;
+
+ if ((ret = ffio_ensure_seekback(s->pb, MAX_IAMF_OBU_HEADER_SIZE)) < 0)
+ return ret;
+ size = avio_read(s->pb, header, MAX_IAMF_OBU_HEADER_SIZE);
+ if (size < 0)
+ return size;
+
+ len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type,
+ &skip_samples, &discard_padding);
+ if (len < 0) {
+ av_log(s, AV_LOG_ERROR, "Failed to read obu\n");
+ return len;
+ }
+ avio_seek(s->pb, -(size - start_pos), SEEK_CUR);
+
+ if (type >= IAMF_OBU_IA_AUDIO_FRAME && type <= IAMF_OBU_IA_AUDIO_FRAME_ID17)
+ return audio_frame_obu(s, pkt, obu_size, type,
+ skip_samples, discard_padding,
+ type == IAMF_OBU_IA_AUDIO_FRAME);
+ else if (type == IAMF_OBU_IA_PARAMETER_BLOCK) {
+ ret = parameter_block_obu(s, obu_size);
+ if (ret < 0)
+ return ret;
+ } else if (type == IAMF_OBU_IA_TEMPORAL_DELIMITER) {
+ av_freep(&c->mix);
+ c->mix_size = 0;
+ av_freep(&c->demix);
+ c->demix_size = 0;
+ av_freep(&c->recon);
+ c->recon_size = 0;
+ } else {
+ int64_t offset = avio_skip(s->pb, obu_size);
+ if (offset < 0) {
+ ret = offset;
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+//return < 0 if we need more data
+static int get_score(const uint8_t *buf, int buf_size, enum IAMF_OBU_Type type, int *seq)
+{
+ if (type == IAMF_OBU_IA_SEQUENCE_HEADER) {
+ if (buf_size < 4 || AV_RB32(buf) != MKBETAG('i','a','m','f'))
+ return 0;
+ *seq = 1;
+ return -1;
+ }
+ if (type >= IAMF_OBU_IA_CODEC_CONFIG && type <= IAMF_OBU_IA_TEMPORAL_DELIMITER)
+ return *seq ? -1 : 0;
+ if (type >= IAMF_OBU_IA_AUDIO_FRAME && type <= IAMF_OBU_IA_AUDIO_FRAME_ID17)
+ return *seq ? AVPROBE_SCORE_EXTENSION + 1 : 0;
+ return 0;
+}
+
+static int iamf_probe(const AVProbeData *p)
+{
+ unsigned obu_size;
+ enum IAMF_OBU_Type type;
+ int seq = 0, cnt = 0, start_pos;
+ int ret;
+
+ while (1) {
+ int size = ff_iamf_parse_obu_header(p->buf + cnt, p->buf_size - cnt,
+ &obu_size, &start_pos, &type,
+ NULL, NULL);
+ if (size < 0)
+ return 0;
+
+ ret = get_score(p->buf + cnt + start_pos,
+ p->buf_size - cnt - start_pos,
+ type, &seq);
+ if (ret >= 0)
+ return ret;
+
+ cnt += FFMIN(size, p->buf_size - cnt);
+ }
+ return 0;
+}
+
+static int iamf_read_header(AVFormatContext *s)
+{
+ IAMFDemuxContext *const c = s->priv_data;
+ IAMFContext *const iamf = &c->iamf;
+ int ret;
+
+ ret = ff_iamfdec_read_descriptors(iamf, s->pb, INT_MAX, s);
+ if (ret < 0)
+ return ret;
+
+ for (int i = 0; i < iamf->nb_audio_elements; i++) {
+ IAMFAudioElement *audio_element = &iamf->audio_elements[i];
+ AVStreamGroup *stg = avformat_stream_group_create(s, AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT, NULL);
+
+ if (!stg)
+ return AVERROR(ENOMEM);
+
+ stg->id = audio_element->audio_element_id;
+ stg->params.iamf_audio_element = audio_element->element;
+ audio_element->element = NULL;
+
+ for (int j = 0; j < audio_element->nb_substreams; j++) {
+ IAMFSubStream *substream = &audio_element->substreams[j];
+ AVStream *st = avformat_new_stream(s, NULL);
+
+ if (!st)
+ return AVERROR(ENOMEM);
+
+ ret = avformat_stream_group_add_stream(stg, st);
+ if (ret < 0)
+ return ret;
+
+ ret = avcodec_parameters_copy(st->codecpar, substream->codecpar);
+ if (ret < 0)
+ return ret;
+
+ st->id = substream->audio_substream_id;
+ avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+ }
+ }
+
+ for (int i = 0; i < iamf->nb_mix_presentations; i++) {
+ IAMFMixPresentation *mix_presentation = &iamf->mix_presentations[i];
+ AVStreamGroup *stg = avformat_stream_group_create(s, AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION, NULL);
+ const AVIAMFMixPresentation *mix = mix_presentation->mix;
+
+ if (!stg)
+ return AVERROR(ENOMEM);
+
+ stg->id = mix_presentation->mix_presentation_id;
+ stg->params.iamf_mix_presentation = mix_presentation->mix;
+ mix_presentation->mix = NULL;
+
+ for (int j = 0; j < mix->num_submixes; j++) {
+ AVIAMFSubmix *sub_mix = mix->submixes[j];
+
+ for (int k = 0; k < sub_mix->num_elements; k++) {
+ AVIAMFSubmixElement *submix_element = sub_mix->elements[k];
+ AVStreamGroup *audio_element = NULL;
+
+ for (int l = 0; l < s->nb_stream_groups; l++)
+ if (s->stream_groups[l]->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT &&
+ s->stream_groups[l]->id == submix_element->audio_element_id) {
+ audio_element = s->stream_groups[l];
+ break;
+ }
+ av_assert0(audio_element);
+
+ for (int l = 0; l < audio_element->nb_streams; l++) {
+ ret = avformat_stream_group_add_stream(stg, audio_element->streams[l]);
+ if (ret < 0 && ret != AVERROR(EEXIST))
+ return ret;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int iamf_read_close(AVFormatContext *s)
+{
+ IAMFDemuxContext *const c = s->priv_data;
+
+ ff_iamf_uninit_context(&c->iamf);
+
+ av_freep(&c->mix);
+ c->mix_size = 0;
+ av_freep(&c->demix);
+ c->demix_size = 0;
+ av_freep(&c->recon);
+ c->recon_size = 0;
+
+ return 0;
+}
+
+const AVInputFormat ff_iamf_demuxer = {
+ .name = "iamf",
+ .long_name = NULL_IF_CONFIG_SMALL("Raw Immersive Audio Model and Formats"),
+ .priv_data_size = sizeof(IAMFDemuxContext),
+ .flags_internal = FF_FMT_INIT_CLEANUP,
+ .read_probe = iamf_probe,
+ .read_header = iamf_read_header,
+ .read_packet = iamf_read_packet,
+ .read_close = iamf_read_close,
+ .extensions = "iamf",
+ .flags = AVFMT_GENERIC_INDEX | AVFMT_NO_BYTE_SEEK | AVFMT_NOTIMESTAMPS | AVFMT_SHOW_IDS,
+};
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 9/9] avformat: Immersive Audio Model and Formats muxer
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
` (7 preceding siblings ...)
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 8/9] avformat: Immersive Audio Model and Formats demuxer James Almer
@ 2023-11-26 1:28 ` James Almer
2023-11-27 18:43 ` [FFmpeg-devel] [PATCH 10/13] avcodec: add an Immersive Audio Model and Formats frame split bsf James Almer
` (3 subsequent siblings)
12 siblings, 0 replies; 29+ messages in thread
From: James Almer @ 2023-11-26 1:28 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavformat/Makefile | 1 +
libavformat/allformats.c | 1 +
libavformat/iamfenc.c | 1091 ++++++++++++++++++++++++++++++++++++++
3 files changed, 1093 insertions(+)
create mode 100644 libavformat/iamfenc.c
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 752833f5a8..a90ced6dd2 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -259,6 +259,7 @@ OBJS-$(CONFIG_HLS_DEMUXER) += hls.o hls_sample_encryption.o
OBJS-$(CONFIG_HLS_MUXER) += hlsenc.o hlsplaylist.o avc.o
OBJS-$(CONFIG_HNM_DEMUXER) += hnm.o
OBJS-$(CONFIG_IAMF_DEMUXER) += iamfdec.o iamf.o
+OBJS-$(CONFIG_IAMF_MUXER) += iamfenc.o iamf.o
OBJS-$(CONFIG_ICO_DEMUXER) += icodec.o
OBJS-$(CONFIG_ICO_MUXER) += icoenc.o
OBJS-$(CONFIG_IDCIN_DEMUXER) += idcin.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 63ca44bacd..7529aed4a4 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -213,6 +213,7 @@ extern const AVInputFormat ff_hls_demuxer;
extern const FFOutputFormat ff_hls_muxer;
extern const AVInputFormat ff_hnm_demuxer;
extern const AVInputFormat ff_iamf_demuxer;
+extern const FFOutputFormat ff_iamf_muxer;
extern const AVInputFormat ff_ico_demuxer;
extern const FFOutputFormat ff_ico_muxer;
extern const AVInputFormat ff_idcin_demuxer;
diff --git a/libavformat/iamfenc.c b/libavformat/iamfenc.c
new file mode 100644
index 0000000000..a53396a34d
--- /dev/null
+++ b/libavformat/iamfenc.c
@@ -0,0 +1,1091 @@
+/*
+ * IAMF muxer
+ * Copyright (c) 2023 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/iamf.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavcodec/get_bits.h"
+#include "libavcodec/flac.h"
+#include "libavcodec/mpeg4audio.h"
+#include "libavcodec/put_bits.h"
+#include "avformat.h"
+#include "avio_internal.h"
+#include "iamf.h"
+#include "internal.h"
+#include "mux.h"
+
+typedef struct IAMFMuxContext {
+ IAMFContext iamf;
+
+ int first_stream_id;
+} IAMFMuxContext;
+
+static int update_extradata(IAMFCodecConfig *codec_config)
+{
+ GetBitContext gb;
+ PutBitContext pb;
+ int ret;
+
+ switch(codec_config->codec_id) {
+ case AV_CODEC_ID_OPUS:
+ if (codec_config->extradata_size < 19)
+ return AVERROR_INVALIDDATA;
+ codec_config->extradata_size -= 8;
+ memmove(codec_config->extradata, codec_config->extradata + 8, codec_config->extradata_size);
+ AV_WB8(codec_config->extradata + 1, 2); // set channels to stereo
+ break;
+ case AV_CODEC_ID_FLAC: {
+ uint8_t buf[13];
+
+ init_put_bits(&pb, buf, sizeof(buf));
+ ret = init_get_bits8(&gb, codec_config->extradata, codec_config->extradata_size);
+ if (ret < 0)
+ return ret;
+
+ put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize
+ put_bits64(&pb, 48, get_bits64(&gb, 48)); // min/max framesize
+ put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate
+ skip_bits(&gb, 3);
+ put_bits(&pb, 3, 1); // set channels to stereo
+ ret = put_bits_left(&pb);
+ put_bits(&pb, ret, get_bits(&gb, ret));
+ flush_put_bits(&pb);
+
+ memcpy(codec_config->extradata, buf, sizeof(buf));
+ break;
+ }
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int fill_codec_config(const AVStreamGroup *stg, IAMFCodecConfig *codec_config)
+{
+ const AVIAMFAudioElement *iamf = stg->params.iamf_audio_element;
+ const AVStream *st = stg->streams[0];
+ int ret;
+
+ av_freep(&codec_config->extradata);
+ codec_config->extradata_size = 0;
+
+ codec_config->codec_config_id = iamf->codec_config_id;
+ codec_config->codec_id = st->codecpar->codec_id;
+ codec_config->sample_rate = st->codecpar->sample_rate;
+ codec_config->codec_tag = st->codecpar->codec_tag;
+ codec_config->nb_samples = st->codecpar->frame_size;
+ codec_config->seek_preroll = st->codecpar->seek_preroll;
+ if (st->codecpar->extradata_size) {
+ codec_config->extradata = av_memdup(st->codecpar->extradata, st->codecpar->extradata_size);
+ if (!codec_config->extradata)
+ return AVERROR(ENOMEM);
+ codec_config->extradata_size = st->codecpar->extradata_size;
+ ret = update_extradata(codec_config);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static IAMFParamDefinition *get_param_definition(AVFormatContext *s, unsigned int parameter_id)
+{
+ const IAMFMuxContext *const c = s->priv_data;
+ const IAMFContext *const iamf = &c->iamf;
+ IAMFParamDefinition *param_definition = NULL;
+
+ for (int i = 0; i < iamf->nb_param_definitions; i++)
+ if (iamf->param_definitions[i].param->parameter_id == parameter_id) {
+ param_definition = &iamf->param_definitions[i];
+ break;
+ }
+
+ return param_definition;
+}
+
+static IAMFParamDefinition *add_param_definition(AVFormatContext *s, AVIAMFParamDefinition *param)
+{
+ IAMFMuxContext *const c = s->priv_data;
+ IAMFContext *const iamf = &c->iamf;
+ IAMFParamDefinition *param_definition = av_dynarray2_add_nofree((void **)&iamf->param_definitions,
+ &iamf->nb_param_definitions,
+ sizeof(*iamf->param_definitions), NULL);
+ if (!param_definition)
+ return NULL;
+ param_definition->param = param;
+ param_definition->audio_element = NULL;
+
+ return param_definition;
+}
+
+static int iamf_init(AVFormatContext *s)
+{
+ IAMFMuxContext *const c = s->priv_data;
+ IAMFContext *const iamf = &c->iamf;
+ int ret;
+
+ if (!s->nb_streams) {
+ av_log(s, AV_LOG_ERROR, "There must be at least one stream\n");
+ return AVERROR(EINVAL);
+ }
+
+ for (int i = 0; i < s->nb_streams; i++) {
+ if (s->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO ||
+ (s->streams[i]->codecpar->codec_tag != MKTAG('m','p','4','a') &&
+ s->streams[i]->codecpar->codec_tag != MKTAG('O','p','u','s') &&
+ s->streams[i]->codecpar->codec_tag != MKTAG('f','L','a','C') &&
+ s->streams[i]->codecpar->codec_tag != MKTAG('i','p','c','m'))) {
+ av_log(s, AV_LOG_ERROR, "Unsupported codec id %s\n",
+ avcodec_get_name(s->streams[i]->codecpar->codec_id));
+ return AVERROR(EINVAL);
+ }
+
+ if (s->streams[i]->codecpar->ch_layout.nb_channels > 2) {
+ av_log(s, AV_LOG_ERROR, "Unsupported channel layout on stream #%d\n", i);
+ return AVERROR(EINVAL);
+ }
+
+ for (int j = 0; j < i; j++) {
+ if (s->streams[i]->id == s->streams[j]->id) {
+ av_log(s, AV_LOG_ERROR, "Duplicated stream id %d\n", s->streams[j]->id);
+ return AVERROR(EINVAL);
+ }
+ }
+ }
+
+ if (!s->nb_stream_groups) {
+ av_log(s, AV_LOG_ERROR, "There must be at least two stream groups\n");
+ return AVERROR(EINVAL);
+ }
+
+ for (int i = 0; i < s->nb_stream_groups; i++) {
+ const AVStreamGroup *stg = s->stream_groups[i];
+
+ if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
+ iamf->nb_audio_elements++;
+ if (stg->type == AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION)
+ iamf->nb_mix_presentations++;
+ }
+ if ((iamf->nb_audio_elements < 1 && iamf->nb_audio_elements > 2) || iamf->nb_mix_presentations < 1) {
+ av_log(s, AV_LOG_ERROR, "There must be >= 1 and <= 2 IAMF_AUDIO_ELEMENT and at least "
+ "one IAMF_MIX_PRESENTATION stream groups\n");
+ return AVERROR(EINVAL);
+ }
+
+ iamf->audio_elements = av_calloc(iamf->nb_audio_elements, sizeof(*iamf->audio_elements));
+ iamf->mix_presentations = av_calloc(iamf->nb_mix_presentations, sizeof(*iamf->mix_presentations));
+
+ if (!iamf->audio_elements || !iamf->mix_presentations) {
+ iamf->nb_audio_elements = iamf->nb_mix_presentations = 0;
+ return AVERROR(ENOMEM);
+ }
+
+ for (int i = 0, idx = 0; i < s->nb_stream_groups; i++) {
+ const AVStreamGroup *stg = s->stream_groups[i];
+ const AVIAMFAudioElement *iamf_audio_element;
+ IAMFAudioElement *audio_element;
+ IAMFCodecConfig *codec_config = NULL;
+
+ if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT)
+ continue;
+
+ iamf_audio_element = stg->params.iamf_audio_element;
+ if (iamf_audio_element->audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
+ const AVIAMFLayer *layer = iamf_audio_element->layers[0];
+ if (iamf_audio_element->num_layers != 1) {
+ av_log(s, AV_LOG_ERROR, "Invalid amount of layers for SCENE_BASED audio element. Must be 1\n");
+ return AVERROR(EINVAL);
+ }
+ if (layer->ch_layout.order != AV_CHANNEL_ORDER_CUSTOM &&
+ layer->ch_layout.order != AV_CHANNEL_ORDER_AMBISONIC) {
+ av_log(s, AV_LOG_ERROR, "Invalid channel layout for SCENE_BASED audio element\n");
+ return AVERROR(EINVAL);
+ }
+ if (layer->ambisonics_mode >= AV_IAMF_AMBISONICS_MODE_PROJECTION) {
+ av_log(s, AV_LOG_ERROR, "Unsuported ambisonics mode %d\n", layer->ambisonics_mode);
+ return AVERROR_PATCHWELCOME;
+ }
+ for (int j = 0; j < stg->nb_streams; j++) {
+ if (stg->streams[j]->codecpar->ch_layout.nb_channels > 1) {
+ av_log(s, AV_LOG_ERROR, "Invalid amount of channels in a stream for MONO mode ambisonics\n");
+ return AVERROR(EINVAL);
+ }
+ }
+ } else
+ for (int k, j = 0; j < iamf_audio_element->num_layers; j++) {
+ const AVIAMFLayer *layer = iamf_audio_element->layers[j];
+ for (k = 0; k < FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts); k++)
+ if (!av_channel_layout_compare(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[k]))
+ break;
+
+ if (k >= FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts)) {
+ av_log(s, AV_LOG_ERROR, "Unsupported channel layout in stream group #%d\n", i);
+ return AVERROR(EINVAL);
+ }
+ }
+
+ for (int j = 0; j < iamf->nb_codec_configs; j++) {
+ if (iamf->codec_configs[j].codec_config_id == iamf_audio_element->codec_config_id) {
+ codec_config = &iamf->codec_configs[j];
+ break;
+ }
+ }
+
+ if (!codec_config) {
+ codec_config = av_dynarray2_add_nofree((void **)&iamf->codec_configs, &iamf->nb_codec_configs,
+ sizeof(*iamf->codec_configs), NULL);
+ if (!codec_config)
+ return AVERROR(ENOMEM);
+ memset(codec_config, 0, sizeof(*codec_config));
+
+ }
+
+ ret = fill_codec_config(stg, codec_config);
+ if (ret < 0)
+ return ret;
+
+ for (int j = 0; j < idx; j++) {
+ if (stg->id == iamf->audio_elements[j].audio_element_id) {
+ av_log(s, AV_LOG_ERROR, "Duplicated Audio Element id %"PRId64"\n", stg->id);
+ return AVERROR(EINVAL);
+ }
+ }
+
+ audio_element = &iamf->audio_elements[idx++];
+ audio_element->element = stg->params.iamf_audio_element;
+ audio_element->audio_element_id = stg->id;
+ audio_element->codec_config = codec_config;
+
+ audio_element->substreams = av_calloc(stg->nb_streams, sizeof(*audio_element->substreams));
+ if (!audio_element->substreams)
+ return AVERROR(ENOMEM);
+ audio_element->nb_substreams = stg->nb_streams;
+
+ for (int j = 0, k = 0; j < iamf_audio_element->num_layers; j++) {
+ IAMFLayer *layer = av_dynarray2_add_nofree((void **)&audio_element->layers, &audio_element->nb_layers,
+ sizeof(*audio_element->layers), NULL);
+ int nb_channels = iamf_audio_element->layers[j]->ch_layout.nb_channels;
+
+ if (!layer)
+ return AVERROR(ENOMEM);
+ memset(layer, 0, sizeof(*layer));
+
+ if (j)
+ nb_channels -= iamf_audio_element->layers[j - 1]->ch_layout.nb_channels;
+ for (; nb_channels > 0 && k < stg->nb_streams; k++) {
+ const AVStream *st = stg->streams[k];
+ IAMFSubStream *substream = &audio_element->substreams[k];
+
+ substream->audio_substream_id = st->id;
+ layer->substream_count++;
+ layer->coupled_substream_count += st->codecpar->ch_layout.nb_channels == 2;
+ nb_channels -= st->codecpar->ch_layout.nb_channels;
+ }
+ if (nb_channels) {
+ av_log(s, AV_LOG_ERROR, "Invalid channel count across substreams in layer %u from stream group %u\n",
+ j, stg->index);
+ return AVERROR(EINVAL);
+ }
+ }
+
+ if (iamf_audio_element->demixing_info) {
+ AVIAMFParamDefinition *param = iamf_audio_element->demixing_info;
+ IAMFParamDefinition *param_definition = get_param_definition(s, param->parameter_id);
+
+ if (param->num_subblocks != 1) {
+ av_log(s, AV_LOG_ERROR, "num_subblocks in demixing_info for stream group %u is not 1\n", stg->index);
+ return AVERROR(EINVAL);
+ }
+ if (!param_definition) {
+ param_definition = add_param_definition(s, param);
+ if (!param_definition)
+ return AVERROR(ENOMEM);
+ }
+ param_definition->audio_element = iamf_audio_element;
+ }
+ if (iamf_audio_element->recon_gain_info) {
+ AVIAMFParamDefinition *param = iamf_audio_element->recon_gain_info;
+ IAMFParamDefinition *param_definition = get_param_definition(s, param->parameter_id);
+
+ if (param->num_subblocks != 1) {
+ av_log(s, AV_LOG_ERROR, "num_subblocks in recon_gain_info for stream group %u is not 1\n", stg->index);
+ return AVERROR(EINVAL);
+ }
+
+ if (!param_definition) {
+ param_definition = add_param_definition(s, param);
+ if (!param_definition)
+ return AVERROR(ENOMEM);
+ }
+ param_definition->audio_element = iamf_audio_element;
+ }
+ }
+
+ for (int i = 0, idx = 0; i < s->nb_stream_groups; i++) {
+ const AVStreamGroup *stg = s->stream_groups[i];
+ IAMFMixPresentation *mix_presentation;
+
+ if (stg->type != AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION)
+ continue;
+
+ for (int j = 0; j < idx; j++) {
+ if (stg->id == iamf->mix_presentations[j].mix_presentation_id) {
+ av_log(s, AV_LOG_ERROR, "Duplicate Mix Presentation id %"PRId64"\n", stg->id);
+ return AVERROR(EINVAL);
+ }
+ }
+
+ mix_presentation = &iamf->mix_presentations[idx++];
+ mix_presentation->mix = stg->params.iamf_mix_presentation;
+ mix_presentation->mix_presentation_id = stg->id;
+
+ for (int i = 0; i < mix_presentation->mix->num_submixes; i++) {
+ const AVIAMFSubmix *submix = mix_presentation->mix->submixes[i];
+ AVIAMFParamDefinition *param = submix->output_mix_config;
+ IAMFParamDefinition *param_definition;
+
+ if (!param) {
+ av_log(s, AV_LOG_ERROR, "output_mix_config is not present in submix %u from Mix Presentation ID %"PRId64"\n", i, stg->id);
+ return AVERROR(EINVAL);
+ }
+
+ param_definition = get_param_definition(s, param->parameter_id);
+ if (!param_definition) {
+ param_definition = add_param_definition(s, param);
+ if (!param_definition)
+ return AVERROR(ENOMEM);
+ }
+
+ for (int j = 0; j < submix->num_elements; j++) {
+ const AVIAMFAudioElement *iamf_audio_element = NULL;
+ const AVIAMFSubmixElement *element = submix->elements[j];
+ param = element->element_mix_config;
+
+ if (!param) {
+ av_log(s, AV_LOG_ERROR, "element_mix_config is not present for element %u in submix %u from Mix Presentation ID %"PRId64"\n", j, i, stg->id);
+ return AVERROR(EINVAL);
+ }
+ param_definition = get_param_definition(s, param->parameter_id);
+ if (!param_definition) {
+ param_definition = add_param_definition(s, param);
+ if (!param_definition)
+ return AVERROR(ENOMEM);
+ }
+ for (int k = 0; k < iamf->nb_audio_elements; k++)
+ if (iamf->audio_elements[k].audio_element_id == element->audio_element_id) {
+ iamf_audio_element = iamf->audio_elements[k].element;
+ break;
+ }
+ param_definition->audio_element = iamf_audio_element;
+ }
+ }
+ }
+
+ c->first_stream_id = s->streams[0]->id;
+
+ return 0;
+}
+
+static int iamf_write_codec_config(AVFormatContext *s, const IAMFCodecConfig *codec_config)
+{
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ AVIOContext *dyn_bc;
+ uint8_t *dyn_buf = NULL;
+ PutBitContext pb;
+ int dyn_size;
+
+ int ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ ffio_write_leb(dyn_bc, codec_config->codec_config_id);
+ avio_wl32(dyn_bc, codec_config->codec_tag);
+
+ ffio_write_leb(dyn_bc, codec_config->nb_samples);
+ avio_wb16(dyn_bc, codec_config->seek_preroll);
+
+ switch(codec_config->codec_id) {
+ case AV_CODEC_ID_OPUS:
+ avio_write(dyn_bc, codec_config->extradata, codec_config->extradata_size);
+ break;
+ case AV_CODEC_ID_AAC:
+ return AVERROR_PATCHWELCOME;
+ case AV_CODEC_ID_FLAC:
+ avio_w8(dyn_bc, 0x80);
+ avio_wb24(dyn_bc, codec_config->extradata_size);
+ avio_write(dyn_bc, codec_config->extradata, codec_config->extradata_size);
+ break;
+ case AV_CODEC_ID_PCM_S16LE:
+ avio_w8(dyn_bc, 0);
+ avio_w8(dyn_bc, 16);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ case AV_CODEC_ID_PCM_S24LE:
+ avio_w8(dyn_bc, 0);
+ avio_w8(dyn_bc, 24);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ case AV_CODEC_ID_PCM_S32LE:
+ avio_w8(dyn_bc, 0);
+ avio_w8(dyn_bc, 32);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ case AV_CODEC_ID_PCM_S16BE:
+ avio_w8(dyn_bc, 1);
+ avio_w8(dyn_bc, 16);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ case AV_CODEC_ID_PCM_S24BE:
+ avio_w8(dyn_bc, 1);
+ avio_w8(dyn_bc, 24);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ case AV_CODEC_ID_PCM_S32BE:
+ avio_w8(dyn_bc, 1);
+ avio_w8(dyn_bc, 32);
+ avio_wb32(dyn_bc, codec_config->sample_rate);
+ break;
+ default:
+ break;
+ }
+
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 5, IAMF_OBU_IA_CODEC_CONFIG);
+ put_bits(&pb, 3, 0);
+ flush_put_bits(&pb);
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ avio_write(s->pb, header, put_bytes_count(&pb, 1));
+ ffio_write_leb(s->pb, dyn_size);
+ avio_write(s->pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+
+ return 0;
+}
+
+static inline int rescale_rational(AVRational q, int b)
+{
+ return av_clip_int16(av_rescale(q.num, b, q.den));
+}
+
+static int scalable_channel_layout_config(AVFormatContext *s, AVIOContext *dyn_bc,
+ const IAMFAudioElement *audio_element)
+{
+ const AVIAMFAudioElement *element = audio_element->element;
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ PutBitContext pb;
+
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 3, element->num_layers);
+ put_bits(&pb, 5, 0);
+ flush_put_bits(&pb);
+ avio_write(dyn_bc, header, put_bytes_count(&pb, 1));
+ for (int i = 0; i < element->num_layers; i++) {
+ AVIAMFLayer *layer = element->layers[i];
+ int layout;
+ for (layout = 0; layout < FF_ARRAY_ELEMS(ff_iamf_scalable_ch_layouts); layout++) {
+ if (!av_channel_layout_compare(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[layout]))
+ break;
+ }
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 4, layout);
+ put_bits(&pb, 1, !!layer->output_gain_flags);
+ put_bits(&pb, 1, layer->recon_gain_is_present);
+ put_bits(&pb, 2, 0); // reserved
+ put_bits(&pb, 8, audio_element->layers[i].substream_count);
+ put_bits(&pb, 8, audio_element->layers[i].coupled_substream_count);
+ // av_log(s, AV_LOG_WARNING, "k %d, substream_count %d, coupled_substream_count %d\n", k, layer->substream_count, coupled_substream_count);
+ if (layer->output_gain_flags) {
+ put_bits(&pb, 6, layer->output_gain_flags);
+ put_bits(&pb, 2, 0);
+ put_bits(&pb, 16, rescale_rational(layer->output_gain, 1 << 8));
+ }
+ flush_put_bits(&pb);
+ avio_write(dyn_bc, header, put_bytes_count(&pb, 1));
+ }
+
+ return 0;
+}
+
+static int ambisonics_config(AVFormatContext *s, AVIOContext *dyn_bc,
+ const IAMFAudioElement *audio_element)
+{
+ const AVIAMFAudioElement *element = audio_element->element;
+ AVIAMFLayer *layer = element->layers[0];
+
+ ffio_write_leb(dyn_bc, 0); // ambisonics_mode
+ ffio_write_leb(dyn_bc, layer->ch_layout.nb_channels); // output_channel_count
+ ffio_write_leb(dyn_bc, audio_element->nb_substreams); // substream_count
+
+ if (layer->ch_layout.order == AV_CHANNEL_ORDER_AMBISONIC)
+ for (int i = 0; i < layer->ch_layout.nb_channels; i++)
+ avio_w8(dyn_bc, i);
+ else
+ for (int i = 0; i < layer->ch_layout.nb_channels; i++)
+ avio_w8(dyn_bc, layer->ch_layout.u.map[i].id);
+
+ return 0;
+}
+
+static int param_definition(AVFormatContext *s, AVIOContext *dyn_bc,
+ AVIAMFParamDefinition *param)
+{
+ ffio_write_leb(dyn_bc, param->parameter_id);
+ ffio_write_leb(dyn_bc, param->parameter_rate);
+ avio_w8(dyn_bc, !!param->param_definition_mode << 7);
+ if (!param->param_definition_mode) {
+ ffio_write_leb(dyn_bc, param->duration);
+ ffio_write_leb(dyn_bc, param->constant_subblock_duration);
+ if (param->constant_subblock_duration == 0) {
+ ffio_write_leb(dyn_bc, param->num_subblocks);
+ for (int i = 0; i < param->num_subblocks; i++) {
+ const void *subblock = av_iamf_param_definition_get_subblock(param, i);
+
+ switch (param->param_definition_type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+ const AVIAMFMixGainParameterData *mix = subblock;
+ ffio_write_leb(dyn_bc, mix->subblock_duration);
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+ const AVIAMFDemixingInfoParameterData *demix = subblock;
+ ffio_write_leb(dyn_bc, demix->subblock_duration);
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+ const AVIAMFReconGainParameterData *recon = subblock;
+ ffio_write_leb(dyn_bc, recon->subblock_duration);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int iamf_write_audio_element(AVFormatContext *s, const IAMFAudioElement *audio_element)
+{
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ const AVIAMFAudioElement *element = audio_element->element;
+ AVIOContext *dyn_bc;
+ uint8_t *dyn_buf = NULL;
+ PutBitContext pb;
+ int param_definition_types = AV_IAMF_PARAMETER_DEFINITION_DEMIXING, dyn_size;
+
+ int ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ ffio_write_leb(dyn_bc, audio_element->audio_element_id);
+
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 3, element->audio_element_type);
+ put_bits(&pb, 5, 0);
+ flush_put_bits(&pb);
+ avio_write(dyn_bc, header, put_bytes_count(&pb, 1));
+
+ ffio_write_leb(dyn_bc, audio_element->codec_config->codec_config_id);
+ ffio_write_leb(dyn_bc, audio_element->nb_substreams);
+
+ for (int i = 0; i < audio_element->nb_substreams; i++)
+ ffio_write_leb(dyn_bc, audio_element->substreams[i].audio_substream_id);
+
+ if (audio_element->nb_layers == 1)
+ param_definition_types &= ~AV_IAMF_PARAMETER_DEFINITION_DEMIXING;
+ if (audio_element->nb_layers > 1)
+ param_definition_types |= AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN;
+ if (audio_element->codec_config->codec_tag == MKTAG('f','L','a','C') ||
+ audio_element->codec_config->codec_tag == MKTAG('i','p','c','m'))
+ param_definition_types &= ~AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN;
+
+ ffio_write_leb(dyn_bc, av_popcount(param_definition_types)); // num_parameters
+
+ if (param_definition_types & 1) {
+ AVIAMFParamDefinition *param = element->demixing_info;
+ const AVIAMFDemixingInfoParameterData *demix;
+
+ if (!param) {
+ av_log(s, AV_LOG_ERROR, "demixing_info needed but not set in Stream Group #%u\n",
+ audio_element->audio_element_id);
+ return AVERROR(EINVAL);
+ }
+
+ demix = av_iamf_param_definition_get_subblock(param, 0);
+ ffio_write_leb(dyn_bc, AV_IAMF_PARAMETER_DEFINITION_DEMIXING); // param_definition_type
+ param_definition(s, dyn_bc, param);
+
+ avio_w8(dyn_bc, demix->dmixp_mode << 5); // dmixp_mode
+ avio_w8(dyn_bc, element->default_w << 4); // default_w
+ }
+ if (param_definition_types & 2) {
+ AVIAMFParamDefinition *param = element->recon_gain_info;
+
+ if (!param) {
+ av_log(s, AV_LOG_ERROR, "recon_gain_info needed but not set in Stream Group #%u\n",
+ audio_element->audio_element_id);
+ return AVERROR(EINVAL);
+ }
+ ffio_write_leb(dyn_bc, AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN); // param_definition_type
+ param_definition(s, dyn_bc, param);
+ }
+
+ if (element->audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
+ ret = scalable_channel_layout_config(s, dyn_bc, audio_element);
+ if (ret < 0)
+ return ret;
+ } else {
+ ret = ambisonics_config(s, dyn_bc, audio_element);
+ if (ret < 0)
+ return ret;
+ }
+
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 5, IAMF_OBU_IA_AUDIO_ELEMENT);
+ put_bits(&pb, 3, 0);
+ flush_put_bits(&pb);
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ avio_write(s->pb, header, put_bytes_count(&pb, 1));
+ ffio_write_leb(s->pb, dyn_size);
+ avio_write(s->pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+
+ return 0;
+}
+
+static int iamf_write_mixing_presentation(AVFormatContext *s, const IAMFMixPresentation *mix_presentation)
+{
+ IAMFMuxContext *const c = s->priv_data;
+ IAMFContext *const iamf = &c->iamf;
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ const AVIAMFMixPresentation *mix = mix_presentation->mix;
+ const AVDictionaryEntry *tag = NULL;
+ PutBitContext pb;
+ AVIOContext *dyn_bc;
+ uint8_t *dyn_buf = NULL;
+ int dyn_size;
+
+ int ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ ffio_write_leb(dyn_bc, mix_presentation->mix_presentation_id); // mix_presentation_id
+ ffio_write_leb(dyn_bc, av_dict_count(mix->annotations)); // count_label
+
+ while ((tag = av_dict_iterate(mix->annotations, tag)))
+ avio_put_str(dyn_bc, tag->key);
+ while ((tag = av_dict_iterate(mix->annotations, tag)))
+ avio_put_str(dyn_bc, tag->value);
+
+ ffio_write_leb(dyn_bc, mix->num_submixes);
+ for (int i = 0; i < mix->num_submixes; i++) {
+ const AVIAMFSubmix *sub_mix = mix->submixes[i];
+
+ ffio_write_leb(dyn_bc, sub_mix->num_elements);
+ for (int j = 0; j < sub_mix->num_elements; j++) {
+ const IAMFAudioElement *audio_element = NULL;
+ const AVIAMFSubmixElement *submix_element = sub_mix->elements[j];
+
+ for (int k = 0; k < iamf->nb_audio_elements; k++)
+ if (iamf->audio_elements[k].audio_element_id == submix_element->audio_element_id) {
+ audio_element = &iamf->audio_elements[k];
+ break;
+ }
+
+ av_assert0(audio_element);
+ ffio_write_leb(dyn_bc, submix_element->audio_element_id);
+
+ if (av_dict_count(submix_element->annotations) != av_dict_count(mix->annotations)) {
+ av_log(s, AV_LOG_ERROR, "Inconsistent amount of labels in submix %d from Mix Presentation id #%u\n",
+ j, audio_element->audio_element_id);
+ return AVERROR(EINVAL);
+ }
+ while ((tag = av_dict_iterate(submix_element->annotations, tag)))
+ avio_put_str(dyn_bc, tag->value);
+
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 2, submix_element->headphones_rendering_mode);
+ put_bits(&pb, 6, 0); // reserved
+ flush_put_bits(&pb);
+ avio_write(dyn_bc, header, put_bytes_count(&pb, 1));
+ ffio_write_leb(dyn_bc, 0); // rendering_config_extension_size
+ param_definition(s, dyn_bc, submix_element->element_mix_config);
+ avio_wb16(dyn_bc, rescale_rational(submix_element->default_mix_gain, 1 << 8));
+ }
+ param_definition(s, dyn_bc, sub_mix->output_mix_config);
+ avio_wb16(dyn_bc, rescale_rational(sub_mix->default_mix_gain, 1 << 8));
+
+ ffio_write_leb(dyn_bc, sub_mix->num_layouts); // num_layouts
+ for (int i = 0; i < sub_mix->num_layouts; i++) {
+ const AVIAMFSubmixLayout *submix_layout = sub_mix->layouts[i];
+ int layout, info_type;
+ int dialogue = submix_layout->dialogue_anchored_loudness.num &&
+ submix_layout->dialogue_anchored_loudness.den;
+ int album = submix_layout->album_anchored_loudness.num &&
+ submix_layout->album_anchored_loudness.den;
+
+ if (layout == FF_ARRAY_ELEMS(ff_iamf_sound_system_map)) {
+ av_log(s, AV_LOG_ERROR, "Invalid Sound System value in a submix\n");
+ return AVERROR(EINVAL);
+ }
+
+ if (submix_layout->layout_type == AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS) {
+ for (layout = 0; layout < FF_ARRAY_ELEMS(ff_iamf_sound_system_map); layout++) {
+ if (!av_channel_layout_compare(&submix_layout->sound_system, &ff_iamf_sound_system_map[layout].layout))
+ break;
+ }
+ if (layout == FF_ARRAY_ELEMS(ff_iamf_sound_system_map)) {
+ av_log(s, AV_LOG_ERROR, "Invalid Sound System value in a submix\n");
+ return AVERROR(EINVAL);
+ }
+ }
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 2, submix_layout->layout_type); // layout_type
+ if (submix_layout->layout_type == AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS) {
+ put_bits(&pb, 4, ff_iamf_sound_system_map[layout].id); // sound_system
+ put_bits(&pb, 2, 0); // reserved
+ } else
+ put_bits(&pb, 6, 0); // reserved
+ flush_put_bits(&pb);
+ avio_write(dyn_bc, header, put_bytes_count(&pb, 1));
+
+ info_type = (submix_layout->true_peak.num && submix_layout->true_peak.den);
+ info_type |= (dialogue || album) << 1;
+ avio_w8(dyn_bc, info_type);
+ avio_wb16(dyn_bc, rescale_rational(submix_layout->integrated_loudness, 1 << 8));
+ avio_wb16(dyn_bc, rescale_rational(submix_layout->digital_peak, 1 << 8));
+ if (info_type & 1)
+ avio_wb16(dyn_bc, rescale_rational(submix_layout->true_peak, 1 << 8));
+ if (info_type & 2) {
+ avio_w8(dyn_bc, dialogue + album); // num_anchored_loudness
+ if (dialogue) {
+ avio_w8(dyn_bc, IAMF_ANCHOR_ELEMENT_DIALOGUE);
+ avio_wb16(dyn_bc, rescale_rational(submix_layout->dialogue_anchored_loudness, 1 << 8));
+ }
+ if (album) {
+ avio_w8(dyn_bc, IAMF_ANCHOR_ELEMENT_ALBUM);
+ avio_wb16(dyn_bc, rescale_rational(submix_layout->album_anchored_loudness, 1 << 8));
+ }
+ }
+ }
+ }
+
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 5, IAMF_OBU_IA_MIX_PRESENTATION);
+ put_bits(&pb, 3, 0);
+ flush_put_bits(&pb);
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ avio_write(s->pb, header, put_bytes_count(&pb, 1));
+ ffio_write_leb(s->pb, dyn_size);
+ avio_write(s->pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+
+ return 0;
+}
+
+static int iamf_write_header(AVFormatContext *s)
+{
+ IAMFMuxContext *const c = s->priv_data;
+ IAMFContext *const iamf = &c->iamf;
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ PutBitContext pb;
+ AVIOContext *dyn_bc;
+ uint8_t *dyn_buf = NULL;
+ int dyn_size;
+
+ int ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ // Sequence Header
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 5, IAMF_OBU_IA_SEQUENCE_HEADER);
+ put_bits(&pb, 3, 0);
+ flush_put_bits(&pb);
+
+ avio_write(dyn_bc, header, put_bytes_count(&pb, 1));
+ ffio_write_leb(dyn_bc, 6);
+ avio_wb32(dyn_bc, MKBETAG('i','a','m','f'));
+ avio_w8(dyn_bc, iamf->nb_audio_elements > 1); // primary_profile
+ avio_w8(dyn_bc, iamf->nb_audio_elements > 1); // additional_profile
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ avio_write(s->pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+
+ for (int i; i < iamf->nb_codec_configs; i++) {
+ ret = iamf_write_codec_config(s, &iamf->codec_configs[i]);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (int i; i < iamf->nb_audio_elements; i++) {
+ ret = iamf_write_audio_element(s, &iamf->audio_elements[i]);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (int i; i < iamf->nb_mix_presentations; i++) {
+ ret = iamf_write_mixing_presentation(s, &iamf->mix_presentations[i]);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int write_parameter_block(AVFormatContext *s, AVIAMFParamDefinition *param)
+{
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ IAMFParamDefinition *param_definition = get_param_definition(s, param->parameter_id);
+ PutBitContext pb;
+ AVIOContext *dyn_bc;
+ uint8_t *dyn_buf = NULL;
+ int dyn_size, ret;
+
+ if (param->param_definition_type > AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
+ av_log(s, AV_LOG_DEBUG, "Ignoring side data with unknown param_definition_type %u\n",
+ param->param_definition_type);
+ return 0;
+ }
+
+ if (!param_definition) {
+ av_log(s, AV_LOG_ERROR, "Non-existent Parameter Definition with ID %u referenced by a packet\n",
+ param->parameter_id);
+ return AVERROR(EINVAL);
+ }
+
+ if (param->param_definition_type != param_definition->param->param_definition_type ||
+ param->param_definition_mode != param_definition->param->param_definition_mode) {
+ av_log(s, AV_LOG_ERROR, "Inconsistent param_definition_mode or param_definition_type values "
+ "for Parameter Definition with ID %u in a packet\n",
+ param->parameter_id);
+ return AVERROR(EINVAL);
+ }
+
+ ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ // Sequence Header
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 5, IAMF_OBU_IA_PARAMETER_BLOCK);
+ put_bits(&pb, 3, 0);
+ flush_put_bits(&pb);
+ avio_write(s->pb, header, put_bytes_count(&pb, 1));
+
+ ffio_write_leb(dyn_bc, param->parameter_id);
+ if (param->param_definition_mode) {
+ ffio_write_leb(dyn_bc, param->duration);
+ ffio_write_leb(dyn_bc, param->constant_subblock_duration);
+ if (param->constant_subblock_duration == 0)
+ ffio_write_leb(dyn_bc, param->num_subblocks);
+ }
+
+ for (int i = 0; i < param->num_subblocks; i++) {
+ const void *subblock = av_iamf_param_definition_get_subblock(param, i);
+
+ switch (param->param_definition_type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+ const AVIAMFMixGainParameterData *mix = subblock;
+ if (param->param_definition_mode && param->constant_subblock_duration == 0)
+ ffio_write_leb(dyn_bc, mix->subblock_duration);
+
+ ffio_write_leb(dyn_bc, mix->animation_type);
+
+ avio_wb16(dyn_bc, rescale_rational(mix->start_point_value, 1 << 8));
+ if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR)
+ avio_wb16(dyn_bc, rescale_rational(mix->end_point_value, 1 << 8));
+ if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) {
+ avio_wb16(dyn_bc, rescale_rational(mix->control_point_value, 1 << 8));
+ avio_w8(dyn_bc, mix->control_point_relative_time);
+ }
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+ const AVIAMFDemixingInfoParameterData *demix = subblock;
+ if (param->param_definition_mode && param->constant_subblock_duration == 0)
+ ffio_write_leb(dyn_bc, demix->subblock_duration);
+
+ avio_w8(dyn_bc, demix->dmixp_mode << 5);
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+ const AVIAMFReconGainParameterData *recon = subblock;
+ const AVIAMFAudioElement *audio_element = param_definition->audio_element;
+
+ if (param->param_definition_mode && param->constant_subblock_duration == 0)
+ ffio_write_leb(dyn_bc, recon->subblock_duration);
+
+ if (!audio_element) {
+ av_log(s, AV_LOG_ERROR, "Invalid Parameter Definition with ID %u referenced by a packet\n", param->parameter_id);
+ return AVERROR(EINVAL);
+ }
+
+ for (int j = 0; j < audio_element->num_layers; j++) {
+ const AVIAMFLayer *layer = audio_element->layers[j];
+
+ if (layer->recon_gain_is_present) {
+ unsigned int recon_gain_flags = 0;
+ int k = 0;
+
+ for (; k < 7; k++)
+ recon_gain_flags |= (1 << k) * !!recon->recon_gain[j][k];
+ for (; k < 12; k++)
+ recon_gain_flags |= (2 << k) * !!recon->recon_gain[j][k];
+ if (recon_gain_flags >> 8)
+ recon_gain_flags |= (1 << k);
+
+ ffio_write_leb(dyn_bc, recon_gain_flags);
+ for (k = 0; k < 12; k++) {
+ if (recon->recon_gain[j][k])
+ avio_w8(dyn_bc, recon->recon_gain[j][k]);
+ }
+ }
+ }
+ break;
+ }
+ default:
+ av_assert0(0);
+ }
+ }
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ ffio_write_leb(s->pb, dyn_size);
+ avio_write(s->pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+
+ return 0;
+}
+
+static int iamf_write_packet(AVFormatContext *s, AVPacket *pkt)
+{
+ const IAMFMuxContext *const c = s->priv_data;
+ AVStream *st = s->streams[pkt->stream_index];
+ uint8_t header[MAX_IAMF_OBU_HEADER_SIZE];
+ PutBitContext pb;
+ AVIOContext *dyn_bc;
+ uint8_t *dyn_buf = NULL;
+ int dyn_size;
+ int ret, type = st->id <= 17 ? st->id + IAMF_OBU_IA_AUDIO_FRAME_ID0 : IAMF_OBU_IA_AUDIO_FRAME;
+
+ if (s->nb_stream_groups && st->id == c->first_stream_id) {
+ AVIAMFParamDefinition *mix =
+ (AVIAMFParamDefinition *)av_packet_get_side_data(pkt, AV_PKT_DATA_IAMF_MIX_GAIN_PARAM, NULL);
+ AVIAMFParamDefinition *demix =
+ (AVIAMFParamDefinition *)av_packet_get_side_data(pkt, AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM, NULL);
+ AVIAMFParamDefinition *recon =
+ (AVIAMFParamDefinition *)av_packet_get_side_data(pkt, AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM, NULL);
+
+ if (mix) {
+ ret = write_parameter_block(s, mix);
+ if (ret < 0)
+ return ret;
+ }
+ if (demix) {
+ ret = write_parameter_block(s, demix);
+ if (ret < 0)
+ return ret;
+ }
+ if (recon) {
+ ret = write_parameter_block(s, recon);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ ret = avio_open_dyn_buf(&dyn_bc);
+ if (ret < 0)
+ return ret;
+
+ init_put_bits(&pb, header, sizeof(header));
+ put_bits(&pb, 5, type);
+ put_bits(&pb, 3, 0);
+ flush_put_bits(&pb);
+ avio_write(s->pb, header, put_bytes_count(&pb, 1));
+
+ if (st->id > 17)
+ ffio_write_leb(dyn_bc, st->id);
+
+ dyn_size = avio_close_dyn_buf(dyn_bc, &dyn_buf);
+ ffio_write_leb(s->pb, dyn_size + pkt->size);
+ avio_write(s->pb, dyn_buf, dyn_size);
+ av_free(dyn_buf);
+ avio_write(s->pb, pkt->data, pkt->size);
+
+ return 0;
+}
+
+static void iamf_deinit(AVFormatContext *s)
+{
+ IAMFMuxContext *const c = s->priv_data;
+ IAMFContext *const iamf = &c->iamf;
+
+ for (int i = 0; i < iamf->nb_audio_elements; i++) {
+ IAMFAudioElement *audio_element = &iamf->audio_elements[i];
+ audio_element->element = NULL;
+ }
+
+ for (int i = 0; i < iamf->nb_mix_presentations; i++) {
+ IAMFMixPresentation *mix_presentation = &iamf->mix_presentations[i];
+ mix_presentation->mix = NULL;
+ }
+
+ ff_iamf_uninit_context(iamf);
+
+ return;
+}
+
+static const AVCodecTag iamf_codec_tags[] = {
+ { AV_CODEC_ID_AAC, MKTAG('m','p','4','a') },
+ { AV_CODEC_ID_FLAC, MKTAG('f','L','a','C') },
+ { AV_CODEC_ID_OPUS, MKTAG('O','p','u','s') },
+ { AV_CODEC_ID_PCM_S16LE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_PCM_S16BE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_PCM_S24LE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_PCM_S24BE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_PCM_S32LE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_PCM_S32BE, MKTAG('i','p','c','m') },
+ { AV_CODEC_ID_NONE, MKTAG('i','p','c','m') }
+};
+
+const FFOutputFormat ff_iamf_muxer = {
+ .p.name = "iamf",
+ .p.long_name = NULL_IF_CONFIG_SMALL("Raw Immersive Audio Model and Formats"),
+ .p.extensions = "iamf",
+ .priv_data_size = sizeof(IAMFMuxContext),
+ .p.audio_codec = AV_CODEC_ID_OPUS,
+ .init = iamf_init,
+ .deinit = iamf_deinit,
+ .write_header = iamf_write_header,
+ .write_packet = iamf_write_packet,
+ .p.codec_tag = (const AVCodecTag* const []){ iamf_codec_tags, NULL },
+ .p.flags = AVFMT_GLOBALHEADER | AVFMT_NOTIMESTAMPS,
+};
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 10/13] avcodec: add an Immersive Audio Model and Formats frame split bsf
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
` (8 preceding siblings ...)
2023-11-26 1:28 ` [FFmpeg-devel] [PATCH 9/9] avformat: Immersive Audio Model and Formats muxer James Almer
@ 2023-11-27 18:43 ` James Almer
2023-11-27 18:43 ` [FFmpeg-devel] [PATCH 11/13] avformat/demux: support inserting bitstream filters in demuxing scenarios James Almer
` (2 subsequent siblings)
12 siblings, 0 replies; 29+ messages in thread
From: James Almer @ 2023-11-27 18:43 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavcodec/Makefile | 1 +
libavcodec/bitstream_filters.c | 1 +
libavcodec/iamf_stream_split_bsf.c | 807 +++++++++++++++++++++++++++++
3 files changed, 809 insertions(+)
create mode 100644 libavcodec/iamf_stream_split_bsf.c
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 748806e702..a2c345570e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1245,6 +1245,7 @@ OBJS-$(CONFIG_HAPQA_EXTRACT_BSF) += hapqa_extract_bsf.o hap.o
OBJS-$(CONFIG_HEVC_METADATA_BSF) += h265_metadata_bsf.o h265_profile_level.o \
h2645data.o
OBJS-$(CONFIG_HEVC_MP4TOANNEXB_BSF) += hevc_mp4toannexb_bsf.o
+OBJS-$(CONFIG_IAMF_STREAM_SPLIT_BSF) += iamf_stream_split_bsf.o
OBJS-$(CONFIG_IMX_DUMP_HEADER_BSF) += imx_dump_header_bsf.o
OBJS-$(CONFIG_MEDIA100_TO_MJPEGB_BSF) += media100_to_mjpegb_bsf.o
OBJS-$(CONFIG_MJPEG2JPEG_BSF) += mjpeg2jpeg_bsf.o
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index 1e9a676a3d..640b821413 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -42,6 +42,7 @@ extern const FFBitStreamFilter ff_h264_redundant_pps_bsf;
extern const FFBitStreamFilter ff_hapqa_extract_bsf;
extern const FFBitStreamFilter ff_hevc_metadata_bsf;
extern const FFBitStreamFilter ff_hevc_mp4toannexb_bsf;
+extern const FFBitStreamFilter ff_iamf_stream_split_bsf;
extern const FFBitStreamFilter ff_imx_dump_header_bsf;
extern const FFBitStreamFilter ff_media100_to_mjpegb_bsf;
extern const FFBitStreamFilter ff_mjpeg2jpeg_bsf;
diff --git a/libavcodec/iamf_stream_split_bsf.c b/libavcodec/iamf_stream_split_bsf.c
new file mode 100644
index 0000000000..28c8101719
--- /dev/null
+++ b/libavcodec/iamf_stream_split_bsf.c
@@ -0,0 +1,807 @@
+/*
+ * Copyright (c) 2023 James Almer <jamrial@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/dict.h"
+#include "libavutil/opt.h"
+#include "libavformat/iamf.h"
+#include "bsf.h"
+#include "bsf_internal.h"
+#include "get_bits.h"
+
+typedef struct ParamDefinition {
+ AVIAMFParamDefinition *param;
+ size_t param_size;
+ int recon_gain_present_bitmask;
+} ParamDefinition;
+
+typedef struct IAMFSplitContext {
+ AVClass *class;
+ AVPacket *buffer_pkt;
+
+ ParamDefinition *param_definitions;
+ unsigned int nb_param_definitions;
+
+ unsigned int *ids;
+ int nb_ids;
+
+ // AVOptions
+ int first_index;
+
+ // Packet side data
+ AVIAMFParamDefinition *mix;
+ size_t mix_size;
+ AVIAMFParamDefinition *demix;
+ size_t demix_size;
+ AVIAMFParamDefinition *recon;
+ size_t recon_size;
+} IAMFSplitContext;
+
+static int param_parse(AVBSFContext *ctx, GetBitContext *gb,
+ unsigned int param_definition_type,
+ ParamDefinition **out)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+ ParamDefinition *param_definition = NULL;
+ AVIAMFParamDefinition *param;
+ unsigned int parameter_id, parameter_rate, param_definition_mode;
+ unsigned int duration = 0, constant_subblock_duration = 0, num_subblocks = 0;
+ size_t param_size;
+
+ parameter_id = get_leb(gb);
+
+ for (int i = 0; i < c->nb_param_definitions; i++)
+ if (c->param_definitions[i].param->parameter_id == parameter_id) {
+ param_definition = &c->param_definitions[i];
+ break;
+ }
+
+ parameter_rate = get_leb(gb);
+ param_definition_mode = get_bits(gb, 8) >> 7;
+
+ if (param_definition_mode == 0) {
+ duration = get_leb(gb);
+ constant_subblock_duration = get_leb(gb);
+ if (constant_subblock_duration == 0) {
+ num_subblocks = get_leb(gb);
+ } else
+ num_subblocks = duration / constant_subblock_duration;
+ }
+
+ param = av_iamf_param_definition_alloc(param_definition_type, NULL, num_subblocks, NULL, ¶m_size);
+ if (!param)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < num_subblocks; i++) {
+ if (constant_subblock_duration == 0)
+ get_leb(gb); // subblock_duration
+
+ switch (param_definition_type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+ skip_bits(gb, 8); // dmixp_mode
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+ break;
+ default:
+ av_free(param);
+ return AVERROR_INVALIDDATA;
+ }
+ }
+
+ param->parameter_id = parameter_id;
+ param->parameter_rate = parameter_rate;
+ param->param_definition_mode = param_definition_mode;
+ param->duration = duration;
+ param->constant_subblock_duration = constant_subblock_duration;
+ param->num_subblocks = num_subblocks;
+
+ if (param_definition) {
+ if (param_definition->param_size != param_size || memcmp(param_definition->param, param, param_size)) {
+ av_log(ctx, AV_LOG_ERROR, "Incosistent parameters for parameter_id %u\n", parameter_id);
+ av_free(param);
+ return AVERROR_INVALIDDATA;
+ }
+ av_freep(¶m);
+ } else {
+ param_definition = av_dynarray2_add_nofree((void **)&c->param_definitions, &c->nb_param_definitions,
+ sizeof(*c->param_definitions), NULL);
+ if (!param_definition) {
+ av_free(param);
+ return AVERROR(ENOMEM);
+ }
+ param_definition->param = param;
+ param_definition->param_size = param_size;
+ }
+ if (out)
+ *out = param_definition;
+
+ return 0;
+}
+
+static int scalable_channel_layout_config(AVBSFContext *ctx, GetBitContext *gb,
+ ParamDefinition *recon_gain)
+{
+ int num_layers;
+
+ num_layers = get_bits(gb, 3);
+ skip_bits(gb, 5); //reserved
+
+ if (num_layers > 6)
+ return AVERROR_INVALIDDATA;
+
+ for (int i = 0; i < num_layers; i++) {
+ int output_gain_is_present_flag, recon_gain_is_present;
+
+ skip_bits(gb, 4); // loudspeaker_layout
+ output_gain_is_present_flag = get_bits1(gb);
+ recon_gain_is_present = get_bits1(gb);
+ if (recon_gain)
+ recon_gain->recon_gain_present_bitmask |= recon_gain_is_present << i;
+ skip_bits(gb, 2); // reserved
+ skip_bits(gb, 8); // substream_count
+ skip_bits(gb, 8); // coupled_substream_count
+ if (output_gain_is_present_flag) {
+ skip_bits(gb, 8); // output_gain_flags & reserved
+ skip_bits(gb, 16); // output_gain
+ }
+ }
+
+ return 0;
+}
+
+static int audio_element_obu(AVBSFContext *ctx, uint8_t *buf, unsigned size)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+ GetBitContext gb;
+ ParamDefinition *recon_gain = NULL;
+ unsigned audio_element_type;
+ unsigned num_substreams, num_parameters;
+ int ret;
+
+ ret = init_get_bits8(&gb, buf, size);
+ if (ret < 0)
+ return ret;
+
+ get_leb(&gb); // audio_element_id
+ audio_element_type = get_bits(&gb, 3);
+ skip_bits(&gb, 5); // reserved
+
+ get_leb(&gb); // codec_config_id
+ num_substreams = get_leb(&gb);
+ for (unsigned i = 0; i < num_substreams; i++) {
+ unsigned *audio_substream_id = av_dynarray2_add_nofree((void **)&c->ids, &c->nb_ids,
+ sizeof(*c->ids), NULL);
+ if (!audio_substream_id) {
+ return AVERROR(ENOMEM);
+ }
+
+ *audio_substream_id = get_leb(&gb);
+ }
+
+ num_parameters = get_leb(&gb);
+ if (num_parameters && audio_element_type != 0) {
+ av_log(ctx, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
+ " for Scene representations\n", num_parameters);
+ return AVERROR_INVALIDDATA;
+ }
+
+ for (int i = 0; i < num_parameters; i++) {
+ unsigned param_definition_type = get_leb(&gb);
+
+ if (param_definition_type == AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN)
+ return AVERROR_INVALIDDATA;
+ else if (param_definition_type == AV_IAMF_PARAMETER_DEFINITION_DEMIXING) {
+ ret = param_parse(ctx, &gb, param_definition_type, NULL);
+ if (ret < 0)
+ return ret;
+ skip_bits(&gb, 8); // default_w
+ } else if (param_definition_type == AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
+ ret = param_parse(ctx, &gb, param_definition_type, &recon_gain);
+ if (ret < 0)
+ return ret;
+ } else {
+ unsigned param_definition_size = get_leb(&gb);
+ skip_bits_long(&gb, param_definition_size * 8);
+ }
+ }
+
+ if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
+ ret = scalable_channel_layout_config(ctx, &gb, recon_gain);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int label_string(GetBitContext *gb)
+{
+ int byte;
+
+ do {
+ byte = get_bits(gb, 8);
+ } while (byte);
+
+ return 0;
+}
+
+static int mix_presentation_obu(AVBSFContext *ctx, uint8_t *buf, unsigned size)
+{
+ GetBitContext gb;
+ unsigned mix_presentation_id, count_label;
+ unsigned num_submixes, num_elements;
+ int ret;
+
+ ret = init_get_bits8(&gb, buf, size);
+ if (ret < 0)
+ return ret;
+
+ mix_presentation_id = get_leb(&gb);
+ count_label = get_leb(&gb);
+
+ for (int i = 0; i < count_label; i++) {
+ ret = label_string(&gb);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (int i = 0; i < count_label; i++) {
+ ret = label_string(&gb);
+ if (ret < 0)
+ return ret;
+ }
+
+ num_submixes = get_leb(&gb);
+ for (int i = 0; i < num_submixes; i++) {
+ unsigned num_layouts;
+
+ num_elements = get_leb(&gb);
+
+ for (int j = 0; j < num_elements; j++) {
+ unsigned rendering_config_extension_size;
+
+ get_leb(&gb); // audio_element_id
+ for (int k = 0; k < count_label; k++) {
+ ret = label_string(&gb);
+ if (ret < 0)
+ return ret;
+ }
+
+ skip_bits(&gb, 8); // headphones_rendering_mode & reserved
+ rendering_config_extension_size = get_leb(&gb);
+ skip_bits_long(&gb, rendering_config_extension_size * 8);
+
+ ret = param_parse(ctx, &gb, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL);
+ if (ret < 0)
+ return ret;
+ skip_bits(&gb, 16); // default_mix_gain
+ }
+
+ ret = param_parse(ctx, &gb, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL);
+ if (ret < 0)
+ return ret;
+ get_bits(&gb, 16); // default_mix_gain
+
+ num_layouts = get_leb(&gb);
+ for (int j = 0; j < num_layouts; j++) {
+ int info_type, layout_type;
+ int byte = get_bits(&gb, 8);
+
+ layout_type = byte >> 6;
+ if (layout_type < AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS &&
+ layout_type > AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) {
+ av_log(ctx, AV_LOG_ERROR, "Invalid Layout type %u in a submix from Mix Presentation %u\n",
+ layout_type, mix_presentation_id);
+ return AVERROR_INVALIDDATA;
+ }
+
+ info_type = get_bits(&gb, 8);
+ get_bits(&gb, 16); // integrated_loudness
+ get_bits(&gb, 16); // digital_peak
+
+ if (info_type & 1)
+ get_bits(&gb, 16); // true_peak
+
+ if (info_type & 2) {
+ unsigned int num_anchored_loudness = get_bits(&gb, 8);
+
+ for (int k = 0; k < num_anchored_loudness; k++) {
+ get_bits(&gb, 8); // anchor_element
+ get_bits(&gb, 16); // anchored_loudness
+ }
+ }
+
+ if (info_type & 0xFC) {
+ unsigned int info_type_size = get_leb(&gb);
+ skip_bits_long(&gb, info_type_size * 8);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int find_idx_by_id(AVBSFContext *ctx, unsigned id)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+
+ for (int i = 0; i < c->nb_ids; i++) {
+ unsigned audio_substream_id = c->ids[i];
+
+ if (audio_substream_id == id)
+ return i;
+ }
+
+ av_log(ctx, AV_LOG_ERROR, "Invalid id %d\n", id);
+ return AVERROR_INVALIDDATA;
+}
+
+static int audio_frame_obu(AVBSFContext *ctx, enum IAMF_OBU_Type type, int *idx,
+ uint8_t *buf, int *start_pos, unsigned *size,
+ int id_in_bitstream)
+{
+ GetBitContext gb;
+ unsigned audio_substream_id;
+ int ret;
+
+ ret = init_get_bits8(&gb, buf + *start_pos, *size);
+ if (ret < 0)
+ return ret;
+
+ if (id_in_bitstream) {
+ int pos;
+ audio_substream_id = get_leb(&gb);
+ pos = get_bits_count(&gb) / 8;
+ *start_pos += pos;
+ *size -= pos;
+ } else
+ audio_substream_id = type - IAMF_OBU_IA_AUDIO_FRAME_ID0;
+
+ ret = find_idx_by_id(ctx, audio_substream_id);
+ if (ret < 0)
+ return ret;
+
+ *idx = ret;
+
+ return 0;
+}
+
+static const ParamDefinition *get_param_definition(AVBSFContext *ctx, unsigned int parameter_id)
+{
+ const IAMFSplitContext *const c = ctx->priv_data;
+ const ParamDefinition *param_definition = NULL;
+
+ for (int i = 0; i < c->nb_param_definitions; i++)
+ if (c->param_definitions[i].param->parameter_id == parameter_id) {
+ param_definition = &c->param_definitions[i];
+ break;
+ }
+
+ return param_definition;
+}
+
+static int parameter_block_obu(AVBSFContext *ctx, uint8_t *buf, unsigned size)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+ GetBitContext gb;
+ const ParamDefinition *param_definition;
+ const AVIAMFParamDefinition *param;
+ AVIAMFParamDefinition *out_param = NULL;
+ unsigned int duration, constant_subblock_duration;
+ unsigned int num_subblocks;
+ unsigned int parameter_id;
+ size_t out_param_size;
+ int ret;
+
+ ret = init_get_bits8(&gb, buf, size);
+ if (ret < 0)
+ return ret;
+
+ parameter_id = get_leb(&gb);
+
+ param_definition = get_param_definition(ctx, parameter_id);
+ if (!param_definition) {
+ ret = 0;
+ goto fail;
+ }
+
+ param = param_definition->param;
+ if (param->param_definition_mode) {
+ duration = get_leb(&gb);
+ constant_subblock_duration = get_leb(&gb);
+ if (constant_subblock_duration == 0)
+ num_subblocks = get_leb(&gb);
+ else
+ num_subblocks = duration / constant_subblock_duration;
+ } else {
+ duration = param->duration;
+ constant_subblock_duration = param->constant_subblock_duration;
+ num_subblocks = param->num_subblocks;
+ if (!num_subblocks)
+ num_subblocks = duration / constant_subblock_duration;
+ }
+
+ out_param = av_iamf_param_definition_alloc(param->param_definition_type, NULL, num_subblocks,
+ NULL, &out_param_size);
+ if (!out_param) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ out_param->parameter_id = param->parameter_id;
+ out_param->param_definition_type = param->param_definition_type;
+ out_param->parameter_rate = param->parameter_rate;
+ out_param->param_definition_mode = param->param_definition_mode;
+ out_param->duration = duration;
+ out_param->constant_subblock_duration = constant_subblock_duration;
+ out_param->num_subblocks = num_subblocks;
+
+ for (int i = 0; i < num_subblocks; i++) {
+ void *subblock = av_iamf_param_definition_get_subblock(out_param, i);
+ unsigned int subblock_duration = constant_subblock_duration;
+
+ if (param->param_definition_mode && !constant_subblock_duration)
+ subblock_duration = get_leb(&gb);
+
+ switch (param->param_definition_type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+ AVIAMFMixGainParameterData *mix = subblock;
+
+ mix->animation_type = get_leb(&gb);
+ if (mix->animation_type > AV_IAMF_ANIMATION_TYPE_BEZIER) {
+ ret = 0;
+ av_free(out_param);
+ goto fail;
+ }
+
+ mix->start_point_value = av_make_q(sign_extend(get_bits(&gb, 16), 16), 1 << 8);
+ if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR)
+ mix->end_point_value = av_make_q(sign_extend(get_bits(&gb, 16), 16), 1 << 8);
+ if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) {
+ mix->control_point_value = av_make_q(sign_extend(get_bits(&gb, 16), 16), 1 << 8);
+ mix->control_point_relative_time = get_bits(&gb, 8);
+ }
+ mix->subblock_duration = subblock_duration;
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+ AVIAMFDemixingInfoParameterData *demix = subblock;
+
+ demix->dmixp_mode = get_bits(&gb, 3);
+ skip_bits(&gb, 5); // reserved
+ demix->subblock_duration = subblock_duration;
+ break;
+ }
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+ AVIAMFReconGainParameterData *recon = subblock;
+
+ for (int i = 0; i < 6; i++) {
+ if (param_definition->recon_gain_present_bitmask & (1 << i)) {
+ unsigned int recon_gain_flags = get_leb(&gb);
+ unsigned int bitcount = 7 + 5 * !!(recon_gain_flags & 0x80);
+ recon_gain_flags = (recon_gain_flags & 0x7F) | ((recon_gain_flags & 0xFF00) >> 1);
+ for (int j = 0; j < bitcount; j++) {
+ if (recon_gain_flags & (1 << j))
+ recon->recon_gain[i][j] = get_bits(&gb, 8);
+ }
+ }
+ }
+ recon->subblock_duration = subblock_duration;
+ break;
+ }
+ default:
+ av_assert0(0);
+ }
+ }
+
+ switch (param->param_definition_type) {
+ case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+ av_free(c->mix);
+ c->mix = out_param;
+ c->mix_size = out_param_size;
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+ av_free(c->demix);
+ c->demix = out_param;
+ c->demix_size = out_param_size;
+ break;
+ case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+ av_free(c->recon);
+ c->recon = out_param;
+ c->recon_size = out_param_size;
+ break;
+ default:
+ av_assert0(0);
+ }
+
+ ret = 0;
+fail:
+ if (ret < 0)
+ av_free(out_param);
+
+ return ret;
+}
+
+static int iamf_parse_obu_header(const uint8_t *buf, int buf_size,
+ unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
+ unsigned *skip_samples, unsigned *discard_padding)
+{
+ GetBitContext gb;
+ int ret, extension_flag, trimming, start;
+ unsigned size;
+
+ ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE));
+ if (ret < 0)
+ return ret;
+
+ *type = get_bits(&gb, 5);
+ /*redundant =*/ get_bits1(&gb);
+ trimming = get_bits1(&gb);
+ extension_flag = get_bits1(&gb);
+
+ *obu_size = get_leb(&gb);
+ if (*obu_size > INT_MAX)
+ return AVERROR_INVALIDDATA;
+
+ start = get_bits_count(&gb) / 8;
+
+ if (trimming) {
+ *skip_samples = get_leb(&gb); // num_samples_to_trim_at_end
+ *discard_padding = get_leb(&gb); // num_samples_to_trim_at_start
+ }
+
+ if (extension_flag) {
+ unsigned extension_bytes = get_leb(&gb);
+ if (extension_bytes > INT_MAX / 8)
+ return AVERROR_INVALIDDATA;
+ skip_bits_long(&gb, extension_bytes * 8);
+ }
+
+ if (get_bits_left(&gb) < 0)
+ return AVERROR_INVALIDDATA;
+
+ size = *obu_size + start;
+ if (size > INT_MAX)
+ return AVERROR_INVALIDDATA;
+
+ *obu_size -= get_bits_count(&gb) / 8 - start;
+ *start_pos = size - *obu_size;
+
+ return size;
+}
+
+static int iamf_stream_split_filter(AVBSFContext *ctx, AVPacket *out)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+ int ret = 0;
+
+ if (!c->buffer_pkt->data) {
+ ret = ff_bsf_get_packet_ref(ctx, c->buffer_pkt);
+ if (ret < 0)
+ return ret;
+ }
+
+ while (1) {
+ enum IAMF_OBU_Type type;
+ unsigned skip_samples = 0, discard_padding = 0, obu_size;
+ int len, start_pos, idx;
+
+ len = iamf_parse_obu_header(c->buffer_pkt->data,
+ c->buffer_pkt->size,
+ &obu_size, &start_pos, &type,
+ &skip_samples, &discard_padding);
+ if (len < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to read obu\n");
+ ret = len;
+ goto fail;
+ }
+
+ if (type >= IAMF_OBU_IA_AUDIO_FRAME && type <= IAMF_OBU_IA_AUDIO_FRAME_ID17) {
+ ret = audio_frame_obu(ctx, type, &idx,
+ c->buffer_pkt->data, &start_pos,
+ &obu_size,
+ type == IAMF_OBU_IA_AUDIO_FRAME);
+ if (ret < 0)
+ goto fail;
+ } else {
+ switch (type) {
+ case IAMF_OBU_IA_AUDIO_ELEMENT:
+ ret = audio_element_obu(ctx, c->buffer_pkt->data + start_pos, obu_size);
+ if (ret < 0)
+ goto fail;
+ break;
+ case IAMF_OBU_IA_MIX_PRESENTATION:
+ ret = mix_presentation_obu(ctx, c->buffer_pkt->data + start_pos, obu_size);
+ if (ret < 0)
+ goto fail;
+ break;
+ case IAMF_OBU_IA_PARAMETER_BLOCK:
+ ret = parameter_block_obu(ctx, c->buffer_pkt->data + start_pos, obu_size);
+ if (ret < 0)
+ goto fail;
+ break;
+ case IAMF_OBU_IA_SEQUENCE_HEADER:
+ for (int i = 0; c->param_definitions && i < c->nb_param_definitions; i++)
+ av_free(c->param_definitions[i].param);
+ av_freep(&c->param_definitions);
+ av_freep(&c->ids);
+ c->nb_param_definitions = 0;
+ c->nb_ids = 0;
+ // fall-through
+ case IAMF_OBU_IA_TEMPORAL_DELIMITER:
+ av_freep(&c->mix);
+ av_freep(&c->demix);
+ av_freep(&c->recon);
+ c->mix_size = 0;
+ c->demix_size = 0;
+ c->recon_size = 0;
+ break;
+ }
+
+ c->buffer_pkt->data += len;
+ c->buffer_pkt->size -= len;
+
+ if (!c->buffer_pkt->size) {
+ av_packet_unref(c->buffer_pkt);
+ ret = ff_bsf_get_packet_ref(ctx, c->buffer_pkt);
+ if (ret < 0)
+ return ret;
+ } else if (c->buffer_pkt->size < 0) {
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+ continue;
+ }
+
+ if (c->buffer_pkt->size > INT_MAX - len) {
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ ret = av_packet_ref(out, c->buffer_pkt);
+ if (ret < 0)
+ goto fail;
+
+ if (skip_samples || discard_padding) {
+ uint8_t *side_data = av_packet_new_side_data(out, AV_PKT_DATA_SKIP_SAMPLES, 10);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ AV_WL32(side_data, skip_samples);
+ AV_WL32(side_data + 4, discard_padding);
+ }
+ if (c->mix) {
+ uint8_t *side_data = av_packet_new_side_data(out, AV_PKT_DATA_IAMF_MIX_GAIN_PARAM, c->mix_size);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ memcpy(side_data, c->mix, c->mix_size);
+ }
+ if (c->demix) {
+ uint8_t *side_data = av_packet_new_side_data(out, AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM, c->demix_size);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ memcpy(side_data, c->demix, c->demix_size);
+ }
+ if (c->recon) {
+ uint8_t *side_data = av_packet_new_side_data(out, AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM, c->recon_size);
+ if (!side_data)
+ return AVERROR(ENOMEM);
+ memcpy(side_data, c->recon, c->recon_size);
+ }
+
+ out->data += start_pos;
+ out->size = obu_size;
+ out->stream_index = idx + c->first_index;
+
+ c->buffer_pkt->data += len;
+ c->buffer_pkt->size -= len;
+
+ if (!c->buffer_pkt->size)
+ av_packet_unref(c->buffer_pkt);
+ else if (c->buffer_pkt->size < 0) {
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ return 0;
+ }
+
+fail:
+ if (ret < 0) {
+ av_packet_unref(out);
+ av_packet_unref(c->buffer_pkt);
+ }
+
+ return ret;
+}
+
+static int iamf_stream_split_init(AVBSFContext *ctx)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+
+ c->buffer_pkt = av_packet_alloc();
+ if (!c->buffer_pkt)
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+static void iamf_stream_split_flush(AVBSFContext *ctx)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+
+ av_packet_unref(c->buffer_pkt);
+
+ av_freep(&c->mix);
+ av_freep(&c->demix);
+ av_freep(&c->recon);
+ c->mix_size = 0;
+ c->demix_size = 0;
+ c->recon_size = 0;
+}
+
+static void iamf_stream_split_close(AVBSFContext *ctx)
+{
+ IAMFSplitContext *const c = ctx->priv_data;
+
+ iamf_stream_split_flush(ctx);
+
+ for (int i = 0; c->param_definitions && i < c->nb_param_definitions; i++)
+ av_free(c->param_definitions[i].param);
+ av_freep(&c->param_definitions);
+ c->nb_param_definitions = 0;
+
+ av_freep(&c->ids);
+ c->nb_ids = 0;
+}
+
+#define OFFSET(x) offsetof(IAMFSplitContext, x)
+#define FLAGS (AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_BSF_PARAM)
+static const AVOption iamf_stream_split_options[] = {
+ { "first_index", "First index to set stream index in output packets",
+ OFFSET(first_index), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, FLAGS },
+ { NULL }
+};
+
+static const AVClass iamf_stream_split_class = {
+ .class_name = "iamf_stream_split_bsf",
+ .item_name = av_default_item_name,
+ .option = iamf_stream_split_options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+static const enum AVCodecID iamf_stream_split_codec_ids[] = {
+ AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE,
+ AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S24BE,
+ AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE,
+ AV_CODEC_ID_OPUS, AV_CODEC_ID_AAC,
+ AV_CODEC_ID_FLAC, AV_CODEC_ID_NONE,
+};
+
+const FFBitStreamFilter ff_iamf_stream_split_bsf = {
+ .p.name = "iamf_stream_split",
+ .p.codec_ids = iamf_stream_split_codec_ids,
+ .p.priv_class = &iamf_stream_split_class,
+ .priv_data_size = sizeof(IAMFSplitContext),
+ .init = iamf_stream_split_init,
+ .flush = iamf_stream_split_flush,
+ .close = iamf_stream_split_close,
+ .filter = iamf_stream_split_filter,
+};
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 11/13] avformat/demux: support inserting bitstream filters in demuxing scenarios
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
` (9 preceding siblings ...)
2023-11-27 18:43 ` [FFmpeg-devel] [PATCH 10/13] avcodec: add an Immersive Audio Model and Formats frame split bsf James Almer
@ 2023-11-27 18:43 ` James Almer
2023-11-28 4:00 ` Gyan Doshi
2023-11-27 18:43 ` [FFmpeg-devel] [PATCH 12/13] avformat/mov: make MOVStreamContext refcounted James Almer
2023-11-27 18:43 ` [FFmpeg-devel] [PATCH 13/13] avformat/mov: add support for Immersive Audio Model and Formats in ISOBMFF James Almer
12 siblings, 1 reply; 29+ messages in thread
From: James Almer @ 2023-11-27 18:43 UTC (permalink / raw)
To: ffmpeg-devel
Packets will be passed to the bsf immediately after being generated by a
demuxer, and no further data will be read from the input until all packets
have been returned by the bsf.
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavformat/avformat.c | 47 ++++++++++++
libavformat/demux.c | 162 ++++++++++++++++++++++++++++++-----------
libavformat/internal.h | 13 +++-
libavformat/mux.c | 43 -----------
libavformat/mux.h | 11 ---
libavformat/rawenc.c | 1 +
6 files changed, 181 insertions(+), 96 deletions(-)
diff --git a/libavformat/avformat.c b/libavformat/avformat.c
index a02ec965dd..a41c0b391c 100644
--- a/libavformat/avformat.c
+++ b/libavformat/avformat.c
@@ -1033,3 +1033,50 @@ FF_ENABLE_DEPRECATION_WARNINGS
*pb = NULL;
return ret;
}
+
+int ff_stream_add_bitstream_filter(AVStream *st, const char *name, const char *args)
+{
+ int ret;
+ const AVBitStreamFilter *bsf;
+ FFStream *const sti = ffstream(st);
+ AVBSFContext *bsfc;
+
+ av_assert0(!sti->bsfc);
+
+ if (name) {
+ bsf = av_bsf_get_by_name(name);
+ if (!bsf) {
+ av_log(NULL, AV_LOG_ERROR, "Unknown bitstream filter '%s'\n", name);
+ return AVERROR_BSF_NOT_FOUND;
+ }
+ ret = av_bsf_alloc(bsf, &bsfc);
+ } else
+ ret = av_bsf_get_null_filter(&bsfc);
+ if (ret < 0)
+ return ret;
+
+ bsfc->time_base_in = st->time_base;
+ if ((ret = avcodec_parameters_copy(bsfc->par_in, st->codecpar)) < 0) {
+ av_bsf_free(&bsfc);
+ return ret;
+ }
+
+ if (args && bsfc->filter->priv_class) {
+ if ((ret = av_set_options_string(bsfc->priv_data, args, "=", ":")) < 0) {
+ av_bsf_free(&bsfc);
+ return ret;
+ }
+ }
+
+ if ((ret = av_bsf_init(bsfc)) < 0) {
+ av_bsf_free(&bsfc);
+ return ret;
+ }
+
+ sti->bsfc = bsfc;
+
+ av_log(NULL, AV_LOG_VERBOSE,
+ "Automatically inserted bitstream filter '%s'; args='%s'\n",
+ name, args ? args : "");
+ return 1;
+}
diff --git a/libavformat/demux.c b/libavformat/demux.c
index 6f640b92b1..fb9bf9e4ac 100644
--- a/libavformat/demux.c
+++ b/libavformat/demux.c
@@ -540,6 +540,109 @@ static int update_wrap_reference(AVFormatContext *s, AVStream *st, int stream_in
return 1;
}
+static void update_timestamps(AVFormatContext *s, AVStream *st, AVPacket *pkt)
+{
+ FFStream *const sti = ffstream(st);
+
+ if (update_wrap_reference(s, st, pkt->stream_index, pkt) && sti->pts_wrap_behavior == AV_PTS_WRAP_SUB_OFFSET) {
+ // correct first time stamps to negative values
+ if (!is_relative(sti->first_dts))
+ sti->first_dts = wrap_timestamp(st, sti->first_dts);
+ if (!is_relative(st->start_time))
+ st->start_time = wrap_timestamp(st, st->start_time);
+ if (!is_relative(sti->cur_dts))
+ sti->cur_dts = wrap_timestamp(st, sti->cur_dts);
+ }
+
+ pkt->dts = wrap_timestamp(st, pkt->dts);
+ pkt->pts = wrap_timestamp(st, pkt->pts);
+
+ force_codec_ids(s, st);
+
+ /* TODO: audio: time filter; video: frame reordering (pts != dts) */
+ if (s->use_wallclock_as_timestamps)
+ pkt->dts = pkt->pts = av_rescale_q(av_gettime(), AV_TIME_BASE_Q, st->time_base);
+}
+
+static int filter_packet(AVFormatContext *s, AVStream *st, AVPacket *pkt)
+{
+ FFFormatContext *const si = ffformatcontext(s);
+ FFStream *const sti = ffstream(st);
+ const AVPacket *pkt1;
+ int err;
+
+ if (!sti->bsfc) {
+ const PacketListEntry *pktl = si->raw_packet_buffer.head;
+ if (AVPACKET_IS_EMPTY(pkt))
+ return 0;
+
+ update_timestamps(s, st, pkt);
+
+ if (!pktl && sti->request_probe <= 0)
+ return 0;
+
+ err = avpriv_packet_list_put(&si->raw_packet_buffer, pkt, NULL, 0);
+ if (err < 0) {
+ av_packet_unref(pkt);
+ return err;
+ }
+
+ pkt1 = &si->raw_packet_buffer.tail->pkt;
+ si->raw_packet_buffer_size += pkt1->size;
+
+ if (sti->request_probe <= 0)
+ return 0;
+
+ return probe_codec(s, s->streams[pkt1->stream_index], pkt1);
+ }
+
+ err = av_bsf_send_packet(sti->bsfc, pkt);
+ if (err < 0) {
+ av_log(s, AV_LOG_ERROR,
+ "Failed to send packet to filter %s for stream %d\n",
+ sti->bsfc->filter->name, st->index);
+ return err;
+ }
+
+ do {
+ AVStream *out_st;
+ FFStream *out_sti;
+
+ err = av_bsf_receive_packet(sti->bsfc, pkt);
+ if (err < 0) {
+ if (err == AVERROR(EAGAIN) || err == AVERROR_EOF)
+ return 0;
+ av_log(s, AV_LOG_ERROR, "Error applying bitstream filters to an output "
+ "packet for stream #%d: %s\n", st->index, av_err2str(err));
+ if (!(s->error_recognition & AV_EF_EXPLODE) && err != AVERROR(ENOMEM))
+ continue;
+ return err;
+ }
+ out_st = s->streams[pkt->stream_index];
+ out_sti = ffstream(out_st);
+
+ update_timestamps(s, out_st, pkt);
+
+ err = avpriv_packet_list_put(&si->raw_packet_buffer, pkt, NULL, 0);
+ if (err < 0) {
+ av_packet_unref(pkt);
+ return err;
+ }
+
+ pkt1 = &si->raw_packet_buffer.tail->pkt;
+ si->raw_packet_buffer_size += pkt1->size;
+
+ if (out_sti->request_probe <= 0)
+ continue;
+
+ err = probe_codec(s, out_st, pkt1);
+ if (err < 0)
+ return err;
+ } while (1);
+
+ return 0;
+}
+
int ff_read_packet(AVFormatContext *s, AVPacket *pkt)
{
FFFormatContext *const si = ffformatcontext(s);
@@ -557,9 +660,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
for (;;) {
PacketListEntry *pktl = si->raw_packet_buffer.head;
- AVStream *st;
- FFStream *sti;
- const AVPacket *pkt1;
if (pktl) {
AVStream *const st = s->streams[pktl->pkt.stream_index];
@@ -582,16 +682,27 @@ FF_ENABLE_DEPRECATION_WARNINGS
We must re-call the demuxer to get the real packet. */
if (err == FFERROR_REDO)
continue;
- if (!pktl || err == AVERROR(EAGAIN))
+ if (err == AVERROR(EAGAIN))
return err;
for (unsigned i = 0; i < s->nb_streams; i++) {
AVStream *const st = s->streams[i];
FFStream *const sti = ffstream(st);
+ int ret;
+
+ // Drain buffered packets in the bsf context on eof
+ if (err == AVERROR_EOF)
+ if ((ret = filter_packet(s, st, pkt)) < 0)
+ return ret;
+ pktl = si->raw_packet_buffer.head;
+ if (!pktl)
+ continue;
if (sti->probe_packets || sti->request_probe > 0)
- if ((err = probe_codec(s, st, NULL)) < 0)
- return err;
+ if ((ret = probe_codec(s, st, NULL)) < 0)
+ return ret;
av_assert0(sti->request_probe <= 0);
}
+ if (!pktl)
+ return err;
continue;
}
@@ -616,42 +727,11 @@ FF_ENABLE_DEPRECATION_WARNINGS
av_assert0(pkt->stream_index < (unsigned)s->nb_streams &&
"Invalid stream index.\n");
- st = s->streams[pkt->stream_index];
- sti = ffstream(st);
-
- if (update_wrap_reference(s, st, pkt->stream_index, pkt) && sti->pts_wrap_behavior == AV_PTS_WRAP_SUB_OFFSET) {
- // correct first time stamps to negative values
- if (!is_relative(sti->first_dts))
- sti->first_dts = wrap_timestamp(st, sti->first_dts);
- if (!is_relative(st->start_time))
- st->start_time = wrap_timestamp(st, st->start_time);
- if (!is_relative(sti->cur_dts))
- sti->cur_dts = wrap_timestamp(st, sti->cur_dts);
- }
-
- pkt->dts = wrap_timestamp(st, pkt->dts);
- pkt->pts = wrap_timestamp(st, pkt->pts);
-
- force_codec_ids(s, st);
-
- /* TODO: audio: time filter; video: frame reordering (pts != dts) */
- if (s->use_wallclock_as_timestamps)
- pkt->dts = pkt->pts = av_rescale_q(av_gettime(), AV_TIME_BASE_Q, st->time_base);
-
- if (!pktl && sti->request_probe <= 0)
- return 0;
-
- err = avpriv_packet_list_put(&si->raw_packet_buffer,
- pkt, NULL, 0);
- if (err < 0) {
- av_packet_unref(pkt);
- return err;
- }
- pkt1 = &si->raw_packet_buffer.tail->pkt;
- si->raw_packet_buffer_size += pkt1->size;
-
- if ((err = probe_codec(s, st, pkt1)) < 0)
+ err = filter_packet(s, s->streams[pkt->stream_index], pkt);
+ if (err < 0)
return err;
+ if (!AVPACKET_IS_EMPTY(pkt))
+ return 0;
}
}
diff --git a/libavformat/internal.h b/libavformat/internal.h
index c6181683ef..0a5d512697 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -212,7 +212,7 @@ typedef struct FFStream {
/**
* bitstream filter to run on stream
* - encoding: Set by muxer using ff_stream_add_bitstream_filter
- * - decoding: unused
+ * - decoding: Set by demuxer using ff_stream_add_bitstream_filter
*/
struct AVBSFContext *bsfc;
@@ -757,4 +757,15 @@ int ff_match_url_ext(const char *url, const char *extensions);
struct FFOutputFormat;
void avpriv_register_devices(const struct FFOutputFormat * const o[], const AVInputFormat * const i[]);
+/**
+ * Add a bitstream filter to a stream.
+ *
+ * @param st output stream to add a filter to
+ * @param name the name of the filter to add
+ * @param args filter-specific argument string
+ * @return >0 on success;
+ * AVERROR code on failure
+ */
+int ff_stream_add_bitstream_filter(AVStream *st, const char *name, const char *args);
+
#endif /* AVFORMAT_INTERNAL_H */
diff --git a/libavformat/mux.c b/libavformat/mux.c
index de10d2c008..4bc8627617 100644
--- a/libavformat/mux.c
+++ b/libavformat/mux.c
@@ -1344,49 +1344,6 @@ int av_get_output_timestamp(struct AVFormatContext *s, int stream,
return 0;
}
-int ff_stream_add_bitstream_filter(AVStream *st, const char *name, const char *args)
-{
- int ret;
- const AVBitStreamFilter *bsf;
- FFStream *const sti = ffstream(st);
- AVBSFContext *bsfc;
-
- av_assert0(!sti->bsfc);
-
- if (!(bsf = av_bsf_get_by_name(name))) {
- av_log(NULL, AV_LOG_ERROR, "Unknown bitstream filter '%s'\n", name);
- return AVERROR_BSF_NOT_FOUND;
- }
-
- if ((ret = av_bsf_alloc(bsf, &bsfc)) < 0)
- return ret;
-
- bsfc->time_base_in = st->time_base;
- if ((ret = avcodec_parameters_copy(bsfc->par_in, st->codecpar)) < 0) {
- av_bsf_free(&bsfc);
- return ret;
- }
-
- if (args && bsfc->filter->priv_class) {
- if ((ret = av_set_options_string(bsfc->priv_data, args, "=", ":")) < 0) {
- av_bsf_free(&bsfc);
- return ret;
- }
- }
-
- if ((ret = av_bsf_init(bsfc)) < 0) {
- av_bsf_free(&bsfc);
- return ret;
- }
-
- sti->bsfc = bsfc;
-
- av_log(NULL, AV_LOG_VERBOSE,
- "Automatically inserted bitstream filter '%s'; args='%s'\n",
- name, args ? args : "");
- return 1;
-}
-
int ff_write_chained(AVFormatContext *dst, int dst_stream, AVPacket *pkt,
AVFormatContext *src, int interleave)
{
diff --git a/libavformat/mux.h b/libavformat/mux.h
index b9ec75641d..ab3e8edd60 100644
--- a/libavformat/mux.h
+++ b/libavformat/mux.h
@@ -171,17 +171,6 @@ const AVPacket *ff_interleaved_peek(AVFormatContext *s, int stream);
int ff_get_muxer_ts_offset(AVFormatContext *s, int stream_index, int64_t *offset);
-/**
- * Add a bitstream filter to a stream.
- *
- * @param st output stream to add a filter to
- * @param name the name of the filter to add
- * @param args filter-specific argument string
- * @return >0 on success;
- * AVERROR code on failure
- */
-int ff_stream_add_bitstream_filter(AVStream *st, const char *name, const char *args);
-
/**
* Write a packet to another muxer than the one the user originally
* intended. Useful when chaining muxers, where one muxer internally
diff --git a/libavformat/rawenc.c b/libavformat/rawenc.c
index f916db13a2..ec31d76d88 100644
--- a/libavformat/rawenc.c
+++ b/libavformat/rawenc.c
@@ -25,6 +25,7 @@
#include "libavutil/intreadwrite.h"
#include "avformat.h"
+#include "internal.h"
#include "rawenc.h"
#include "mux.h"
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 11/13] avformat/demux: support inserting bitstream filters in demuxing scenarios
2023-11-27 18:43 ` [FFmpeg-devel] [PATCH 11/13] avformat/demux: support inserting bitstream filters in demuxing scenarios James Almer
@ 2023-11-28 4:00 ` Gyan Doshi
2023-11-29 21:55 ` James Almer
0 siblings, 1 reply; 29+ messages in thread
From: Gyan Doshi @ 2023-11-28 4:00 UTC (permalink / raw)
To: ffmpeg-devel
On 2023-11-28 12:13 am, James Almer wrote:
> Packets will be passed to the bsf immediately after being generated by a
> demuxer, and no further data will be read from the input until all packets
> have been returned by the bsf.
Do you plan to add a lib/cli option for user-specified insertions?
Will be useful for something like dts2pts in some cases.
Regards,
Gyan
>
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
> libavformat/avformat.c | 47 ++++++++++++
> libavformat/demux.c | 162 ++++++++++++++++++++++++++++++-----------
> libavformat/internal.h | 13 +++-
> libavformat/mux.c | 43 -----------
> libavformat/mux.h | 11 ---
> libavformat/rawenc.c | 1 +
> 6 files changed, 181 insertions(+), 96 deletions(-)
>
> diff --git a/libavformat/avformat.c b/libavformat/avformat.c
> index a02ec965dd..a41c0b391c 100644
> --- a/libavformat/avformat.c
> +++ b/libavformat/avformat.c
> @@ -1033,3 +1033,50 @@ FF_ENABLE_DEPRECATION_WARNINGS
> *pb = NULL;
> return ret;
> }
> +
> +int ff_stream_add_bitstream_filter(AVStream *st, const char *name, const char *args)
> +{
> + int ret;
> + const AVBitStreamFilter *bsf;
> + FFStream *const sti = ffstream(st);
> + AVBSFContext *bsfc;
> +
> + av_assert0(!sti->bsfc);
> +
> + if (name) {
> + bsf = av_bsf_get_by_name(name);
> + if (!bsf) {
> + av_log(NULL, AV_LOG_ERROR, "Unknown bitstream filter '%s'\n", name);
> + return AVERROR_BSF_NOT_FOUND;
> + }
> + ret = av_bsf_alloc(bsf, &bsfc);
> + } else
> + ret = av_bsf_get_null_filter(&bsfc);
> + if (ret < 0)
> + return ret;
> +
> + bsfc->time_base_in = st->time_base;
> + if ((ret = avcodec_parameters_copy(bsfc->par_in, st->codecpar)) < 0) {
> + av_bsf_free(&bsfc);
> + return ret;
> + }
> +
> + if (args && bsfc->filter->priv_class) {
> + if ((ret = av_set_options_string(bsfc->priv_data, args, "=", ":")) < 0) {
> + av_bsf_free(&bsfc);
> + return ret;
> + }
> + }
> +
> + if ((ret = av_bsf_init(bsfc)) < 0) {
> + av_bsf_free(&bsfc);
> + return ret;
> + }
> +
> + sti->bsfc = bsfc;
> +
> + av_log(NULL, AV_LOG_VERBOSE,
> + "Automatically inserted bitstream filter '%s'; args='%s'\n",
> + name, args ? args : "");
> + return 1;
> +}
> diff --git a/libavformat/demux.c b/libavformat/demux.c
> index 6f640b92b1..fb9bf9e4ac 100644
> --- a/libavformat/demux.c
> +++ b/libavformat/demux.c
> @@ -540,6 +540,109 @@ static int update_wrap_reference(AVFormatContext *s, AVStream *st, int stream_in
> return 1;
> }
>
> +static void update_timestamps(AVFormatContext *s, AVStream *st, AVPacket *pkt)
> +{
> + FFStream *const sti = ffstream(st);
> +
> + if (update_wrap_reference(s, st, pkt->stream_index, pkt) && sti->pts_wrap_behavior == AV_PTS_WRAP_SUB_OFFSET) {
> + // correct first time stamps to negative values
> + if (!is_relative(sti->first_dts))
> + sti->first_dts = wrap_timestamp(st, sti->first_dts);
> + if (!is_relative(st->start_time))
> + st->start_time = wrap_timestamp(st, st->start_time);
> + if (!is_relative(sti->cur_dts))
> + sti->cur_dts = wrap_timestamp(st, sti->cur_dts);
> + }
> +
> + pkt->dts = wrap_timestamp(st, pkt->dts);
> + pkt->pts = wrap_timestamp(st, pkt->pts);
> +
> + force_codec_ids(s, st);
> +
> + /* TODO: audio: time filter; video: frame reordering (pts != dts) */
> + if (s->use_wallclock_as_timestamps)
> + pkt->dts = pkt->pts = av_rescale_q(av_gettime(), AV_TIME_BASE_Q, st->time_base);
> +}
> +
> +static int filter_packet(AVFormatContext *s, AVStream *st, AVPacket *pkt)
> +{
> + FFFormatContext *const si = ffformatcontext(s);
> + FFStream *const sti = ffstream(st);
> + const AVPacket *pkt1;
> + int err;
> +
> + if (!sti->bsfc) {
> + const PacketListEntry *pktl = si->raw_packet_buffer.head;
> + if (AVPACKET_IS_EMPTY(pkt))
> + return 0;
> +
> + update_timestamps(s, st, pkt);
> +
> + if (!pktl && sti->request_probe <= 0)
> + return 0;
> +
> + err = avpriv_packet_list_put(&si->raw_packet_buffer, pkt, NULL, 0);
> + if (err < 0) {
> + av_packet_unref(pkt);
> + return err;
> + }
> +
> + pkt1 = &si->raw_packet_buffer.tail->pkt;
> + si->raw_packet_buffer_size += pkt1->size;
> +
> + if (sti->request_probe <= 0)
> + return 0;
> +
> + return probe_codec(s, s->streams[pkt1->stream_index], pkt1);
> + }
> +
> + err = av_bsf_send_packet(sti->bsfc, pkt);
> + if (err < 0) {
> + av_log(s, AV_LOG_ERROR,
> + "Failed to send packet to filter %s for stream %d\n",
> + sti->bsfc->filter->name, st->index);
> + return err;
> + }
> +
> + do {
> + AVStream *out_st;
> + FFStream *out_sti;
> +
> + err = av_bsf_receive_packet(sti->bsfc, pkt);
> + if (err < 0) {
> + if (err == AVERROR(EAGAIN) || err == AVERROR_EOF)
> + return 0;
> + av_log(s, AV_LOG_ERROR, "Error applying bitstream filters to an output "
> + "packet for stream #%d: %s\n", st->index, av_err2str(err));
> + if (!(s->error_recognition & AV_EF_EXPLODE) && err != AVERROR(ENOMEM))
> + continue;
> + return err;
> + }
> + out_st = s->streams[pkt->stream_index];
> + out_sti = ffstream(out_st);
> +
> + update_timestamps(s, out_st, pkt);
> +
> + err = avpriv_packet_list_put(&si->raw_packet_buffer, pkt, NULL, 0);
> + if (err < 0) {
> + av_packet_unref(pkt);
> + return err;
> + }
> +
> + pkt1 = &si->raw_packet_buffer.tail->pkt;
> + si->raw_packet_buffer_size += pkt1->size;
> +
> + if (out_sti->request_probe <= 0)
> + continue;
> +
> + err = probe_codec(s, out_st, pkt1);
> + if (err < 0)
> + return err;
> + } while (1);
> +
> + return 0;
> +}
> +
> int ff_read_packet(AVFormatContext *s, AVPacket *pkt)
> {
> FFFormatContext *const si = ffformatcontext(s);
> @@ -557,9 +660,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
>
> for (;;) {
> PacketListEntry *pktl = si->raw_packet_buffer.head;
> - AVStream *st;
> - FFStream *sti;
> - const AVPacket *pkt1;
>
> if (pktl) {
> AVStream *const st = s->streams[pktl->pkt.stream_index];
> @@ -582,16 +682,27 @@ FF_ENABLE_DEPRECATION_WARNINGS
> We must re-call the demuxer to get the real packet. */
> if (err == FFERROR_REDO)
> continue;
> - if (!pktl || err == AVERROR(EAGAIN))
> + if (err == AVERROR(EAGAIN))
> return err;
> for (unsigned i = 0; i < s->nb_streams; i++) {
> AVStream *const st = s->streams[i];
> FFStream *const sti = ffstream(st);
> + int ret;
> +
> + // Drain buffered packets in the bsf context on eof
> + if (err == AVERROR_EOF)
> + if ((ret = filter_packet(s, st, pkt)) < 0)
> + return ret;
> + pktl = si->raw_packet_buffer.head;
> + if (!pktl)
> + continue;
> if (sti->probe_packets || sti->request_probe > 0)
> - if ((err = probe_codec(s, st, NULL)) < 0)
> - return err;
> + if ((ret = probe_codec(s, st, NULL)) < 0)
> + return ret;
> av_assert0(sti->request_probe <= 0);
> }
> + if (!pktl)
> + return err;
> continue;
> }
>
> @@ -616,42 +727,11 @@ FF_ENABLE_DEPRECATION_WARNINGS
> av_assert0(pkt->stream_index < (unsigned)s->nb_streams &&
> "Invalid stream index.\n");
>
> - st = s->streams[pkt->stream_index];
> - sti = ffstream(st);
> -
> - if (update_wrap_reference(s, st, pkt->stream_index, pkt) && sti->pts_wrap_behavior == AV_PTS_WRAP_SUB_OFFSET) {
> - // correct first time stamps to negative values
> - if (!is_relative(sti->first_dts))
> - sti->first_dts = wrap_timestamp(st, sti->first_dts);
> - if (!is_relative(st->start_time))
> - st->start_time = wrap_timestamp(st, st->start_time);
> - if (!is_relative(sti->cur_dts))
> - sti->cur_dts = wrap_timestamp(st, sti->cur_dts);
> - }
> -
> - pkt->dts = wrap_timestamp(st, pkt->dts);
> - pkt->pts = wrap_timestamp(st, pkt->pts);
> -
> - force_codec_ids(s, st);
> -
> - /* TODO: audio: time filter; video: frame reordering (pts != dts) */
> - if (s->use_wallclock_as_timestamps)
> - pkt->dts = pkt->pts = av_rescale_q(av_gettime(), AV_TIME_BASE_Q, st->time_base);
> -
> - if (!pktl && sti->request_probe <= 0)
> - return 0;
> -
> - err = avpriv_packet_list_put(&si->raw_packet_buffer,
> - pkt, NULL, 0);
> - if (err < 0) {
> - av_packet_unref(pkt);
> - return err;
> - }
> - pkt1 = &si->raw_packet_buffer.tail->pkt;
> - si->raw_packet_buffer_size += pkt1->size;
> -
> - if ((err = probe_codec(s, st, pkt1)) < 0)
> + err = filter_packet(s, s->streams[pkt->stream_index], pkt);
> + if (err < 0)
> return err;
> + if (!AVPACKET_IS_EMPTY(pkt))
> + return 0;
> }
> }
>
> diff --git a/libavformat/internal.h b/libavformat/internal.h
> index c6181683ef..0a5d512697 100644
> --- a/libavformat/internal.h
> +++ b/libavformat/internal.h
> @@ -212,7 +212,7 @@ typedef struct FFStream {
> /**
> * bitstream filter to run on stream
> * - encoding: Set by muxer using ff_stream_add_bitstream_filter
> - * - decoding: unused
> + * - decoding: Set by demuxer using ff_stream_add_bitstream_filter
> */
> struct AVBSFContext *bsfc;
>
> @@ -757,4 +757,15 @@ int ff_match_url_ext(const char *url, const char *extensions);
> struct FFOutputFormat;
> void avpriv_register_devices(const struct FFOutputFormat * const o[], const AVInputFormat * const i[]);
>
> +/**
> + * Add a bitstream filter to a stream.
> + *
> + * @param st output stream to add a filter to
> + * @param name the name of the filter to add
> + * @param args filter-specific argument string
> + * @return >0 on success;
> + * AVERROR code on failure
> + */
> +int ff_stream_add_bitstream_filter(AVStream *st, const char *name, const char *args);
> +
> #endif /* AVFORMAT_INTERNAL_H */
> diff --git a/libavformat/mux.c b/libavformat/mux.c
> index de10d2c008..4bc8627617 100644
> --- a/libavformat/mux.c
> +++ b/libavformat/mux.c
> @@ -1344,49 +1344,6 @@ int av_get_output_timestamp(struct AVFormatContext *s, int stream,
> return 0;
> }
>
> -int ff_stream_add_bitstream_filter(AVStream *st, const char *name, const char *args)
> -{
> - int ret;
> - const AVBitStreamFilter *bsf;
> - FFStream *const sti = ffstream(st);
> - AVBSFContext *bsfc;
> -
> - av_assert0(!sti->bsfc);
> -
> - if (!(bsf = av_bsf_get_by_name(name))) {
> - av_log(NULL, AV_LOG_ERROR, "Unknown bitstream filter '%s'\n", name);
> - return AVERROR_BSF_NOT_FOUND;
> - }
> -
> - if ((ret = av_bsf_alloc(bsf, &bsfc)) < 0)
> - return ret;
> -
> - bsfc->time_base_in = st->time_base;
> - if ((ret = avcodec_parameters_copy(bsfc->par_in, st->codecpar)) < 0) {
> - av_bsf_free(&bsfc);
> - return ret;
> - }
> -
> - if (args && bsfc->filter->priv_class) {
> - if ((ret = av_set_options_string(bsfc->priv_data, args, "=", ":")) < 0) {
> - av_bsf_free(&bsfc);
> - return ret;
> - }
> - }
> -
> - if ((ret = av_bsf_init(bsfc)) < 0) {
> - av_bsf_free(&bsfc);
> - return ret;
> - }
> -
> - sti->bsfc = bsfc;
> -
> - av_log(NULL, AV_LOG_VERBOSE,
> - "Automatically inserted bitstream filter '%s'; args='%s'\n",
> - name, args ? args : "");
> - return 1;
> -}
> -
> int ff_write_chained(AVFormatContext *dst, int dst_stream, AVPacket *pkt,
> AVFormatContext *src, int interleave)
> {
> diff --git a/libavformat/mux.h b/libavformat/mux.h
> index b9ec75641d..ab3e8edd60 100644
> --- a/libavformat/mux.h
> +++ b/libavformat/mux.h
> @@ -171,17 +171,6 @@ const AVPacket *ff_interleaved_peek(AVFormatContext *s, int stream);
>
> int ff_get_muxer_ts_offset(AVFormatContext *s, int stream_index, int64_t *offset);
>
> -/**
> - * Add a bitstream filter to a stream.
> - *
> - * @param st output stream to add a filter to
> - * @param name the name of the filter to add
> - * @param args filter-specific argument string
> - * @return >0 on success;
> - * AVERROR code on failure
> - */
> -int ff_stream_add_bitstream_filter(AVStream *st, const char *name, const char *args);
> -
> /**
> * Write a packet to another muxer than the one the user originally
> * intended. Useful when chaining muxers, where one muxer internally
> diff --git a/libavformat/rawenc.c b/libavformat/rawenc.c
> index f916db13a2..ec31d76d88 100644
> --- a/libavformat/rawenc.c
> +++ b/libavformat/rawenc.c
> @@ -25,6 +25,7 @@
> #include "libavutil/intreadwrite.h"
>
> #include "avformat.h"
> +#include "internal.h"
> #include "rawenc.h"
> #include "mux.h"
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 11/13] avformat/demux: support inserting bitstream filters in demuxing scenarios
2023-11-28 4:00 ` Gyan Doshi
@ 2023-11-29 21:55 ` James Almer
2023-11-30 4:01 ` Gyan Doshi
0 siblings, 1 reply; 29+ messages in thread
From: James Almer @ 2023-11-29 21:55 UTC (permalink / raw)
To: ffmpeg-devel
On 11/28/2023 1:00 AM, Gyan Doshi wrote:
>
>
> On 2023-11-28 12:13 am, James Almer wrote:
>> Packets will be passed to the bsf immediately after being generated by a
>> demuxer, and no further data will be read from the input until all
>> packets
>> have been returned by the bsf.
>
> Do you plan to add a lib/cli option for user-specified insertions?
No. This is internal to lavf, same as the muxing implementation.
> Will be useful for something like dts2pts in some cases.
Is the existing -bsfs option not enough?
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [FFmpeg-devel] [PATCH 11/13] avformat/demux: support inserting bitstream filters in demuxing scenarios
2023-11-29 21:55 ` James Almer
@ 2023-11-30 4:01 ` Gyan Doshi
0 siblings, 0 replies; 29+ messages in thread
From: Gyan Doshi @ 2023-11-30 4:01 UTC (permalink / raw)
To: ffmpeg-devel
On 2023-11-30 03:25 am, James Almer wrote:
> On 11/28/2023 1:00 AM, Gyan Doshi wrote:
>>
>>
>> On 2023-11-28 12:13 am, James Almer wrote:
>>> Packets will be passed to the bsf immediately after being generated
>>> by a
>>> demuxer, and no further data will be read from the input until all
>>> packets
>>> have been returned by the bsf.
>>
>> Do you plan to add a lib/cli option for user-specified insertions?
>
> No. This is internal to lavf, same as the muxing implementation.
>
>> Will be useful for something like dts2pts in some cases.
>
> Is the existing -bsfs option not enough?
It's not flagged with OPT_INPUT so can't be set for inputs.
Regards,
Gyan
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 12/13] avformat/mov: make MOVStreamContext refcounted
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
` (10 preceding siblings ...)
2023-11-27 18:43 ` [FFmpeg-devel] [PATCH 11/13] avformat/demux: support inserting bitstream filters in demuxing scenarios James Almer
@ 2023-11-27 18:43 ` James Almer
2023-11-27 18:43 ` [FFmpeg-devel] [PATCH 13/13] avformat/mov: add support for Immersive Audio Model and Formats in ISOBMFF James Almer
12 siblings, 0 replies; 29+ messages in thread
From: James Almer @ 2023-11-27 18:43 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavformat/mov.c | 257 ++++++++++++++++++++++++++--------------------
1 file changed, 145 insertions(+), 112 deletions(-)
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 34ca8095c2..d1f214a441 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -31,6 +31,7 @@
#include "libavutil/attributes.h"
#include "libavutil/bprint.h"
+#include "libavutil/buffer.h"
#include "libavutil/channel_layout.h"
#include "libavutil/dict_internal.h"
#include "libavutil/internal.h"
@@ -184,10 +185,20 @@ static int mov_read_mac_string(MOVContext *c, AVIOContext *pb, int len,
return p - dst;
}
+static void mov_free_stream_context(void *opaque, uint8_t *data);
+
+static inline MOVStreamContext *mov_get_stream_context(const AVStream *st)
+{
+ AVBufferRef *buf = st->priv_data;
+
+ return (MOVStreamContext *)buf->data;
+}
+
static int mov_read_covr(MOVContext *c, AVIOContext *pb, int type, int len)
{
AVStream *st;
- MOVStreamContext *sc;
+ AVBufferRef *buf;
+ uint8_t *data;
enum AVCodecID id;
int ret;
@@ -201,16 +212,22 @@ static int mov_read_covr(MOVContext *c, AVIOContext *pb, int type, int len)
return 0;
}
- sc = av_mallocz(sizeof(*sc));
- if (!sc)
+ data = av_mallocz(sizeof(MOVStreamContext));
+ if (!data)
+ return AVERROR(ENOMEM);
+ buf = av_buffer_create(data, sizeof(MOVStreamContext), mov_free_stream_context, c->fc, 0);
+ if (!buf) {
+ av_free(data);
return AVERROR(ENOMEM);
+ }
+
ret = ff_add_attached_pic(c->fc, NULL, pb, NULL, len);
if (ret < 0) {
- av_free(sc);
+ av_buffer_unref(&buf);
return ret;
}
st = c->fc->streams[c->fc->nb_streams - 1];
- st->priv_data = sc;
+ st->priv_data = buf;
if (st->attached_pic.size >= 8 && id != AV_CODEC_ID_BMP) {
if (AV_RB64(st->attached_pic.data) == 0x89504e470d0a1a0a) {
@@ -590,7 +607,7 @@ static int mov_read_dref(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
avio_rb32(pb); // version + flags
entries = avio_rb32(pb);
@@ -1369,7 +1386,7 @@ static int64_t get_frag_time(AVFormatContext *s, AVStream *dst_st,
MOVFragmentIndex *frag_index, int index)
{
MOVFragmentStreamInfo * frag_stream_info;
- MOVStreamContext *sc = dst_st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(dst_st);
int64_t timestamp;
int i, j;
@@ -1567,7 +1584,7 @@ static int mov_read_mdhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
if (sc->time_scale) {
av_log(c->fc, AV_LOG_ERROR, "Multiple mdhd?\n");
@@ -1710,7 +1727,7 @@ static int mov_read_pcmc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
return AVERROR_INVALIDDATA;
st = fc->streams[fc->nb_streams - 1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
if (sc->format == MOV_MP4_FPCM_TAG) {
switch (pcm_sample_size) {
@@ -2213,7 +2230,7 @@ static int mov_read_stco(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
avio_r8(pb); /* version */
avio_rb24(pb); /* flags */
@@ -2546,7 +2563,7 @@ static int mov_parse_stsd_data(MOVContext *c, AVIOContext *pb,
if (ret < 0)
return ret;
if (size > 16) {
- MOVStreamContext *tmcd_ctx = st->priv_data;
+ MOVStreamContext *tmcd_ctx = mov_get_stream_context(st);
int val;
val = AV_RB32(st->codecpar->extradata + 4);
tmcd_ctx->tmcd_flags = val;
@@ -2712,7 +2729,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
av_assert0 (c->fc->nb_streams >= 1);
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
for (pseudo_stream_id = 0;
pseudo_stream_id < entries && !pb->eof_reached;
@@ -2810,7 +2827,7 @@ static int mov_read_stsd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams - 1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
sc->stsd_version = avio_r8(pb);
avio_rb24(pb); /* flags */
@@ -2875,7 +2892,7 @@ static int mov_read_stsc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
avio_r8(pb); /* version */
avio_rb24(pb); /* flags */
@@ -2971,7 +2988,7 @@ static int mov_read_stps(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
avio_rb32(pb); // version + flags
@@ -3009,7 +3026,7 @@ static int mov_read_stss(MOVContext *c, AVIOContext *pb, MOVAtom atom)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
sti = ffstream(st);
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
avio_r8(pb); /* version */
avio_rb24(pb); /* flags */
@@ -3060,7 +3077,7 @@ static int mov_read_stsz(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
avio_r8(pb); /* version */
avio_rb24(pb); /* flags */
@@ -3149,7 +3166,7 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
avio_r8(pb); /* version */
avio_rb24(pb); /* flags */
@@ -3260,7 +3277,7 @@ static int mov_read_sdtp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams - 1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
avio_r8(pb); /* version */
avio_rb24(pb); /* flags */
@@ -3305,7 +3322,7 @@ static int mov_read_ctts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
avio_r8(pb); /* version */
avio_rb24(pb); /* flags */
@@ -3375,7 +3392,7 @@ static int mov_read_sgpd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams - 1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
version = avio_r8(pb); /* version */
avio_rb24(pb); /* flags */
@@ -3431,7 +3448,7 @@ static int mov_read_sbgp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
version = avio_r8(pb); /* version */
avio_rb24(pb); /* flags */
@@ -3532,7 +3549,7 @@ static int find_prev_closest_index(AVStream *st,
int64_t* ctts_index,
int64_t* ctts_sample)
{
- MOVStreamContext *msc = st->priv_data;
+ MOVStreamContext *msc = mov_get_stream_context(st);
FFStream *const sti = ffstream(st);
AVIndexEntry *e_keep = sti->index_entries;
int nb_keep = sti->nb_index_entries;
@@ -3705,7 +3722,7 @@ static int64_t add_ctts_entry(MOVCtts** ctts_data, unsigned int* ctts_count, uns
#define MAX_REORDER_DELAY 16
static void mov_estimate_video_delay(MOVContext *c, AVStream* st)
{
- MOVStreamContext *msc = st->priv_data;
+ MOVStreamContext *msc = mov_get_stream_context(st);
FFStream *const sti = ffstream(st);
int ctts_ind = 0;
int ctts_sample = 0;
@@ -3813,7 +3830,7 @@ static void mov_current_sample_set(MOVStreamContext *sc, int current_sample)
*/
static void mov_fix_index(MOVContext *mov, AVStream *st)
{
- MOVStreamContext *msc = st->priv_data;
+ MOVStreamContext *msc = mov_get_stream_context(st);
FFStream *const sti = ffstream(st);
AVIndexEntry *e_old = sti->index_entries;
int nb_old = sti->nb_index_entries;
@@ -4127,7 +4144,7 @@ static int build_open_gop_key_points(AVStream *st)
int k;
int sample_id = 0;
uint32_t cra_index;
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
if (st->codecpar->codec_id != AV_CODEC_ID_HEVC || !sc->sync_group_count)
return 0;
@@ -4187,7 +4204,7 @@ static int build_open_gop_key_points(AVStream *st)
static void mov_build_index(MOVContext *mov, AVStream *st)
{
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
FFStream *const sti = ffstream(st);
int64_t current_offset;
int64_t current_dts = 0;
@@ -4627,7 +4644,9 @@ static void fix_timescale(MOVContext *c, MOVStreamContext *sc)
static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
{
AVStream *st;
+ AVBufferRef *buf;
MOVStreamContext *sc;
+ uint8_t *data;
int ret;
if (c->is_still_picture_avif) {
@@ -4637,10 +4656,16 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
st = avformat_new_stream(c->fc, NULL);
if (!st) return AVERROR(ENOMEM);
st->id = -1;
- sc = av_mallocz(sizeof(MOVStreamContext));
- if (!sc) return AVERROR(ENOMEM);
+ data = av_mallocz(sizeof(MOVStreamContext));
+ if (!data) return AVERROR(ENOMEM);
+ buf = av_buffer_create(data, sizeof(MOVStreamContext), mov_free_stream_context, c->fc, 0);
+ if (!buf) {
+ av_free(data);
+ return AVERROR(ENOMEM);
+ }
- st->priv_data = sc;
+ st->priv_data = buf;
+ sc = mov_get_stream_context(st);
st->codecpar->codec_type = AVMEDIA_TYPE_DATA;
sc->ffindex = st->index;
c->trak_index = st->index;
@@ -4836,7 +4861,7 @@ static int mov_read_custom(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
for (i = 0; i < 3; i++) {
uint8_t **p;
@@ -4936,7 +4961,7 @@ static int avif_add_stream(MOVContext *c, int item_id)
st->time_base.num = st->time_base.den = 1;
st->nb_frames = 1;
sc->time_scale = 1;
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
sc->pb = c->fc->pb;
sc->pb_is_copied = 1;
@@ -5025,7 +5050,7 @@ static int mov_read_tkhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
// Each stream (trak) should have exactly 1 tkhd. This catches bad files and
// avoids corrupting AVStreams mapped to an earlier tkhd.
@@ -5227,7 +5252,7 @@ static int mov_read_tfdt(MOVContext *c, AVIOContext *pb, MOVAtom atom)
av_log(c->fc, AV_LOG_WARNING, "could not find corresponding track id %u\n", frag->track_id);
return 0;
}
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
if (sc->pseudo_stream_id + 1 != frag->stsd_id && sc->pseudo_stream_id != -1)
return 0;
version = avio_r8(pb);
@@ -5281,7 +5306,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
av_log(c->fc, AV_LOG_WARNING, "could not find corresponding track id %u\n", frag->track_id);
return 0;
}
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
if (sc->pseudo_stream_id+1 != frag->stsd_id && sc->pseudo_stream_id != -1)
return 0;
@@ -5584,7 +5609,7 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom)
return 0;
}
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
timescale = av_make_q(1, avio_rb32(pb));
@@ -5667,14 +5692,14 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom)
si = &item->stream_info[j];
if (si->sidx_pts != AV_NOPTS_VALUE) {
ref_st = c->fc->streams[j];
- ref_sc = ref_st->priv_data;
+ ref_sc = mov_get_stream_context(ref_st);
break;
}
}
}
if (ref_st) for (i = 0; i < c->fc->nb_streams; i++) {
st = c->fc->streams[i];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
if (!sc->has_sidx) {
st->duration = sc->track_end = av_rescale(ref_st->duration, sc->time_scale, ref_sc->time_scale);
}
@@ -5770,7 +5795,7 @@ static int mov_read_elst(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1 || c->ignore_editlist)
return 0;
- sc = c->fc->streams[c->fc->nb_streams-1]->priv_data;
+ sc = mov_get_stream_context(c->fc->streams[c->fc->nb_streams-1]);
version = avio_r8(pb); /* version */
avio_rb24(pb); /* flags */
@@ -5837,7 +5862,7 @@ static int mov_read_tmcd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return AVERROR_INVALIDDATA;
- sc = c->fc->streams[c->fc->nb_streams - 1]->priv_data;
+ sc = mov_get_stream_context(c->fc->streams[c->fc->nb_streams - 1]);
sc->timecode_track = avio_rb32(pb);
return 0;
}
@@ -5894,7 +5919,7 @@ static int mov_read_smdm(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return AVERROR_INVALIDDATA;
- sc = c->fc->streams[c->fc->nb_streams - 1]->priv_data;
+ sc = mov_get_stream_context(c->fc->streams[c->fc->nb_streams - 1]);
if (atom.size < 5) {
av_log(c->fc, AV_LOG_ERROR, "Empty Mastering Display Metadata box\n");
@@ -5942,7 +5967,7 @@ static int mov_read_mdcv(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return AVERROR_INVALIDDATA;
- sc = c->fc->streams[c->fc->nb_streams - 1]->priv_data;
+ sc = mov_get_stream_context(c->fc->streams[c->fc->nb_streams - 1]);
if (atom.size < 24 || sc->mastering) {
av_log(c->fc, AV_LOG_ERROR, "Invalid Mastering Display Color Volume box\n");
@@ -5978,7 +6003,7 @@ static int mov_read_coll(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return AVERROR_INVALIDDATA;
- sc = c->fc->streams[c->fc->nb_streams - 1]->priv_data;
+ sc = mov_get_stream_context(c->fc->streams[c->fc->nb_streams - 1]);
if (atom.size < 5) {
av_log(c->fc, AV_LOG_ERROR, "Empty Content Light Level box\n");
@@ -6014,7 +6039,7 @@ static int mov_read_clli(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return AVERROR_INVALIDDATA;
- sc = c->fc->streams[c->fc->nb_streams - 1]->priv_data;
+ sc = mov_get_stream_context(c->fc->streams[c->fc->nb_streams - 1]);
if (atom.size < 4) {
av_log(c->fc, AV_LOG_ERROR, "Empty Content Light Level Info box\n");
@@ -6047,7 +6072,7 @@ static int mov_read_st3d(MOVContext *c, AVIOContext *pb, MOVAtom atom)
return 0;
st = c->fc->streams[c->fc->nb_streams - 1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
if (atom.size < 5) {
av_log(c->fc, AV_LOG_ERROR, "Empty stereoscopic video box\n");
@@ -6097,7 +6122,7 @@ static int mov_read_sv3d(MOVContext *c, AVIOContext *pb, MOVAtom atom)
return 0;
st = c->fc->streams[c->fc->nb_streams - 1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
if (atom.size < 8) {
av_log(c->fc, AV_LOG_ERROR, "Empty spherical video box\n");
@@ -6308,7 +6333,7 @@ static int mov_read_uuid(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams - 1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
ret = ffio_read_size(pb, uuid, AV_UUID_LEN);
if (ret < 0)
@@ -6420,7 +6445,7 @@ static int mov_read_frma(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams - 1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
switch (sc->format)
{
@@ -6473,7 +6498,7 @@ static int get_current_encryption_info(MOVContext *c, MOVEncryptionIndex **encry
}
if (i == c->fc->nb_streams)
return 0;
- *sc = st->priv_data;
+ *sc = mov_get_stream_context(st);
if (!frag_stream_info->encryption_index) {
// If this stream isn't encrypted, don't create the index.
@@ -6491,7 +6516,7 @@ static int get_current_encryption_info(MOVContext *c, MOVEncryptionIndex **encry
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams - 1];
- *sc = st->priv_data;
+ *sc = mov_get_stream_context(st);
if (!(*sc)->cenc.encryption_index) {
// If this stream isn't encrypted, don't create the index.
@@ -6984,7 +7009,7 @@ static int mov_read_schm(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
if (sc->pseudo_stream_id != 0) {
av_log(c->fc, AV_LOG_ERROR, "schm boxes are only supported in first sample descriptor\n");
@@ -7016,7 +7041,7 @@ static int mov_read_tenc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
if (sc->pseudo_stream_id != 0) {
av_log(c->fc, AV_LOG_ERROR, "tenc atom are only supported in first sample descriptor\n");
@@ -8198,7 +8223,7 @@ static void mov_read_chapters(AVFormatContext *s)
}
sti = ffstream(st);
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
cur_pos = avio_tell(sc->pb);
if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
@@ -8288,7 +8313,7 @@ static int parse_timecode_in_framenum_format(AVFormatContext *s, AVStream *st,
static int mov_read_rtmd_track(AVFormatContext *s, AVStream *st)
{
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
FFStream *const sti = ffstream(st);
char buf[AV_TIMECODE_STR_SIZE];
int64_t cur_pos = avio_tell(sc->pb);
@@ -8314,7 +8339,7 @@ static int mov_read_rtmd_track(AVFormatContext *s, AVStream *st)
static int mov_read_timecode_track(AVFormatContext *s, AVStream *st)
{
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
FFStream *const sti = ffstream(st);
int flags = 0;
int64_t cur_pos = avio_tell(sc->pb);
@@ -8371,6 +8396,56 @@ static void mov_free_encryption_index(MOVEncryptionIndex **index) {
av_freep(index);
}
+static void mov_free_stream_context(void *opaque, uint8_t *data)
+{
+ AVFormatContext *s = opaque;
+ MOVStreamContext *sc = (MOVStreamContext *)data;
+
+ av_freep(&sc->ctts_data);
+ for (int i = 0; i < sc->drefs_count; i++) {
+ av_freep(&sc->drefs[i].path);
+ av_freep(&sc->drefs[i].dir);
+ }
+ av_freep(&sc->drefs);
+
+ sc->drefs_count = 0;
+
+ if (!sc->pb_is_copied)
+ ff_format_io_close(s, &sc->pb);
+
+ sc->pb = NULL;
+ av_freep(&sc->chunk_offsets);
+ av_freep(&sc->stsc_data);
+ av_freep(&sc->sample_sizes);
+ av_freep(&sc->keyframes);
+ av_freep(&sc->stts_data);
+ av_freep(&sc->sdtp_data);
+ av_freep(&sc->stps_data);
+ av_freep(&sc->elst_data);
+ av_freep(&sc->rap_group);
+ av_freep(&sc->sync_group);
+ av_freep(&sc->sgpd_sync);
+ av_freep(&sc->sample_offsets);
+ av_freep(&sc->open_key_samples);
+ av_freep(&sc->display_matrix);
+ av_freep(&sc->index_ranges);
+
+ if (sc->extradata)
+ for (int i = 0; i < sc->stsd_count; i++)
+ av_free(sc->extradata[i]);
+ av_freep(&sc->extradata);
+ av_freep(&sc->extradata_size);
+
+ mov_free_encryption_index(&sc->cenc.encryption_index);
+ av_encryption_info_free(sc->cenc.default_encrypted_sample);
+ av_aes_ctr_free(sc->cenc.aes_ctr);
+
+ av_freep(&sc->stereo3d);
+ av_freep(&sc->spherical);
+ av_freep(&sc->mastering);
+ av_freep(&sc->coll);
+}
+
static int mov_read_close(AVFormatContext *s)
{
MOVContext *mov = s->priv_data;
@@ -8378,54 +8453,12 @@ static int mov_read_close(AVFormatContext *s)
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
if (!sc)
continue;
- av_freep(&sc->ctts_data);
- for (j = 0; j < sc->drefs_count; j++) {
- av_freep(&sc->drefs[j].path);
- av_freep(&sc->drefs[j].dir);
- }
- av_freep(&sc->drefs);
-
- sc->drefs_count = 0;
-
- if (!sc->pb_is_copied)
- ff_format_io_close(s, &sc->pb);
-
- sc->pb = NULL;
- av_freep(&sc->chunk_offsets);
- av_freep(&sc->stsc_data);
- av_freep(&sc->sample_sizes);
- av_freep(&sc->keyframes);
- av_freep(&sc->stts_data);
- av_freep(&sc->sdtp_data);
- av_freep(&sc->stps_data);
- av_freep(&sc->elst_data);
- av_freep(&sc->rap_group);
- av_freep(&sc->sync_group);
- av_freep(&sc->sgpd_sync);
- av_freep(&sc->sample_offsets);
- av_freep(&sc->open_key_samples);
- av_freep(&sc->display_matrix);
- av_freep(&sc->index_ranges);
-
- if (sc->extradata)
- for (j = 0; j < sc->stsd_count; j++)
- av_free(sc->extradata[j]);
- av_freep(&sc->extradata);
- av_freep(&sc->extradata_size);
-
- mov_free_encryption_index(&sc->cenc.encryption_index);
- av_encryption_info_free(sc->cenc.default_encrypted_sample);
- av_aes_ctr_free(sc->cenc.aes_ctr);
-
- av_freep(&sc->stereo3d);
- av_freep(&sc->spherical);
- av_freep(&sc->mastering);
- av_freep(&sc->coll);
+ av_buffer_unref((AVBufferRef **)&st->priv_data);
}
av_freep(&mov->dv_demux);
@@ -8464,7 +8497,7 @@ static int tmcd_is_referenced(AVFormatContext *s, int tmcd_id)
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
sc->timecode_track == tmcd_id)
@@ -8644,7 +8677,7 @@ static int mov_read_header(AVFormatContext *s)
/* copy timecode metadata from tmcd tracks to the related video streams */
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
if (sc->timecode_track > 0) {
AVDictionaryEntry *tcr;
int tmcd_st_id = -1;
@@ -8665,7 +8698,7 @@ static int mov_read_header(AVFormatContext *s)
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
FFStream *const sti = ffstream(st);
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
fix_timescale(mov, sc);
if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
st->codecpar->codec_id == AV_CODEC_ID_AAC) {
@@ -8695,7 +8728,7 @@ static int mov_read_header(AVFormatContext *s)
if (mov->trex_data) {
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
if (st->duration > 0) {
/* Akin to sc->data_size * 8 * sc->time_scale / st->duration but accounting for overflows. */
st->codecpar->bit_rate = av_rescale(sc->data_size, ((int64_t) sc->time_scale) * 8, st->duration);
@@ -8713,7 +8746,7 @@ static int mov_read_header(AVFormatContext *s)
if (mov->use_mfra_for > 0) {
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
if (sc->duration_for_fps > 0) {
/* Akin to sc->data_size * 8 * sc->time_scale / sc->duration_for_fps but accounting for overflows. */
st->codecpar->bit_rate = av_rescale(sc->data_size, ((int64_t) sc->time_scale) * 8, sc->duration_for_fps);
@@ -8738,7 +8771,7 @@ static int mov_read_header(AVFormatContext *s)
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
switch (st->codecpar->codec_type) {
case AVMEDIA_TYPE_AUDIO:
@@ -8809,7 +8842,7 @@ static AVIndexEntry *mov_find_next_sample(AVFormatContext *s, AVStream **st)
for (i = 0; i < s->nb_streams; i++) {
AVStream *avst = s->streams[i];
FFStream *const avsti = ffstream(avst);
- MOVStreamContext *msc = avst->priv_data;
+ MOVStreamContext *msc = mov_get_stream_context(avst);
if (msc->pb && msc->current_sample < avsti->nb_index_entries) {
AVIndexEntry *current_sample = &avsti->index_entries[msc->current_sample];
int64_t dts = av_rescale(current_sample->timestamp, AV_TIME_BASE, msc->time_scale);
@@ -8934,7 +8967,7 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
return ret;
goto retry;
}
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
/* must be done just before reading, to avoid infinite loop on sample */
current_index = sc->current_index;
mov_current_sample_inc(sc);
@@ -9100,7 +9133,7 @@ static int is_open_key_sample(const MOVStreamContext *sc, int sample)
*/
static int can_seek_to_key_sample(AVStream *st, int sample, int64_t requested_pts)
{
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
FFStream *const sti = ffstream(st);
int64_t key_sample_dts, key_sample_pts;
@@ -9126,7 +9159,7 @@ static int can_seek_to_key_sample(AVStream *st, int sample, int64_t requested_pt
static int mov_seek_stream(AVFormatContext *s, AVStream *st, int64_t timestamp, int flags)
{
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
FFStream *const sti = ffstream(st);
int sample, time_sample, ret;
unsigned int i;
@@ -9188,7 +9221,7 @@ static int mov_seek_stream(AVFormatContext *s, AVStream *st, int64_t timestamp,
static int64_t mov_get_skip_samples(AVStream *st, int sample)
{
- MOVStreamContext *sc = st->priv_data;
+ MOVStreamContext *sc = mov_get_stream_context(st);
FFStream *const sti = ffstream(st);
int64_t first_ts = sti->index_entries[0].timestamp;
int64_t ts = sti->index_entries[sample].timestamp;
@@ -9242,7 +9275,7 @@ static int mov_read_seek(AVFormatContext *s, int stream_index, int64_t sample_ti
for (i = 0; i < s->nb_streams; i++) {
MOVStreamContext *sc;
st = s->streams[i];
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
mov_current_sample_set(sc, 0);
}
while (1) {
@@ -9250,7 +9283,7 @@ static int mov_read_seek(AVFormatContext *s, int stream_index, int64_t sample_ti
AVIndexEntry *entry = mov_find_next_sample(s, &st);
if (!entry)
return AVERROR_INVALIDDATA;
- sc = st->priv_data;
+ sc = mov_get_stream_context(st);
if (sc->ffindex == stream_index && sc->current_sample == sample)
break;
mov_current_sample_inc(sc);
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread
* [FFmpeg-devel] [PATCH 13/13] avformat/mov: add support for Immersive Audio Model and Formats in ISOBMFF
2023-11-26 1:28 [FFmpeg-devel] [PATCH v5 0/9] avformat: introduce AVStreamGroup James Almer
` (11 preceding siblings ...)
2023-11-27 18:43 ` [FFmpeg-devel] [PATCH 12/13] avformat/mov: make MOVStreamContext refcounted James Almer
@ 2023-11-27 18:43 ` James Almer
12 siblings, 0 replies; 29+ messages in thread
From: James Almer @ 2023-11-27 18:43 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavformat/Makefile | 3 +-
libavformat/isom.h | 6 +
libavformat/mov.c | 290 +++++++++++++++++++++++++++++++++++++++----
3 files changed, 272 insertions(+), 27 deletions(-)
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 521bf5fef6..0272311828 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -364,7 +364,8 @@ OBJS-$(CONFIG_MMF_MUXER) += mmf.o rawenc.o
OBJS-$(CONFIG_MODS_DEMUXER) += mods.o
OBJS-$(CONFIG_MOFLEX_DEMUXER) += moflex.o
OBJS-$(CONFIG_MOV_DEMUXER) += mov.o mov_chan.o mov_esds.o \
- qtpalette.o replaygain.o dovi_isom.o
+ qtpalette.o replaygain.o dovi_isom.o \
+ iamf.o
OBJS-$(CONFIG_MOV_MUXER) += movenc.o av1.o avc.o hevc.o vpcc.o \
movenchint.o mov_chan.o rtp.o \
movenccenc.o movenc_ttml.o rawutils.o \
diff --git a/libavformat/isom.h b/libavformat/isom.h
index 3d375d7a46..32d42490b5 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -33,6 +33,7 @@
#include "libavutil/stereo3d.h"
#include "avio.h"
+#include "iamf.h"
#include "internal.h"
#include "dv.h"
@@ -166,6 +167,7 @@ typedef struct MOVIndexRange {
typedef struct MOVStreamContext {
AVIOContext *pb;
int pb_is_copied;
+ int id; ///< AVStream id
int ffindex; ///< AVStream index
int next_chunk;
unsigned int chunk_count;
@@ -260,6 +262,10 @@ typedef struct MOVStreamContext {
AVEncryptionInfo *default_encrypted_sample;
MOVEncryptionIndex *encryption_index;
} cenc;
+
+ IAMFContext *iamf;
+ uint8_t *iamf_descriptors;
+ int iamf_descriptors_size;
} MOVStreamContext;
typedef struct MOVContext {
diff --git a/libavformat/mov.c b/libavformat/mov.c
index d1f214a441..11c68a2f6e 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -59,6 +59,7 @@
#include "internal.h"
#include "avio_internal.h"
#include "demux.h"
+#include "iamf.h"
#include "dovi_isom.h"
#include "riff.h"
#include "isom.h"
@@ -851,6 +852,163 @@ static int mov_read_dac3(MOVContext *c, AVIOContext *pb, MOVAtom atom)
return 0;
}
+static int mov_read_iamf(MOVContext *c, AVIOContext *pb, int64_t size)
+{
+ AVStream *st;
+ MOVStreamContext *sc;
+ FFIOContext b;
+ AVIOContext *descriptor_pb;
+ AVDictionary *metadata;
+ IAMFContext *iamf;
+ char args[32];
+ int64_t start_time, duration;
+ int nb_frames, disposition;
+ int ret;
+
+ if ((int)size != size)
+ return AVERROR(ENOMEM);
+
+ if (c->fc->nb_streams < 1)
+ return 0;
+
+ st = c->fc->streams[c->fc->nb_streams - 1];
+ sc = mov_get_stream_context(st);
+
+ metadata = st->metadata;
+ st->metadata = NULL;
+ start_time = st->start_time;
+ nb_frames = st->nb_frames;
+ duration = st->duration;
+ disposition = st->disposition;
+
+ iamf = sc->iamf = av_mallocz(sizeof(*iamf));
+ if (!iamf) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ sc->iamf_descriptors = av_malloc(size);
+ if (!sc->iamf_descriptors) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ sc->iamf_descriptors_size = size;
+ ret = avio_read(pb, sc->iamf_descriptors, size);
+ if (ret != size) {
+ ret = AVERROR_INVALIDDATA;
+ goto fail;
+ }
+
+ ffio_init_context(&b, sc->iamf_descriptors, size, 0, NULL, NULL, NULL, NULL);
+ descriptor_pb = &b.pub;
+
+ ret = ff_iamfdec_read_descriptors(iamf, descriptor_pb, size, c->fc);
+ if (ret < 0)
+ goto fail;
+
+ for (int i = 0; i < iamf->nb_audio_elements; i++) {
+ IAMFAudioElement *audio_element = &iamf->audio_elements[i];
+ AVStreamGroup *stg = avformat_stream_group_create(c->fc, AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT, NULL);
+
+ if (!stg) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ stg->id = audio_element->audio_element_id;
+ stg->params.iamf_audio_element = audio_element->element;
+ audio_element->element = NULL;
+
+ for (int j = 0; j < audio_element->nb_substreams; j++) {
+ IAMFSubStream *substream = &audio_element->substreams[j];
+ AVStream *stream;
+
+ if (!i && !j)
+ stream = st;
+ else
+ stream = avformat_new_stream(c->fc, NULL);
+ if (!stream) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ stream->start_time = start_time;
+ stream->nb_frames = nb_frames;
+ stream->duration = duration;
+ stream->disposition = disposition;
+ if (stream != st && !(stream->priv_data = av_buffer_ref(st->priv_data))) {
+ ret = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ ret = avcodec_parameters_copy(stream->codecpar, substream->codecpar);
+ if (ret < 0)
+ goto fail;
+
+ stream->id = substream->audio_substream_id;
+
+ avpriv_set_pts_info(st, 64, 1, sc->time_scale);
+
+ ret = avformat_stream_group_add_stream(stg, stream);
+ if (ret < 0)
+ goto fail;
+ }
+
+ ret = av_dict_copy(&stg->metadata, metadata, 0);
+ if (ret < 0)
+ goto fail;
+ }
+
+ for (int i = 0; i < iamf->nb_mix_presentations; i++) {
+ IAMFMixPresentation *mix_presentation = &iamf->mix_presentations[i];
+ const AVIAMFMixPresentation *mix = mix_presentation->mix;
+ AVStreamGroup *stg = avformat_stream_group_create(c->fc, AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION, NULL);
+
+ if (!stg)
+ goto fail;
+
+ stg->id = mix_presentation->mix_presentation_id;
+ stg->params.iamf_mix_presentation = mix_presentation->mix;
+ mix_presentation->mix = NULL;
+
+ for (int j = 0; j < mix->num_submixes; j++) {
+ const AVIAMFSubmix *submix = mix->submixes[j];
+
+ for (int k = 0; k < submix->num_elements; k++) {
+ const AVIAMFSubmixElement *submix_element = submix->elements[k];
+ const AVStreamGroup *audio_element = NULL;
+
+ for (int l = 0; l < c->fc->nb_stream_groups; l++)
+ if (c->fc->stream_groups[l]->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT &&
+ c->fc->stream_groups[l]->id == submix_element->audio_element_id) {
+ audio_element = c->fc->stream_groups[l];
+ break;
+ }
+ av_assert0(audio_element);
+
+ for (int l = 0; l < audio_element->nb_streams; l++) {
+ ret = avformat_stream_group_add_stream(stg, audio_element->streams[l]);
+ if (ret < 0 && ret != AVERROR(EEXIST))
+ goto fail;
+ }
+ }
+ }
+
+ ret = av_dict_copy(&stg->metadata, metadata, 0);
+ if (ret < 0)
+ goto fail;
+ }
+
+ snprintf(args, sizeof(args), "first_index=%d", st->index);
+
+ ret = ff_stream_add_bitstream_filter(st, "iamf_stream_split", args);
+fail:
+ av_dict_free(&metadata);
+
+ return ret;
+}
+
static int mov_read_dec3(MOVContext *c, AVIOContext *pb, MOVAtom atom)
{
AVStream *st;
@@ -1393,7 +1551,7 @@ static int64_t get_frag_time(AVFormatContext *s, AVStream *dst_st,
// If the stream is referenced by any sidx, limit the search
// to fragments that referenced this stream in the sidx
if (sc->has_sidx) {
- frag_stream_info = get_frag_stream_info(frag_index, index, dst_st->id);
+ frag_stream_info = get_frag_stream_info(frag_index, index, sc->id);
if (frag_stream_info->sidx_pts != AV_NOPTS_VALUE)
return frag_stream_info->sidx_pts;
if (frag_stream_info->first_tfra_pts != AV_NOPTS_VALUE)
@@ -1404,9 +1562,11 @@ static int64_t get_frag_time(AVFormatContext *s, AVStream *dst_st,
for (i = 0; i < frag_index->item[index].nb_stream_info; i++) {
AVStream *frag_stream = NULL;
frag_stream_info = &frag_index->item[index].stream_info[i];
- for (j = 0; j < s->nb_streams; j++)
- if (s->streams[j]->id == frag_stream_info->id)
+ for (j = 0; j < s->nb_streams; j++) {
+ MOVStreamContext *sc2 = mov_get_stream_context(s->streams[j]);
+ if (sc2->id == frag_stream_info->id)
frag_stream = s->streams[j];
+ }
if (!frag_stream) {
av_log(s, AV_LOG_WARNING, "No stream matching sidx ID found.\n");
continue;
@@ -1472,12 +1632,13 @@ static int update_frag_index(MOVContext *c, int64_t offset)
for (i = 0; i < c->fc->nb_streams; i++) {
// Avoid building frag index if streams lack track id.
- if (c->fc->streams[i]->id < 0) {
+ MOVStreamContext *sc = mov_get_stream_context(c->fc->streams[i]);
+ if (sc->id < 0) {
av_free(frag_stream_info);
return AVERROR_INVALIDDATA;
}
- frag_stream_info[i].id = c->fc->streams[i]->id;
+ frag_stream_info[i].id = sc->id;
frag_stream_info[i].sidx_pts = AV_NOPTS_VALUE;
frag_stream_info[i].tfdt_dts = AV_NOPTS_VALUE;
frag_stream_info[i].next_trun_dts = AV_NOPTS_VALUE;
@@ -2368,14 +2529,17 @@ static void mov_parse_stsd_video(MOVContext *c, AVIOContext *pb,
}
}
-static void mov_parse_stsd_audio(MOVContext *c, AVIOContext *pb,
- AVStream *st, MOVStreamContext *sc)
+static int mov_parse_stsd_audio(MOVContext *c, AVIOContext *pb,
+ AVStream *st, MOVStreamContext *sc,
+ int64_t size)
{
int bits_per_sample, flags;
+ int64_t start_pos = avio_tell(pb);
uint16_t version = avio_rb16(pb);
uint32_t id = 0;
AVDictionaryEntry *compatible_brands = av_dict_get(c->fc->metadata, "compatible_brands", NULL, AV_DICT_MATCH_CASE);
int channel_count;
+ int ret;
avio_rb16(pb); /* revision level */
id = avio_rl32(pb); /* vendor */
@@ -2436,7 +2600,9 @@ static void mov_parse_stsd_audio(MOVContext *c, AVIOContext *pb,
st->codecpar->codec_id = mov_codec_id(st, MKTAG('r','a','w',' '));
else if (st->codecpar->bits_per_coded_sample == 16)
st->codecpar->codec_id = mov_codec_id(st, MKTAG('t','w','o','s'));
- }
+ } else if (sc->format == MKTAG('i','a','m','f'))
+ if ((ret = mov_read_iamf(c, pb, size - (avio_tell(pb) - start_pos))) < 0)
+ return ret;
switch (st->codecpar->codec_id) {
case AV_CODEC_ID_PCM_S8:
@@ -2483,6 +2649,8 @@ static void mov_parse_stsd_audio(MOVContext *c, AVIOContext *pb,
st->codecpar->bits_per_coded_sample = bits_per_sample;
sc->sample_size = (bits_per_sample >> 3) * st->codecpar->ch_layout.nb_channels;
}
+
+ return 0;
}
static void mov_parse_stsd_subtitle(MOVContext *c, AVIOContext *pb,
@@ -2772,7 +2940,10 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
if (st->codecpar->codec_type==AVMEDIA_TYPE_VIDEO) {
mov_parse_stsd_video(c, pb, st, sc);
} else if (st->codecpar->codec_type==AVMEDIA_TYPE_AUDIO) {
- mov_parse_stsd_audio(c, pb, st, sc);
+ int ret = mov_parse_stsd_audio(c, pb, st, sc,
+ size - (avio_tell(pb) - start_pos));
+ if (ret < 0)
+ return ret;
if (st->codecpar->sample_rate < 0) {
av_log(c->fc, AV_LOG_ERROR, "Invalid sample rate %d\n", st->codecpar->sample_rate);
return AVERROR_INVALIDDATA;
@@ -3261,7 +3432,7 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
"All samples in data stream index:id [%d:%d] have zero "
"duration, stream set to be discarded by default. Override "
"using AVStream->discard or -discard for ffmpeg command.\n",
- st->index, st->id);
+ st->index, sc->id);
st->discard = AVDISCARD_ALL;
}
sc->track_end = duration;
@@ -4641,6 +4812,50 @@ static void fix_timescale(MOVContext *c, MOVStreamContext *sc)
}
}
+static int mov_update_iamf_streams(MOVContext *c, const AVStream *st)
+{
+ const MOVStreamContext *sc = mov_get_stream_context(st);
+
+ for (int i = 0; i < sc->iamf->nb_audio_elements; i++) {
+ const AVStreamGroup *stg = NULL;
+
+ for (int j = 0; j < c->fc->nb_stream_groups; j++)
+ if (c->fc->stream_groups[j]->id == sc->iamf->audio_elements[i].audio_element_id)
+ stg = c->fc->stream_groups[j];
+ av_assert0(stg);
+
+ for (int j = 0; j < stg->nb_streams; j++) {
+ const FFStream *sti = cffstream(st);
+ AVStream *out = stg->streams[j];
+ FFStream *out_sti = ffstream(stg->streams[j]);
+
+ out->codecpar->bit_rate = 0;
+
+ if (out == st)
+ continue;
+
+ out->time_base = st->time_base;
+ out->start_time = st->start_time;
+ out->duration = st->duration;
+ out->nb_frames = st->nb_frames;
+ out->disposition = st->disposition;
+ out->discard = st->discard;
+
+ av_assert0(!out_sti->index_entries);
+ out_sti->index_entries = av_malloc(sti->index_entries_allocated_size);
+ if (!out_sti->index_entries)
+ return AVERROR(ENOMEM);
+
+ out_sti->index_entries_allocated_size = sti->index_entries_allocated_size;
+ out_sti->nb_index_entries = sti->nb_index_entries;
+ out_sti->skip_samples = sti->skip_samples;
+ memcpy(out_sti->index_entries, sti->index_entries, sti->index_entries_allocated_size);
+ }
+ }
+
+ return 0;
+}
+
static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
{
AVStream *st;
@@ -4715,6 +4930,12 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
mov_build_index(c, st);
+ if (sc->iamf) {
+ ret = mov_update_iamf_streams(c, st);
+ if (ret < 0)
+ return ret;
+ }
+
if (sc->dref_id-1 < sc->drefs_count && sc->drefs[sc->dref_id-1].path) {
MOVDref *dref = &sc->drefs[sc->dref_id - 1];
if (c->enable_drefs) {
@@ -4955,6 +5176,7 @@ static int avif_add_stream(MOVContext *c, int item_id)
st->priv_data = sc;
st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
st->codecpar->codec_id = AV_CODEC_ID_AV1;
+ sc->id = st->id;
sc->ffindex = st->index;
c->trak_index = st->index;
st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
@@ -5069,6 +5291,7 @@ static int mov_read_tkhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
avio_rb32(pb); /* modification time */
}
st->id = (int)avio_rb32(pb); /* track id (NOT 0 !)*/
+ sc->id = st->id;
avio_rb32(pb); /* reserved */
/* highlevel (considering edits) duration in movie timebase */
@@ -5243,7 +5466,8 @@ static int mov_read_tfdt(MOVContext *c, AVIOContext *pb, MOVAtom atom)
int64_t base_media_decode_time;
for (i = 0; i < c->fc->nb_streams; i++) {
- if (c->fc->streams[i]->id == frag->track_id) {
+ sc = mov_get_stream_context(c->fc->streams[i]);
+ if (sc->id == frag->track_id) {
st = c->fc->streams[i];
break;
}
@@ -5252,7 +5476,6 @@ static int mov_read_tfdt(MOVContext *c, AVIOContext *pb, MOVAtom atom)
av_log(c->fc, AV_LOG_WARNING, "could not find corresponding track id %u\n", frag->track_id);
return 0;
}
- sc = mov_get_stream_context(st);
if (sc->pseudo_stream_id + 1 != frag->stsd_id && sc->pseudo_stream_id != -1)
return 0;
version = avio_r8(pb);
@@ -5296,7 +5519,8 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
}
for (i = 0; i < c->fc->nb_streams; i++) {
- if (c->fc->streams[i]->id == frag->track_id) {
+ sc = mov_get_stream_context(c->fc->streams[i]);
+ if (sc->id == frag->track_id) {
st = c->fc->streams[i];
sti = ffstream(st);
break;
@@ -5306,7 +5530,6 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
av_log(c->fc, AV_LOG_WARNING, "could not find corresponding track id %u\n", frag->track_id);
return 0;
}
- sc = mov_get_stream_context(st);
if (sc->pseudo_stream_id+1 != frag->stsd_id && sc->pseudo_stream_id != -1)
return 0;
@@ -5599,7 +5822,8 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom)
track_id = avio_rb32(pb); // Reference ID
for (i = 0; i < c->fc->nb_streams; i++) {
- if (c->fc->streams[i]->id == track_id) {
+ sc = mov_get_stream_context(c->fc->streams[i]);
+ if (sc->id == track_id) {
st = c->fc->streams[i];
break;
}
@@ -5609,8 +5833,6 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom)
return 0;
}
- sc = mov_get_stream_context(st);
-
timescale = av_make_q(1, avio_rb32(pb));
if (timescale.den <= 0) {
@@ -6491,14 +6713,14 @@ static int get_current_encryption_info(MOVContext *c, MOVEncryptionIndex **encry
frag_stream_info = get_current_frag_stream_info(&c->frag_index);
if (frag_stream_info) {
for (i = 0; i < c->fc->nb_streams; i++) {
- if (c->fc->streams[i]->id == frag_stream_info->id) {
+ *sc = mov_get_stream_context(c->fc->streams[i]);
+ if ((*sc)->id == frag_stream_info->id) {
st = c->fc->streams[i];
break;
}
}
if (i == c->fc->nb_streams)
return 0;
- *sc = mov_get_stream_context(st);
if (!frag_stream_info->encryption_index) {
// If this stream isn't encrypted, don't create the index.
@@ -7435,7 +7657,7 @@ static int cenc_filter(MOVContext *mov, AVStream* st, MOVStreamContext *sc, AVPa
AVEncryptionInfo *encrypted_sample;
int encrypted_index, ret;
- frag_stream_info = get_frag_stream_info_from_pkt(&mov->frag_index, pkt, st->id);
+ frag_stream_info = get_frag_stream_info_from_pkt(&mov->frag_index, pkt, sc->id);
encrypted_index = current_index;
encryption_index = NULL;
if (frag_stream_info) {
@@ -8212,18 +8434,19 @@ static void mov_read_chapters(AVFormatContext *s)
AVStream *st = NULL;
FFStream *sti = NULL;
chapter_track = mov->chapter_tracks[j];
- for (i = 0; i < s->nb_streams; i++)
- if (s->streams[i]->id == chapter_track) {
+ for (i = 0; i < s->nb_streams; i++) {
+ sc = mov_get_stream_context(s->streams[i]);
+ if (sc->id == chapter_track) {
st = s->streams[i];
break;
}
+ }
if (!st) {
av_log(s, AV_LOG_ERROR, "Referenced QT chapter track not found\n");
continue;
}
sti = ffstream(st);
- sc = mov_get_stream_context(st);
cur_pos = avio_tell(sc->pb);
if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
@@ -8444,6 +8667,11 @@ static void mov_free_stream_context(void *opaque, uint8_t *data)
av_freep(&sc->spherical);
av_freep(&sc->mastering);
av_freep(&sc->coll);
+
+ ff_iamf_uninit_context(sc->iamf);
+ av_freep(&sc->iamf);
+ av_freep(&sc->iamf_descriptors);
+ sc->iamf_descriptors_size = 0;
}
static int mov_read_close(AVFormatContext *s)
@@ -8682,9 +8910,11 @@ static int mov_read_header(AVFormatContext *s)
AVDictionaryEntry *tcr;
int tmcd_st_id = -1;
- for (j = 0; j < s->nb_streams; j++)
- if (s->streams[j]->id == sc->timecode_track)
+ for (j = 0; j < s->nb_streams; j++) {
+ MOVStreamContext *sc2 = mov_get_stream_context(s->streams[j]);
+ if (sc2->id == sc->timecode_track)
tmcd_st_id = j;
+ }
if (tmcd_st_id < 0 || tmcd_st_id == i)
continue;
@@ -8997,7 +9227,15 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
if (st->codecpar->codec_id == AV_CODEC_ID_EIA_608 && sample->size > 8)
ret = get_eia608_packet(sc->pb, pkt, sample->size);
- else
+ else if (sc->iamf_descriptors_size) {
+ ret = av_new_packet(pkt, sc->iamf_descriptors_size);
+ if (ret < 0)
+ return ret;
+ pkt->pos = avio_tell(sc->pb);
+ memcpy(pkt->data, sc->iamf_descriptors, sc->iamf_descriptors_size);
+ sc->iamf_descriptors_size = 0;
+ ret = av_append_packet(sc->pb, pkt, sample->size);
+ } else
ret = av_get_packet(sc->pb, pkt, sample->size);
if (ret < 0) {
if (should_retry(sc->pb, ret)) {
--
2.42.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 29+ messages in thread