Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Tomasz Rosinski via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: "ffmpeg-devel@ffmpeg.org" <ffmpeg-devel@ffmpeg.org>
Cc: Tomasz Rosinski <Tomasz.Rosinski@xperi.com>
Subject: [FFmpeg-devel] Parsing of loudness mp4 boxes ludt, tlou and alou FFmpeg patch
Date: Thu, 29 Jan 2026 14:05:51 +0000
Message-ID: <PH7PR15MB57650465A9D6F12816FCB5C5EB9EA@PH7PR15MB5765.namprd15.prod.outlook.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 127 bytes --]

Hi, please review attached patch containing parsing of loudness mp4 boxes implementation.

Best regards,
Tomasz Rosinski


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Parsing-of-loudness-mp4-boxes-ludt-tlou-and-alou.patch --]
[-- Type: text/x-patch; name="0001-Parsing-of-loudness-mp4-boxes-ludt-tlou-and-alou.patch", Size: 7549 bytes --]

From 1d10836c3693be2b4054633d64ee8445af747a24 Mon Sep 17 00:00:00 2001
From: Tomasz Rosinski <tomasz.rosinski@xperi.com>
Date: Thu, 29 Jan 2026 12:37:03 +0100
Subject: [PATCH] Parsing of loudness mp4 boxes ludt, tlou and alou

This change adds parsing of loudness mp4 boxes to mov.c source file.
Parsing of loudness information from mp4 boxes was implemented
according to ISO/IEC 14496-12, section 12.2.7 Audio stream loudness,
so the following mp4 boxes are processed:
- ludt (LoudnessBox),
and its child boxes:
- tlou (TrackLoudnessInfo),
- alou (AlbumLoudnessInfo).
---
 libavformat/mov.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 169 insertions(+)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index d19b213ffa..3a74922fb5 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -9399,6 +9399,174 @@ fail:
     return ret;
 }
 
+/* Based on ISO 14496-12:2021(E), section 12.2.7 Audio stream loudness */
+static int mov_read_loudness_base(MOVContext *c, AVIOContext *pb, MOVAtom atom,
+    const char *scope)
+{
+    AVStream *st = get_curr_st(c);
+    if (!st) {
+        avio_skip(pb, atom.size);
+        return 0;
+    }
+
+    int64_t end = avio_tell(pb) + atom.size;
+
+    if (atom.size < 1) {
+        return AVERROR_INVALIDDATA;
+    }
+    uint8_t version = avio_r8(pb);
+
+    const int8_t min_loudness_box_size =
+        (version < 2) ? 16  /* Containing only one measurement chunk */
+                      : 17; /* Additional 1B for MAE metadata */
+    if (atom.size < min_loudness_box_size) {
+        av_log(c->fc, AV_LOG_ERROR,
+            "atom data [%"PRId64"B] too small for reading loudness box [%dB].\n",
+            atom.size, min_loudness_box_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    AVDictionary *md = st->metadata;
+    av_dict_set_int(&md, av_asprintf("loudness.%s.version",
+        scope), version, 0);
+    uint32_t flags = avio_rb24(pb);
+    av_dict_set_int(&md, av_asprintf("loudness.%s.flags",
+        scope), flags, 0);
+
+    uint8_t data_8b = 0;
+    uint8_t base_count = 1;
+    if (version > 0) {
+        data_8b = avio_r8(pb);
+        base_count = data_8b & 0x3f;
+    }
+    av_dict_set_int(&md, av_asprintf("loudness.%s.baseCount",
+        scope), base_count, 0);
+
+    if (version > 1) {
+        uint8_t info_type = (data_8b >> 6);
+        data_8b = avio_r8(pb);
+        if (info_type == 1 || info_type == 2) {
+            uint8_t mae_group_ID = data_8b & 0x7f;
+            av_dict_set_int(&md, av_asprintf("loudness.%s.maeGroupID",
+                scope), mae_group_ID, 0);
+        } else if (info_type == 3) {
+            uint8_t mae_group_preset_ID = data_8b & 0x1f;
+            av_dict_set_int(&md, av_asprintf("loudness.%s.maeGroupPresetID",
+                scope), mae_group_preset_ID, 0);
+        }
+    }
+
+    for (uint8_t b = 0; b < base_count; ++b) {
+        if (version > 0) {
+            data_8b = avio_r8(pb);
+            uint8_t eq_set_id = data_8b & 0x3f;
+            av_dict_set_int(&md, av_asprintf("loudness.%s.%u.eqSetID",
+                scope, b), eq_set_id, 0);
+        }
+
+        uint16_t data_16b = avio_rb16(pb);
+        uint16_t drc_set_id = data_16b & 0x3f;
+        av_dict_set_int(&md, av_asprintf("loudness.%s.%u.drcSetID",
+            scope, b), drc_set_id, 0);
+        uint16_t downmix_id = (data_16b >> 6) & 0x7f;
+        av_dict_set_int(&md, av_asprintf("loudness.%s.%u.downmixID",
+            scope, b), downmix_id, 0);
+
+        uint32_t data_32b = avio_rb32(pb);
+        uint8_t reliability_for_TP = data_32b & 0xf;
+        av_dict_set_int(&md, av_asprintf("loudness.%s.%u.reliabilityTP",
+            scope, b), reliability_for_TP, 0);
+        uint8_t measurement_system_for_TP = (data_32b >> 4) & 0xf;
+        av_dict_set_int(&md, av_asprintf("loudness.%s.%u.measurementSystemTP",
+            scope, b), measurement_system_for_TP, 0);
+
+        data_16b = (data_32b >> 8) & 0xfff;
+        int16_t bs_true_peak_level = 0;
+        if (data_16b & 0x800) {
+            bs_true_peak_level = (int16_t)(data_16b | 0xF000);
+        } else {
+            bs_true_peak_level = (int16_t)data_16b;
+        }
+        av_dict_set_int(&md, av_asprintf("loudness.%s.%u.bsTruePeakLevel",
+            scope, b), bs_true_peak_level, 0);
+
+        data_16b = data_32b >> 20;
+        int16_t bs_sample_peak_level = 0;
+        if (data_16b & 0x800) {
+            bs_sample_peak_level = (int16_t)(data_16b | 0xF000);
+        } else {
+            bs_sample_peak_level = (int16_t)data_16b;
+        }
+        av_dict_set_int(&md, av_asprintf("loudness.%s.%u.bsSamplePeakLevel",
+            scope, b), bs_sample_peak_level, 0);
+
+        uint8_t num_measurements = avio_r8(pb);
+        av_dict_set_int(&md, av_asprintf("loudness.%s.%u.numMeasurements",
+            scope, b), num_measurements, 0);
+
+        const int8_t measurement_chunk_size = 3;
+        for (uint8_t m = 0; m < num_measurements &&
+            avio_tell(pb) + measurement_chunk_size <= end; ++m) {
+            uint8_t method_definition = avio_r8(pb);
+            av_dict_set_int(&md, av_asprintf("loudness.%s.%u.%u.methodDefinition",
+                scope, b, m), method_definition, 0);
+            uint8_t method_value = avio_r8(pb);
+            av_dict_set_int(&md, av_asprintf("loudness.%s.%u.%u.methodValue",
+                scope, b, m), method_value, 0);
+
+            data_8b = avio_r8(pb);
+            uint8_t reliability = data_8b & 0xf;
+            av_dict_set_int(&md, av_asprintf("loudness.%s.%u.%u.reliability",
+                scope, b, m), reliability, 0);
+            uint8_t measurement_system = data_8b >> 4;
+            av_dict_set_int(&md, av_asprintf("loudness.%s.%u.%u.measurementSystem",
+                scope, b, m), measurement_system, 0);
+        }
+    }
+
+    /* Skip to end if there is any padding */
+    if (avio_tell(pb) < end) {
+        avio_skip(pb, end - avio_tell(pb));
+    }
+    return 0;
+}
+
+static int mov_read_ludt(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    int ret = 0;
+    const uint8_t tag_size = 8;
+    int64_t end = avio_tell(pb) + atom.size;
+    while (avio_tell(pb) + tag_size <= end) {
+        MOVAtom a;
+        a.size = avio_rb32(pb);
+        a.type = avio_rl32(pb);
+
+        if (a.size < tag_size) {
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (a.type == MKTAG('t','l','o','u')) {
+            a.size -= tag_size;
+            ret = mov_read_loudness_base(c, pb, a, "tlou");
+        } else if (a.type == MKTAG('a','l','o','u')) {
+            a.size -= tag_size;
+            ret = mov_read_loudness_base(c, pb, a, "alou");
+        } else {
+            /* Skip unknown child */
+            avio_skip(pb, a.size - tag_size);
+            ret = 0;
+        }
+        if (ret < 0) {
+            return ret;
+        }
+    }
+    /* Skip to end if there is any padding */
+    if (avio_tell(pb) < end) {
+        avio_skip(pb, end - avio_tell(pb));
+    }
+    return 0;
+}
+
 static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('A','C','L','R'), mov_read_aclr },
 { MKTAG('A','P','R','G'), mov_read_avid },
@@ -9522,6 +9690,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('l','h','v','C'), mov_read_lhvc },
 { MKTAG('l','v','c','C'), mov_read_glbl },
 { MKTAG('a','p','v','C'), mov_read_glbl },
+{ MKTAG('l','u','d','t'), mov_read_ludt }, /* loudness box */
 #if CONFIG_IAMFDEC
 { MKTAG('i','a','c','b'), mov_read_iacb },
 #endif
-- 
2.43.0


[-- Attachment #3: Type: text/plain, Size: 163 bytes --]

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

                 reply	other threads:[~2026-01-29 14:06 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=PH7PR15MB57650465A9D6F12816FCB5C5EB9EA@PH7PR15MB5765.namprd15.prod.outlook.com \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=Tomasz.Rosinski@xperi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git