Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] avformat/webvttdec: improve WebVTT parsing
@ 2025-05-27 10:28 Marcos Del Sol Vives via ffmpeg-devel
  2025-05-27 10:40 ` Marcos Del Sol via ffmpeg-devel
  0 siblings, 1 reply; 9+ messages in thread
From: Marcos Del Sol Vives via ffmpeg-devel @ 2025-05-27 10:28 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Marcos Del Sol Vives

The parser will now strictly check if WebVTT files start with the correct
"WEBVTT" marker. Before, files were not checked if they truly started
with it.

It will also now ignore all non-cue blocks, instead of only a hardcoded
list. This is closer to the specification that calls for no action
if unknown blocks are encountered.

Signed-off-by: Marcos Del Sol Vives <marcos@orca.pet>
---
 libavformat/webvttdec.c | 178 ++++++++++++++++++++++------------------
 1 file changed, 98 insertions(+), 80 deletions(-)

diff --git a/libavformat/webvttdec.c b/libavformat/webvttdec.c
index 6feda1585e..b454b2c1cf 100644
--- a/libavformat/webvttdec.c
+++ b/libavformat/webvttdec.c
@@ -58,6 +58,79 @@ static int64_t read_ts(const char *s)
     return AV_NOPTS_VALUE;
 }
 
+static int webvtt_parse_cue(WebVTTContext *webvtt, AVBPrint *cue, int64_t pos)
+{
+    int i;
+    AVPacket *sub;
+    const char *p, *identifier, *settings;
+    size_t identifier_len, settings_len;
+    int64_t ts_start, ts_end;
+
+    p = identifier = cue->str;
+
+    /* optional cue identifier (can be a number like in SRT or some kind of
+     * chaptering id) */
+    for (i = 0; p[i] && p[i] != '\n' && p[i] != '\r'; i++) {
+        if (!strncmp(p + i, "-->", 3)) {
+            identifier = NULL;
+            break;
+        }
+    }
+    if (!identifier)
+        identifier_len = 0;
+    else {
+        identifier_len = strcspn(p, "\r\n");
+        p += identifier_len;
+        if (*p == '\r')
+            p++;
+        if (*p == '\n')
+            p++;
+    }
+
+    /* cue timestamps */
+    if ((ts_start = read_ts(p)) == AV_NOPTS_VALUE)
+        return AVERROR_INVALIDDATA;
+    if (!(p = strstr(p, "-->")))
+        return AVERROR_INVALIDDATA;
+    p += 2;
+    do p++; while (*p == ' ' || *p == '\t');
+    if ((ts_end = read_ts(p)) == AV_NOPTS_VALUE)
+        return AVERROR_INVALIDDATA;
+
+    /* optional cue settings */
+    p += strcspn(p, "\n\r\t ");
+    while (*p == '\t' || *p == ' ')
+        p++;
+    settings = p;
+    settings_len = strcspn(p, "\r\n");
+    p += settings_len;
+    if (*p == '\r')
+        p++;
+    if (*p == '\n')
+        p++;
+
+    /* create packet */
+    sub = ff_subtitles_queue_insert(&webvtt->q, p, strlen(p), 0);
+    if (!sub)
+        return AVERROR(ENOMEM);
+    sub->pos = pos;
+    sub->pts = ts_start;
+    sub->duration = ts_end - ts_start;
+
+#define SET_SIDE_DATA(name, type) do {                                  \
+    if (name##_len) {                                                   \
+        uint8_t *buf = av_packet_new_side_data(sub, type, name##_len);  \
+        if (!buf)                                                       \
+            return AVERROR(ENOMEM);                                     \
+        memcpy(buf, name, name##_len);                                  \
+    }                                                                   \
+} while (0)
+
+    SET_SIDE_DATA(identifier, AV_PKT_DATA_WEBVTT_IDENTIFIER);
+    SET_SIDE_DATA(settings,   AV_PKT_DATA_WEBVTT_SETTINGS);
+    return 0;
+}
+
 static int webvtt_read_header(AVFormatContext *s)
 {
     WebVTTContext *webvtt = s->priv_data;
@@ -74,13 +147,27 @@ static int webvtt_read_header(AVFormatContext *s)
 
     av_bprint_init(&cue,    0, AV_BPRINT_SIZE_UNLIMITED);
 
+    res = ff_subtitles_read_chunk(s->pb, &cue);
+    if (res < 0) {
+        av_log(s, AV_LOG_ERROR, "Unable to read file header\n");
+        goto end;
+    }
+
+    if (!cue.len) {
+        av_log(s, AV_LOG_ERROR, "Unable to read file header\n");
+        res = AVERROR_EOF;
+        goto end;
+    }
+
+    if (!strncmp(cue.str, "\xEF\xBB\xBFWEBVTT", 9) &&
+        !strncmp(cue.str, "WEBVTT", 6)) {
+        av_log(s, AV_LOG_ERROR, "Invalid file header\n");
+        res = AVERROR_INVALIDDATA;
+        goto end;
+    }
+
     for (;;) {
-        int i;
-        int64_t pos;
-        AVPacket *sub;
-        const char *p, *identifier, *settings;
-        size_t identifier_len, settings_len;
-        int64_t ts_start, ts_end;
+        int64_t pos = avio_tell(s->pb);
 
         res = ff_subtitles_read_chunk(s->pb, &cue);
         if (res < 0)
@@ -89,81 +176,12 @@ static int webvtt_read_header(AVFormatContext *s)
         if (!cue.len)
             break;
 
-        p = identifier = cue.str;
-        pos = avio_tell(s->pb);
-
-        /* ignore header chunk */
-        if (!strncmp(p, "\xEF\xBB\xBFWEBVTT", 9) ||
-            !strncmp(p, "WEBVTT", 6) ||
-            !strncmp(p, "STYLE", 5) ||
-            !strncmp(p, "REGION", 6) ||
-            !strncmp(p, "NOTE", 4))
-            continue;
-
-        /* optional cue identifier (can be a number like in SRT or some kind of
-         * chaptering id) */
-        for (i = 0; p[i] && p[i] != '\n' && p[i] != '\r'; i++) {
-            if (!strncmp(p + i, "-->", 3)) {
-                identifier = NULL;
-                break;
-            }
-        }
-        if (!identifier)
-            identifier_len = 0;
-        else {
-            identifier_len = strcspn(p, "\r\n");
-            p += identifier_len;
-            if (*p == '\r')
-                p++;
-            if (*p == '\n')
-                p++;
+        res = webvtt_parse_cue(webvtt, &cue, pos);
+        if (res < 0) {
+            if (res != AVERROR_INVALIDDATA)
+                goto end;
+            av_log(s, AV_LOG_DEBUG, "Ignoring non-cue block at 0x%"PRIx64"\n", pos);
         }
-
-        /* cue timestamps */
-        if ((ts_start = read_ts(p)) == AV_NOPTS_VALUE)
-            break;
-        if (!(p = strstr(p, "-->")))
-            break;
-        p += 2;
-        do p++; while (*p == ' ' || *p == '\t');
-        if ((ts_end = read_ts(p)) == AV_NOPTS_VALUE)
-            break;
-
-        /* optional cue settings */
-        p += strcspn(p, "\n\r\t ");
-        while (*p == '\t' || *p == ' ')
-            p++;
-        settings = p;
-        settings_len = strcspn(p, "\r\n");
-        p += settings_len;
-        if (*p == '\r')
-            p++;
-        if (*p == '\n')
-            p++;
-
-        /* create packet */
-        sub = ff_subtitles_queue_insert(&webvtt->q, p, strlen(p), 0);
-        if (!sub) {
-            res = AVERROR(ENOMEM);
-            goto end;
-        }
-        sub->pos = pos;
-        sub->pts = ts_start;
-        sub->duration = ts_end - ts_start;
-
-#define SET_SIDE_DATA(name, type) do {                                  \
-    if (name##_len) {                                                   \
-        uint8_t *buf = av_packet_new_side_data(sub, type, name##_len);  \
-        if (!buf) {                                                     \
-            res = AVERROR(ENOMEM);                                      \
-            goto end;                                                   \
-        }                                                               \
-        memcpy(buf, name, name##_len);                                  \
-    }                                                                   \
-} while (0)
-
-        SET_SIDE_DATA(identifier, AV_PKT_DATA_WEBVTT_IDENTIFIER);
-        SET_SIDE_DATA(settings,   AV_PKT_DATA_WEBVTT_SETTINGS);
     }
 
     ff_subtitles_queue_finalize(s, &webvtt->q);
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avformat/webvttdec: improve WebVTT parsing
  2025-05-27 10:28 [FFmpeg-devel] [PATCH] avformat/webvttdec: improve WebVTT parsing Marcos Del Sol Vives via ffmpeg-devel
@ 2025-05-27 10:40 ` Marcos Del Sol via ffmpeg-devel
  2025-06-06 19:43   ` Tomas Härdin
  0 siblings, 1 reply; 9+ messages in thread
From: Marcos Del Sol via ffmpeg-devel @ 2025-05-27 10:40 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Marcos Del Sol

A note on this change: I found some .vtt files while using yt-dlp that follow
a draft version of the WebVTT standard (probably
https://www.w3.org/2013/07/webvtt.html) that use "Region:" for regions instead
of "REGION", and that was causing the conversion to .srt to fail completely.

An example of those files is available at
https://statics.3cat.cat/multimedia/vtt/2/4/1745967620742.vtt, which belongs
to a state-owned free online streaming service in Spain.

While the diff might seem large, I basically moved the current cue parsing
to a function because it made error handling much easier. The logic is
the exact same.

-----Mensaje original-----
De: Marcos <marcos@orca.pet>
Para: ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
CC: Marcos <marcos@orca.pet>
Fecha: martes, 27 de mayo de 2025 12:29 CEST
Asunto: [PATCH] avformat/webvttdec: improve WebVTT parsing


The parser will now strictly check if WebVTT files start with the correct
"WEBVTT" marker. Before, files were not checked if they truly started
with it.

It will also now ignore all non-cue blocks, instead of only a hardcoded
list. This is closer to the specification that calls for no action
if unknown blocks are encountered.

Signed-off-by: Marcos Del Sol Vives <marcos@orca.pet>
---
 libavformat/webvttdec.c | 178 ++++++++++++++++++++++------------------
 1 file changed, 98 insertions(+), 80 deletions(-)

diff --git a/libavformat/webvttdec.c b/libavformat/webvttdec.c
index 6feda1585e..b454b2c1cf 100644
--- a/libavformat/webvttdec.c
+++ b/libavformat/webvttdec.c
@@ -58,6 +58,79 @@ static int64_t read_ts(const char *s)
     return AV_NOPTS_VALUE;
 }
 
+static int webvtt_parse_cue(WebVTTContext *webvtt, AVBPrint *cue, int64_t pos)
+{
+    int i;
+    AVPacket *sub;
+    const char *p, *identifier, *settings;
+    size_t identifier_len, settings_len;
+    int64_t ts_start, ts_end;
+
+    p = identifier = cue->str;
+
+    /* optional cue identifier (can be a number like in SRT or some kind of
+     * chaptering id) */
+    for (i = 0; p[i] && p[i] != '\n' && p[i] != '\r'; i++) {
+        if (!strncmp(p + i, "-->", 3)) {
+            identifier = NULL;
+            break;
+        }
+    }
+    if (!identifier)
+        identifier_len = 0;
+    else {
+        identifier_len = strcspn(p, "\r\n");
+        p += identifier_len;
+        if (*p == '\r')
+            p++;
+        if (*p == '\n')
+            p++;
+    }
+
+    /* cue timestamps */
+    if ((ts_start = read_ts(p)) == AV_NOPTS_VALUE)
+        return AVERROR_INVALIDDATA;
+    if (!(p = strstr(p, "-->")))
+        return AVERROR_INVALIDDATA;
+    p += 2;
+    do p++; while (*p == ' ' || *p == '\t');
+    if ((ts_end = read_ts(p)) == AV_NOPTS_VALUE)
+        return AVERROR_INVALIDDATA;
+
+    /* optional cue settings */
+    p += strcspn(p, "\n\r\t ");
+    while (*p == '\t' || *p == ' ')
+        p++;
+    settings = p;
+    settings_len = strcspn(p, "\r\n");
+    p += settings_len;
+    if (*p == '\r')
+        p++;
+    if (*p == '\n')
+        p++;
+
+    /* create packet */
+    sub = ff_subtitles_queue_insert(&webvtt->q, p, strlen(p), 0);
+    if (!sub)
+        return AVERROR(ENOMEM);
+    sub->pos = pos;
+    sub->pts = ts_start;
+    sub->duration = ts_end - ts_start;
+
+#define SET_SIDE_DATA(name, type) do {                                  \
+    if (name##_len) {                                                   \
+        uint8_t *buf = av_packet_new_side_data(sub, type, name##_len);  \
+        if (!buf)                                                       \
+            return AVERROR(ENOMEM);                                     \
+        memcpy(buf, name, name##_len);                                  \
+    }                                                                   \
+} while (0)
+
+    SET_SIDE_DATA(identifier, AV_PKT_DATA_WEBVTT_IDENTIFIER);
+    SET_SIDE_DATA(settings,   AV_PKT_DATA_WEBVTT_SETTINGS);
+    return 0;
+}
+
 static int webvtt_read_header(AVFormatContext *s)
 {
     WebVTTContext *webvtt = s->priv_data;
@@ -74,13 +147,27 @@ static int webvtt_read_header(AVFormatContext *s)
 
     av_bprint_init(&cue,    0, AV_BPRINT_SIZE_UNLIMITED);
 
+    res = ff_subtitles_read_chunk(s->pb, &cue);
+    if (res < 0) {
+        av_log(s, AV_LOG_ERROR, "Unable to read file header\n");
+        goto end;
+    }
+
+    if (!cue.len) {
+        av_log(s, AV_LOG_ERROR, "Unable to read file header\n");
+        res = AVERROR_EOF;
+        goto end;
+    }
+
+    if (!strncmp(cue.str, "\xEF\xBB\xBFWEBVTT", 9) &&
+        !strncmp(cue.str, "WEBVTT", 6)) {
+        av_log(s, AV_LOG_ERROR, "Invalid file header\n");
+        res = AVERROR_INVALIDDATA;
+        goto end;
+    }
+
     for (;;) {
-        int i;
-        int64_t pos;
-        AVPacket *sub;
-        const char *p, *identifier, *settings;
-        size_t identifier_len, settings_len;
-        int64_t ts_start, ts_end;
+        int64_t pos = avio_tell(s->pb);
 
         res = ff_subtitles_read_chunk(s->pb, &cue);
         if (res < 0)
@@ -89,81 +176,12 @@ static int webvtt_read_header(AVFormatContext *s)
         if (!cue.len)
             break;
 
-        p = identifier = cue.str;
-        pos = avio_tell(s->pb);
-
-        /* ignore header chunk */
-        if (!strncmp(p, "\xEF\xBB\xBFWEBVTT", 9) ||
-            !strncmp(p, "WEBVTT", 6) ||
-            !strncmp(p, "STYLE", 5) ||
-            !strncmp(p, "REGION", 6) ||
-            !strncmp(p, "NOTE", 4))
-            continue;
-
-        /* optional cue identifier (can be a number like in SRT or some kind of
-         * chaptering id) */
-        for (i = 0; p[i] && p[i] != '\n' && p[i] != '\r'; i++) {
-            if (!strncmp(p + i, "-->", 3)) {
-                identifier = NULL;
-                break;
-            }
-        }
-        if (!identifier)
-            identifier_len = 0;
-        else {
-            identifier_len = strcspn(p, "\r\n");
-            p += identifier_len;
-            if (*p == '\r')
-                p++;
-            if (*p == '\n')
-                p++;
+        res = webvtt_parse_cue(webvtt, &cue, pos);
+        if (res < 0) {
+            if (res != AVERROR_INVALIDDATA)
+                goto end;
+            av_log(s, AV_LOG_DEBUG, "Ignoring non-cue block at 0x%"PRIx64"\n", pos);
         }
-
-        /* cue timestamps */
-        if ((ts_start = read_ts(p)) == AV_NOPTS_VALUE)
-            break;
-        if (!(p = strstr(p, "-->")))
-            break;
-        p += 2;
-        do p++; while (*p == ' ' || *p == '\t');
-        if ((ts_end = read_ts(p)) == AV_NOPTS_VALUE)
-            break;
-
-        /* optional cue settings */
-        p += strcspn(p, "\n\r\t ");
-        while (*p == '\t' || *p == ' ')
-            p++;
-        settings = p;
-        settings_len = strcspn(p, "\r\n");
-        p += settings_len;
-        if (*p == '\r')
-            p++;
-        if (*p == '\n')
-            p++;
-
-        /* create packet */
-        sub = ff_subtitles_queue_insert(&webvtt->q, p, strlen(p), 0);
-        if (!sub) {
-            res = AVERROR(ENOMEM);
-            goto end;
-        }
-        sub->pos = pos;
-        sub->pts = ts_start;
-        sub->duration = ts_end - ts_start;
-
-#define SET_SIDE_DATA(name, type) do {                                  \
-    if (name##_len) {                                                   \
-        uint8_t *buf = av_packet_new_side_data(sub, type, name##_len);  \
-        if (!buf) {                                                     \
-            res = AVERROR(ENOMEM);                                      \
-            goto end;                                                   \
-        }                                                               \
-        memcpy(buf, name, name##_len);                                  \
-    }                                                                   \
-} while (0)
-
-        SET_SIDE_DATA(identifier, AV_PKT_DATA_WEBVTT_IDENTIFIER);
-        SET_SIDE_DATA(settings,   AV_PKT_DATA_WEBVTT_SETTINGS);
     }
 
     ff_subtitles_queue_finalize(s, &webvtt->q);
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avformat/webvttdec: improve WebVTT parsing
  2025-05-27 10:40 ` Marcos Del Sol via ffmpeg-devel
@ 2025-06-06 19:43   ` Tomas Härdin
  2025-06-06 20:22     ` Marcos Del Sol Vives
  0 siblings, 1 reply; 9+ messages in thread
From: Tomas Härdin @ 2025-06-06 19:43 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

tis 2025-05-27 klockan 10:40 +0000 skrev Marcos Del Sol via ffmpeg-
devel:
> A note on this change: I found some .vtt files while using yt-dlp
> that follow
> a draft version of the WebVTT standard (probably
> https://www.w3.org/2013/07/webvtt.html) that use "Region:" for
> regions instead
> of "REGION", and that was causing the conversion to .srt to fail
> completely.

Sounds like the demuxer correctly rejected some broken files

> +        res = webvtt_parse_cue(webvtt, &cue, pos);
> +        if (res < 0) {
> +            if (res != AVERROR_INVALIDDATA)
> +                goto end;
> +            av_log(s, AV_LOG_DEBUG, "Ignoring non-cue block at
> 0x%"PRIx64"\n", pos);
>          }

This kind of change will cause broken files to proliferate, which is
bad for the WebVTT ecosystem, bringing it closer to the mess that is
the SubRip ecosystem

/Tomas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avformat/webvttdec: improve WebVTT parsing
  2025-06-06 19:43   ` Tomas Härdin
@ 2025-06-06 20:22     ` Marcos Del Sol Vives
  2025-06-09 21:51       ` Tomas Härdin
  0 siblings, 1 reply; 9+ messages in thread
From: Marcos Del Sol Vives @ 2025-06-06 20:22 UTC (permalink / raw)
  To: FFmpeg development discussions and patches, Tomas Härdin



El 6 de junio de 2025 21:43:58 CEST, "Tomas Härdin" <git@haerdin.se> escribió:
>
>Sounds like the demuxer correctly rejected some broken files
>

The WebVTT standard does not call for a fatal error unless the magic header does not match. The current implementation is not only non-compliant with the standard, but will also break on future changes.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avformat/webvttdec: improve WebVTT parsing
  2025-06-06 20:22     ` Marcos Del Sol Vives
@ 2025-06-09 21:51       ` Tomas Härdin
  2025-06-09 23:51         ` Marcos Del Sol
  0 siblings, 1 reply; 9+ messages in thread
From: Tomas Härdin @ 2025-06-09 21:51 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

fre 2025-06-06 klockan 22:22 +0200 skrev Marcos Del Sol Vives:
> 
> 
> El 6 de junio de 2025 21:43:58 CEST, "Tomas Härdin" <git@haerdin.se>
> escribió:
> > 
> > Sounds like the demuxer correctly rejected some broken files
> > 
> 
> The WebVTT standard does not call for a fatal error unless the magic
> header does not match. The current implementation is not only non-
> compliant with the standard, but will also break on future changes.

The linked file does not follow the syntax specified in section 4 of
the WebVTT standard. Therefore it is not WebVTT.

The probe function should ensure that the file starts with the
necessary bytes. I see it can let some non-compliant files slip by,
since it does not check whether [BOM]WEBVTT is followed by a newline,
and possibly a space or tab and any non-newline characters. We could
fix that in the main parsing loop. We also shouldn't expect any
"WEBVTT" chunks after the first one.

webvttdec.c also allows REGION etc chunks outside of where section 4
says they are allowed. In my opinion this is bad, since it means the
demuxer allows more than just WebVTT.

I've been harping on the permissive attitude towards parsing on this
list for a while. The reason why I do this is because every time we're
lax with parsing, some user will come to rely on said laxness rather
than fixing their workflow. Therefore we're perpetually unable to fix
our demuxers. My opinion is that it is best to nip this permissiveness
in the bud. The fact that the demuxer does the wrong thing right now is
no excuse to make it behave even more incorrectly.

What I'd like to see is the project moving towards either parser
combinators or a domain specific language for grammars like a PEG
variant extended with length fields.

We can't do much about the W3C making breaking changes to their
standards in the future, other than updating our code when that
happens. We're lucky that the spec is quite narrow. The space for
making non-breaking changes to it is quite small. They could for
example reuse NOTE chunks for future functionality. For example, if W3C
wants to allow STYLE chunks in the middle of the file the current
syntax does not allow that. But they could amend it by using "NOTE
STYLE" for stylesheets between cues.

All this is just my views of course. Other devs might feel very
differently. I'd point out it's no longer the wild west of the early
2000's.

/Tomas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avformat/webvttdec: improve WebVTT parsing
  2025-06-09 21:51       ` Tomas Härdin
@ 2025-06-09 23:51         ` Marcos Del Sol
  2025-06-10 11:42           ` Marcos Del Sol
  0 siblings, 1 reply; 9+ messages in thread
From: Marcos Del Sol @ 2025-06-09 23:51 UTC (permalink / raw)
  To: Tomas Härdin; +Cc: FFmpeg development discussions and patches

> The linked file does not follow the syntax specified in section 4 of
> the WebVTT standard. Therefore it is not WebVTT.

The section 6.1 on file parsing, says explicitely that processing should
be aborted if, and only if:

 - The file is too small (point 4)
 - The file does not start with "WEBVTT" (point 5)
 - The character after "WEBVTT" is not a whitespace (point 6)
 - There is no data after "WEBVTT" (point 8 and 10)

Note on the point 14, the main block handling loop, it not even once talks
about aborting parsing. If an extraneous block is found, no action should
be taken. It is just ignored and keeps processing the next ones.

WebVTT is supposed to be an extensible format. Limiting to a small set of
known values and silently aborting when anything new is introduced does
not seem like the best option to me. Web browsers do not stop rendering
pages when they see a new, unknown HTML tag or CSS option.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avformat/webvttdec: improve WebVTT parsing
  2025-06-09 23:51         ` Marcos Del Sol
@ 2025-06-10 11:42           ` Marcos Del Sol
  2025-06-13 13:03             ` Marcos Del Sol
  0 siblings, 1 reply; 9+ messages in thread
From: Marcos Del Sol @ 2025-06-10 11:42 UTC (permalink / raw)
  To: Tomas Härdin; +Cc: FFmpeg development discussions and patches

> WebVTT is supposed to be an extensible format. Limiting to a small set of
> known values and silently aborting when anything new is introduced does
> not seem like the best option to me. Web browsers do not stop rendering
> pages when they see a new, unknown HTML tag or CSS option.

About this, for interoperability reasons I've checked other multiple
implementations of WebVTT parsers and these all follow the approach of
silently ignoring unknown chunks:

 - WebKit (https://github.com/WebKit/WebKit/blob/main/Source/WebCore/html/track/WebVTTParser.cpp#L224-L227)
 - Chromium's Blink parser (https://chromium.googlesource.com/chromium/src/+/main/third_party/blink/renderer/core/html/track/vtt/vtt_parser.cc#199)
 - Mozilla's Gecko (https://searchfox.org/mozilla-central/source/dom/media/webvtt/vtt.sys.mjs#1674-1677)
 - VLC (https://code.videolan.org/videolan/vlc/-/blob/master/modules/codec/webvtt/webvtt.c)

The only one that would report an error, but will still keep parsing anyway,
is W3C's own WebVTT.js parser (https://github.com/w3c/webvtt.js/blob/main/parser.js#L150-L153).

If that's not enough, I found also in part 2.1 of the WebVTT specification,
also copied verbatim:

"The parsing rules are more tolerant to author errors than the syntax
allows, in order to provide for extensibility and to still render cues that
have some syntax errors."

Basically, the standard asks to follow Postel's law: "Be conservative in
what you send, be liberal in what you accept". ffmpeg should do that.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avformat/webvttdec: improve WebVTT parsing
  2025-06-10 11:42           ` Marcos Del Sol
@ 2025-06-13 13:03             ` Marcos Del Sol
  2025-06-18  7:01               ` Tomas Härdin
  0 siblings, 1 reply; 9+ messages in thread
From: Marcos Del Sol @ 2025-06-13 13:03 UTC (permalink / raw)
  To: Tomas Härdin; +Cc: FFmpeg development discussions and patches

Tomas Härdin:
> tis 2025-06-10 klockan 11:42 +0000 skrev Marcos Del Sol:
> > WebVTT is supposed to be an extensible format.
>
> The syntax says otherwise. Why the W3C feels the need to specify a
> particular imperative algorithm for parsing I cannot know, but this is
> not how RFCs are authored. It also makes implementing WebVTT in
> functional languages impossible. It is a shotgun parser to boot.

What do you mean that's not how RFCs are authored? Go read RFC2083
from 1997 where it has literal C code in it. You should consider writing
an irate email to the IETF and tell them that has to go. This TLV-based
standard, by the way, also asks you to ignore unknown tags.

> > Basically, the standard asks to follow Postel's law: "Be conservative in
> > what you send, be liberal in what you accept". ffmpeg should do that.
>
> No, it should not. That is a bourgeois attitude to parsing. If the W3C
> needs extensibility then it should amend the syntax to make that
> possible. Or they should just have used XML I guess, but here we are.

If they had used XML you would be here complaining that we should crash
on unknown tags that don't appear in the XML DOCTYPE, so that'd change
nothing.

Would you say that'd be the correct approach on unknown Matroska EBML
tags, which the standard (RFC9559) also tells you to accept and ignore?

If you wanna go proprietary, let's read what QuickTime says about
unknown atoms:

"If your application encounters an atom of an unknown type, it should
not attempt to interpret the atom’s data. Use the atom’s size field to
skip this atom and all of its contents. This allows a degree of forward
compatibility with extensions to the QuickTime file format."

I haven't checked any others, but I am fairly sure no tag-based file
format standard would say "be as strict as possible, treat any
recoverable parsing errors as fatal and do not notify the user".

> A liberal attitude towards parsing begets liberal attitudes elsewhere,
> for example in the very sample you linked. Rather than this broadcaster
> fixing their workflow, are we to be saddled with the maintenance burden
> of their broken files for free, in perpetuity? To give our labour away
> gratis? Of course not.

Isn't this what you're exactly advocating for though? Spent time and
effort perpetually updating the list of known tags just to ultimately
ignore them?

PS: Remember to send the emails to the list, not to me alone.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avformat/webvttdec: improve WebVTT parsing
  2025-06-13 13:03             ` Marcos Del Sol
@ 2025-06-18  7:01               ` Tomas Härdin
  0 siblings, 0 replies; 9+ messages in thread
From: Tomas Härdin @ 2025-06-18  7:01 UTC (permalink / raw)
  To: ffmpeg-devel

fre 2025-06-13 klockan 13:03 +0000 skrev Marcos Del Sol:
> Tomas Härdin:
> > tis 2025-06-10 klockan 11:42 +0000 skrev Marcos Del Sol:
> > > WebVTT is supposed to be an extensible format.
> > 
> > The syntax says otherwise. Why the W3C feels the need to specify a
> > particular imperative algorithm for parsing I cannot know, but this
> > is
> > not how RFCs are authored. It also makes implementing WebVTT in
> > functional languages impossible. It is a shotgun parser to boot.
> 
> What do you mean that's not how RFCs are authored? Go read RFC2083
> from 1997 where it has literal C code in it. You should consider
> writing
> an irate email to the IETF and tell them that has to go. This TLV-
> based
> standard, by the way, also asks you to ignore unknown tags.

The important difference is that KLV based formats allow us to
*recognize* unknown tags before attempting to process them. RFC2083
does not specify two mutually incompatible languages as far as I can
tell. The C code in for example section 10.8 specifies how to *process*
pixel data already recognized (parsed), assuming the file is sRGB. It
also appears to be wrong, but let's ignore that

This stuff is important because every CVE relates to parsing. Language
ambiguities can and have lead to CVEs. The parsing of URIs is one
example, for which curl caught flak since it does not adhere to the
regex specified in the URI RFC. lavf has similar URI issues I'm sure,
which is why I'm adamant that the codebase needs to be de-postelized.
If for example a PNG file has more than one IHDR chunk then it should
be rejected. We should not attempt to guess what should be done in this
case, but loudly abort

With WebVTT this may seem academic, until you realize that ambiguities
in the spec can be abused to make two different decoders display
different things. In places with strict legislation on certain kinds of
speech this can have legal consequences.

Anyway, I have said my peace and placated the langsec spirits. When the
time comes to hand out I-told-you-so's a few years down the line, I can
point to this and other posts in this vein

/Tomas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-06-18  7:01 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-05-27 10:28 [FFmpeg-devel] [PATCH] avformat/webvttdec: improve WebVTT parsing Marcos Del Sol Vives via ffmpeg-devel
2025-05-27 10:40 ` Marcos Del Sol via ffmpeg-devel
2025-06-06 19:43   ` Tomas Härdin
2025-06-06 20:22     ` Marcos Del Sol Vives
2025-06-09 21:51       ` Tomas Härdin
2025-06-09 23:51         ` Marcos Del Sol
2025-06-10 11:42           ` Marcos Del Sol
2025-06-13 13:03             ` Marcos Del Sol
2025-06-18  7:01               ` Tomas Härdin

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git