Re: [FFmpeg-devel] [PATCH 2/3] lavf/srtdec: Permit streaming input

From: "Tomas Härdin" <git@haerdin.se>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Subject: Re: [FFmpeg-devel] [PATCH 2/3] lavf/srtdec: Permit streaming input
Date: Thu, 28 Mar 2024 23:57:57 +0100
Message-ID: <32b802fec68dea20c8744edf2e4aab80d3999f62.camel@haerdin.se> (raw)
In-Reply-To: <6f6d79bb1b7421e409b74e767d5dc07bf18241fe.camel@haerdin.se>

[-- Attachment #1: Type: text/plain, Size: 13 bytes --]

Here as well

[-- Attachment #2: 0002-lavf-srtdec-Permit-streaming-input.patch --]
[-- Type: text/x-patch, Size: 11574 bytes --]

From 6d0684ca6fe02d80fc07a622fb85445a6917c29f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Thu, 28 Mar 2024 22:15:18 +0100
Subject: [PATCH 2/3] lavf/srtdec: Permit streaming input

This is largely a rewrite.

Read packets in srt_read_packet() rather than reading the entire file in srt_read_header().
Rely on AVFMT_GENERIC_INDEX for seeking.
Allow zero-length packets (same as WebVTT).
The implementation before this is broken in at least the following ways:

Decimals like .999999 are silently accepted and converted to 999.999 seconds.
This is because no verification is done on the milliseconds part.
This patch enforces that the minutes and seconds parts are 00-59, and the milliseconds 000-999.
It's not perfect since FFmpeg doesn't have regex functionality or indeed any kind of parsing framework,
but it's better than before.

Segmenting cues by lines that consist of just a single integer is incredibly wrong,
since the subtitle text itself may well contain lines that are just a lone integer.
This means files written with CR line endings that have text with lone integers are
parsed in a completely broken manner. Neither can we segment by lines containing -->
since this is permissible in SubRip (as far as I can tell). WebVTT explicitly forbids it however.
---
 libavformat/srtdec.c             | 211 ++++++++++++++++---------------
 tests/ref/fate/sub-srt-rrn-remux |   4 +
 2 files changed, 116 insertions(+), 99 deletions(-)

diff --git a/libavformat/srtdec.c b/libavformat/srtdec.c
index 6bf73599a7..c74d40b726 100644
--- a/libavformat/srtdec.c
+++ b/libavformat/srtdec.c
@@ -28,7 +28,8 @@
 #include "libavutil/intreadwrite.h"
 
 typedef struct {
-    FFDemuxSubtitlesQueue q;
+    AVBPrint buf;
+    FFTextReader tr;
 } SRTContext;
 
 static int srt_probe(const AVProbeData *p)
@@ -72,54 +73,66 @@ struct event_info {
 
 static int get_event_info(const char *line, struct event_info *ei)
 {
-    int hh1, mm1, ss1, ms1;
-    int hh2, mm2, ss2, ms2;
-
-    ei->x1 = ei->x2 = ei->y1 = ei->y2 = ei->duration = -1;
-    ei->pts = AV_NOPTS_VALUE;
-    ei->pos = -1;
-    if (sscanf(line, "%d:%d:%d%*1[,.]%d --> %d:%d:%d%*1[,.]%d"
+    unsigned int hh1, mm1, ss1, ms1;
+    unsigned int hh2, mm2, ss2, ms2;
+    int n, m = 0;
+
+    // require exactly two digits for mm and ss, three digits for ms
+    n = sscanf(line, "%*u:%*1u%*1u:%*1u%*1u%*1[,.]%*1u%*1u%*1u --> %*u:%*1u%*1u:%*1u%*1u%*1[,.]%*1u%*1u%*1u%n", &m);
+    if (n < 0 || m <= 0)
+        return -1;
+    n = sscanf(line, "%u:%u:%u%*1[,.]%u --> %u:%u:%u%*1[,.]%u"
                "%*[ ]X1:%"PRId32" X2:%"PRId32" Y1:%"PRId32" Y2:%"PRId32,
                &hh1, &mm1, &ss1, &ms1,
                &hh2, &mm2, &ss2, &ms2,
-               &ei->x1, &ei->x2, &ei->y1, &ei->y2) >= 8) {
+               &ei->x1, &ei->x2, &ei->y1, &ei->y2);
+    // do not accept partial positions (9 <= n <= 11)
+    if ((n == 8 || n == 12) &&
+            // require timestamps to be well-formed
+            mm1 < 60 && ss1 < 60 && ms1 < 1000 &&
+            mm2 < 60 && ss2 < 60 && ms2 < 1000) {
+        // hh is converted to long long before the multiplication
+        // hence this cannot overflow even if hh == UINT_MAX
         const int64_t start = (hh1*3600LL + mm1*60LL + ss1) * 1000LL + ms1;
         const int64_t end   = (hh2*3600LL + mm2*60LL + ss2) * 1000LL + ms2;
-        ei->duration = end - start;
-        ei->pts = start;
-        return 0;
+        // accept start == end (hidden subtitles) since a FATE test requires it
+        // WebVTT by contrast does not allow this
+        if (start <= end) {
+            ei->duration = end - start;
+            ei->pts = start;
+            return 0;
+        }
     }
     return -1;
 }
 
-static int add_event(FFDemuxSubtitlesQueue *q, AVBPrint *buf, char *line_cache,
-                     const struct event_info *ei, int append_cache)
+static int output_packet(AVBPrint *buf,
+                     const struct event_info *ei, AVPacket *pkt)
 {
-    if (append_cache && line_cache[0])
-        av_bprintf(buf, "%s\n", line_cache);
-    line_cache[0] = 0;
+    int ret;
     if (!av_bprint_is_complete(buf))
         return AVERROR(ENOMEM);
 
+    // len < size throughout this loop, so we don't need to
+    // call av_bprint_is_complete() twice like the old code did
     while (buf->len > 0 && buf->str[buf->len - 1] == '\n')
         buf->str[--buf->len] = 0;
 
-    if (buf->len) {
-        AVPacket *sub = ff_subtitles_queue_insert_bprint(q, buf, 0);
-        if (!sub)
-            return AVERROR(ENOMEM);
-        av_bprint_clear(buf);
-        sub->pos = ei->pos;
-        sub->pts = ei->pts;
-        sub->duration = ei->duration;
-        if (ei->x1 != -1) {
-            uint8_t *p = av_packet_new_side_data(sub, AV_PKT_DATA_SUBTITLE_POSITION, 16);
-            if (p) {
-                AV_WL32(p,      ei->x1);
-                AV_WL32(p +  4, ei->y1);
-                AV_WL32(p +  8, ei->x2);
-                AV_WL32(p + 12, ei->y2);
-            }
+    ret = av_new_packet(pkt, buf->len);
+    if (ret)
+        return ret;
+    memcpy(pkt->data, buf->str, buf->len);
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    pkt->pos = ei->pos;
+    pkt->dts = pkt->pts = ei->pts;
+    pkt->duration = ei->duration;
+    if (ei->x1 != -1) {
+        uint8_t *p = av_packet_new_side_data(pkt, AV_PKT_DATA_SUBTITLE_POSITION, 16);
+        if (p) {
+            AV_WL32(p,      ei->x1);
+            AV_WL32(p +  4, ei->y1);
+            AV_WL32(p +  8, ei->x2);
+            AV_WL32(p + 12, ei->y2);
         }
     }
 
@@ -129,14 +142,8 @@ static int add_event(FFDemuxSubtitlesQueue *q, AVBPrint *buf, char *line_cache,
 static int srt_read_header(AVFormatContext *s)
 {
     SRTContext *srt = s->priv_data;
-    AVBPrint buf;
     AVStream *st = avformat_new_stream(s, NULL);
-    int res = 0;
-    char line[4096], line_cache[4096];
-    int has_event_info = 0;
-    struct event_info ei;
-    FFTextReader tr;
-    ff_text_init_avio(s, &tr, s->pb);
+    ff_text_init_avio(s, &srt->tr, s->pb);
 
     if (!st)
         return AVERROR(ENOMEM);
@@ -144,84 +151,90 @@ static int srt_read_header(AVFormatContext *s)
     st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE;
     st->codecpar->codec_id   = AV_CODEC_ID_SUBRIP;
 
-    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+    av_bprint_init(&srt->buf, 0, AV_BPRINT_SIZE_UNLIMITED);
 
-    line_cache[0] = 0;
+    return 0;
+}
 
-    while (!ff_text_eof(&tr)) {
-        struct event_info tmp_ei;
-        const int64_t pos = ff_text_pos(&tr);
-        ptrdiff_t len = ff_subtitles_read_line(&tr, line, sizeof(line));
+static int srt_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    SRTContext *srt = s->priv_data;
+    struct event_info ei = {-1, -1, -1, -1};
+    /**
+     * Three states:
+     *
+     *  0: discarding any leading blank lines
+     *  1: have cue number, will read event info
+     *  2: have event info, will read lines until blank line or EOF
+     */
+    int state = 0;
+    av_bprint_clear(&srt->buf);
+
+    if (ff_text_eof(&srt->tr))
+        return AVERROR_EOF;
+
+    while (!ff_text_eof(&srt->tr)) {
+        char line[4096];
+        const int64_t pos = ff_text_pos(&srt->tr);
+        ptrdiff_t len = ff_subtitles_read_line(&srt->tr, line, sizeof(line));
+
+        if (len < 0) {
+            return len;
+        }
 
-        if (len < 0)
-            break;
+        if (!len || !line[0]) {
+            if (state == 0)
+                continue;   // skip leading blank lines
+            else
+                break;
+        }
 
-        if (!len || !line[0])
+        if (state == 0) {
+            state = 1;
+            ei.pos = pos;
+            // we don't care about the line number
             continue;
-
-        if (get_event_info(line, &tmp_ei) < 0) {
-            char *pline;
-
-            if (!has_event_info)
-                continue;
-
-            if (line_cache[0]) {
-                /* We got some cache and a new line so we assume the cached
-                 * line was actually part of the payload */
-                av_bprintf(&buf, "%s\n", line_cache);
-                line_cache[0] = 0;
+        } else if (state == 1) {
+            // expect event into
+            int ret = get_event_info(line, &ei);
+            if (ret < 0) {
+                av_log(s, AV_LOG_ERROR, "malformed event info\n");
+                return AVERROR_INVALIDDATA;
             }
-
-            /* If the line doesn't start with a number, we assume it's part of
-             * the payload, otherwise is likely an event number preceding the
-             * timing information... but we can't be sure of this yet, so we
-             * cache it */
-            if (strtol(line, &pline, 10) < 0 || line == pline)
-                av_bprintf(&buf, "%s\n", line);
-            else
-                strcpy(line_cache, line);
+            state = 2;
         } else {
-            if (has_event_info) {
-                /* We have the information of previous event, append it to the
-                 * queue. We insert the cached line if and only if the payload
-                 * is empty and the cached line is not a standalone number. */
-                char *pline = NULL;
-                const int standalone_number = strtol(line_cache, &pline, 10) >= 0 && pline && !*pline;
-                res = add_event(&srt->q, &buf, line_cache, &ei, !buf.len && !standalone_number);
-                if (res < 0)
-                    goto end;
-            } else {
-                has_event_info = 1;
-            }
-            tmp_ei.pos = pos;
-            ei = tmp_ei;
+            av_bprintf(&srt->buf, "%s\n", line);
         }
     }
 
-    /* Append the last event. Here we force the cache to be flushed, because a
-     * trailing number is more likely to be geniune (for example a copyright
-     * date) and not the event index of an inexistant event */
-    if (has_event_info) {
-        res = add_event(&srt->q, &buf, line_cache, &ei, 1);
-        if (res < 0)
-            goto end;
+    if (state == 2) {
+        return output_packet(&srt->buf, &ei, pkt);
+    } else if (state != 0) {
+        // we got a line number but no event info
+        av_log(s, AV_LOG_ERROR, "malformed end of file\n");
+        return AVERROR_INVALIDDATA;
+    } else {
+        // ignore trailing blank lines
+        return AVERROR_EOF;
     }
+}
 
-    ff_subtitles_queue_finalize(s, &srt->q);
-
-end:
-    av_bprint_finalize(&buf, NULL);
-    return res;
+static int srt_read_close(AVFormatContext *s)
+{
+    SRTContext *srt = s->priv_data;
+    av_bprint_finalize(&srt->buf, NULL);
+    return 0;
 }
 
+
 const FFInputFormat ff_srt_demuxer = {
     .p.name      = "srt",
     .p.long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle"),
     .priv_data_size = sizeof(SRTContext),
+    .p.flags        = AVFMT_GENERIC_INDEX,
     .flags_internal = FF_INFMT_FLAG_INIT_CLEANUP,
     .read_probe  = srt_probe,
     .read_header = srt_read_header,
-    .read_packet = ff_subtitles_read_packet,
-    .read_seek2  = ff_subtitles_read_seek,
-    .read_close  = ff_subtitles_read_close,
+    .read_packet = srt_read_packet,
+    .read_close  = srt_read_close,
 };
diff --git a/tests/ref/fate/sub-srt-rrn-remux b/tests/ref/fate/sub-srt-rrn-remux
index 1cb66d21bc..48c49e6fa1 100644
--- a/tests/ref/fate/sub-srt-rrn-remux
+++ b/tests/ref/fate/sub-srt-rrn-remux
@@ -2111,3 +2111,7 @@ girly, girly girl ¶
 00:20:53,053 --> 00:20:55,954
 ¶ girly, girly girl-- ¶
 
+452
+00:20:56,000 --> 00:20:59,128
+
+
-- 
2.39.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".