Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Leon Grutters <gruttersleonbot2@gmail.com>
To: ffmpeg-devel@ffmpeg.org
Cc: Leon Grutters <gruttersleonbot2@gmail.com>
Subject: [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes
Date: Thu, 20 Mar 2025 08:24:50 +0100
Message-ID: <20250320072450.1164-1-gruttersleonbot2@gmail.com> (raw)

If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
instead of the intended "{\i1}Hello{\i0}".

Signed-off-by: Leon Grutters <gruttersleonbot2@gmail.com>
---
 libavcodec/webvttdec.c | 51 +++++++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
index 35bdbe805d..4111d138c4 100644
--- a/libavcodec/webvttdec.c
+++ b/libavcodec/webvttdec.c
@@ -29,25 +29,53 @@
 #include "ass.h"
 #include "codec_internal.h"
 #include "libavutil/bprint.h"
+#include "libavutil/mem.h"
 
 static const struct {
     const char *from;
     const char *to;
 } webvtt_tag_replace[] = {
-    {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
-    {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
-    {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
     {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
     {"&gt;", ">"}, {"&lt;", "<"},
     {"&lrm;", "\xe2\x80\x8e"}, {"&rlm;", "\xe2\x80\x8f"},
     {"&amp;", "&"}, {"&nbsp;", "\\h"},
 };
+static const struct {
+    const char *from;
+    const char *to;
+} webvtt_valid_tags[] = {
+    {"i", "{\\i1}"}, {"/i", "{\\i0}"},
+    {"b", "{\\b1}"}, {"/b", "{\\b0}"},
+    {"u", "{\\u1}"}, {"/u", "{\\u0}"},
+};
 
 static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
 {
-    int i, again = 0, skip = 0;
+    int i, again = 0/*, skip = 0*/;
 
     while (*p) {
+        if (*p == '<') {
+            const char *tag_end = strchr(p, '>');
+            char *tag_body, *tag_name, *saveptr = NULL;
+            ptrdiff_t len;
+            if (!tag_end)
+                break;
+            len = tag_end - p + 1;
+            tag_body = av_strndup(p + 1, len - 2);
+            if (!tag_body)
+                return AVERROR(ENOMEM);
+            tag_name = av_strtok(tag_body, ".", &saveptr);
+            for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
+                const char *from = webvtt_valid_tags[i].from;
+                if(!strcmp(tag_name, from)) {
+                    av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
+                    break;
+                }
+            }
+            p += len;
+            again = 1;
+            av_freep(&tag_body);
+        }
 
         for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
             const char *from = webvtt_tag_replace[i].from;
@@ -59,21 +87,22 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
                 break;
             }
         }
+
         if (!*p)
             break;
 
         if (again) {
             again = 0;
-            skip = 0;
+            // skip = 0;
             continue;
         }
-        if (*p == '<')
-            skip = 1;
-        else if (*p == '>')
-            skip = 0;
-        else if (p[0] == '\n' && p[1])
+        // if (*p == '<')
+        //     skip = 1;
+        // else if (*p == '>')
+        //     skip = 0;
+        if (p[0] == '\n' && p[1])
             av_bprintf(buf, "\\N");
-        else if (!skip && *p != '\r')
+        else if (/*!skip && */*p != '\r')
             av_bprint_chars(buf, *p, 1);
         p++;
     }
-- 
2.49.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

             reply	other threads:[~2025-03-20  7:25 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-20  7:24 Leon Grutters [this message]
2025-03-20  8:02 ` Andreas Rheinhardt
2025-03-20 18:16   ` Leon Grutters
2025-03-21  2:48     ` Andreas Rheinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250320072450.1164-1-gruttersleonbot2@gmail.com \
    --to=gruttersleonbot2@gmail.com \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git