Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes
@ 2025-03-20  7:24 Leon Grutters
  2025-03-20  8:02 ` Andreas Rheinhardt
  0 siblings, 1 reply; 4+ messages in thread
From: Leon Grutters @ 2025-03-20  7:24 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Leon Grutters

If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
instead of the intended "{\i1}Hello{\i0}".

Signed-off-by: Leon Grutters <gruttersleonbot2@gmail.com>
---
 libavcodec/webvttdec.c | 51 +++++++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
index 35bdbe805d..4111d138c4 100644
--- a/libavcodec/webvttdec.c
+++ b/libavcodec/webvttdec.c
@@ -29,25 +29,53 @@
 #include "ass.h"
 #include "codec_internal.h"
 #include "libavutil/bprint.h"
+#include "libavutil/mem.h"
 
 static const struct {
     const char *from;
     const char *to;
 } webvtt_tag_replace[] = {
-    {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
-    {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
-    {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
     {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
     {"&gt;", ">"}, {"&lt;", "<"},
     {"&lrm;", "\xe2\x80\x8e"}, {"&rlm;", "\xe2\x80\x8f"},
     {"&amp;", "&"}, {"&nbsp;", "\\h"},
 };
+static const struct {
+    const char *from;
+    const char *to;
+} webvtt_valid_tags[] = {
+    {"i", "{\\i1}"}, {"/i", "{\\i0}"},
+    {"b", "{\\b1}"}, {"/b", "{\\b0}"},
+    {"u", "{\\u1}"}, {"/u", "{\\u0}"},
+};
 
 static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
 {
-    int i, again = 0, skip = 0;
+    int i, again = 0/*, skip = 0*/;
 
     while (*p) {
+        if (*p == '<') {
+            const char *tag_end = strchr(p, '>');
+            char *tag_body, *tag_name, *saveptr = NULL;
+            ptrdiff_t len;
+            if (!tag_end)
+                break;
+            len = tag_end - p + 1;
+            tag_body = av_strndup(p + 1, len - 2);
+            if (!tag_body)
+                return AVERROR(ENOMEM);
+            tag_name = av_strtok(tag_body, ".", &saveptr);
+            for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
+                const char *from = webvtt_valid_tags[i].from;
+                if(!strcmp(tag_name, from)) {
+                    av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
+                    break;
+                }
+            }
+            p += len;
+            again = 1;
+            av_freep(&tag_body);
+        }
 
         for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
             const char *from = webvtt_tag_replace[i].from;
@@ -59,21 +87,22 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
                 break;
             }
         }
+
         if (!*p)
             break;
 
         if (again) {
             again = 0;
-            skip = 0;
+            // skip = 0;
             continue;
         }
-        if (*p == '<')
-            skip = 1;
-        else if (*p == '>')
-            skip = 0;
-        else if (p[0] == '\n' && p[1])
+        // if (*p == '<')
+        //     skip = 1;
+        // else if (*p == '>')
+        //     skip = 0;
+        if (p[0] == '\n' && p[1])
             av_bprintf(buf, "\\N");
-        else if (!skip && *p != '\r')
+        else if (/*!skip && */*p != '\r')
             av_bprint_chars(buf, *p, 1);
         p++;
     }
-- 
2.49.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes
  2025-03-20  7:24 [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes Leon Grutters
@ 2025-03-20  8:02 ` Andreas Rheinhardt
  2025-03-20 18:16   ` Leon Grutters
  0 siblings, 1 reply; 4+ messages in thread
From: Andreas Rheinhardt @ 2025-03-20  8:02 UTC (permalink / raw)
  To: ffmpeg-devel

Leon Grutters:
> If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
> so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
> instead of the intended "{\i1}Hello{\i0}".
> 
> Signed-off-by: Leon Grutters <gruttersleonbot2@gmail.com>
> ---
>  libavcodec/webvttdec.c | 51 +++++++++++++++++++++++++++++++++---------
>  1 file changed, 40 insertions(+), 11 deletions(-)
> 
> diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
> index 35bdbe805d..4111d138c4 100644
> --- a/libavcodec/webvttdec.c
> +++ b/libavcodec/webvttdec.c
> @@ -29,25 +29,53 @@
>  #include "ass.h"
>  #include "codec_internal.h"
>  #include "libavutil/bprint.h"
> +#include "libavutil/mem.h"
>  
>  static const struct {
>      const char *from;
>      const char *to;
>  } webvtt_tag_replace[] = {
> -    {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
> -    {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
> -    {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
>      {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
>      {"&gt;", ">"}, {"&lt;", "<"},
>      {"&lrm;", "\xe2\x80\x8e"}, {"&rlm;", "\xe2\x80\x8f"},
>      {"&amp;", "&"}, {"&nbsp;", "\\h"},
>  };
> +static const struct {
> +    const char *from;
> +    const char *to;
> +} webvtt_valid_tags[] = {
> +    {"i", "{\\i1}"}, {"/i", "{\\i0}"},
> +    {"b", "{\\b1}"}, {"/b", "{\\b0}"},
> +    {"u", "{\\u1}"}, {"/u", "{\\u0}"},
> +};

These strings are so small that one can avoid the relocations by using
fixes-size buffers.

>  
>  static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
>  {
> -    int i, again = 0, skip = 0;
> +    int i, again = 0/*, skip = 0*/;
>  
>      while (*p) {
> +        if (*p == '<') {
> +            const char *tag_end = strchr(p, '>');
> +            char *tag_body, *tag_name, *saveptr = NULL;
> +            ptrdiff_t len;
> +            if (!tag_end)
> +                break;
> +            len = tag_end - p + 1;
> +            tag_body = av_strndup(p + 1, len - 2);
> +            if (!tag_body)
> +                return AVERROR(ENOMEM);

This allocation seems unnecessary. You can inspect the string without
modifying it by using strncmp() below and by using a maximum field width
for the %s directive in a parameter.

> +            tag_name = av_strtok(tag_body, ".", &saveptr);
> +            for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
> +                const char *from = webvtt_valid_tags[i].from;
> +                if(!strcmp(tag_name, from)) {
> +                    av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
> +                    break;
> +                }
> +            }
> +            p += len;
> +            again = 1;
> +            av_freep(&tag_body);
> +        }
>  
>          for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
>              const char *from = webvtt_tag_replace[i].from;
> @@ -59,21 +87,22 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
>                  break;
>              }
>          }
> +
>          if (!*p)
>              break;
>  
>          if (again) {
>              again = 0;
> -            skip = 0;
> +            // skip = 0;
>              continue;
>          }
> -        if (*p == '<')
> -            skip = 1;
> -        else if (*p == '>')
> -            skip = 0;
> -        else if (p[0] == '\n' && p[1])
> +        // if (*p == '<')
> +        //     skip = 1;
> +        // else if (*p == '>')
> +        //     skip = 0;
> +        if (p[0] == '\n' && p[1])
>              av_bprintf(buf, "\\N");
> -        else if (!skip && *p != '\r')
> +        else if (/*!skip && */*p != '\r')
>              av_bprint_chars(buf, *p, 1);
>          p++;
>      }

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes
  2025-03-20  8:02 ` Andreas Rheinhardt
@ 2025-03-20 18:16   ` Leon Grutters
  2025-03-21  2:48     ` Andreas Rheinhardt
  0 siblings, 1 reply; 4+ messages in thread
From: Leon Grutters @ 2025-03-20 18:16 UTC (permalink / raw)
  To: ffmpeg-devel

On 3/20/25 9:02 AM, Andreas Rheinhardt wrote:
> Leon Grutters:
>> If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
>> so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
>> instead of the intended "{\i1}Hello{\i0}".
>>
>> Signed-off-by: Leon Grutters<gruttersleonbot2@gmail.com>
>> ---
>>   libavcodec/webvttdec.c | 51 +++++++++++++++++++++++++++++++++---------
>>   1 file changed, 40 insertions(+), 11 deletions(-)
>>
>> diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
>> index 35bdbe805d..4111d138c4 100644
>> --- a/libavcodec/webvttdec.c
>> +++ b/libavcodec/webvttdec.c
>> @@ -29,25 +29,53 @@
>>   #include "ass.h"
>>   #include "codec_internal.h"
>>   #include "libavutil/bprint.h"
>> +#include "libavutil/mem.h"
>>   
>>   static const struct {
>>       const char *from;
>>       const char *to;
>>   } webvtt_tag_replace[] = {
>> -    {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
>> -    {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
>> -    {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
>>       {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
>>       {"&gt;", ">"}, {"&lt;", "<"},
>>       {"&lrm;", "\xe2\x80\x8e"}, {"&rlm;", "\xe2\x80\x8f"},
>>       {"&amp;", "&"}, {"&nbsp;", "\\h"},
>>   };
>> +static const struct {
>> +    const char *from;
>> +    const char *to;
>> +} webvtt_valid_tags[] = {
>> +    {"i", "{\\i1}"}, {"/i", "{\\i0}"},
>> +    {"b", "{\\b1}"}, {"/b", "{\\b0}"},
>> +    {"u", "{\\u1}"}, {"/u", "{\\u0}"},
>> +};
> These strings are so small that one can avoid the relocations by using
> fixes-size buffers.
What do you mean exactly? I'm not sure I understand.
>>   
>>   static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
>>   {
>> -    int i, again = 0, skip = 0;
>> +    int i, again = 0/*, skip = 0*/;
>>   
>>       while (*p) {
>> +        if (*p == '<') {
>> +            const char *tag_end = strchr(p, '>');
>> +            char *tag_body, *tag_name, *saveptr = NULL;
>> +            ptrdiff_t len;
>> +            if (!tag_end)
>> +                break;
>> +            len = tag_end - p + 1;
>> +            tag_body = av_strndup(p + 1, len - 2);
>> +            if (!tag_body)
>> +                return AVERROR(ENOMEM);
> This allocation seems unnecessary. You can inspect the string without
> modifying it by using strncmp() below and by using a maximum field width
> for the %s directive in a parameter.
My thought process here was that if it's done this way, you have saveptr
for possibly getting the classes (and tag annotation) later on.
>> +            tag_name = av_strtok(tag_body, ".", &saveptr);
>> +            for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
>> +                const char *from = webvtt_valid_tags[i].from;
>> +                if(!strcmp(tag_name, from)) {
>> +                    av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
>> +                    break;
>> +                }
>> +            }
>> +            p += len;
>> +            again = 1;
>> +            av_freep(&tag_body);
>> +        }
>>   
>>           for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
>>               const char *from = webvtt_tag_replace[i].from;
>> @@ -59,21 +87,22 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
>>                   break;
>>               }
>>           }
>> +
>>           if (!*p)
>>               break;
>>   
>>           if (again) {
>>               again = 0;
>> -            skip = 0;
>> +            // skip = 0;
>>               continue;
>>           }
>> -        if (*p == '<')
>> -            skip = 1;
>> -        else if (*p == '>')
>> -            skip = 0;
>> -        else if (p[0] == '\n' && p[1])
>> +        // if (*p == '<')
>> +        //     skip = 1;
>> +        // else if (*p == '>')
>> +        //     skip = 0;
>> +        if (p[0] == '\n' && p[1])
>>               av_bprintf(buf, "\\N");
>> -        else if (!skip && *p != '\r')
>> +        else if (/*!skip && */*p != '\r')
>>               av_bprint_chars(buf, *p, 1);
>>           p++;
>>       }
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes
  2025-03-20 18:16   ` Leon Grutters
@ 2025-03-21  2:48     ` Andreas Rheinhardt
  0 siblings, 0 replies; 4+ messages in thread
From: Andreas Rheinhardt @ 2025-03-21  2:48 UTC (permalink / raw)
  To: ffmpeg-devel

Leon Grutters:
> On 3/20/25 9:02 AM, Andreas Rheinhardt wrote:
>> Leon Grutters:
>>> If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
>>> so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
>>> instead of the intended "{\i1}Hello{\i0}".
>>>
>>> Signed-off-by: Leon Grutters<gruttersleonbot2@gmail.com>
>>> ---
>>>   libavcodec/webvttdec.c | 51 +++++++++++++++++++++++++++++++++---------
>>>   1 file changed, 40 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
>>> index 35bdbe805d..4111d138c4 100644
>>> --- a/libavcodec/webvttdec.c
>>> +++ b/libavcodec/webvttdec.c
>>> @@ -29,25 +29,53 @@
>>>   #include "ass.h"
>>>   #include "codec_internal.h"
>>>   #include "libavutil/bprint.h"
>>> +#include "libavutil/mem.h"
>>>     static const struct {
>>>       const char *from;
>>>       const char *to;
>>>   } webvtt_tag_replace[] = {
>>> -    {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
>>> -    {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
>>> -    {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
>>>       {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid
>>> ASS markup conflicts
>>>       {"&gt;", ">"}, {"&lt;", "<"},
>>>       {"&lrm;", "\xe2\x80\x8e"}, {"&rlm;", "\xe2\x80\x8f"},
>>>       {"&amp;", "&"}, {"&nbsp;", "\\h"},
>>>   };
>>> +static const struct {
>>> +    const char *from;
>>> +    const char *to;
>>> +} webvtt_valid_tags[] = {
>>> +    {"i", "{\\i1}"}, {"/i", "{\\i0}"},
>>> +    {"b", "{\\b1}"}, {"/b", "{\\b0}"},
>>> +    {"u", "{\\u1}"}, {"/u", "{\\u0}"},
>>> +};
>> These strings are so small that one can avoid the relocations by using
>> fixes-size buffers.
> What do you mean exactly? I'm not sure I understand.

A fixed size buffer in the struct instead of a pointer to a string
somewhere else: char from[3]; char to[6]; should work.

>>>     static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
>>>   {
>>> -    int i, again = 0, skip = 0;
>>> +    int i, again = 0/*, skip = 0*/;
>>>         while (*p) {
>>> +        if (*p == '<') {
>>> +            const char *tag_end = strchr(p, '>');
>>> +            char *tag_body, *tag_name, *saveptr = NULL;
>>> +            ptrdiff_t len;
>>> +            if (!tag_end)
>>> +                break;
>>> +            len = tag_end - p + 1;
>>> +            tag_body = av_strndup(p + 1, len - 2);
>>> +            if (!tag_body)
>>> +                return AVERROR(ENOMEM);
>> This allocation seems unnecessary. You can inspect the string without
>> modifying it by using strncmp() below and by using a maximum field width
>> for the %s directive in a parameter.
> My thought process here was that if it's done this way, you have saveptr
> for possibly getting the classes (and tag annotation) later on.

You can always save a pointer to start and end of the current component
manually.

>>> +            tag_name = av_strtok(tag_body, ".", &saveptr);
>>> +            for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
>>> +                const char *from = webvtt_valid_tags[i].from;
>>> +                if(!strcmp(tag_name, from)) {
>>> +                    av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
>>> +                    break;
>>> +                }
>>> +            }
>>> +            p += len;
>>> +            again = 1;
>>> +            av_freep(&tag_body);
>>> +        }
>>>             for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
>>>               const char *from = webvtt_tag_replace[i].from;
>>> @@ -59,21 +87,22 @@ static int webvtt_event_to_ass(AVBPrint *buf,
>>> const char *p)
>>>                   break;
>>>               }
>>>           }
>>> +
>>>           if (!*p)
>>>               break;
>>>             if (again) {
>>>               again = 0;
>>> -            skip = 0;
>>> +            // skip = 0;
>>>               continue;
>>>           }
>>> -        if (*p == '<')
>>> -            skip = 1;
>>> -        else if (*p == '>')
>>> -            skip = 0;
>>> -        else if (p[0] == '\n' && p[1])
>>> +        // if (*p == '<')
>>> +        //     skip = 1;
>>> +        // else if (*p == '>')
>>> +        //     skip = 0;
>>> +        if (p[0] == '\n' && p[1])
>>>               av_bprintf(buf, "\\N");
>>> -        else if (!skip && *p != '\r')
>>> +        else if (/*!skip && */*p != '\r')
>>>               av_bprint_chars(buf, *p, 1);
>>>           p++;
>>>       }
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-03-21  2:49 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-20  7:24 [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes Leon Grutters
2025-03-20  8:02 ` Andreas Rheinhardt
2025-03-20 18:16   ` Leon Grutters
2025-03-21  2:48     ` Andreas Rheinhardt

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git