* [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes
@ 2025-03-20 7:24 Leon Grutters
2025-03-20 8:02 ` Andreas Rheinhardt
0 siblings, 1 reply; 4+ messages in thread
From: Leon Grutters @ 2025-03-20 7:24 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Leon Grutters
If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
instead of the intended "{\i1}Hello{\i0}".
Signed-off-by: Leon Grutters <gruttersleonbot2@gmail.com>
---
libavcodec/webvttdec.c | 51 +++++++++++++++++++++++++++++++++---------
1 file changed, 40 insertions(+), 11 deletions(-)
diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
index 35bdbe805d..4111d138c4 100644
--- a/libavcodec/webvttdec.c
+++ b/libavcodec/webvttdec.c
@@ -29,25 +29,53 @@
#include "ass.h"
#include "codec_internal.h"
#include "libavutil/bprint.h"
+#include "libavutil/mem.h"
static const struct {
const char *from;
const char *to;
} webvtt_tag_replace[] = {
- {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
- {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
- {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
{"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
{">", ">"}, {"<", "<"},
{"‎", "\xe2\x80\x8e"}, {"‏", "\xe2\x80\x8f"},
{"&", "&"}, {" ", "\\h"},
};
+static const struct {
+ const char *from;
+ const char *to;
+} webvtt_valid_tags[] = {
+ {"i", "{\\i1}"}, {"/i", "{\\i0}"},
+ {"b", "{\\b1}"}, {"/b", "{\\b0}"},
+ {"u", "{\\u1}"}, {"/u", "{\\u0}"},
+};
static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
{
- int i, again = 0, skip = 0;
+ int i, again = 0/*, skip = 0*/;
while (*p) {
+ if (*p == '<') {
+ const char *tag_end = strchr(p, '>');
+ char *tag_body, *tag_name, *saveptr = NULL;
+ ptrdiff_t len;
+ if (!tag_end)
+ break;
+ len = tag_end - p + 1;
+ tag_body = av_strndup(p + 1, len - 2);
+ if (!tag_body)
+ return AVERROR(ENOMEM);
+ tag_name = av_strtok(tag_body, ".", &saveptr);
+ for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
+ const char *from = webvtt_valid_tags[i].from;
+ if(!strcmp(tag_name, from)) {
+ av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
+ break;
+ }
+ }
+ p += len;
+ again = 1;
+ av_freep(&tag_body);
+ }
for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
const char *from = webvtt_tag_replace[i].from;
@@ -59,21 +87,22 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
break;
}
}
+
if (!*p)
break;
if (again) {
again = 0;
- skip = 0;
+ // skip = 0;
continue;
}
- if (*p == '<')
- skip = 1;
- else if (*p == '>')
- skip = 0;
- else if (p[0] == '\n' && p[1])
+ // if (*p == '<')
+ // skip = 1;
+ // else if (*p == '>')
+ // skip = 0;
+ if (p[0] == '\n' && p[1])
av_bprintf(buf, "\\N");
- else if (!skip && *p != '\r')
+ else if (/*!skip && */*p != '\r')
av_bprint_chars(buf, *p, 1);
p++;
}
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes
2025-03-20 7:24 [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes Leon Grutters
@ 2025-03-20 8:02 ` Andreas Rheinhardt
2025-03-20 18:16 ` Leon Grutters
0 siblings, 1 reply; 4+ messages in thread
From: Andreas Rheinhardt @ 2025-03-20 8:02 UTC (permalink / raw)
To: ffmpeg-devel
Leon Grutters:
> If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
> so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
> instead of the intended "{\i1}Hello{\i0}".
>
> Signed-off-by: Leon Grutters <gruttersleonbot2@gmail.com>
> ---
> libavcodec/webvttdec.c | 51 +++++++++++++++++++++++++++++++++---------
> 1 file changed, 40 insertions(+), 11 deletions(-)
>
> diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
> index 35bdbe805d..4111d138c4 100644
> --- a/libavcodec/webvttdec.c
> +++ b/libavcodec/webvttdec.c
> @@ -29,25 +29,53 @@
> #include "ass.h"
> #include "codec_internal.h"
> #include "libavutil/bprint.h"
> +#include "libavutil/mem.h"
>
> static const struct {
> const char *from;
> const char *to;
> } webvtt_tag_replace[] = {
> - {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
> - {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
> - {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
> {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
> {">", ">"}, {"<", "<"},
> {"‎", "\xe2\x80\x8e"}, {"‏", "\xe2\x80\x8f"},
> {"&", "&"}, {" ", "\\h"},
> };
> +static const struct {
> + const char *from;
> + const char *to;
> +} webvtt_valid_tags[] = {
> + {"i", "{\\i1}"}, {"/i", "{\\i0}"},
> + {"b", "{\\b1}"}, {"/b", "{\\b0}"},
> + {"u", "{\\u1}"}, {"/u", "{\\u0}"},
> +};
These strings are so small that one can avoid the relocations by using
fixes-size buffers.
>
> static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
> {
> - int i, again = 0, skip = 0;
> + int i, again = 0/*, skip = 0*/;
>
> while (*p) {
> + if (*p == '<') {
> + const char *tag_end = strchr(p, '>');
> + char *tag_body, *tag_name, *saveptr = NULL;
> + ptrdiff_t len;
> + if (!tag_end)
> + break;
> + len = tag_end - p + 1;
> + tag_body = av_strndup(p + 1, len - 2);
> + if (!tag_body)
> + return AVERROR(ENOMEM);
This allocation seems unnecessary. You can inspect the string without
modifying it by using strncmp() below and by using a maximum field width
for the %s directive in a parameter.
> + tag_name = av_strtok(tag_body, ".", &saveptr);
> + for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
> + const char *from = webvtt_valid_tags[i].from;
> + if(!strcmp(tag_name, from)) {
> + av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
> + break;
> + }
> + }
> + p += len;
> + again = 1;
> + av_freep(&tag_body);
> + }
>
> for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
> const char *from = webvtt_tag_replace[i].from;
> @@ -59,21 +87,22 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
> break;
> }
> }
> +
> if (!*p)
> break;
>
> if (again) {
> again = 0;
> - skip = 0;
> + // skip = 0;
> continue;
> }
> - if (*p == '<')
> - skip = 1;
> - else if (*p == '>')
> - skip = 0;
> - else if (p[0] == '\n' && p[1])
> + // if (*p == '<')
> + // skip = 1;
> + // else if (*p == '>')
> + // skip = 0;
> + if (p[0] == '\n' && p[1])
> av_bprintf(buf, "\\N");
> - else if (!skip && *p != '\r')
> + else if (/*!skip && */*p != '\r')
> av_bprint_chars(buf, *p, 1);
> p++;
> }
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes
2025-03-20 8:02 ` Andreas Rheinhardt
@ 2025-03-20 18:16 ` Leon Grutters
2025-03-21 2:48 ` Andreas Rheinhardt
0 siblings, 1 reply; 4+ messages in thread
From: Leon Grutters @ 2025-03-20 18:16 UTC (permalink / raw)
To: ffmpeg-devel
On 3/20/25 9:02 AM, Andreas Rheinhardt wrote:
> Leon Grutters:
>> If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
>> so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
>> instead of the intended "{\i1}Hello{\i0}".
>>
>> Signed-off-by: Leon Grutters<gruttersleonbot2@gmail.com>
>> ---
>> libavcodec/webvttdec.c | 51 +++++++++++++++++++++++++++++++++---------
>> 1 file changed, 40 insertions(+), 11 deletions(-)
>>
>> diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
>> index 35bdbe805d..4111d138c4 100644
>> --- a/libavcodec/webvttdec.c
>> +++ b/libavcodec/webvttdec.c
>> @@ -29,25 +29,53 @@
>> #include "ass.h"
>> #include "codec_internal.h"
>> #include "libavutil/bprint.h"
>> +#include "libavutil/mem.h"
>>
>> static const struct {
>> const char *from;
>> const char *to;
>> } webvtt_tag_replace[] = {
>> - {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
>> - {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
>> - {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
>> {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
>> {">", ">"}, {"<", "<"},
>> {"‎", "\xe2\x80\x8e"}, {"‏", "\xe2\x80\x8f"},
>> {"&", "&"}, {" ", "\\h"},
>> };
>> +static const struct {
>> + const char *from;
>> + const char *to;
>> +} webvtt_valid_tags[] = {
>> + {"i", "{\\i1}"}, {"/i", "{\\i0}"},
>> + {"b", "{\\b1}"}, {"/b", "{\\b0}"},
>> + {"u", "{\\u1}"}, {"/u", "{\\u0}"},
>> +};
> These strings are so small that one can avoid the relocations by using
> fixes-size buffers.
What do you mean exactly? I'm not sure I understand.
>>
>> static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
>> {
>> - int i, again = 0, skip = 0;
>> + int i, again = 0/*, skip = 0*/;
>>
>> while (*p) {
>> + if (*p == '<') {
>> + const char *tag_end = strchr(p, '>');
>> + char *tag_body, *tag_name, *saveptr = NULL;
>> + ptrdiff_t len;
>> + if (!tag_end)
>> + break;
>> + len = tag_end - p + 1;
>> + tag_body = av_strndup(p + 1, len - 2);
>> + if (!tag_body)
>> + return AVERROR(ENOMEM);
> This allocation seems unnecessary. You can inspect the string without
> modifying it by using strncmp() below and by using a maximum field width
> for the %s directive in a parameter.
My thought process here was that if it's done this way, you have saveptr
for possibly getting the classes (and tag annotation) later on.
>> + tag_name = av_strtok(tag_body, ".", &saveptr);
>> + for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
>> + const char *from = webvtt_valid_tags[i].from;
>> + if(!strcmp(tag_name, from)) {
>> + av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
>> + break;
>> + }
>> + }
>> + p += len;
>> + again = 1;
>> + av_freep(&tag_body);
>> + }
>>
>> for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
>> const char *from = webvtt_tag_replace[i].from;
>> @@ -59,21 +87,22 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
>> break;
>> }
>> }
>> +
>> if (!*p)
>> break;
>>
>> if (again) {
>> again = 0;
>> - skip = 0;
>> + // skip = 0;
>> continue;
>> }
>> - if (*p == '<')
>> - skip = 1;
>> - else if (*p == '>')
>> - skip = 0;
>> - else if (p[0] == '\n' && p[1])
>> + // if (*p == '<')
>> + // skip = 1;
>> + // else if (*p == '>')
>> + // skip = 0;
>> + if (p[0] == '\n' && p[1])
>> av_bprintf(buf, "\\N");
>> - else if (!skip && *p != '\r')
>> + else if (/*!skip && */*p != '\r')
>> av_bprint_chars(buf, *p, 1);
>> p++;
>> }
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes
2025-03-20 18:16 ` Leon Grutters
@ 2025-03-21 2:48 ` Andreas Rheinhardt
0 siblings, 0 replies; 4+ messages in thread
From: Andreas Rheinhardt @ 2025-03-21 2:48 UTC (permalink / raw)
To: ffmpeg-devel
Leon Grutters:
> On 3/20/25 9:02 AM, Andreas Rheinhardt wrote:
>> Leon Grutters:
>>> If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
>>> so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
>>> instead of the intended "{\i1}Hello{\i0}".
>>>
>>> Signed-off-by: Leon Grutters<gruttersleonbot2@gmail.com>
>>> ---
>>> libavcodec/webvttdec.c | 51 +++++++++++++++++++++++++++++++++---------
>>> 1 file changed, 40 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
>>> index 35bdbe805d..4111d138c4 100644
>>> --- a/libavcodec/webvttdec.c
>>> +++ b/libavcodec/webvttdec.c
>>> @@ -29,25 +29,53 @@
>>> #include "ass.h"
>>> #include "codec_internal.h"
>>> #include "libavutil/bprint.h"
>>> +#include "libavutil/mem.h"
>>> static const struct {
>>> const char *from;
>>> const char *to;
>>> } webvtt_tag_replace[] = {
>>> - {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
>>> - {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
>>> - {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
>>> {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid
>>> ASS markup conflicts
>>> {">", ">"}, {"<", "<"},
>>> {"‎", "\xe2\x80\x8e"}, {"‏", "\xe2\x80\x8f"},
>>> {"&", "&"}, {" ", "\\h"},
>>> };
>>> +static const struct {
>>> + const char *from;
>>> + const char *to;
>>> +} webvtt_valid_tags[] = {
>>> + {"i", "{\\i1}"}, {"/i", "{\\i0}"},
>>> + {"b", "{\\b1}"}, {"/b", "{\\b0}"},
>>> + {"u", "{\\u1}"}, {"/u", "{\\u0}"},
>>> +};
>> These strings are so small that one can avoid the relocations by using
>> fixes-size buffers.
> What do you mean exactly? I'm not sure I understand.
A fixed size buffer in the struct instead of a pointer to a string
somewhere else: char from[3]; char to[6]; should work.
>>> static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
>>> {
>>> - int i, again = 0, skip = 0;
>>> + int i, again = 0/*, skip = 0*/;
>>> while (*p) {
>>> + if (*p == '<') {
>>> + const char *tag_end = strchr(p, '>');
>>> + char *tag_body, *tag_name, *saveptr = NULL;
>>> + ptrdiff_t len;
>>> + if (!tag_end)
>>> + break;
>>> + len = tag_end - p + 1;
>>> + tag_body = av_strndup(p + 1, len - 2);
>>> + if (!tag_body)
>>> + return AVERROR(ENOMEM);
>> This allocation seems unnecessary. You can inspect the string without
>> modifying it by using strncmp() below and by using a maximum field width
>> for the %s directive in a parameter.
> My thought process here was that if it's done this way, you have saveptr
> for possibly getting the classes (and tag annotation) later on.
You can always save a pointer to start and end of the current component
manually.
>>> + tag_name = av_strtok(tag_body, ".", &saveptr);
>>> + for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
>>> + const char *from = webvtt_valid_tags[i].from;
>>> + if(!strcmp(tag_name, from)) {
>>> + av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
>>> + break;
>>> + }
>>> + }
>>> + p += len;
>>> + again = 1;
>>> + av_freep(&tag_body);
>>> + }
>>> for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
>>> const char *from = webvtt_tag_replace[i].from;
>>> @@ -59,21 +87,22 @@ static int webvtt_event_to_ass(AVBPrint *buf,
>>> const char *p)
>>> break;
>>> }
>>> }
>>> +
>>> if (!*p)
>>> break;
>>> if (again) {
>>> again = 0;
>>> - skip = 0;
>>> + // skip = 0;
>>> continue;
>>> }
>>> - if (*p == '<')
>>> - skip = 1;
>>> - else if (*p == '>')
>>> - skip = 0;
>>> - else if (p[0] == '\n' && p[1])
>>> + // if (*p == '<')
>>> + // skip = 1;
>>> + // else if (*p == '>')
>>> + // skip = 0;
>>> + if (p[0] == '\n' && p[1])
>>> av_bprintf(buf, "\\N");
>>> - else if (!skip && *p != '\r')
>>> + else if (/*!skip && */*p != '\r')
>>> av_bprint_chars(buf, *p, 1);
>>> p++;
>>> }
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-03-21 2:49 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-20 7:24 [FFmpeg-devel] [PATCH] avcodec/webvttdec: strip classes Leon Grutters
2025-03-20 8:02 ` Andreas Rheinhardt
2025-03-20 18:16 ` Leon Grutters
2025-03-21 2:48 ` Andreas Rheinhardt
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git