From: <m.kaindl0208@gmail.com> To: <ffmpeg-devel@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH FFmpeg 3/15] libavfilter: tokenizer implementation for batch tokenization using tokenizer-cpp library Date: Sat, 8 Mar 2025 15:59:01 +0100 Message-ID: <007501db903a$a0ecd4a0$e2c67de0$@gmail.com> (raw) Implements batch tokenization support using the tokenizers-cpp library, providing functions to load tokenizers and encode text batches. This is crucial for CLIP/CLAP models that need to process text prompts. https://github.com/mlc-ai/tokenizers-cpp Try the new filters using my Github Repo https://github.com/MaximilianKaindl/DeepFFMPEGVideoClassification. Any Feedback is appreciated! Signed-off-by: MaximilianKaindl <m.kaindl0208@gmail.com> --- libavfilter/dnn_filter_common.c | 155 ++++++++++++++++++++++++++++++++ libavfilter/dnn_filter_common.h | 19 ++++ 2 files changed, 174 insertions(+) diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c index 6b9c6f8d7f..6a1e9ace2e 100644 --- a/libavfilter/dnn_filter_common.c +++ b/libavfilter/dnn_filter_common.c @@ -20,6 +20,11 @@ #include "libavutil/avstring.h" #include "libavutil/mem.h" #include "libavutil/opt.h" +#include "libavformat/avio.h" + +#if (CONFIG_LIBTOKENIZERS == 1) +#include "tokenizers_c.h" +#endif #define MAX_SUPPORTED_OUTPUTS_NB 4 @@ -217,3 +222,153 @@ void ff_dnn_uninit(DnnContext *ctx) av_freep(&ctx->model_outputnames); } } + +static int load_file_content(const char *path, char **data, size_t *data_size, + void *log_ctx) { + AVIOContext *avio_ctx = NULL; + int ret; + int64_t size; + + ret = avio_open(&avio_ctx, path, AVIO_FLAG_READ); + if (ret < 0) { + if (log_ctx) + av_log(log_ctx, AV_LOG_ERROR, "Cannot open file: %s\n", path); + return ret; + } + + size = avio_size(avio_ctx); + if (size < 0) { + if (log_ctx) + av_log(log_ctx, AV_LOG_ERROR, "Failed to determine file size: %s\n", path); + avio_closep(&avio_ctx); + return size; + } + + *data = av_malloc(size + 1); + if (!*data) { + avio_closep(&avio_ctx); + return AVERROR(ENOMEM); + } + + ret = avio_read(avio_ctx, (unsigned char *)*data, size); + avio_closep(&avio_ctx); + + if (ret < 0) { + if (log_ctx) + av_log(log_ctx, AV_LOG_ERROR, "Failed to read file: %s\n", path); + av_freep(data); + return ret; + } + + if (ret != size) { + if (log_ctx) + av_log(log_ctx, AV_LOG_ERROR, "Incomplete read: %s\n", path); + av_freep(data); + return AVERROR(EIO); + } + + // Null-terminate the data + (*data)[size] = '\0'; + *data_size = size; + + return 0; +} + +#if (CONFIG_LIBTOKENIZERS == 1) +TokenizerHandle ff_dnn_tokenizer_create(const char *path, void *log_ctx) +{ + char *blob = NULL; + size_t blob_size = 0; + TokenizerHandle handle = NULL; + int ret; + + if (!path) { + if (log_ctx) + av_log(log_ctx, AV_LOG_ERROR, "Tokenizer path is NULL\n"); + return NULL; + } + + ret = load_file_content(path, &blob, &blob_size, log_ctx); + if (ret < 0) + return NULL; + + handle = tokenizers_new_from_str(blob, blob_size); + av_freep(&blob); + + if (!handle && log_ctx) + av_log(log_ctx, AV_LOG_ERROR, "Error creating tokenizer\n"); + + return handle; +} + +int ff_dnn_tokenizer_encode_batch(TokenizerHandle tokenizer, const char **texts, int text_count, + TokenizerEncodeResult **results, void *log_ctx) +{ + size_t *lengths = NULL; + int ret = 0; + + if (!tokenizer) { + if (log_ctx) + av_log(log_ctx, AV_LOG_ERROR, "Tokenizer is NULL\n"); + return AVERROR(EINVAL); + } + + if (!texts || text_count <= 0 || !results) { + if (log_ctx) + av_log(log_ctx, AV_LOG_ERROR, "Invalid parameters\n"); + return AVERROR(EINVAL); + } + + *results = av_calloc(text_count, sizeof(**results)); + if (!*results) { + ret = AVERROR(ENOMEM); + goto fail; + } + + lengths = av_calloc(text_count, sizeof(*lengths)); + if (!lengths) { + ret = AVERROR(ENOMEM); + goto fail; + } + + // Calculate text lengths + for (int i = 0; i < text_count; i++) { + lengths[i] = texts[i] ? strlen(texts[i]) : 0; + } + + // Tokenize all texts in batch - directly store results in the output array + tokenizers_encode_batch(tokenizer, texts, lengths, text_count, 1, *results); + + av_freep(&lengths); + return 0; + +fail: + av_freep(results); + av_freep(&lengths); + return ret; +} + +int ff_dnn_create_tokenizer_and_encode_batch(const char *path, const char **texts, int text_count, + TokenizerEncodeResult **results, void *log_ctx) +{ + int ret; + + // Create tokenizer + TokenizerHandle tokenizer = ff_dnn_tokenizer_create(path, log_ctx); + if (!tokenizer) { + av_log(log_ctx, AV_LOG_ERROR, "Error creating tokenizer\n"); + return AVERROR(EINVAL); + } + + // Tokenize batch + ret = ff_dnn_tokenizer_encode_batch(tokenizer, texts, text_count, results, log_ctx); + + if (ret < 0) { + av_log(log_ctx, AV_LOG_ERROR, "Failed to tokenize batch text\n"); + } + + // Clean up tokenizer + ff_dnn_tokenizer_free(tokenizer); + return ret; +} +#endif \ No newline at end of file diff --git a/libavfilter/dnn_filter_common.h b/libavfilter/dnn_filter_common.h index 42a4719997..fffa676a9e 100644 --- a/libavfilter/dnn_filter_common.h +++ b/libavfilter/dnn_filter_common.h @@ -25,6 +25,9 @@ #define AVFILTER_DNN_FILTER_COMMON_H #include "dnn_interface.h" +#if(CONFIG_LIBTOKENIZERS == 1) +#include "tokenizers_c.h" +#endif #define DNN_FILTER_CHILD_CLASS_ITERATE(name, backend_mask) \ static const AVClass *name##_child_class_iterate(void **iter) \ @@ -63,4 +66,20 @@ DNNAsyncStatusType ff_dnn_get_result(DnnContext *ctx, AVFrame **in_frame, AVFram int ff_dnn_flush(DnnContext *ctx); void ff_dnn_uninit(DnnContext *ctx); +#if(CONFIG_LIBTOKENIZERS == 1) +TokenizerHandle ff_dnn_tokenizer_create(const char *path, void *log_ctx); +int ff_dnn_tokenizer_encode_batch(TokenizerHandle tokenizer, const char **texts, int text_count, TokenizerEncodeResult **results, void *log_ctx); +int ff_dnn_create_tokenizer_and_encode_batch(const char *path, const char **texts, int text_count, TokenizerEncodeResult **results, void *log_ctx); + +inline void ff_dnn_tokenizer_free(TokenizerHandle tokenizer) { + if (tokenizer) + tokenizers_free(tokenizer); +} +inline void ff_dnn_tokenizer_free_results(TokenizerEncodeResult *results, int count) { + if (results) { + tokenizers_free_encode_results(results, count); + } +} +#endif + #endif -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
reply other threads:[~2025-03-08 14:59 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to='007501db903a$a0ecd4a0$e2c67de0$@gmail.com' \ --to=m.kaindl0208@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git