From: WyattBlue via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: WyattBlue <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH] avfilter/af_whisper: Add max_len parameter (PR #21259)
Date: Mon, 22 Dec 2025 04:52:35 -0000
Message-ID: <176637915645.60.15086184433482053711@2cb04c0e5124> (raw)
PR #21259 opened by WyattBlue
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21259
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21259.patch
This closes #20333
>From d387c186321ab5e8ebff92521c178dbd90475388 Mon Sep 17 00:00:00 2001
From: WyattBlue <wyattblue@auto-editor.com>
Date: Sun, 21 Dec 2025 23:51:15 -0500
Subject: [PATCH] avfilter/af_whisper: Add max_len parameter
---
libavfilter/af_whisper.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/libavfilter/af_whisper.c b/libavfilter/af_whisper.c
index 3c0eba42f0..7e1b27e21b 100644
--- a/libavfilter/af_whisper.c
+++ b/libavfilter/af_whisper.c
@@ -52,6 +52,7 @@ typedef struct WhisperContext {
int64_t queue;
char *destination;
char *format;
+ int max_len;
struct whisper_context *ctx_wsp;
struct whisper_vad_context *ctx_vad;
@@ -204,6 +205,8 @@ static void run_transcription(AVFilterContext *ctx, AVFrame *frame, int samples)
params.print_progress = 0;
params.print_realtime = 0;
params.print_timestamps = 0;
+ params.max_len = wctx->max_len;
+ params.token_timestamps = (wctx->max_len > 0);
if (whisper_full(wctx->ctx_wsp, params, wctx->audio_buffer, samples) != 0) {
av_log(ctx, AV_LOG_ERROR, "Failed to process audio with whisper.cpp\n");
@@ -224,6 +227,14 @@ static void run_transcription(AVFilterContext *ctx, AVFrame *frame, int samples)
continue;
}
+ // Skip segments that are parts of [BLANK_AUDIO] when max_len splits them
+ if (wctx->max_len > 0 && (strcmp(text_cleaned, "[") == 0 || strcmp(text_cleaned, "]") == 0 ||
+ strcmp(text_cleaned, "BLANK") == 0 || strcmp(text_cleaned, "_") == 0 ||
+ strcmp(text_cleaned, "AUDIO") == 0)) {
+ av_freep(&text_cleaned);
+ continue;
+ }
+
const bool turn = whisper_full_get_segment_speaker_turn_next(wctx->ctx_wsp, i);
const int64_t t0_ms = whisper_full_get_segment_t0(wctx->ctx_wsp, i) * 10;
const int64_t t1_ms = whisper_full_get_segment_t1(wctx->ctx_wsp, i) * 10;
@@ -437,6 +448,7 @@ static const AVOption whisper_options[] = {
{ "gpu_device", "GPU device to use", OFFSET(gpu_device), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS },
{ "destination", "Output destination", OFFSET(destination), AV_OPT_TYPE_STRING, {.str = ""}, .flags = FLAGS },
{ "format", "Output format (text|srt|json)", OFFSET(format), AV_OPT_TYPE_STRING, {.str = "text"},.flags = FLAGS },
+ { "max_len", "Max segment length in characters", OFFSET(max_len), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS },
{ "vad_model", "Path to the VAD model file", OFFSET(vad_model_path), AV_OPT_TYPE_STRING,.flags = FLAGS },
{ "vad_threshold", "VAD threshold", OFFSET(vad_threshold), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0.0, 1.0, .flags = FLAGS },
{ "vad_min_speech_duration", "Minimum speech duration for VAD", OFFSET(vad_min_speech_duration), AV_OPT_TYPE_DURATION, {.i64 = 100000}, 20000, HOURS, .flags = FLAGS },
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2025-12-22 4:54 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=176637915645.60.15086184433482053711@2cb04c0e5124 \
--to=ffmpeg-devel@ffmpeg.org \
--cc=code@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git