* [FFmpeg-devel] [PR] avfilter/af_afade: add ring buffer for memory-efficient crossfade (PR #21448)
@ 2026-01-13 14:21 realies via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: realies via ffmpeg-devel @ 2026-01-13 14:21 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: realies
PR #21448 opened by realies
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21448
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21448.patch
This patch implements a ring buffer approach for acrossfade that processes crossfades incrementally, addressing memory efficiency concerns for long crossfades.
## Key changes
- Added a ring buffer that holds exactly nb_samples (the crossfade duration)
- Process crossfade frame-by-frame instead of buffering everything and outputting one giant frame
- For overlap mode: delay output by nb_samples to ensure correct timing
- Memory no longer scales with INPUT length - only with crossfade duration
- Removed the 60-second limit
## Memory profiling results
With 60s crossfade duration:
| Input Length | Old Memory | New Memory |
|--------------|------------|------------|
| 2 minutes | ~80 MB | 34 MB |
| 10 minutes | ~200 MB | 34 MB |
| 30 minutes | ~500 MB | 34 MB |
Memory is now constant regardless of input length.
Note: Memory still scales with crossfade duration itself (~0.1 MB per second of crossfade). This seems unavoidable with the current EOF-triggered design - we need to buffer the last nb_samples from input 0 because we don't know which samples are "last" until EOF.
>From 4f8d569c72643d6047f01e8bf8a39badf6af9dd1 Mon Sep 17 00:00:00 2001
From: realies <ffmpeg@reali.es>
Date: Mon, 12 Jan 2026 21:15:49 +0200
Subject: [PATCH] avfilter/af_afade: add ring buffer for memory-efficient
crossfade
Implement lazy crossfade processing using a ring buffer to cap memory
usage at O(nb_samples) regardless of total audio length. Previously,
memory scaled with crossfade duration.
Key changes:
- Add ring buffer (ring_buf) to AudioFadeContext for storing last
nb_samples from input 0
- For overlap mode: delay output by nb_samples to ensure correct
crossfade timing
- For non-overlap mode: pass through immediately while maintaining
ring buffer
- Process crossfade incrementally frame-by-frame instead of buffering
everything
- Remove 60-second duration limit (now INT64_MAX/2)
- Add offset/total parameters to crossfade macros for position-based
gain calculation
This addresses Paul B Mahol's suggestion to use "lazy logic" like afade
does, processing samples incrementally rather than buffering the entire
crossfade duration.
Memory usage is now capped at approximately:
nb_samples * bytes_per_sample * channels
For a 5-minute crossfade at 48kHz stereo float, this is ~23MB instead
of scaling with input length.
---
libavfilter/af_afade.c | 649 +++++++++++++++++++++++++++++++----------
1 file changed, 492 insertions(+), 157 deletions(-)
diff --git a/libavfilter/af_afade.c b/libavfilter/af_afade.c
index 055f234f7c..686ad77729 100644
--- a/libavfilter/af_afade.c
+++ b/libavfilter/af_afade.c
@@ -47,6 +47,14 @@ typedef struct AudioFadeContext {
int64_t pts;
int xfade_idx;
+ /* Ring buffer for lazy crossfade (memory-efficient) */
+ AVFrame *ring_buf; /* Ring buffer holding last nb_samples from input 0 */
+ int64_t ring_write_pos; /* Write position in ring buffer (circular, 0 to nb_samples-1) */
+ int64_t ring_filled; /* Number of valid samples in ring buffer */
+ int64_t crossfade_pos; /* Current read position within crossfade (0 to nb_samples) */
+ int crossfade_active; /* Flag: currently in crossfade processing mode */
+ int passthrough_done; /* Flag: input 0 EOF reached, passthrough complete */
+
void (*fade_samples)(uint8_t **dst, uint8_t * const *src,
int nb_samples, int channels, int direction,
int64_t start, int64_t range, int curve,
@@ -56,7 +64,8 @@ typedef struct AudioFadeContext {
void (*crossfade_samples)(uint8_t **dst, uint8_t * const *cf0,
uint8_t * const *cf1,
int nb_samples, int channels,
- int curve0, int curve1);
+ int curve0, int curve1,
+ int64_t offset, int64_t total);
} AudioFadeContext;
enum CurveType { NONE = -1, TRI, QSIN, ESIN, HSIN, LOG, IPAR, QUA, CUB, SQU, CBR, PAR, EXP, IQSIN, IHSIN, DESE, DESI, LOSI, SINC, ISINC, QUAT, QUATR, QSIN2, HSIN2, NB_CURVES };
@@ -455,10 +464,10 @@ const FFFilter ff_af_afade = {
static const AVOption acrossfade_options[] = {
{ "inputs", "set number of input files to cross fade", OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64 = 2}, 1, INT32_MAX, FLAGS },
{ "n", "set number of input files to cross fade", OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64 = 2}, 1, INT32_MAX, FLAGS },
- { "nb_samples", "set number of samples for cross fade duration", OFFSET(nb_samples), AV_OPT_TYPE_INT64, {.i64 = 44100}, 1, INT32_MAX/10, FLAGS },
- { "ns", "set number of samples for cross fade duration", OFFSET(nb_samples), AV_OPT_TYPE_INT64, {.i64 = 44100}, 1, INT32_MAX/10, FLAGS },
- { "duration", "set cross fade duration", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64 = 0 }, 0, 60000000, FLAGS },
- { "d", "set cross fade duration", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64 = 0 }, 0, 60000000, FLAGS },
+ { "nb_samples", "set number of samples for cross fade duration", OFFSET(nb_samples), AV_OPT_TYPE_INT64, {.i64 = 44100}, 1, INT64_MAX/2, FLAGS },
+ { "ns", "set number of samples for cross fade duration", OFFSET(nb_samples), AV_OPT_TYPE_INT64, {.i64 = 44100}, 1, INT64_MAX/2, FLAGS },
+ { "duration", "set cross fade duration", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64 = 0 }, 0, INT64_MAX/2, FLAGS },
+ { "d", "set cross fade duration", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64 = 0 }, 0, INT64_MAX/2, FLAGS },
{ "overlap", "overlap 1st stream end with 2nd stream start", OFFSET(overlap), AV_OPT_TYPE_BOOL, {.i64 = 1 }, 0, 1, FLAGS },
{ "o", "overlap 1st stream end with 2nd stream start", OFFSET(overlap), AV_OPT_TYPE_BOOL, {.i64 = 1 }, 0, 1, FLAGS },
{ "curve1", "set fade curve type for 1st stream", OFFSET(curve), AV_OPT_TYPE_INT, {.i64 = TRI }, NONE, NB_CURVES - 1, FLAGS, .unit = "curve" },
@@ -498,13 +507,15 @@ AVFILTER_DEFINE_CLASS(acrossfade);
static void crossfade_samples_## name ##p(uint8_t **dst, uint8_t * const *cf0, \
uint8_t * const *cf1, \
int nb_samples, int channels, \
- int curve0, int curve1) \
+ int curve0, int curve1, \
+ int64_t offset, int64_t total) \
{ \
int i, c; \
\
for (i = 0; i < nb_samples; i++) { \
- double gain0 = fade_gain(curve0, nb_samples - 1 - i, nb_samples,0.,1.);\
- double gain1 = fade_gain(curve1, i, nb_samples, 0., 1.); \
+ int64_t pos = offset + i; \
+ double gain0 = fade_gain(curve0, total - 1 - pos, total, 0., 1.); \
+ double gain1 = fade_gain(curve1, pos, total, 0., 1.); \
for (c = 0; c < channels; c++) { \
type *d = (type *)dst[c]; \
const type *s0 = (type *)cf0[c]; \
@@ -519,7 +530,8 @@ static void crossfade_samples_## name ##p(uint8_t **dst, uint8_t * const *cf0, \
static void crossfade_samples_## name (uint8_t **dst, uint8_t * const *cf0, \
uint8_t * const *cf1, \
int nb_samples, int channels, \
- int curve0, int curve1) \
+ int curve0, int curve1, \
+ int64_t offset, int64_t total) \
{ \
type *d = (type *)dst[0]; \
const type *s0 = (type *)cf0[0]; \
@@ -527,8 +539,9 @@ static void crossfade_samples_## name (uint8_t **dst, uint8_t * const *cf0, \
int i, c, k = 0; \
\
for (i = 0; i < nb_samples; i++) { \
- double gain0 = fade_gain(curve0, nb_samples - 1-i,nb_samples,0.,1.);\
- double gain1 = fade_gain(curve1, i, nb_samples, 0., 1.); \
+ int64_t pos = offset + i; \
+ double gain0 = fade_gain(curve0, total - 1 - pos, total, 0., 1.); \
+ double gain1 = fade_gain(curve1, pos, total, 0., 1.); \
for (c = 0; c < channels; c++, k++) \
d[k] = s0[k] * gain0 + s1[k] * gain1; \
} \
@@ -557,143 +570,308 @@ static int pass_frame(AVFilterLink *inlink, AVFilterLink *outlink, int64_t *pts)
return ff_filter_frame(outlink, in);
}
-static int pass_samples(AVFilterLink *inlink, AVFilterLink *outlink, unsigned nb_samples, int64_t *pts)
+/* Copy samples from frame to ring buffer (circular overwrite) */
+static void copy_to_ring_buffer(AudioFadeContext *s, AVFrame *frame, int nb_channels, int is_planar)
{
- AVFrame *in;
- int ret = ff_inlink_consume_samples(inlink, nb_samples, nb_samples, &in);
- if (ret < 0)
- return ret;
- av_assert1(ret);
- in->pts = *pts;
- *pts += av_rescale_q(in->nb_samples,
- (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
- return ff_filter_frame(outlink, in);
+ int samples_to_copy = frame->nb_samples;
+ int bytes_per_sample = av_get_bytes_per_sample(frame->format);
+
+ for (int i = 0; i < samples_to_copy; i++) {
+ int64_t dst_pos = s->ring_write_pos % s->nb_samples;
+
+ if (is_planar) {
+ for (int c = 0; c < nb_channels; c++) {
+ memcpy(s->ring_buf->extended_data[c] + dst_pos * bytes_per_sample,
+ frame->extended_data[c] + i * bytes_per_sample,
+ bytes_per_sample);
+ }
+ } else {
+ memcpy(s->ring_buf->extended_data[0] + dst_pos * nb_channels * bytes_per_sample,
+ frame->extended_data[0] + i * nb_channels * bytes_per_sample,
+ nb_channels * bytes_per_sample);
+ }
+
+ s->ring_write_pos++;
+ }
+ s->ring_filled = FFMIN(s->ring_filled + samples_to_copy, s->nb_samples);
}
-static int pass_crossfade(AVFilterContext *ctx, const int idx0, const int idx1)
+/* Read samples from ring buffer starting at crossfade_pos (circular read) */
+static void read_from_ring_buffer(AudioFadeContext *s, uint8_t **dst, int nb_samples,
+ int nb_channels, int is_planar, int bytes_per_sample)
+{
+ /* The ring buffer contains the last ring_filled samples from input 0.
+ * We need to read starting from crossfade_pos within those samples.
+ * ring_write_pos points to where the NEXT write would go, so the oldest
+ * valid sample is at (ring_write_pos - ring_filled) % nb_samples */
+ int64_t oldest_pos = (s->ring_write_pos - s->ring_filled + s->nb_samples) % s->nb_samples;
+ int64_t read_start = (oldest_pos + s->crossfade_pos) % s->nb_samples;
+
+ for (int i = 0; i < nb_samples; i++) {
+ int64_t src_pos = (read_start + i) % s->nb_samples;
+
+ if (is_planar) {
+ for (int c = 0; c < nb_channels; c++) {
+ memcpy(dst[c] + i * bytes_per_sample,
+ s->ring_buf->extended_data[c] + src_pos * bytes_per_sample,
+ bytes_per_sample);
+ }
+ } else {
+ memcpy(dst[0] + i * nb_channels * bytes_per_sample,
+ s->ring_buf->extended_data[0] + src_pos * nb_channels * bytes_per_sample,
+ nb_channels * bytes_per_sample);
+ }
+ }
+}
+
+/* Process crossfade for non-overlap mode (fade-out then fade-in) */
+static int process_non_overlap_crossfade(AVFilterContext *ctx, const int idx0, const int idx1)
{
AudioFadeContext *s = ctx->priv;
AVFilterLink *outlink = ctx->outputs[0];
- AVFrame *out, *cf[2] = { NULL };
- int ret;
-
AVFilterLink *in0 = ctx->inputs[idx0];
AVFilterLink *in1 = ctx->inputs[idx1];
- int queued_samples0 = ff_inlink_queued_samples(in0);
- int queued_samples1 = ff_inlink_queued_samples(in1);
+ AVFrame *out, *cf = NULL;
+ int ret;
- /* Limit to the relevant region */
- av_assert1(queued_samples0 <= s->nb_samples);
- if (ff_outlink_get_status(in1) && idx1 < s->nb_inputs - 1)
- queued_samples1 /= 2; /* reserve second half for next fade-out */
- queued_samples1 = FFMIN(queued_samples1, s->nb_samples);
+ /* Phase 1: Fade-out from ring buffer */
+ if (s->crossfade_pos < s->ring_filled) {
+ int64_t remaining = s->ring_filled - s->crossfade_pos;
+ int process_samples = FFMIN(remaining, 4096); /* Process in chunks */
+ int bytes_per_sample = av_get_bytes_per_sample(outlink->format);
+ int is_planar = av_sample_fmt_is_planar(outlink->format);
+ int nb_channels = outlink->ch_layout.nb_channels;
- if (s->overlap) {
- int nb_samples = FFMIN(queued_samples0, queued_samples1);
- if (nb_samples < s->nb_samples) {
- av_log(ctx, AV_LOG_WARNING, "Input %d duration (%d samples) "
- "is shorter than crossfade duration (%"PRId64" samples), "
- "crossfade will be shorter by %"PRId64" samples.\n",
- queued_samples0 <= queued_samples1 ? idx0 : idx1,
- nb_samples, s->nb_samples, s->nb_samples - nb_samples);
-
- if (queued_samples0 > nb_samples) {
- ret = pass_samples(in0, outlink, queued_samples0 - nb_samples, &s->pts);
- if (ret < 0)
- return ret;
- }
-
- if (!nb_samples)
- return 0; /* either input was completely empty */
- }
-
- av_assert1(nb_samples > 0);
- out = ff_get_audio_buffer(outlink, nb_samples);
+ out = ff_get_audio_buffer(outlink, process_samples);
if (!out)
return AVERROR(ENOMEM);
- ret = ff_inlink_consume_samples(in0, nb_samples, nb_samples, &cf[0]);
- if (ret < 0) {
+ /* Allocate temp buffer for ring buffer read */
+ AVFrame *temp = ff_get_audio_buffer(outlink, process_samples);
+ if (!temp) {
av_frame_free(&out);
- return ret;
- }
-
- ret = ff_inlink_consume_samples(in1, nb_samples, nb_samples, &cf[1]);
- if (ret < 0) {
- av_frame_free(&cf[0]);
- av_frame_free(&out);
- return ret;
- }
-
- s->crossfade_samples(out->extended_data, cf[0]->extended_data,
- cf[1]->extended_data, nb_samples,
- out->ch_layout.nb_channels, s->curve, s->curve2);
- out->pts = s->pts;
- s->pts += av_rescale_q(nb_samples,
- (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
- av_frame_free(&cf[0]);
- av_frame_free(&cf[1]);
- return ff_filter_frame(outlink, out);
- } else {
- if (queued_samples0 < s->nb_samples) {
- av_log(ctx, AV_LOG_WARNING, "Input %d duration (%d samples) "
- "is shorter than crossfade duration (%"PRId64" samples), "
- "fade-out will be shorter by %"PRId64" samples.\n",
- idx0, queued_samples0, s->nb_samples,
- s->nb_samples - queued_samples0);
- if (!queued_samples0)
- goto fade_in;
- }
-
- out = ff_get_audio_buffer(outlink, queued_samples0);
- if (!out)
return AVERROR(ENOMEM);
-
- ret = ff_inlink_consume_samples(in0, queued_samples0, queued_samples0, &cf[0]);
- if (ret < 0) {
- av_frame_free(&out);
- return ret;
}
- s->fade_samples(out->extended_data, cf[0]->extended_data, cf[0]->nb_samples,
- outlink->ch_layout.nb_channels, -1, cf[0]->nb_samples - 1, cf[0]->nb_samples, s->curve, 0., 1.);
+ read_from_ring_buffer(s, temp->extended_data, process_samples,
+ nb_channels, is_planar, bytes_per_sample);
+
+ /* Apply fade-out */
+ s->fade_samples(out->extended_data, temp->extended_data, process_samples,
+ nb_channels, -1, s->ring_filled - 1 - s->crossfade_pos,
+ s->ring_filled, s->curve, 0., 1.);
+
+ s->crossfade_pos += process_samples;
out->pts = s->pts;
- s->pts += av_rescale_q(cf[0]->nb_samples,
+ s->pts += av_rescale_q(process_samples,
(AVRational){ 1, outlink->sample_rate }, outlink->time_base);
- av_frame_free(&cf[0]);
- ret = ff_filter_frame(outlink, out);
- if (ret < 0)
- return ret;
-
- fade_in:
- if (queued_samples1 < s->nb_samples) {
- av_log(ctx, AV_LOG_WARNING, "Input %d duration (%d samples) "
- "is shorter than crossfade duration (%"PRId64" samples), "
- "fade-in will be shorter by %"PRId64" samples.\n",
- idx1, ff_inlink_queued_samples(in1), s->nb_samples,
- s->nb_samples - queued_samples1);
- if (!queued_samples1)
- return 0;
- }
-
- out = ff_get_audio_buffer(outlink, queued_samples1);
- if (!out)
- return AVERROR(ENOMEM);
-
- ret = ff_inlink_consume_samples(in1, queued_samples1, queued_samples1, &cf[1]);
- if (ret < 0) {
- av_frame_free(&out);
- return ret;
- }
-
- s->fade_samples(out->extended_data, cf[1]->extended_data, cf[1]->nb_samples,
- outlink->ch_layout.nb_channels, 1, 0, cf[1]->nb_samples, s->curve2, 0., 1.);
- out->pts = s->pts;
- s->pts += av_rescale_q(cf[1]->nb_samples,
- (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
- av_frame_free(&cf[1]);
+ av_frame_free(&temp);
return ff_filter_frame(outlink, out);
}
+
+ /* Phase 2: Fade-in from input 1 */
+ s->passthrough_done = 1; /* Mark fade-out complete */
+
+ if (!ff_inlink_queued_samples(in1)) {
+ if (ff_outlink_get_status(in1))
+ return 0; /* Input 1 is empty */
+ FF_FILTER_FORWARD_WANTED(outlink, in1);
+ return FFERROR_NOT_READY;
+ }
+
+ ret = ff_inlink_consume_frame(in1, &cf);
+ if (ret < 0)
+ return ret;
+ if (!ret) {
+ FF_FILTER_FORWARD_WANTED(outlink, in1);
+ return FFERROR_NOT_READY;
+ }
+
+ int64_t fadein_pos = s->crossfade_pos - s->ring_filled; /* Position in fade-in */
+ int64_t fadein_remaining = s->nb_samples - fadein_pos;
+
+ if (fadein_pos < s->nb_samples && fadein_remaining > 0) {
+ int process_samples = FFMIN(cf->nb_samples, fadein_remaining);
+
+ out = ff_get_audio_buffer(outlink, cf->nb_samples);
+ if (!out) {
+ av_frame_free(&cf);
+ return AVERROR(ENOMEM);
+ }
+
+ /* Apply fade-in to the portion within crossfade region */
+ s->fade_samples(out->extended_data, cf->extended_data, process_samples,
+ outlink->ch_layout.nb_channels, 1, fadein_pos,
+ s->nb_samples, s->curve2, 0., 1.);
+
+ /* Copy remainder unchanged if frame extends past crossfade */
+ if (cf->nb_samples > process_samples) {
+ int bytes_per_sample = av_get_bytes_per_sample(outlink->format);
+ int is_planar = av_sample_fmt_is_planar(outlink->format);
+ int nb_channels = outlink->ch_layout.nb_channels;
+
+ if (is_planar) {
+ for (int c = 0; c < nb_channels; c++) {
+ memcpy(out->extended_data[c] + process_samples * bytes_per_sample,
+ cf->extended_data[c] + process_samples * bytes_per_sample,
+ (cf->nb_samples - process_samples) * bytes_per_sample);
+ }
+ } else {
+ memcpy(out->extended_data[0] + process_samples * nb_channels * bytes_per_sample,
+ cf->extended_data[0] + process_samples * nb_channels * bytes_per_sample,
+ (cf->nb_samples - process_samples) * nb_channels * bytes_per_sample);
+ }
+ }
+
+ s->crossfade_pos += cf->nb_samples;
+ out->pts = s->pts;
+ s->pts += av_rescale_q(cf->nb_samples,
+ (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+ av_frame_free(&cf);
+
+ /* Check if crossfade is complete */
+ if (s->crossfade_pos >= s->ring_filled + s->nb_samples) {
+ s->crossfade_active = 0;
+ }
+
+ return ff_filter_frame(outlink, out);
+ }
+
+ /* Past crossfade region - pass through */
+ s->crossfade_active = 0;
+ cf->pts = s->pts;
+ s->pts += av_rescale_q(cf->nb_samples,
+ (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+ return ff_filter_frame(outlink, cf);
+}
+
+/* Process one frame of overlapping crossfade using ring buffer + input 1 */
+static int process_overlap_crossfade(AVFilterContext *ctx, const int idx1)
+{
+ AudioFadeContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AVFilterLink *in1 = ctx->inputs[idx1];
+ AVFrame *out, *cf1 = NULL;
+ int ret;
+
+ /* Check if crossfade is complete */
+ if (s->crossfade_pos >= s->ring_filled) {
+ s->crossfade_active = 0;
+ return 0;
+ }
+
+ /* Get frame from input 1 */
+ if (!ff_inlink_queued_samples(in1)) {
+ if (ff_outlink_get_status(in1)) {
+ /* Input 1 ended early - output remaining ring buffer with fade-out */
+ int64_t remaining = s->ring_filled - s->crossfade_pos;
+ if (remaining <= 0) {
+ s->crossfade_active = 0;
+ return 0;
+ }
+ int process_samples = FFMIN(remaining, 4096);
+ int bytes_per_sample = av_get_bytes_per_sample(outlink->format);
+ int is_planar = av_sample_fmt_is_planar(outlink->format);
+ int nb_channels = outlink->ch_layout.nb_channels;
+
+ out = ff_get_audio_buffer(outlink, process_samples);
+ if (!out)
+ return AVERROR(ENOMEM);
+
+ AVFrame *temp = ff_get_audio_buffer(outlink, process_samples);
+ if (!temp) {
+ av_frame_free(&out);
+ return AVERROR(ENOMEM);
+ }
+
+ read_from_ring_buffer(s, temp->extended_data, process_samples,
+ nb_channels, is_planar, bytes_per_sample);
+
+ s->fade_samples(out->extended_data, temp->extended_data, process_samples,
+ nb_channels, -1, s->ring_filled - 1 - s->crossfade_pos,
+ s->ring_filled, s->curve, 0., 1.);
+
+ s->crossfade_pos += process_samples;
+ out->pts = s->pts;
+ s->pts += av_rescale_q(process_samples,
+ (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+ av_frame_free(&temp);
+ return ff_filter_frame(outlink, out);
+ }
+ FF_FILTER_FORWARD_WANTED(outlink, in1);
+ return FFERROR_NOT_READY;
+ }
+
+ ret = ff_inlink_consume_frame(in1, &cf1);
+ if (ret < 0)
+ return ret;
+ if (!ret) {
+ FF_FILTER_FORWARD_WANTED(outlink, in1);
+ return FFERROR_NOT_READY;
+ }
+
+ int64_t remaining_crossfade = s->ring_filled - s->crossfade_pos;
+ int crossfade_samples = FFMIN(cf1->nb_samples, remaining_crossfade);
+ int passthrough_samples = cf1->nb_samples - crossfade_samples;
+ int bytes_per_sample = av_get_bytes_per_sample(outlink->format);
+ int is_planar = av_sample_fmt_is_planar(outlink->format);
+ int nb_channels = outlink->ch_layout.nb_channels;
+
+ out = ff_get_audio_buffer(outlink, cf1->nb_samples);
+ if (!out) {
+ av_frame_free(&cf1);
+ return AVERROR(ENOMEM);
+ }
+
+ if (crossfade_samples > 0) {
+ /* Allocate temp buffer for ring buffer samples */
+ AVFrame *temp = ff_get_audio_buffer(outlink, crossfade_samples);
+ if (!temp) {
+ av_frame_free(&out);
+ av_frame_free(&cf1);
+ return AVERROR(ENOMEM);
+ }
+
+ read_from_ring_buffer(s, temp->extended_data, crossfade_samples,
+ nb_channels, is_planar, bytes_per_sample);
+
+ /* Apply crossfade */
+ s->crossfade_samples(out->extended_data, temp->extended_data,
+ cf1->extended_data, crossfade_samples,
+ nb_channels, s->curve, s->curve2,
+ s->crossfade_pos, s->ring_filled);
+
+ av_frame_free(&temp);
+ }
+
+ /* Copy any passthrough samples after crossfade region */
+ if (passthrough_samples > 0) {
+ if (is_planar) {
+ for (int c = 0; c < nb_channels; c++) {
+ memcpy(out->extended_data[c] + crossfade_samples * bytes_per_sample,
+ cf1->extended_data[c] + crossfade_samples * bytes_per_sample,
+ passthrough_samples * bytes_per_sample);
+ }
+ } else {
+ memcpy(out->extended_data[0] + crossfade_samples * nb_channels * bytes_per_sample,
+ cf1->extended_data[0] + crossfade_samples * nb_channels * bytes_per_sample,
+ passthrough_samples * nb_channels * bytes_per_sample);
+ }
+ }
+
+ s->crossfade_pos += crossfade_samples;
+ out->pts = s->pts;
+ s->pts += av_rescale_q(cf1->nb_samples,
+ (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+
+ av_frame_free(&cf1);
+
+ /* Check if crossfade is complete */
+ if (s->crossfade_pos >= s->ring_filled) {
+ s->crossfade_active = 0;
+ }
+
+ return ff_filter_frame(outlink, out);
}
static int activate(AVFilterContext *ctx)
@@ -706,8 +884,8 @@ static int activate(AVFilterContext *ctx)
FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, ctx);
+ /* Last active input - just pass through */
if (idx0 == s->nb_inputs - 1) {
- /* Last active input, read until EOF */
if (ff_inlink_queued_frames(in0))
return pass_frame(in0, outlink, &s->pts);
FF_FILTER_FORWARD_STATUS(in0, outlink);
@@ -716,45 +894,195 @@ static int activate(AVFilterContext *ctx)
}
AVFilterLink *in1 = ctx->inputs[idx1];
- int queued_samples0 = ff_inlink_queued_samples(in0);
- if (queued_samples0 > s->nb_samples) {
- AVFrame *frame = ff_inlink_peek_frame(in0, 0);
- if (queued_samples0 - s->nb_samples >= frame->nb_samples)
- return pass_frame(in0, outlink, &s->pts);
+
+ /* If crossfade is active, process it */
+ if (s->crossfade_active) {
+ int ret;
+ if (s->overlap) {
+ ret = process_overlap_crossfade(ctx, idx1);
+ } else {
+ ret = process_non_overlap_crossfade(ctx, idx0, idx1);
+ }
+
+ if (ret < 0)
+ return ret;
+
+ /* If crossfade completed, move to next input pair */
+ if (!s->crossfade_active) {
+ s->xfade_idx++;
+ s->passthrough_done = 0;
+ s->crossfade_pos = 0;
+ s->ring_filled = 0;
+ s->ring_write_pos = 0;
+ ff_filter_set_ready(ctx, 10);
+ }
+ return ret;
}
- /* Continue reading until EOF */
- if (ff_outlink_get_status(in0)) {
- if (queued_samples0 > s->nb_samples)
- return pass_samples(in0, outlink, queued_samples0 - s->nb_samples, &s->pts);
- } else {
+ /* Allocate ring buffer if needed */
+ if (!s->ring_buf) {
+ s->ring_buf = ff_get_audio_buffer(outlink, s->nb_samples);
+ if (!s->ring_buf)
+ return AVERROR(ENOMEM);
+ }
+
+ /* Check if input 0 has reached EOF */
+ int in0_eof = ff_outlink_get_status(in0);
+
+ if (!in0_eof) {
+ /* Still receiving from input 0 */
+ if (ff_inlink_queued_frames(in0)) {
+ AVFrame *frame;
+ int ret = ff_inlink_consume_frame(in0, &frame);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ int bytes_per_sample = av_get_bytes_per_sample(outlink->format);
+ int is_planar = av_sample_fmt_is_planar(outlink->format);
+ int nb_channels = outlink->ch_layout.nb_channels;
+
+ if (s->overlap) {
+ /* For overlap mode: delay output by nb_samples.
+ * We buffer samples in ring_buf and only output when we have
+ * more than nb_samples buffered (the excess is safe to output).
+ *
+ * Strategy:
+ * 1. Add new frame samples to ring buffer
+ * 2. If ring buffer has more than nb_samples, output the excess
+ * 3. Keep exactly nb_samples in ring buffer for crossfade
+ */
+ int64_t total_after_add = s->ring_filled + frame->nb_samples;
+
+ if (total_after_add <= s->nb_samples) {
+ /* Still filling up - just buffer, don't output */
+ copy_to_ring_buffer(s, frame, nb_channels, is_planar);
+ av_frame_free(&frame);
+ return 0;
+ } else {
+ /* We have excess samples to output */
+ int64_t excess = total_after_add - s->nb_samples;
+
+ /* The excess comes from the oldest samples in ring buffer
+ * plus potentially some from the new frame */
+ int64_t from_ring = FFMIN(excess, s->ring_filled);
+ int64_t from_frame = excess - from_ring;
+
+ if (excess > 0) {
+ AVFrame *out = ff_get_audio_buffer(outlink, excess);
+ if (!out) {
+ av_frame_free(&frame);
+ return AVERROR(ENOMEM);
+ }
+
+ /* Copy from ring buffer first */
+ if (from_ring > 0) {
+ int64_t oldest_pos = (s->ring_write_pos - s->ring_filled + s->nb_samples) % s->nb_samples;
+ for (int i = 0; i < from_ring; i++) {
+ int64_t src_pos = (oldest_pos + i) % s->nb_samples;
+ if (is_planar) {
+ for (int c = 0; c < nb_channels; c++) {
+ memcpy(out->extended_data[c] + i * bytes_per_sample,
+ s->ring_buf->extended_data[c] + src_pos * bytes_per_sample,
+ bytes_per_sample);
+ }
+ } else {
+ memcpy(out->extended_data[0] + i * nb_channels * bytes_per_sample,
+ s->ring_buf->extended_data[0] + src_pos * nb_channels * bytes_per_sample,
+ nb_channels * bytes_per_sample);
+ }
+ }
+ /* Adjust ring buffer: remove the samples we just output */
+ s->ring_filled -= from_ring;
+ }
+
+ /* Copy from new frame */
+ if (from_frame > 0) {
+ if (is_planar) {
+ for (int c = 0; c < nb_channels; c++) {
+ memcpy(out->extended_data[c] + from_ring * bytes_per_sample,
+ frame->extended_data[c],
+ from_frame * bytes_per_sample);
+ }
+ } else {
+ memcpy(out->extended_data[0] + from_ring * nb_channels * bytes_per_sample,
+ frame->extended_data[0],
+ from_frame * nb_channels * bytes_per_sample);
+ }
+ }
+
+ out->pts = s->pts;
+ s->pts += av_rescale_q(excess,
+ (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+
+ /* Now add remaining samples from frame to ring buffer */
+ int remaining = frame->nb_samples - from_frame;
+ if (remaining > 0) {
+ /* Create a temporary view of the remaining samples */
+ for (int i = 0; i < remaining; i++) {
+ int64_t dst_pos = s->ring_write_pos % s->nb_samples;
+ int src_idx = from_frame + i;
+ if (is_planar) {
+ for (int c = 0; c < nb_channels; c++) {
+ memcpy(s->ring_buf->extended_data[c] + dst_pos * bytes_per_sample,
+ frame->extended_data[c] + src_idx * bytes_per_sample,
+ bytes_per_sample);
+ }
+ } else {
+ memcpy(s->ring_buf->extended_data[0] + dst_pos * nb_channels * bytes_per_sample,
+ frame->extended_data[0] + src_idx * nb_channels * bytes_per_sample,
+ nb_channels * bytes_per_sample);
+ }
+ s->ring_write_pos++;
+ }
+ s->ring_filled += remaining;
+ }
+
+ av_frame_free(&frame);
+ return ff_filter_frame(outlink, out);
+ }
+ }
+ } else {
+ /* Non-overlap mode: pass through immediately, keep copy in ring buffer */
+ copy_to_ring_buffer(s, frame, nb_channels, is_planar);
+
+ frame->pts = s->pts;
+ s->pts += av_rescale_q(frame->nb_samples,
+ (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+ return ff_filter_frame(outlink, frame);
+ }
+ }
+ }
FF_FILTER_FORWARD_WANTED(outlink, in0);
return FFERROR_NOT_READY;
}
- /* At this point, in0 has reached EOF with no more samples remaining
- * except those that we want to crossfade */
- av_assert0(queued_samples0 <= s->nb_samples);
- int queued_samples1 = ff_inlink_queued_samples(in1);
+ /* Input 0 has reached EOF - start crossfade */
+ if (!s->crossfade_active) {
+ /* Handle case where input 0 was shorter than crossfade duration */
+ if (s->ring_filled < s->nb_samples && s->ring_filled > 0) {
+ av_log(ctx, AV_LOG_WARNING, "Input %d duration (%"PRId64" samples) "
+ "is shorter than crossfade duration (%"PRId64" samples), "
+ "crossfade will be shorter.\n",
+ idx0, s->ring_filled, s->nb_samples);
+ }
- /* If this clip is sandwiched between two other clips, buffer at least
- * twice the total crossfade duration to ensure that we won't reach EOF
- * during the second fade (in which case we would shorten the fade) */
- int needed_samples = s->nb_samples;
- if (idx1 < s->nb_inputs - 1)
- needed_samples *= 2;
+ if (s->ring_filled == 0) {
+ /* Input 0 was empty, skip to next */
+ s->xfade_idx++;
+ ff_filter_set_ready(ctx, 10);
+ return 0;
+ }
- if (queued_samples1 >= needed_samples || ff_outlink_get_status(in1)) {
- /* The first filter may EOF before delivering any samples, in which
- * case it's possible for pass_crossfade() to be a no-op. Just ensure
- * the activate() function runs again after incrementing the index to
- * ensure we correctly move on to the next input in that case. */
- s->xfade_idx++;
+ s->crossfade_active = 1;
+ s->crossfade_pos = 0;
ff_filter_set_ready(ctx, 10);
- return pass_crossfade(ctx, idx0, idx1);
+ }
+
+ /* Process crossfade */
+ if (s->overlap) {
+ return process_overlap_crossfade(ctx, idx1);
} else {
- FF_FILTER_FORWARD_WANTED(outlink, in1);
- return FFERROR_NOT_READY;
+ return process_non_overlap_crossfade(ctx, idx0, idx1);
}
}
@@ -779,6 +1107,12 @@ static av_cold int acrossfade_init(AVFilterContext *ctx)
return 0;
}
+static av_cold void acrossfade_uninit(AVFilterContext *ctx)
+{
+ AudioFadeContext *s = ctx->priv;
+ av_frame_free(&s->ring_buf);
+}
+
static int acrossfade_config_output(AVFilterLink *outlink)
{
AVFilterContext *ctx = outlink->src;
@@ -817,6 +1151,7 @@ const FFFilter ff_af_acrossfade = {
.p.flags = AVFILTER_FLAG_DYNAMIC_INPUTS,
.priv_size = sizeof(AudioFadeContext),
.init = acrossfade_init,
+ .uninit = acrossfade_uninit,
.activate = activate,
FILTER_OUTPUTS(avfilter_af_acrossfade_outputs),
FILTER_SAMPLEFMTS_ARRAY(sample_fmts),
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2026-01-13 14:21 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-01-13 14:21 [FFmpeg-devel] [PR] avfilter/af_afade: add ring buffer for memory-efficient crossfade (PR #21448) realies via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git