[FFmpeg-devel] [PR] libavfilter/volume: do fade when adjusting the volume. (PR #21709)

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

* [FFmpeg-devel] [PR] libavfilter/volume: do fade when adjusting the volume. (PR #21709)
@ 2026-02-10  4:18 cenzhanquan1 via ffmpeg-devel
  0 siblings, 0 replies; only message in thread
From: cenzhanquan1 via ffmpeg-devel @ 2026-02-10  4:18 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: cenzhanquan1

PR #21709 opened by cenzhanquan1
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21709
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21709.patch

1. add simple fade when volume.
2. do fade when adjust volume to maximal.

when to adjust volume we hope that the audio sample is smooth
we need to calculate the gradient step between each sample based
on the total change of the gradient (i.e. dst_volume - src_volume)
and the number of samples (nb_samples) and apply it to the target sample.

Signed-off-by: cenzhanquan1 <cenzhanquan1@xiaomi.com>


>From f7545e0bf8979d3a91b6f9484b656abaafa50691 Mon Sep 17 00:00:00 2001
From: cenzhanquan1 <cenzhanquan1@xiaomi.com>
Date: Tue, 10 Feb 2026 11:29:12 +0800
Subject: [PATCH] libavfilter/volume: do fade when adjusting the volume.

1. add simple fade when volume.
2. do fade when adjust volume to maximal.

when to adjust volume we hope that the audio sample is smooth
we need to calculate the gradient step between each sample based
on the total change of the gradient (i.e. dst_volume - src_volume)
and the number of samples (nb_samples) and apply it to the target sample.

Signed-off-by: cenzhanquan1 <cenzhanquan1@xiaomi.com>
---
 libavfilter/af_volume.c | 211 ++++++++++++++++++++++++++++++++++++----
 libavfilter/af_volume.h |   9 ++
 2 files changed, 201 insertions(+), 19 deletions(-)

diff --git a/libavfilter/af_volume.c b/libavfilter/af_volume.c
index cd40014345..7016c1950d 100644
--- a/libavfilter/af_volume.c
+++ b/libavfilter/af_volume.c
@@ -84,6 +84,8 @@ static const AVOption volume_options[] = {
             OFFSET(replaygain_preamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.0 }, -15.0, 15.0, A|F },
     { "replaygain_noclip", "Apply replaygain clipping prevention",
             OFFSET(replaygain_noclip), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, A|F },
+    { "transition", "transition time in seconds when volume changes via command",
+            OFFSET(transition), AV_OPT_TYPE_DOUBLE, { .dbl = 0.0 }, 0, 60.0, A|F|T },
     { NULL }
 };
 
@@ -170,6 +172,22 @@ static inline void scale_samples_u8(uint8_t *dst, const uint8_t *src,
         dst[i] = av_clip_uint8(((((int64_t)src[i] - 128) * volume + 128) >> 8) + 128);
 }
 
+static inline void fade_samples_u8(uint8_t *dst, const uint8_t *src,
+                                   int nb_samples, int chs, int dst_volume, int src_volume)
+{
+    int i, j, k = 0;
+    int64_t sample;
+    int64_t step;
+
+    step = (((int64_t)dst_volume - src_volume) * 256) / nb_samples;
+    for (i = 0; i < nb_samples; i++) {
+        for (j = 0; j < chs; j++, k++) {
+            sample = (int64_t)(src[k] - 128) * (src_volume + (step * i) >> 8) + 128;
+            dst[k] = av_clip_uint8((sample >> 8) + 128);
+        }
+    }
+}
+
 static inline void scale_samples_u8_small(uint8_t *dst, const uint8_t *src,
                                           int nb_samples, int volume)
 {
@@ -178,6 +196,22 @@ static inline void scale_samples_u8_small(uint8_t *dst, const uint8_t *src,
         dst[i] = av_clip_uint8((((src[i] - 128) * volume + 128) >> 8) + 128);
 }
 
+static inline void fade_samples_u8_small(uint8_t *dst, const uint8_t *src,
+                                         int nb_samples, int chs, int dst_volume, int src_volume)
+{
+    int i, j, k = 0;
+    int sample;
+    int step;
+
+    step = ((dst_volume - src_volume) * 256) / nb_samples;
+    for (i = 0; i < nb_samples; i++) {
+        for (j = 0; j < chs; j++, k++) {
+            sample = (src[k] - 128) * (src_volume + (step * i >> 8)) + 128;
+            dst[k] = av_clip_uint8((sample >> 8) + 128);
+        }
+    }
+}
+
 static inline void scale_samples_s16(uint8_t *dst, const uint8_t *src,
                                      int nb_samples, int volume)
 {
@@ -188,6 +222,22 @@ static inline void scale_samples_s16(uint8_t *dst, const uint8_t *src,
         smp_dst[i] = av_clip_int16(((int64_t)smp_src[i] * volume + 128) >> 8);
 }
 
+static inline void fade_samples_s16(uint8_t *dst, const uint8_t *src,
+                                    int nb_samples, int chs, int dst_volume, int src_volume)
+{
+    const int16_t *smp_src = (const int16_t *)src;
+    int16_t *smp_dst = (int16_t *)dst;
+    int i, j, k = 0;
+    int64_t step;
+
+    step = (((int64_t)dst_volume - src_volume) * 256) / nb_samples;
+    for (i = 0; i < nb_samples; i++) {
+        for (j = 0; j < chs; j++, k++) {
+            smp_dst[k] = av_clip_int16((int64_t)(smp_src[k] * (src_volume + (step * i >> 8)) + 128) >> 8);
+        }
+    }
+}
+
 static inline void scale_samples_s16_small(uint8_t *dst, const uint8_t *src,
                                            int nb_samples, int volume)
 {
@@ -198,6 +248,22 @@ static inline void scale_samples_s16_small(uint8_t *dst, const uint8_t *src,
         smp_dst[i] = av_clip_int16((smp_src[i] * volume + 128) >> 8);
 }
 
+static inline void fade_samples_s16_small(uint8_t *dst, const uint8_t *src,
+                                          int nb_samples, int chs, int dst_volume, int src_volume)
+{
+    const int16_t *smp_src = (const int16_t *)src;
+    int16_t *smp_dst = (int16_t *)dst;
+    int i, j, k = 0;
+    int step;
+
+    step = (((int64_t)dst_volume - src_volume) * 256) / nb_samples;
+    for (i = 0; i < nb_samples; i++) {
+        for (j = 0; j < chs; j++, k++) {
+            smp_dst[k] = av_clip_int16((int64_t)(smp_src[k] * (src_volume + (step * i >> 8)) + 128) >> 8);
+        }
+    }
+}
+
 static inline void scale_samples_s32(uint8_t *dst, const uint8_t *src,
                                      int nb_samples, int volume)
 {
@@ -208,25 +274,48 @@ static inline void scale_samples_s32(uint8_t *dst, const uint8_t *src,
         smp_dst[i] = av_clipl_int32((((int64_t)smp_src[i] * volume + 128) >> 8));
 }
 
+static inline void fade_samples_s32(uint8_t *dst, const uint8_t *src,
+                                    int nb_samples, int chs, int dst_volume, int src_volume)
+{
+    const int32_t *smp_src = (const int32_t *)src;
+    int32_t *smp_dst = (int32_t *)dst;
+    int i, j, k = 0;
+    int64_t step;
+
+    step = (((int64_t)dst_volume - src_volume)  * 256) / nb_samples;
+    for (i = 0; i < nb_samples; i++) {
+        for (j = 0; j < chs; j++, k++) {
+            smp_dst[k] = av_clipl_int32((int64_t)(smp_src[k] * (src_volume + (step * i >> 8)) + 128) >> 8);
+        }
+    }
+}
+
 static av_cold void volume_init(VolumeContext *vol)
 {
     vol->samples_align = 1;
 
     switch (av_get_packed_sample_fmt(vol->sample_fmt)) {
     case AV_SAMPLE_FMT_U8:
-        if (vol->volume_i < 0x1000000)
+        if (vol->volume_i < 0x1000000) {
             vol->scale_samples = scale_samples_u8_small;
-        else
+            vol->fade_samples  = fade_samples_u8_small;
+        } else {
             vol->scale_samples = scale_samples_u8;
+            vol->fade_samples  = fade_samples_u8;
+        }
         break;
     case AV_SAMPLE_FMT_S16:
-        if (vol->volume_i < 0x10000)
+        if (vol->volume_i < 0x10000) {
             vol->scale_samples = scale_samples_s16_small;
-        else
+            vol->fade_samples  = fade_samples_s16_small;
+        } else {
             vol->scale_samples = scale_samples_s16;
+            vol->fade_samples  = fade_samples_s16;
+        }
         break;
     case AV_SAMPLE_FMT_S32:
         vol->scale_samples = scale_samples_s32;
+        vol->fade_samples  = fade_samples_s32;
         break;
     case AV_SAMPLE_FMT_FLT:
         vol->samples_align = 4;
@@ -313,8 +402,18 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
     if (!strcmp(cmd, "volume")) {
         if ((ret = set_expr(&vol->volume_pexpr, args, ctx)) < 0)
             return ret;
-        if (vol->eval_mode == EVAL_MODE_ONCE)
+        if (vol->eval_mode == EVAL_MODE_ONCE) {
+            vol->volume_isrc = vol->volume_i;
+            vol->volume_src  = vol->volume;
             set_volume(ctx);
+            if (vol->transition > 0 && vol->var_values[VAR_SAMPLE_RATE] > 0) {
+                vol->fade_total     = (int)(vol->transition * vol->var_values[VAR_SAMPLE_RATE] + 0.5);
+                vol->fade_remaining = vol->fade_total;
+            } else {
+                vol->fade_total     = 0;
+                vol->fade_remaining = 0;
+            }
+        }
     }
 
     return ret;
@@ -380,7 +479,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
     if (vol->eval_mode == EVAL_MODE_FRAME)
         set_volume(ctx);
 
-    if (vol->volume == 1.0 || vol->volume_i == 256) {
+    if ((vol->volume == 1.0 || vol->volume_i == 256) && vol->fade_remaining <= 0) {
         out_buf = buf;
         goto end;
     }
@@ -405,29 +504,103 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
 
     if (vol->precision != PRECISION_FIXED || vol->volume_i > 0) {
         int p, plane_samples;
+        int planar = av_sample_fmt_is_planar(buf->format);
 
-        if (av_sample_fmt_is_planar(buf->format))
+        if (planar)
             plane_samples = FFALIGN(nb_samples, vol->samples_align);
         else
             plane_samples = FFALIGN(nb_samples * vol->channels, vol->samples_align);
 
         if (vol->precision == PRECISION_FIXED) {
-            for (p = 0; p < vol->planes; p++) {
-                vol->scale_samples(out_buf->extended_data[p],
-                                   buf->extended_data[p], plane_samples,
-                                   vol->volume_i);
+            if (vol->fade_remaining > 0 && vol->fade_samples && vol->fade_total > 0) {
+                int fade_done  = vol->fade_total - vol->fade_remaining;
+                int fade_count = FFMIN(nb_samples, vol->fade_remaining);
+                int chs = planar ? 1 : vol->channels;
+                int vol_start  = vol->volume_isrc +
+                    (int64_t)(vol->volume_i - vol->volume_isrc) * fade_done / vol->fade_total;
+                int vol_end;
+
+                if (fade_count >= nb_samples) {
+                    /* Entire frame is within the fade region */
+                    vol_end = vol->volume_isrc +
+                        (int64_t)(vol->volume_i - vol->volume_isrc) * (fade_done + fade_count) / vol->fade_total;
+                    for (p = 0; p < vol->planes; p++) {
+                        vol->fade_samples(out_buf->extended_data[p], buf->extended_data[p],
+                                          plane_samples, chs, vol_end, vol_start);
+                    }
+                } else {
+                    /* Fade ends mid-frame: fade the whole frame from vol_start to target volume.
+                     * This slightly extends the fade to cover the full frame, which is acceptable
+                     * since the difference is at most one frame duration. */
+                    vol_end = vol->volume_i;
+                    for (p = 0; p < vol->planes; p++) {
+                        vol->fade_samples(out_buf->extended_data[p], buf->extended_data[p],
+                                          plane_samples, chs, vol_end, vol_start);
+                    }
+                }
+                vol->fade_remaining -= fade_count;
+            } else {
+                for (p = 0; p < vol->planes; p++) {
+                    vol->scale_samples(out_buf->extended_data[p],
+                                       buf->extended_data[p], plane_samples,
+                                       vol->volume_i);
+                }
             }
         } else if (av_get_packed_sample_fmt(vol->sample_fmt) == AV_SAMPLE_FMT_FLT) {
-            for (p = 0; p < vol->planes; p++) {
-                vol->fdsp->vector_fmul_scalar((float *)out_buf->extended_data[p],
-                                             (const float *)buf->extended_data[p],
-                                             vol->volume, plane_samples);
+            if (vol->fade_remaining > 0 && vol->fade_total > 0) {
+                int fade_done  = vol->fade_total - vol->fade_remaining;
+                int fade_count = FFMIN(nb_samples, vol->fade_remaining);
+                int chs = planar ? 1 : vol->channels;
+                for (p = 0; p < vol->planes; p++) {
+                    float *dst = (float *)out_buf->extended_data[p];
+                    const float *src = (const float *)buf->extended_data[p];
+                    int i, j, k = 0;
+                    for (i = 0; i < nb_samples; i++) {
+                        float t;
+                        if (i < fade_count)
+                            t = (float)(fade_done + i) / vol->fade_total;
+                        else
+                            t = 1.0f;
+                        float v = vol->volume_src + (vol->volume - vol->volume_src) * t;
+                        for (j = 0; j < chs; j++, k++)
+                            dst[k] = src[k] * v;
+                    }
+                }
+                vol->fade_remaining -= fade_count;
+            } else {
+                for (p = 0; p < vol->planes; p++) {
+                    vol->fdsp->vector_fmul_scalar((float *)out_buf->extended_data[p],
+                                                 (const float *)buf->extended_data[p],
+                                                 vol->volume, plane_samples);
+                }
             }
         } else {
-            for (p = 0; p < vol->planes; p++) {
-                vol->fdsp->vector_dmul_scalar((double *)out_buf->extended_data[p],
-                                             (const double *)buf->extended_data[p],
-                                             vol->volume, plane_samples);
+            if (vol->fade_remaining > 0 && vol->fade_total > 0) {
+                int fade_done  = vol->fade_total - vol->fade_remaining;
+                int fade_count = FFMIN(nb_samples, vol->fade_remaining);
+                int chs = planar ? 1 : vol->channels;
+                for (p = 0; p < vol->planes; p++) {
+                    double *dst = (double *)out_buf->extended_data[p];
+                    const double *src = (const double *)buf->extended_data[p];
+                    int i, j, k = 0;
+                    for (i = 0; i < nb_samples; i++) {
+                        double t;
+                        if (i < fade_count)
+                            t = (double)(fade_done + i) / vol->fade_total;
+                        else
+                            t = 1.0;
+                        double v = vol->volume_src + (vol->volume - vol->volume_src) * t;
+                        for (j = 0; j < chs; j++, k++)
+                            dst[k] = src[k] * v;
+                    }
+                }
+                vol->fade_remaining -= fade_count;
+            } else {
+                for (p = 0; p < vol->planes; p++) {
+                    vol->fdsp->vector_dmul_scalar((double *)out_buf->extended_data[p],
+                                                 (const double *)buf->extended_data[p],
+                                                 vol->volume, plane_samples);
+                }
             }
         }
     }
diff --git a/libavfilter/af_volume.h b/libavfilter/af_volume.h
index e9527eea8a..b172bd3388 100644
--- a/libavfilter/af_volume.h
+++ b/libavfilter/af_volume.h
@@ -84,7 +84,16 @@ typedef struct VolumeContext {
 
     void (*scale_samples)(uint8_t *dst, const uint8_t *src, int nb_samples,
                           int volume);
+
+    void (*fade_samples)(uint8_t *dst, const uint8_t *src, int nb_samples, int chs,
+                         int dst_volume, int src_volume);
     int samples_align;
+
+    double transition;       ///< transition time in seconds when volume changes (0 = instant)
+    int    fade_total;       ///< total number of samples for the fade transition
+    int    fade_remaining;   ///< remaining samples to fade
+    int    volume_isrc;      ///< source volume level at the start of fade
+    double volume_src;       ///< source volume (float/double) at the start of fade
 } VolumeContext;
 
 void ff_volume_init_x86(VolumeContext *vol);
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2026-02-10  4:19 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-02-10  4:18 [FFmpeg-devel] [PR] libavfilter/volume: do fade when adjusting the volume. (PR #21709) cenzhanquan1 via ffmpeg-devel

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git