[FFmpeg-devel] [PATCH v5 2/2] avfilter/af_volumedetect.c: Add 32bit float audio support

From: Yigithan Yigit <yigithanyigitdevel@gmail.com>
To: ffmpeg-devel@ffmpeg.org
Cc: thilo.borgmann@mail.de, yigithanyigitdevel@gmail.com
Subject: [FFmpeg-devel] [PATCH v5 2/2] avfilter/af_volumedetect.c: Add 32bit float audio support
Date: Mon, 15 Jul 2024 01:38:05 +0300
Message-ID: <20240714223805.9760-3-yigithanyigitdevel@gmail.com> (raw)
In-Reply-To: <20240714223805.9760-1-yigithanyigitdevel@gmail.com>

---
 libavfilter/af_volumedetect.c | 222 +++++++++++++++++++++++++---------
 1 file changed, 166 insertions(+), 56 deletions(-)

diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c
index 327801a7f9..ce68c56962 100644
--- a/libavfilter/af_volumedetect.c
+++ b/libavfilter/af_volumedetect.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2012 Nicolas George
+ * Copyright (c) 2024 Yigithan Yigit - 32 Bit Float Audio Support
  *
  * This file is part of FFmpeg.
  *
@@ -20,98 +21,196 @@
 
 #include "libavutil/channel_layout.h"
 #include "libavutil/avassert.h"
+#include "libavutil/mem.h"
 #include "audio.h"
 #include "avfilter.h"
 #include "internal.h"
 
+#define MAX_DB_FLT 1024
 #define MAX_DB 91
+#define HISTOGRAM_SIZE 0x10000
+#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2)
 
 typedef struct VolDetectContext {
-    /**
-     * Number of samples at each PCM value.
-     * histogram[0x8000 + i] is the number of samples at value i.
-     * The extra element is there for symmetry.
-     */
-    uint64_t histogram[0x10001];
+    uint64_t* histogram;            ///< for integer number of samples at each PCM value, for float number of samples at each dB
+    uint64_t nb_samples;            ///< number of samples
+    size_t histogram_size;          ///< size of the histogram
+    double sum2;                    ///< sum of the squares of the samples
+    double max;                     ///< maximum sample value
+    enum AVSampleFormat sample_fmt; ///< sample format
+    void (*process_samples)(struct VolDetectContext *vd, AVFrame *samples);
 } VolDetectContext;
 
-static inline double logdb(uint64_t v)
+static inline double logdb(double v, const int max_db)
 {
-    double d = v / (double)(0x8000 * 0x8000);
     if (!v)
-        return MAX_DB;
-    return -log10(d) * 10;
+        return max_db;
+
+    return -log10(v) * 10;
+}
+
+#define PROCESS_SAMPLES(name, type, update_func)                                  \
+static void process_samples_##name(VolDetectContext *vd, AVFrame *samples)        \
+{                                                                                 \
+    const type *p = (const type *)samples->extended_data[0];                      \
+    const int nb_samples = samples->nb_samples * samples->ch_layout.nb_channels;  \
+    int i;                                                                        \
+    for (i = 0; i < nb_samples; i++)                                              \
+        update_func(vd, p, i);                                                    \
 }
 
+#define PROCESS_SAMPLES_PLANAR(name, type, update_func )                          \
+static void process_samples_planar_##name(VolDetectContext *vd, AVFrame *samples) \
+{                                                                                 \
+    const int channels = samples->ch_layout.nb_channels;                          \
+    const int nb_samples = samples->nb_samples;                                   \
+    int ch, i;                                                                    \
+    for (ch = 0; ch < channels; ch++) {                                           \
+        const type *p = (const type *)samples->extended_data[ch];                 \
+            for (i = 0; i < nb_samples; i++)                                      \
+                update_func(vd, p, i);                                            \
+    }                                                                             \
+}
+
+#define UPDATE_FLOAT_STATS(vd,p, i )                                              \
+do {                                                                              \
+    double sample, power;                                                         \
+    int idx;                                                                      \
+    if(!isfinite(p[i]))                                                           \
+        continue;                                                                 \
+    sample = FFABS(p[i]);                                                         \
+    vd->max = FFMAX(vd->max, sample);                                             \
+    power = sample * sample;                                                      \
+    vd->sum2 += power;                                                            \
+    idx = (int)logdb(power, MAX_DB_FLT) + MAX_DB_FLT;                             \
+    vd->histogram[idx]++;                                                         \
+    vd->nb_samples++;                                                             \
+} while(0)
+
+#define UPDATE_INT_STATS(vd, p, i)                                                \
+do {                                                                              \
+    vd->histogram[p[i] + 0x8000]++;                                               \
+    vd->nb_samples++;                                                             \
+} while(0)
+
+PROCESS_SAMPLES(flt, float, UPDATE_FLOAT_STATS)
+PROCESS_SAMPLES(s16, int16_t, UPDATE_INT_STATS)
+
+PROCESS_SAMPLES_PLANAR(flt, float, UPDATE_FLOAT_STATS)
+PROCESS_SAMPLES_PLANAR(s16, int16_t,UPDATE_INT_STATS)
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
 {
     AVFilterContext *ctx = inlink->dst;
     VolDetectContext *vd = ctx->priv;
-    int nb_samples  = samples->nb_samples;
-    int nb_channels = samples->ch_layout.nb_channels;
-    int nb_planes   = nb_channels;
-    int plane, i;
-    int16_t *pcm;
-
-    if (!av_sample_fmt_is_planar(samples->format)) {
-        nb_samples *= nb_channels;
-        nb_planes = 1;
-    }
-    for (plane = 0; plane < nb_planes; plane++) {
-        pcm = (int16_t *)samples->extended_data[plane];
-        for (i = 0; i < nb_samples; i++)
-            vd->histogram[pcm[i] + 0x8000]++;
-    }
+
+    vd->process_samples(vd, samples);
 
     return ff_filter_frame(inlink->dst->outputs[0], samples);
 }
 
-static void print_stats(AVFilterContext *ctx)
+static av_cold void print_stats(AVFilterContext *ctx)
 {
     VolDetectContext *vd = ctx->priv;
     int i, max_volume, shift;
     uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
     uint64_t histdb[MAX_DB + 1] = { 0 };
 
-    for (i = 0; i < 0x10000; i++)
+    for (i = 0; i < vd->histogram_size; i++)
         nb_samples += vd->histogram[i];
-    av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
+
     if (!nb_samples)
         return;
 
-    /* If nb_samples > 1<<34, there is a risk of overflow in the
-       multiplication or the sum: shift all histogram values to avoid that.
-       The total number of samples must be recomputed to avoid rounding
-       errors. */
-    shift = av_log2(nb_samples >> 33);
-    for (i = 0; i < 0x10000; i++) {
-        nb_samples_shift += vd->histogram[i] >> shift;
-        power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
+    av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
+    switch(vd->sample_fmt) {
+        case AV_SAMPLE_FMT_FLTP:
+        case AV_SAMPLE_FMT_FLT:
+            av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(vd->sum2 / vd->nb_samples, MAX_DB_FLT));
+            av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(vd->max * vd->max, MAX_DB_FLT));
+            for (i = 0; i < HISTOGRAM_SIZE_FLT && !vd->histogram[i]; i++);
+            for (; i < HISTOGRAM_SIZE_FLT && sum < vd->nb_samples / 1000; i++) {
+                if (!vd->histogram[i])
+                    continue;
+                av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", MAX_DB_FLT - i, vd->histogram[i]);
+                sum += vd->histogram[i];
+            }
+            break;
+        case AV_SAMPLE_FMT_S16P:
+        case AV_SAMPLE_FMT_S16:
+            /* If nb_samples > 1<<34, there is a risk of overflow in the
+            multiplication or the sum: shift all histogram values to avoid that.
+            The total number of samples must be recomputed to avoid rounding
+            errors. */
+            shift = av_log2(nb_samples >> 33);
+            for (i = 0; i < vd->histogram_size; i++) {
+                nb_samples_shift += vd->histogram[i] >> shift;
+                power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
+            }
+            if (!nb_samples_shift)
+                return;
+            power = (power + nb_samples_shift / 2) / nb_samples_shift;
+            av_assert0(power <= 0x8000 * 0x8000);
+            av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(ldexp((double)power, -av_log2(HISTOGRAM_SIZE >> 1) * 2), MAX_DB));
+            max_volume = 0x8000;
+            while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
+                                    !vd->histogram[0x8000 - max_volume])
+                max_volume--;
+            av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(ldexp((double)max_volume * max_volume, -av_log2(HISTOGRAM_SIZE >> 1) * 2), MAX_DB));
+            for (i = 0; i < vd->histogram_size; i++)
+                histdb[(int)logdb(ldexp((double)(i - 0x8000) * (i - 0x8000), -av_log2(HISTOGRAM_SIZE >> 1) * 2), MAX_DB)] += vd->histogram[i];
+            for (i = 0; i <= MAX_DB && !histdb[i]; i++);
+            for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
+                av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", -i, histdb[i]);
+                sum += histdb[i];
+            }
+            break;
     }
-    if (!nb_samples_shift)
-        return;
-    power = (power + nb_samples_shift / 2) / nb_samples_shift;
-    av_assert0(power <= 0x8000 * 0x8000);
-    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
-
-    max_volume = 0x8000;
-    while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
-                             !vd->histogram[0x8000 - max_volume])
-        max_volume--;
-    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
-
-    for (i = 0; i < 0x10000; i++)
-        histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
-    for (i = 0; i <= MAX_DB && !histdb[i]; i++);
-    for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
-        av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
-        sum += histdb[i];
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    VolDetectContext *vd = ctx->priv;
+
+    switch (outlink->format) {
+        case AV_SAMPLE_FMT_S16P:
+        case AV_SAMPLE_FMT_S16:
+           /*
+          * Number of samples at each PCM value.
+          * Only used for integer formats.
+          * For 16 bit signed PCM there are 65536.
+          * histogram[0x8000 + i] is the number of samples at value i.
+          * The extra element is there for symmetry.
+          */
+          vd->histogram_size  = HISTOGRAM_SIZE + 1;
+          vd->process_samples = av_sample_fmt_is_planar(outlink->format)
+                              ? process_samples_planar_s16 : process_samples_s16;
+          break;
+        case AV_SAMPLE_FMT_FLT:
+        case AV_SAMPLE_FMT_FLTP:
+          /*
+          * The histogram is used to store the number of samples at each dB
+          * instead of the number of samples at each PCM value.
+          */
+          vd->histogram_size  = HISTOGRAM_SIZE_FLT + 1;
+          vd->process_samples = av_sample_fmt_is_planar(outlink->format)
+                              ? process_samples_planar_flt : process_samples_flt;
+          break;
     }
+    vd->sample_fmt = outlink->format;
+    vd->histogram = av_calloc(vd->histogram_size, sizeof(uint64_t));
+    if (!vd->histogram)
+        return AVERROR(ENOMEM);
+    return 0;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
+    VolDetectContext *vd = ctx->priv;
     print_stats(ctx);
+    if (vd->histogram)
+        av_freep(&vd->histogram);
 }
 
 static const AVFilterPad volumedetect_inputs[] = {
@@ -122,6 +221,14 @@ static const AVFilterPad volumedetect_inputs[] = {
     },
 };
 
+static const AVFilterPad volumedetect_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_output,
+    },
+};
+
 const AVFilter ff_af_volumedetect = {
     .name          = "volumedetect",
     .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."),
@@ -129,6 +236,9 @@ const AVFilter ff_af_volumedetect = {
     .uninit        = uninit,
     .flags         = AVFILTER_FLAG_METADATA_ONLY,
     FILTER_INPUTS(volumedetect_inputs),
-    FILTER_OUTPUTS(ff_audio_default_filterpad),
-    FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P),
+    FILTER_OUTPUTS(volumedetect_outputs),
+    FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16,
+                      AV_SAMPLE_FMT_S16P,
+                      AV_SAMPLE_FMT_FLT,
+                      AV_SAMPLE_FMT_FLTP),
 };
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".