Re: [FFmpeg-devel] [PATCH 2/3] avfilter/af_volumedetect.c: Add 32bit float audio support

From: Paul B Mahol <onemda@gmail.com>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Subject: Re: [FFmpeg-devel] [PATCH 2/3] avfilter/af_volumedetect.c: Add 32bit float audio support
Date: Mon, 17 Jun 2024 19:52:10 +0200
Message-ID: <CAPYw7P55RP1O+nB-Z38x5_HG4OkTAG+-gjy+bQJ2eE-oiKGp2g@mail.gmail.com> (raw)
In-Reply-To: <92092055-E0A7-45C5-A479-E43D41DF2BE6@remlab.net>

On Mon, Jun 17, 2024 at 4:52 PM Rémi Denis-Courmont <remi@remlab.net> wrote:

>
>
> Le 17 juin 2024 13:18:11 GMT+02:00, Yigithan Yigit <
> yigithanyigitdevel@gmail.com> a écrit :
> >---
> > libavfilter/af_volumedetect.c | 159 ++++++++++++++++++++++++++++------
> > 1 file changed, 133 insertions(+), 26 deletions(-)
> >
> >diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c
> >index 327801a7f9..dbbcd037a5 100644
> >--- a/libavfilter/af_volumedetect.c
> >+++ b/libavfilter/af_volumedetect.c
> >@@ -20,27 +20,51 @@
> >
> > #include "libavutil/channel_layout.h"
> > #include "libavutil/avassert.h"
> >+#include "libavutil/mem.h"
> > #include "audio.h"
> > #include "avfilter.h"
> > #include "internal.h"
> >
> >+#define MAX_DB_FLT 1024
> > #define MAX_DB 91
> >+#define HISTOGRAM_SIZE 0x10000
> >+#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2)
> >
> > typedef struct VolDetectContext {
> >-    /**
> >-     * Number of samples at each PCM value.
> >-     * histogram[0x8000 + i] is the number of samples at value i.
> >-     * The extra element is there for symmetry.
> >-     */
> >-    uint64_t histogram[0x10001];
> >+    uint64_t* histogram; ///< for integer number of samples at each PCM
> value, for float number of samples at each dB
> >+    uint64_t nb_samples; ///< number of samples
> >+    double sum2;         ///< sum of the squares of the samples
> >+    double max;          ///< maximum sample value
> >+    int is_float;        ///< true if the input is in floating point
> > } VolDetectContext;
> >
> >-static inline double logdb(uint64_t v)
> >+static inline double logdb(double v, enum AVSampleFormat sample_fmt)
> > {
> >-    double d = v / (double)(0x8000 * 0x8000);
> >-    if (!v)
> >-        return MAX_DB;
> >-    return -log10(d) * 10;
> >+    if (sample_fmt == AV_SAMPLE_FMT_FLT) {
> >+        if (!v)
> >+            return MAX_DB_FLT;
> >+        return -log10(v) * 10;
> >+    } else {
> >+        double d = v / (double)(0x8000 * 0x8000);
> >+        if (!v)
> >+            return MAX_DB;
> >+        return -log10(d) * 10;
> >+    }
> >+}
> >+
> >+static void update_float_stats(VolDetectContext *vd, float *audio_data)
> >+{
> >+    double sample;
> >+    int idx;
> >+    if(!isnormal(*audio_data))
> >+        return;
>
> Do we really need to classify floats here? That's probably going to hurt
> perfs badly, and makes an otherwise very vectorisable function not so
> easily vectored.
>

This is fast, it should translate to checking few bits of memory.

>
> >+    sample = fabsf(*audio_data);
> >+    if (sample > vd->max)
> >+        vd->max = sample;
> >+    vd->sum2 += sample * sample;
> >+    idx = lrintf(floorf(logdb(sample * sample, AV_SAMPLE_FMT_FLT))) +
> MAX_DB_FLT;
>
> You're recomputing the same value again, and you seem to be rounding twice
> in a row?
>
> >+    vd->histogram[idx]++;
> >+    vd->nb_samples++;
> > }
> >
> > static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
> >@@ -51,18 +75,41 @@ static int filter_frame(AVFilterLink *inlink, AVFrame
> *samples)
> >     int nb_channels = samples->ch_layout.nb_channels;
> >     int nb_planes   = nb_channels;
> >     int plane, i;
> >-    int16_t *pcm;
> >+    int planar = 0;
> >
> >-    if (!av_sample_fmt_is_planar(samples->format)) {
> >-        nb_samples *= nb_channels;
> >+    planar = av_sample_fmt_is_planar(samples->format);
> >+    if (!planar)
> >         nb_planes = 1;
> >+    if (vd->is_float) {
> >+        float *audio_data;
> >+        for (plane = 0; plane < nb_planes; plane++) {
> >+            audio_data = (float *)samples->extended_data[plane];
> >+            for (i = 0; i < nb_samples; i++) {
> >+                if (planar) {
> >+                    update_float_stats(vd, &audio_data[i]);
> >+                } else {
> >+                    for (int j = 0; j < nb_channels; j++)
> >+                        update_float_stats(vd, &audio_data[i *
> nb_channels + j]);
> >+                }
> >+            }
> >+        }
> >+    } else {
> >+        int16_t *pcm;
> >+        for (plane = 0; plane < nb_planes; plane++) {
> >+            pcm = (int16_t *)samples->extended_data[plane];
> >+            for (i = 0; i < nb_samples; i++) {
> >+                if (planar) {
> >+                    vd->histogram[pcm[i] + 0x8000]++;
> >+                    vd->nb_samples++;
> >+                } else {
> >+                    for (int j = 0; j < nb_channels; j++) {
> >+                        vd->histogram[pcm[i * nb_channels + j] +
> 0x8000]++;
> >+                        vd->nb_samples++;
> >+                    }
> >+                }
> >+            }
> >+        }
>
> Can't we pick the correct implementation (planar/packed and float/int)
> once and for all whilst configuring the filter?
>
> >     }
> >-    for (plane = 0; plane < nb_planes; plane++) {
> >-        pcm = (int16_t *)samples->extended_data[plane];
> >-        for (i = 0; i < nb_samples; i++)
> >-            vd->histogram[pcm[i] + 0x8000]++;
> >-    }
> >-
> >     return ff_filter_frame(inlink->dst->outputs[0], samples);
> > }
> >
> >@@ -73,6 +120,20 @@ static void print_stats(AVFilterContext *ctx)
> >     uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
> >     uint64_t histdb[MAX_DB + 1] = { 0 };
> >
> >+    if (!vd->nb_samples)
> >+        return;
> >+    if (vd->is_float) {
> >+        av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n",
> vd->nb_samples);
> >+        av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n",
> -logdb(vd->sum2 / vd->nb_samples, AV_SAMPLE_FMT_FLT));
> >+        av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n",
> -2.0*logdb(vd->max, AV_SAMPLE_FMT_FLT));
> >+        for (i = 0; i < HISTOGRAM_SIZE_FLT && !vd->histogram[i]; i++);
> >+        for (; i >= 0 && sum < vd->nb_samples / 1000; i++) {
> >+            if (!vd->histogram[i])
> >+                continue;
> >+            av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n",
> MAX_DB_FLT - i, vd->histogram[i]);
> >+            sum += vd->histogram[i];
> >+        }
> >+    } else {
> >     for (i = 0; i < 0x10000; i++)
> >         nb_samples += vd->histogram[i];
> >     av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
> >@@ -92,26 +153,61 @@ static void print_stats(AVFilterContext *ctx)
> >         return;
> >     power = (power + nb_samples_shift / 2) / nb_samples_shift;
> >     av_assert0(power <= 0x8000 * 0x8000);
> >-    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
> >+    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n",
> -logdb((double)power, AV_SAMPLE_FMT_S16));
> >
> >     max_volume = 0x8000;
> >     while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
> >                              !vd->histogram[0x8000 - max_volume])
> >         max_volume--;
> >-    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume
> * max_volume));
> >+    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n",
> -logdb((double)(max_volume * max_volume), AV_SAMPLE_FMT_S16));
> >
> >     for (i = 0; i < 0x10000; i++)
> >-        histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] +=
> vd->histogram[i];
> >+        histdb[(int)logdb((double)(i - 0x8000) * (i - 0x8000),
> AV_SAMPLE_FMT_S16)] += vd->histogram[i];
> >     for (i = 0; i <= MAX_DB && !histdb[i]; i++);
> >     for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
> >-        av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i,
> histdb[i]);
> >+        av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", -i,
> histdb[i]);
> >         sum += histdb[i];
> >     }
> >+    }
> >+}
> >+
> >+static int config_output(AVFilterLink *outlink)
> >+{
> >+    AVFilterContext *ctx = outlink->src;
> >+    VolDetectContext *vd = ctx->priv;
> >+    size_t histogram_size;
> >+
> >+    vd->is_float = outlink->format == AV_SAMPLE_FMT_FLT ||
> >+                   outlink->format == AV_SAMPLE_FMT_FLTP;
> >+
> >+    if (!vd->is_float) {
> >+        /*
> >+        * Number of samples at each PCM value.
> >+        * Only used for integer formats.
> >+        * For 16 bit signed PCM there are 65536.
> >+        * histogram[0x8000 + i] is the number of samples at value i.
> >+        * The extra element is there for symmetry.
> >+        */
> >+        histogram_size = HISTOGRAM_SIZE + 1;
> >+    } else {
> >+        /*
> >+        * The histogram is used to store the number of samples at each dB
> >+        * instead of the number of samples at each PCM value.
> >+        */
> >+        histogram_size = HISTOGRAM_SIZE_FLT + 1;
> >+    }
> >+    vd->histogram = av_calloc(histogram_size, sizeof(uint64_t));
> >+    if (!vd->histogram)
> >+        return AVERROR(ENOMEM);
> >+    return 0;
> > }
> >
> > static av_cold void uninit(AVFilterContext *ctx)
> > {
> >+    VolDetectContext *vd = ctx->priv;
> >     print_stats(ctx);
> >+    if (vd->histogram)
> >+        av_freep(&vd->histogram);
> > }
> >
> > static const AVFilterPad volumedetect_inputs[] = {
> >@@ -122,6 +218,14 @@ static const AVFilterPad volumedetect_inputs[] = {
> >     },
> > };
> >
> >+static const AVFilterPad volumedetect_outputs[] = {
> >+    {
> >+        .name         = "default",
> >+        .type         = AVMEDIA_TYPE_AUDIO,
> >+        .config_props = config_output,
> >+    },
> >+};
> >+
> > const AVFilter ff_af_volumedetect = {
> >     .name          = "volumedetect",
> >     .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."),
> >@@ -129,6 +233,9 @@ const AVFilter ff_af_volumedetect = {
> >     .uninit        = uninit,
> >     .flags         = AVFILTER_FLAG_METADATA_ONLY,
> >     FILTER_INPUTS(volumedetect_inputs),
> >-    FILTER_OUTPUTS(ff_audio_default_filterpad),
> >-    FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P),
> >+    FILTER_OUTPUTS(volumedetect_outputs),
> >+    FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16,
> >+                      AV_SAMPLE_FMT_S16P,
> >+                      AV_SAMPLE_FMT_FLT,
> >+                      AV_SAMPLE_FMT_FLTP),
> > };
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".