From: Paul B Mahol <onemda@gmail.com> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Subject: Re: [FFmpeg-devel] [PATCH 2/3] avfilter/af_volumedetect.c: Add 32bit float audio support Date: Mon, 17 Jun 2024 19:52:10 +0200 Message-ID: <CAPYw7P55RP1O+nB-Z38x5_HG4OkTAG+-gjy+bQJ2eE-oiKGp2g@mail.gmail.com> (raw) In-Reply-To: <92092055-E0A7-45C5-A479-E43D41DF2BE6@remlab.net> On Mon, Jun 17, 2024 at 4:52 PM Rémi Denis-Courmont <remi@remlab.net> wrote: > > > Le 17 juin 2024 13:18:11 GMT+02:00, Yigithan Yigit < > yigithanyigitdevel@gmail.com> a écrit : > >--- > > libavfilter/af_volumedetect.c | 159 ++++++++++++++++++++++++++++------ > > 1 file changed, 133 insertions(+), 26 deletions(-) > > > >diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c > >index 327801a7f9..dbbcd037a5 100644 > >--- a/libavfilter/af_volumedetect.c > >+++ b/libavfilter/af_volumedetect.c > >@@ -20,27 +20,51 @@ > > > > #include "libavutil/channel_layout.h" > > #include "libavutil/avassert.h" > >+#include "libavutil/mem.h" > > #include "audio.h" > > #include "avfilter.h" > > #include "internal.h" > > > >+#define MAX_DB_FLT 1024 > > #define MAX_DB 91 > >+#define HISTOGRAM_SIZE 0x10000 > >+#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2) > > > > typedef struct VolDetectContext { > >- /** > >- * Number of samples at each PCM value. > >- * histogram[0x8000 + i] is the number of samples at value i. > >- * The extra element is there for symmetry. > >- */ > >- uint64_t histogram[0x10001]; > >+ uint64_t* histogram; ///< for integer number of samples at each PCM > value, for float number of samples at each dB > >+ uint64_t nb_samples; ///< number of samples > >+ double sum2; ///< sum of the squares of the samples > >+ double max; ///< maximum sample value > >+ int is_float; ///< true if the input is in floating point > > } VolDetectContext; > > > >-static inline double logdb(uint64_t v) > >+static inline double logdb(double v, enum AVSampleFormat sample_fmt) > > { > >- double d = v / (double)(0x8000 * 0x8000); > >- if (!v) > >- return MAX_DB; > >- return -log10(d) * 10; > >+ if (sample_fmt == AV_SAMPLE_FMT_FLT) { > >+ if (!v) > >+ return MAX_DB_FLT; > >+ return -log10(v) * 10; > >+ } else { > >+ double d = v / (double)(0x8000 * 0x8000); > >+ if (!v) > >+ return MAX_DB; > >+ return -log10(d) * 10; > >+ } > >+} > >+ > >+static void update_float_stats(VolDetectContext *vd, float *audio_data) > >+{ > >+ double sample; > >+ int idx; > >+ if(!isnormal(*audio_data)) > >+ return; > > Do we really need to classify floats here? That's probably going to hurt > perfs badly, and makes an otherwise very vectorisable function not so > easily vectored. > This is fast, it should translate to checking few bits of memory. > > >+ sample = fabsf(*audio_data); > >+ if (sample > vd->max) > >+ vd->max = sample; > >+ vd->sum2 += sample * sample; > >+ idx = lrintf(floorf(logdb(sample * sample, AV_SAMPLE_FMT_FLT))) + > MAX_DB_FLT; > > You're recomputing the same value again, and you seem to be rounding twice > in a row? > > >+ vd->histogram[idx]++; > >+ vd->nb_samples++; > > } > > > > static int filter_frame(AVFilterLink *inlink, AVFrame *samples) > >@@ -51,18 +75,41 @@ static int filter_frame(AVFilterLink *inlink, AVFrame > *samples) > > int nb_channels = samples->ch_layout.nb_channels; > > int nb_planes = nb_channels; > > int plane, i; > >- int16_t *pcm; > >+ int planar = 0; > > > >- if (!av_sample_fmt_is_planar(samples->format)) { > >- nb_samples *= nb_channels; > >+ planar = av_sample_fmt_is_planar(samples->format); > >+ if (!planar) > > nb_planes = 1; > >+ if (vd->is_float) { > >+ float *audio_data; > >+ for (plane = 0; plane < nb_planes; plane++) { > >+ audio_data = (float *)samples->extended_data[plane]; > >+ for (i = 0; i < nb_samples; i++) { > >+ if (planar) { > >+ update_float_stats(vd, &audio_data[i]); > >+ } else { > >+ for (int j = 0; j < nb_channels; j++) > >+ update_float_stats(vd, &audio_data[i * > nb_channels + j]); > >+ } > >+ } > >+ } > >+ } else { > >+ int16_t *pcm; > >+ for (plane = 0; plane < nb_planes; plane++) { > >+ pcm = (int16_t *)samples->extended_data[plane]; > >+ for (i = 0; i < nb_samples; i++) { > >+ if (planar) { > >+ vd->histogram[pcm[i] + 0x8000]++; > >+ vd->nb_samples++; > >+ } else { > >+ for (int j = 0; j < nb_channels; j++) { > >+ vd->histogram[pcm[i * nb_channels + j] + > 0x8000]++; > >+ vd->nb_samples++; > >+ } > >+ } > >+ } > >+ } > > Can't we pick the correct implementation (planar/packed and float/int) > once and for all whilst configuring the filter? > > > } > >- for (plane = 0; plane < nb_planes; plane++) { > >- pcm = (int16_t *)samples->extended_data[plane]; > >- for (i = 0; i < nb_samples; i++) > >- vd->histogram[pcm[i] + 0x8000]++; > >- } > >- > > return ff_filter_frame(inlink->dst->outputs[0], samples); > > } > > > >@@ -73,6 +120,20 @@ static void print_stats(AVFilterContext *ctx) > > uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; > > uint64_t histdb[MAX_DB + 1] = { 0 }; > > > >+ if (!vd->nb_samples) > >+ return; > >+ if (vd->is_float) { > >+ av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n", > vd->nb_samples); > >+ av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", > -logdb(vd->sum2 / vd->nb_samples, AV_SAMPLE_FMT_FLT)); > >+ av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", > -2.0*logdb(vd->max, AV_SAMPLE_FMT_FLT)); > >+ for (i = 0; i < HISTOGRAM_SIZE_FLT && !vd->histogram[i]; i++); > >+ for (; i >= 0 && sum < vd->nb_samples / 1000; i++) { > >+ if (!vd->histogram[i]) > >+ continue; > >+ av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", > MAX_DB_FLT - i, vd->histogram[i]); > >+ sum += vd->histogram[i]; > >+ } > >+ } else { > > for (i = 0; i < 0x10000; i++) > > nb_samples += vd->histogram[i]; > > av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); > >@@ -92,26 +153,61 @@ static void print_stats(AVFilterContext *ctx) > > return; > > power = (power + nb_samples_shift / 2) / nb_samples_shift; > > av_assert0(power <= 0x8000 * 0x8000); > >- av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); > >+ av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", > -logdb((double)power, AV_SAMPLE_FMT_S16)); > > > > max_volume = 0x8000; > > while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && > > !vd->histogram[0x8000 - max_volume]) > > max_volume--; > >- av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume > * max_volume)); > >+ av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", > -logdb((double)(max_volume * max_volume), AV_SAMPLE_FMT_S16)); > > > > for (i = 0; i < 0x10000; i++) > >- histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += > vd->histogram[i]; > >+ histdb[(int)logdb((double)(i - 0x8000) * (i - 0x8000), > AV_SAMPLE_FMT_S16)] += vd->histogram[i]; > > for (i = 0; i <= MAX_DB && !histdb[i]; i++); > > for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { > >- av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, > histdb[i]); > >+ av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", -i, > histdb[i]); > > sum += histdb[i]; > > } > >+ } > >+} > >+ > >+static int config_output(AVFilterLink *outlink) > >+{ > >+ AVFilterContext *ctx = outlink->src; > >+ VolDetectContext *vd = ctx->priv; > >+ size_t histogram_size; > >+ > >+ vd->is_float = outlink->format == AV_SAMPLE_FMT_FLT || > >+ outlink->format == AV_SAMPLE_FMT_FLTP; > >+ > >+ if (!vd->is_float) { > >+ /* > >+ * Number of samples at each PCM value. > >+ * Only used for integer formats. > >+ * For 16 bit signed PCM there are 65536. > >+ * histogram[0x8000 + i] is the number of samples at value i. > >+ * The extra element is there for symmetry. > >+ */ > >+ histogram_size = HISTOGRAM_SIZE + 1; > >+ } else { > >+ /* > >+ * The histogram is used to store the number of samples at each dB > >+ * instead of the number of samples at each PCM value. > >+ */ > >+ histogram_size = HISTOGRAM_SIZE_FLT + 1; > >+ } > >+ vd->histogram = av_calloc(histogram_size, sizeof(uint64_t)); > >+ if (!vd->histogram) > >+ return AVERROR(ENOMEM); > >+ return 0; > > } > > > > static av_cold void uninit(AVFilterContext *ctx) > > { > >+ VolDetectContext *vd = ctx->priv; > > print_stats(ctx); > >+ if (vd->histogram) > >+ av_freep(&vd->histogram); > > } > > > > static const AVFilterPad volumedetect_inputs[] = { > >@@ -122,6 +218,14 @@ static const AVFilterPad volumedetect_inputs[] = { > > }, > > }; > > > >+static const AVFilterPad volumedetect_outputs[] = { > >+ { > >+ .name = "default", > >+ .type = AVMEDIA_TYPE_AUDIO, > >+ .config_props = config_output, > >+ }, > >+}; > >+ > > const AVFilter ff_af_volumedetect = { > > .name = "volumedetect", > > .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), > >@@ -129,6 +233,9 @@ const AVFilter ff_af_volumedetect = { > > .uninit = uninit, > > .flags = AVFILTER_FLAG_METADATA_ONLY, > > FILTER_INPUTS(volumedetect_inputs), > >- FILTER_OUTPUTS(ff_audio_default_filterpad), > >- FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P), > >+ FILTER_OUTPUTS(volumedetect_outputs), > >+ FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, > >+ AV_SAMPLE_FMT_S16P, > >+ AV_SAMPLE_FMT_FLT, > >+ AV_SAMPLE_FMT_FLTP), > > }; > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-06-17 17:52 UTC|newest] Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-06-17 11:18 [FFmpeg-devel] [PATCH 0/3] " Yigithan Yigit 2024-06-17 11:18 ` [FFmpeg-devel] [PATCH 1/3] avfilter/af_volumedetect.c: Move logdb function Yigithan Yigit 2024-06-17 11:18 ` [FFmpeg-devel] [PATCH 2/3] avfilter/af_volumedetect.c: Add 32bit float audio support Yigithan Yigit 2024-06-17 14:52 ` Rémi Denis-Courmont 2024-06-17 17:52 ` Paul B Mahol [this message] 2024-06-18 6:55 ` Rémi Denis-Courmont 2024-06-18 7:48 ` Paul B Mahol 2024-06-18 9:16 ` Yigithan Yigit 2024-06-17 11:18 ` [FFmpeg-devel] [PATCH 3/3] avfilter/af_volumedetect.c: reindent after last commit Yigithan Yigit
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=CAPYw7P55RP1O+nB-Z38x5_HG4OkTAG+-gjy+bQJ2eE-oiKGp2g@mail.gmail.com \ --to=onemda@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git