From: Yigithan Yigit <yigithanyigitdevel@gmail.com> To: ffmpeg-devel@ffmpeg.org Cc: thilo.borgmann@mail.de, yigithanyigitdevel@gmail.com Subject: [FFmpeg-devel] [PATCH v5 2/2] avfilter/af_volumedetect.c: Add 32bit float audio support Date: Mon, 15 Jul 2024 01:38:05 +0300 Message-ID: <20240714223805.9760-3-yigithanyigitdevel@gmail.com> (raw) In-Reply-To: <20240714223805.9760-1-yigithanyigitdevel@gmail.com> --- libavfilter/af_volumedetect.c | 222 +++++++++++++++++++++++++--------- 1 file changed, 166 insertions(+), 56 deletions(-) diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c index 327801a7f9..ce68c56962 100644 --- a/libavfilter/af_volumedetect.c +++ b/libavfilter/af_volumedetect.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2012 Nicolas George + * Copyright (c) 2024 Yigithan Yigit - 32 Bit Float Audio Support * * This file is part of FFmpeg. * @@ -20,98 +21,196 @@ #include "libavutil/channel_layout.h" #include "libavutil/avassert.h" +#include "libavutil/mem.h" #include "audio.h" #include "avfilter.h" #include "internal.h" +#define MAX_DB_FLT 1024 #define MAX_DB 91 +#define HISTOGRAM_SIZE 0x10000 +#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2) typedef struct VolDetectContext { - /** - * Number of samples at each PCM value. - * histogram[0x8000 + i] is the number of samples at value i. - * The extra element is there for symmetry. - */ - uint64_t histogram[0x10001]; + uint64_t* histogram; ///< for integer number of samples at each PCM value, for float number of samples at each dB + uint64_t nb_samples; ///< number of samples + size_t histogram_size; ///< size of the histogram + double sum2; ///< sum of the squares of the samples + double max; ///< maximum sample value + enum AVSampleFormat sample_fmt; ///< sample format + void (*process_samples)(struct VolDetectContext *vd, AVFrame *samples); } VolDetectContext; -static inline double logdb(uint64_t v) +static inline double logdb(double v, const int max_db) { - double d = v / (double)(0x8000 * 0x8000); if (!v) - return MAX_DB; - return -log10(d) * 10; + return max_db; + + return -log10(v) * 10; +} + +#define PROCESS_SAMPLES(name, type, update_func) \ +static void process_samples_##name(VolDetectContext *vd, AVFrame *samples) \ +{ \ + const type *p = (const type *)samples->extended_data[0]; \ + const int nb_samples = samples->nb_samples * samples->ch_layout.nb_channels; \ + int i; \ + for (i = 0; i < nb_samples; i++) \ + update_func(vd, p, i); \ } +#define PROCESS_SAMPLES_PLANAR(name, type, update_func ) \ +static void process_samples_planar_##name(VolDetectContext *vd, AVFrame *samples) \ +{ \ + const int channels = samples->ch_layout.nb_channels; \ + const int nb_samples = samples->nb_samples; \ + int ch, i; \ + for (ch = 0; ch < channels; ch++) { \ + const type *p = (const type *)samples->extended_data[ch]; \ + for (i = 0; i < nb_samples; i++) \ + update_func(vd, p, i); \ + } \ +} + +#define UPDATE_FLOAT_STATS(vd,p, i ) \ +do { \ + double sample, power; \ + int idx; \ + if(!isfinite(p[i])) \ + continue; \ + sample = FFABS(p[i]); \ + vd->max = FFMAX(vd->max, sample); \ + power = sample * sample; \ + vd->sum2 += power; \ + idx = (int)logdb(power, MAX_DB_FLT) + MAX_DB_FLT; \ + vd->histogram[idx]++; \ + vd->nb_samples++; \ +} while(0) + +#define UPDATE_INT_STATS(vd, p, i) \ +do { \ + vd->histogram[p[i] + 0x8000]++; \ + vd->nb_samples++; \ +} while(0) + +PROCESS_SAMPLES(flt, float, UPDATE_FLOAT_STATS) +PROCESS_SAMPLES(s16, int16_t, UPDATE_INT_STATS) + +PROCESS_SAMPLES_PLANAR(flt, float, UPDATE_FLOAT_STATS) +PROCESS_SAMPLES_PLANAR(s16, int16_t,UPDATE_INT_STATS) + static int filter_frame(AVFilterLink *inlink, AVFrame *samples) { AVFilterContext *ctx = inlink->dst; VolDetectContext *vd = ctx->priv; - int nb_samples = samples->nb_samples; - int nb_channels = samples->ch_layout.nb_channels; - int nb_planes = nb_channels; - int plane, i; - int16_t *pcm; - - if (!av_sample_fmt_is_planar(samples->format)) { - nb_samples *= nb_channels; - nb_planes = 1; - } - for (plane = 0; plane < nb_planes; plane++) { - pcm = (int16_t *)samples->extended_data[plane]; - for (i = 0; i < nb_samples; i++) - vd->histogram[pcm[i] + 0x8000]++; - } + + vd->process_samples(vd, samples); return ff_filter_frame(inlink->dst->outputs[0], samples); } -static void print_stats(AVFilterContext *ctx) +static av_cold void print_stats(AVFilterContext *ctx) { VolDetectContext *vd = ctx->priv; int i, max_volume, shift; uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; uint64_t histdb[MAX_DB + 1] = { 0 }; - for (i = 0; i < 0x10000; i++) + for (i = 0; i < vd->histogram_size; i++) nb_samples += vd->histogram[i]; - av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); + if (!nb_samples) return; - /* If nb_samples > 1<<34, there is a risk of overflow in the - multiplication or the sum: shift all histogram values to avoid that. - The total number of samples must be recomputed to avoid rounding - errors. */ - shift = av_log2(nb_samples >> 33); - for (i = 0; i < 0x10000; i++) { - nb_samples_shift += vd->histogram[i] >> shift; - power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift); + av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); + switch(vd->sample_fmt) { + case AV_SAMPLE_FMT_FLTP: + case AV_SAMPLE_FMT_FLT: + av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(vd->sum2 / vd->nb_samples, MAX_DB_FLT)); + av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(vd->max * vd->max, MAX_DB_FLT)); + for (i = 0; i < HISTOGRAM_SIZE_FLT && !vd->histogram[i]; i++); + for (; i < HISTOGRAM_SIZE_FLT && sum < vd->nb_samples / 1000; i++) { + if (!vd->histogram[i]) + continue; + av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", MAX_DB_FLT - i, vd->histogram[i]); + sum += vd->histogram[i]; + } + break; + case AV_SAMPLE_FMT_S16P: + case AV_SAMPLE_FMT_S16: + /* If nb_samples > 1<<34, there is a risk of overflow in the + multiplication or the sum: shift all histogram values to avoid that. + The total number of samples must be recomputed to avoid rounding + errors. */ + shift = av_log2(nb_samples >> 33); + for (i = 0; i < vd->histogram_size; i++) { + nb_samples_shift += vd->histogram[i] >> shift; + power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift); + } + if (!nb_samples_shift) + return; + power = (power + nb_samples_shift / 2) / nb_samples_shift; + av_assert0(power <= 0x8000 * 0x8000); + av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(ldexp((double)power, -av_log2(HISTOGRAM_SIZE >> 1) * 2), MAX_DB)); + max_volume = 0x8000; + while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && + !vd->histogram[0x8000 - max_volume]) + max_volume--; + av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(ldexp((double)max_volume * max_volume, -av_log2(HISTOGRAM_SIZE >> 1) * 2), MAX_DB)); + for (i = 0; i < vd->histogram_size; i++) + histdb[(int)logdb(ldexp((double)(i - 0x8000) * (i - 0x8000), -av_log2(HISTOGRAM_SIZE >> 1) * 2), MAX_DB)] += vd->histogram[i]; + for (i = 0; i <= MAX_DB && !histdb[i]; i++); + for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { + av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", -i, histdb[i]); + sum += histdb[i]; + } + break; } - if (!nb_samples_shift) - return; - power = (power + nb_samples_shift / 2) / nb_samples_shift; - av_assert0(power <= 0x8000 * 0x8000); - av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); - - max_volume = 0x8000; - while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && - !vd->histogram[0x8000 - max_volume]) - max_volume--; - av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume)); - - for (i = 0; i < 0x10000; i++) - histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; - for (i = 0; i <= MAX_DB && !histdb[i]; i++); - for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { - av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]); - sum += histdb[i]; +} + +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + VolDetectContext *vd = ctx->priv; + + switch (outlink->format) { + case AV_SAMPLE_FMT_S16P: + case AV_SAMPLE_FMT_S16: + /* + * Number of samples at each PCM value. + * Only used for integer formats. + * For 16 bit signed PCM there are 65536. + * histogram[0x8000 + i] is the number of samples at value i. + * The extra element is there for symmetry. + */ + vd->histogram_size = HISTOGRAM_SIZE + 1; + vd->process_samples = av_sample_fmt_is_planar(outlink->format) + ? process_samples_planar_s16 : process_samples_s16; + break; + case AV_SAMPLE_FMT_FLT: + case AV_SAMPLE_FMT_FLTP: + /* + * The histogram is used to store the number of samples at each dB + * instead of the number of samples at each PCM value. + */ + vd->histogram_size = HISTOGRAM_SIZE_FLT + 1; + vd->process_samples = av_sample_fmt_is_planar(outlink->format) + ? process_samples_planar_flt : process_samples_flt; + break; } + vd->sample_fmt = outlink->format; + vd->histogram = av_calloc(vd->histogram_size, sizeof(uint64_t)); + if (!vd->histogram) + return AVERROR(ENOMEM); + return 0; } static av_cold void uninit(AVFilterContext *ctx) { + VolDetectContext *vd = ctx->priv; print_stats(ctx); + if (vd->histogram) + av_freep(&vd->histogram); } static const AVFilterPad volumedetect_inputs[] = { @@ -122,6 +221,14 @@ static const AVFilterPad volumedetect_inputs[] = { }, }; +static const AVFilterPad volumedetect_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + .config_props = config_output, + }, +}; + const AVFilter ff_af_volumedetect = { .name = "volumedetect", .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), @@ -129,6 +236,9 @@ const AVFilter ff_af_volumedetect = { .uninit = uninit, .flags = AVFILTER_FLAG_METADATA_ONLY, FILTER_INPUTS(volumedetect_inputs), - FILTER_OUTPUTS(ff_audio_default_filterpad), - FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P), + FILTER_OUTPUTS(volumedetect_outputs), + FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_FLT, + AV_SAMPLE_FMT_FLTP), }; -- 2.45.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-07-14 22:38 UTC|newest] Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-07-14 22:38 [FFmpeg-devel] [PATCH v5 0/2] " Yigithan Yigit 2024-07-14 22:38 ` [FFmpeg-devel] [PATCH v5 1/2] avfilter/af_volumedetect.c: Move logdb function Yigithan Yigit 2024-07-14 22:38 ` Yigithan Yigit [this message] 2024-07-21 22:49 ` [FFmpeg-devel] [PATCH v5 2/2] avfilter/af_volumedetect.c: Add 32bit float audio support Yigithan Yigit
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240714223805.9760-3-yigithanyigitdevel@gmail.com \ --to=yigithanyigitdevel@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ --cc=thilo.borgmann@mail.de \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git