commit df4e283d7b2aa4b4de6e405e5dcbbae38d053b9f Author: Marton Balint Date: Sun Oct 16 20:45:51 2016 +0200 lavfi/af_dynaudnorm: add support for momentary loudness based normalization Signed-off-by: Marton Balint diff --git a/doc/filters.texi b/doc/filters.texi index 604e44d569..9d05d7db94 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -3212,6 +3212,22 @@ factor is defined as the factor that would result in exactly that RMS value. Note, however, that the maximum local gain factor is still restricted by the frame's highest magnitude sample, in order to prevent clipping. +@item l +Set the target loudness in LUFS. In range from -70.0 to 0. Default is 0.0 - +disabled. By default, the Dynamic Audio Normalizer performs "peak" +normalization. This means that the maximum local gain factor for each frame is +defined (only) by the frame's highest magnitude sample. This way, the samples +can be amplified as much as possible without exceeding the maximum signal +level, i.e. without clipping. Optionally, however, the Dynamic Audio Normalizer +can also take into account the frame's perceived momentary loudness which is +measured based on the EBU R128 recommendation. Consequently, by adjusting all +frames to a constant loudness value, a uniform "perceived loudness" can be +established. Note, however, that loudness is measured without any kind of +gating, therefore the integrated loudness as defined by EBU R128 will be +usually less than the target level, depending on your content. Also note, that +the maximum local gain factor is still restricted by the frame's highest +magnitude sample, in order to prevent clipping. + @item n Enable channels coupling. By default is enabled. By default, the Dynamic Audio Normalizer will amplify all channels by the same diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 455c809b15..7c3238edd3 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -103,7 +103,7 @@ OBJS-$(CONFIG_CRYSTALIZER_FILTER) += af_crystalizer.o OBJS-$(CONFIG_DCSHIFT_FILTER) += af_dcshift.o OBJS-$(CONFIG_DEESSER_FILTER) += af_deesser.o OBJS-$(CONFIG_DRMETER_FILTER) += af_drmeter.o -OBJS-$(CONFIG_DYNAUDNORM_FILTER) += af_dynaudnorm.o +OBJS-$(CONFIG_DYNAUDNORM_FILTER) += af_dynaudnorm.o ebur128.o OBJS-$(CONFIG_EARWAX_FILTER) += af_earwax.o OBJS-$(CONFIG_EBUR128_FILTER) += f_ebur128.o OBJS-$(CONFIG_EQUALIZER_FILTER) += af_biquads.o diff --git a/libavfilter/af_dynaudnorm.c b/libavfilter/af_dynaudnorm.c index fd430884d7..67db1dcfc2 100644 --- a/libavfilter/af_dynaudnorm.c +++ b/libavfilter/af_dynaudnorm.c @@ -37,6 +37,8 @@ #include "filters.h" #include "internal.h" +#include "ebur128.h" + typedef struct cqueue { double *elements; int size; @@ -59,6 +61,7 @@ typedef struct DynamicAudioNormalizerContext { double peak_value; double max_amplification; double target_rms; + double target_lufs; double compress_factor; double *prev_amplification_factor; double *dc_correction_value; @@ -76,6 +79,8 @@ typedef struct DynamicAudioNormalizerContext { cqueue **gain_history_smoothed; cqueue *is_enabled; + FFEBUR128State **r128; + int nb_r128; } DynamicAudioNormalizerContext; #define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x) @@ -87,6 +92,7 @@ static const AVOption dynaudnorm_options[] = { { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl = 0.95}, 0.0, 1.0, FLAGS }, { "m", "set the max amplification", OFFSET(max_amplification), AV_OPT_TYPE_DOUBLE, {.dbl = 10.0}, 1.0, 100.0, FLAGS }, { "r", "set the target RMS", OFFSET(target_rms), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS }, + { "l", "set the target LUFS", OFFSET(target_lufs), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},-70.0, 0.0, FLAGS }, { "n", "set channel coupling", OFFSET(channels_coupled), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS }, { "c", "set DC correction", OFFSET(dc_correction), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS }, { "b", "set alternative boundary mode", OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS }, @@ -290,6 +296,10 @@ static av_cold void uninit(AVFilterContext *ctx) av_freep(&s->weights); ff_bufqueue_discard_all(&s->queue); + + for (c = 0; c < s->nb_r128; c++) + ff_ebur128_destroy(&s->r128[c]); + av_freep(&s->r128); } static int config_input(AVFilterLink *inlink) @@ -338,6 +348,20 @@ static int config_input(AVFilterLink *inlink) s->channels = inlink->channels; s->delay = s->filter_size; + if (s->target_lufs < -DBL_EPSILON) { + s->nb_r128 = s->channels_coupled ? 1 : inlink->channels; + s->r128 = av_mallocz_array(s->nb_r128, sizeof(*s->r128)); + if (!s->r128) { + s->nb_r128 = 0; + return AVERROR(ENOMEM); + } + for (c = 0; c < s->nb_r128; c++) { + s->r128[c] = ff_ebur128_init(s->channels_coupled ? inlink->channels : 1, inlink->sample_rate, s->frame_len_msec, FF_EBUR128_MODE_M); + if (!s->r128[c]) + return AVERROR(ENOMEM); + } + } + return 0; } @@ -380,6 +404,17 @@ static double find_peak_magnitude(AVFrame *frame, int channel) return max; } +static double compute_frame_lufs_gain(DynamicAudioNormalizerContext *s, AVFrame *frame, int channel) +{ + double lufs; + + channel = FFMAX(0, channel); + ff_ebur128_add_frames_planar_double(s->r128[channel], (const double **)frame->extended_data + channel, frame->nb_samples, 1); + ff_ebur128_loudness_window(s->r128[channel], s->frame_len_msec, &lufs); + + return pow(10.0, (s->target_lufs - lufs) / 20.0); +} + static double compute_frame_rms(AVFrame *frame, int channel) { double rms_value = 0.0; @@ -412,7 +447,8 @@ static double get_max_local_gain(DynamicAudioNormalizerContext *s, AVFrame *fram { const double maximum_gain = s->peak_value / find_peak_magnitude(frame, channel); const double rms_gain = s->target_rms > DBL_EPSILON ? (s->target_rms / compute_frame_rms(frame, channel)) : DBL_MAX; - return bound(s->max_amplification, FFMIN(maximum_gain, rms_gain)); + const double lufs_gain = s->target_lufs < -DBL_EPSILON ? compute_frame_lufs_gain(s, frame, channel) : DBL_MAX; + return bound(s->max_amplification, FFMIN(maximum_gain, FFMIN(lufs_gain, rms_gain))); } static double minimum_filter(cqueue *q)