* [FFmpeg-devel] [PATCH] avfilter: add aspectraldn audio filter
@ 2022-03-03 17:30 Paul B Mahol
0 siblings, 0 replies; only message in thread
From: Paul B Mahol @ 2022-03-03 17:30 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
libavfilter/Makefile | 1 +
libavfilter/af_aspectraldn.c | 440 +++++++++++++++++++++++++++++++++++
libavfilter/allfilters.c | 1 +
3 files changed, 442 insertions(+)
create mode 100644 libavfilter/af_aspectraldn.c
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 9f0e5de532..35e7977741 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -96,6 +96,7 @@ OBJS-$(CONFIG_ASETTB_FILTER) += settb.o
OBJS-$(CONFIG_ASHOWINFO_FILTER) += af_ashowinfo.o
OBJS-$(CONFIG_ASIDEDATA_FILTER) += f_sidedata.o
OBJS-$(CONFIG_ASOFTCLIP_FILTER) += af_asoftclip.o
+OBJS-$(CONFIG_ASPECTRALDN_FILTER) += af_aspectraldn.o
OBJS-$(CONFIG_ASPECTRALSTATS_FILTER) += af_aspectralstats.o
OBJS-$(CONFIG_ASPLIT_FILTER) += split.o
OBJS-$(CONFIG_ASR_FILTER) += af_asr.o
diff --git a/libavfilter/af_aspectraldn.c b/libavfilter/af_aspectraldn.c
new file mode 100644
index 0000000000..0a9c98deb6
--- /dev/null
+++ b/libavfilter/af_aspectraldn.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2022 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <float.h>
+
+#include "libavutil/avstring.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
+#include "libavutil/tx.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+#include "filters.h"
+#include "window_func.h"
+
+enum OutModes {
+ IN_MODE,
+ OUT_MODE,
+ NOISE_MODE,
+ NB_MODES
+};
+
+typedef struct AudioSpectralDenoiseContext
+{
+ const AVClass *class;
+
+ float std_thresh;
+ float reduction;
+ float overlap;
+ float smooth_freq;
+ int stationary;
+ int fft_factor;
+ int fft_size;
+ int win_size;
+ int win_func;
+ int output_mode;
+ int sample_advance;
+
+ AVFrame *fft_in;
+ AVFrame *fft_out;
+ AVFrame *sig_db;
+ AVFrame *sig_mask;
+ AVFrame *temp;
+ AVFrame *winframe;
+
+ AVTXContext **fft, **ifft;
+ av_tx_fn tx_fn, itx_fn;
+
+ float *window;
+ float smooth_freq_a[3];
+ float smooth_freq_b[3];
+
+ int channels;
+} AudioSpectralDenoiseContext;
+
+#define OFFSET(x) offsetof(AudioSpectralDenoiseContext, x)
+#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+#define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
+
+static const AVOption aspectraldn_options[] = {
+ { "stdthr", "set the standard threshold", OFFSET(std_thresh), AV_OPT_TYPE_FLOAT, {.dbl = 0.0}, -1, 30, AFR },
+ { "reduction", "set the noise reduction", OFFSET(reduction), AV_OPT_TYPE_FLOAT, {.dbl = 0.9}, 0, 1, AFR },
+ { "smoothf", "set the freq smooth for mask",OFFSET(smooth_freq), AV_OPT_TYPE_FLOAT, {.dbl = 0.01}, 0, 1, AF },
+ { "stationary", "use stationary reduction", OFFSET(stationary), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AFR },
+ { "fft_factor", "set the fft factor", OFFSET(fft_factor), AV_OPT_TYPE_INT, {.i64 = 1}, 1, 8, AF },
+ { "win_size", "set the window size", OFFSET(win_size), AV_OPT_TYPE_INT, {.i64 = 2048}, 128,8192,AF },
+ WIN_FUNC_OPTION("win_func", OFFSET(win_func), AF, WFUNC_HANNING),
+ { "overlap", "set the window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl = .75}, 0, 1, AF },
+ { "output", "set the output mode", OFFSET(output_mode), AV_OPT_TYPE_INT, {.i64 = OUT_MODE}, 0, NB_MODES-1, AFR, "mode" },
+ { "input", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, "mode" },
+ { "i", "input", 0, AV_OPT_TYPE_CONST, {.i64 = IN_MODE}, 0, 0, AFR, "mode" },
+ { "output", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, "mode" },
+ { "o", "output", 0, AV_OPT_TYPE_CONST, {.i64 = OUT_MODE}, 0, 0, AFR, "mode" },
+ { "noise", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE},0, 0, AFR, "mode" },
+ { "n", "noise", 0, AV_OPT_TYPE_CONST, {.i64 = NOISE_MODE},0, 0, AFR, "mode" },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(aspectraldn);
+
+static int config_input(AVFilterLink *inlink)
+{
+ AVFilterContext *ctx = inlink->dst;
+ AudioSpectralDenoiseContext *s = ctx->priv;
+ float overlap, w0, alpha;
+ int ret;
+
+ s->sample_advance = s->win_size * (1.f - s->overlap);
+ s->fft_size = s->win_size * s->fft_factor;
+ w0 = M_PI * s->smooth_freq;
+ alpha = sinf(w0) * 0.5f;
+
+ s->smooth_freq_a[0] = 1.f + alpha;
+ s->smooth_freq_a[1] = -2.f * cosf(w0);
+ s->smooth_freq_a[2] = 1.f - alpha;
+ s->smooth_freq_b[0] = (1.f - cosf(w0)) / 2.f;
+ s->smooth_freq_b[1] = 1.f - cosf(w0);
+ s->smooth_freq_b[2] = (1.f - cosf(w0)) / 2.f;
+ s->smooth_freq_a[1] /= s->smooth_freq_a[0];
+ s->smooth_freq_a[2] /= s->smooth_freq_a[0];
+ s->smooth_freq_b[0] /= s->smooth_freq_a[0];
+ s->smooth_freq_b[1] /= s->smooth_freq_a[0];
+ s->smooth_freq_b[2] /= s->smooth_freq_a[0];
+
+ s->winframe = ff_get_audio_buffer(inlink, s->fft_size * 2);
+ s->fft_in = ff_get_audio_buffer(inlink, s->fft_size * 2);
+ s->fft_out = ff_get_audio_buffer(inlink, s->fft_size * 2);
+ s->temp = ff_get_audio_buffer(inlink, s->fft_size * 2);
+ s->sig_db = ff_get_audio_buffer(inlink, s->fft_size);
+ s->sig_mask = ff_get_audio_buffer(inlink, s->fft_size);
+ s->window = av_calloc(s->win_size, sizeof(*s->window));
+ if (!s->fft_in || !s->winframe || !s->fft_out || !s->temp || !s->sig_db ||
+ !s->sig_mask || !s->window)
+ return AVERROR(ENOMEM);
+
+ generate_window_func(s->window, s->win_size, s->win_func, &overlap);
+
+ s->channels = inlink->channels;
+ s->fft = av_calloc(s->channels, sizeof(*s->fft));
+ s->ifft = av_calloc(s->channels, sizeof(*s->ifft));
+ if (!s->fft || !s->ifft)
+ return AVERROR(ENOMEM);
+
+ for (int ch = 0; ch < s->channels; ch++) {
+ float scale = 1.f, iscale = 1.f;
+
+ ret = av_tx_init(&s->fft[ch], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->fft_size, &scale, 0);
+ if (ret < 0)
+ return ret;
+
+ ret = av_tx_init(&s->ifft[ch], &s->itx_fn, AV_TX_FLOAT_FFT, 1, s->fft_size, &iscale, 0);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void get_mask(float *out, const float *in, int size, float db_thresh, float reduction)
+{
+ const float original = 1.f - reduction;
+
+ for (int n = 0; n < size; n++) {
+ const float mask = in[n] > db_thresh;
+
+ out[n] = mask * reduction + original;
+ }
+}
+
+static void power_to_db(float *S, int size, float ref, float amin, float top_db, float max_mag)
+{
+ float log_max = FLT_MIN;
+
+ for (int n = 0; n < size; n++) {
+ S[n] = 10.f * log10f(fmaxf(amin, S[n]));
+ S[n] -= 10.f * log10f(fmaxf(amin, ref));
+ log_max = fmaxf(log_max, S[n]);
+ }
+
+ for (int n = 0; n < size; n++)
+ S[n] = fmaxf(log_max - top_db, S[n]);
+}
+
+static void amplitude_to_db(float *S, AVComplexFloat *in, int size, float ref, float amin, float top_db)
+{
+ float max_mag = 0.f;
+
+ for (int n = 0; n < size; n++) {
+ const float re = in[n].re;
+ const float im = in[n].im;
+
+ S[n] = re * re + im * im;
+ max_mag = fmaxf(S[n], max_mag);
+ }
+
+ power_to_db(S, size, ref, amin, top_db, max_mag);
+}
+
+static float get_mean(const float *S, int size)
+{
+ double sum = 0.0;
+
+ for (int n = 0; n < size; n++)
+ sum += S[n];
+
+ return sum / size;
+}
+
+static float get_stddev(const float *S, int size, float mean)
+{
+ double stddev = 0.0;
+
+ for (int n = 0; n < size; n++) {
+ const float p = S[n] - mean;
+
+ stddev += p * p;
+ }
+
+ stddev = sqrt(stddev / size);
+
+ return stddev;
+}
+
+static void smooth_mask(float *m, int size, float *a, float *b)
+{
+ const float b0 = b[0], b1 = b[1], b2 = b[2];
+ const float a1 = -a[1], a2 = -a[2];
+ float w1 = 0.f, w2 = 0.f;
+
+ for (int n = 0; n < size; n++) {
+ float in = m[n], out;
+
+ out = b0 * in + w1;
+ w1 = b1 * in + w2 + a1 * out;
+ w2 = b2 * in + a2 * out;
+ m[n] = out;
+ }
+
+ w1 = 0.f, w2 = 0.f;
+ for (int n = size - 1; n >= 0; n--) {
+ float in = m[n], out;
+
+ out = b0 * in + w1;
+ w1 = b1 * in + w2 + a1 * out;
+ w2 = b2 * in + a2 * out;
+ m[n] = out;
+ }
+}
+
+static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ AudioSpectralDenoiseContext *s = ctx->priv;
+ AVFrame *in = arg;
+ const int fft_size = s->fft_size;
+ const int win_size = s->win_size;
+ const float wscale = 1.f / fft_size;
+ const float *window = s->window;
+ const int start = (in->channels * jobnr) / nb_jobs;
+ const int end = (in->channels * (jobnr+1)) / nb_jobs;
+
+ for (int ch = start; ch < end; ch++) {
+ float *src = (float *)s->winframe->extended_data[ch];
+ float *dst = (float *)s->temp->extended_data[ch];
+ AVComplexFloat *fft_out = (AVComplexFloat *)s->fft_out->extended_data[ch];
+ AVComplexFloat *fft_in = (AVComplexFloat *)s->fft_in->extended_data[ch];
+ float *sig_mask = (float *)s->sig_mask->extended_data[ch];
+ float *sig_db = (float *)s->sig_db->extended_data[ch];
+ const int offset = win_size - s->sample_advance;
+ float mean, stddev, noise_thresh;
+
+ memmove(src, &src[s->sample_advance], offset * sizeof(float));
+ memcpy(&src[offset], in->extended_data[ch], s->sample_advance * sizeof(float));
+
+ for (int n = 0; n < win_size; n++) {
+ fft_in[n].re = window[n] * src[n];
+ fft_in[n].im = 0;
+ }
+
+ for (int n = win_size; n < fft_size; n++) {
+ fft_in[n].re = 0;
+ fft_in[n].im = 0;
+ }
+
+ s->tx_fn(s->fft[ch], fft_out, fft_in, sizeof(float));
+ for (int n = 0; n < fft_size; n++) {
+ fft_out[n].re *= wscale;
+ fft_out[n].im *= wscale;
+ }
+
+ amplitude_to_db(sig_db, fft_out, fft_size, 1.f, 1e-20f, 80.f);
+ mean = get_mean(sig_db, fft_size);
+ stddev = get_stddev(sig_db, fft_size, mean);
+
+ noise_thresh = mean + stddev * s->std_thresh;
+
+ get_mask(sig_mask, sig_db, fft_size, noise_thresh, s->reduction);
+ smooth_mask(sig_mask, fft_size, s->smooth_freq_a, s->smooth_freq_b);
+
+ for (int n = 0; n < fft_size; n++) {
+ fft_out[n].re *= sig_mask[n];
+ fft_out[n].im *= sig_mask[n];
+ }
+
+ s->itx_fn(s->ifft[ch], fft_in, fft_out, sizeof(float));
+
+ for (int n = 0; n < win_size; n++)
+ dst[n] += window[n] * fft_in[n].re;
+ }
+
+ return 0;
+}
+
+static int output_frame(AVFilterLink *inlink, AVFrame *in)
+{
+ AVFilterContext *ctx = inlink->dst;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AudioSpectralDenoiseContext *s = ctx->priv;
+ const int output_mode = ctx->is_disabled ? IN_MODE : s->output_mode;
+ AVFrame *out;
+
+ ff_filter_execute(ctx, filter_channel, in, NULL,
+ FFMIN(outlink->channels, ff_filter_get_nb_threads(ctx)));
+
+ if (av_frame_is_writable(in)) {
+ out = in;
+ } else {
+ out = ff_get_audio_buffer(outlink, in->nb_samples);
+ if (!out) {
+ av_frame_free(&in);
+ return AVERROR(ENOMEM);
+ }
+
+ out->pts = in->pts;
+ }
+
+ for (int ch = 0; ch < inlink->channels; ch++) {
+ float *src = (float *)s->temp->extended_data[ch];
+ float *orig = (float *)s->winframe->extended_data[ch];
+ float *dst = (float *)out->extended_data[ch];
+ const float scale = sqrtf(1.f - s->overlap);
+
+ switch (output_mode) {
+ case IN_MODE:
+ for (int n = 0; n < s->sample_advance; n++)
+ dst[n] = orig[n];
+ break;
+ case OUT_MODE:
+ for (int n = 0; n < s->sample_advance; n++)
+ dst[n] = src[n] * scale;
+ break;
+ case NOISE_MODE:
+ for (int n = 0; n < s->sample_advance; n++)
+ dst[n] = orig[n] - src[n] * scale;
+ break;
+ default:
+ if (out != in)
+ av_frame_free(&out);
+ av_frame_free(&in);
+ return AVERROR_BUG;
+ }
+
+ memmove(src, src + s->sample_advance, (s->fft_size * 2 - s->sample_advance) * sizeof(*src));
+ }
+
+ if (out != in)
+ av_frame_free(&in);
+ return ff_filter_frame(outlink, out);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+ AVFilterLink *inlink = ctx->inputs[0];
+ AVFilterLink *outlink = ctx->outputs[0];
+ AudioSpectralDenoiseContext *s = ctx->priv;
+ AVFrame *in = NULL;
+ int ret;
+
+ FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+ ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return output_frame(inlink, in);
+
+ FF_FILTER_FORWARD_STATUS(inlink, outlink);
+ FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+ return FFERROR_NOT_READY;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+ AudioSpectralDenoiseContext *s = ctx->priv;
+
+ av_freep(&s->window);
+
+ av_frame_free(&s->fft_in);
+ av_frame_free(&s->fft_out);
+ av_frame_free(&s->temp);
+ av_frame_free(&s->sig_db);
+ av_frame_free(&s->sig_mask);
+ av_frame_free(&s->winframe);
+
+ for (int n = 0; n < s->channels; n++) {
+ if (s->fft)
+ av_tx_uninit(&s->fft[n]);
+ if (s->ifft)
+ av_tx_uninit(&s->ifft[n]);
+ }
+
+ av_freep(&s->fft);
+ av_freep(&s->ifft);
+}
+
+static const AVFilterPad inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .config_props = config_input,
+ },
+};
+
+static const AVFilterPad outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_AUDIO,
+ },
+};
+
+const AVFilter ff_af_aspectraldn = {
+ .name = "aspectraldn",
+ .description = NULL_IF_CONFIG_SMALL("Denoise audio samples using Spectral Gating."),
+ .priv_size = sizeof(AudioSpectralDenoiseContext),
+ .priv_class = &aspectraldn_class,
+ .activate = activate,
+ .uninit = uninit,
+ FILTER_INPUTS(inputs),
+ FILTER_OUTPUTS(outputs),
+ FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP),
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
+ AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 7ee8402dcc..73df2dc51a 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -87,6 +87,7 @@ extern const AVFilter ff_af_asettb;
extern const AVFilter ff_af_ashowinfo;
extern const AVFilter ff_af_asidedata;
extern const AVFilter ff_af_asoftclip;
+extern const AVFilter ff_af_aspectraldn;
extern const AVFilter ff_af_aspectralstats;
extern const AVFilter ff_af_asplit;
extern const AVFilter ff_af_asr;
--
2.33.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-03-03 17:28 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-03 17:30 [FFmpeg-devel] [PATCH] avfilter: add aspectraldn audio filter Paul B Mahol
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git