* [FFmpeg-devel] [PATCH] avfilter/af_atempo: switch to rdft from lavu/tx
@ 2022-02-06 11:25 Paul B Mahol
2022-02-06 17:15 ` Pavel Koshevoy
0 siblings, 1 reply; 5+ messages in thread
From: Paul B Mahol @ 2022-02-06 11:25 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
configure | 3 -
libavfilter/af_atempo.c | 126 ++++++++++++++++++++--------------------
2 files changed, 64 insertions(+), 65 deletions(-)
diff --git a/configure b/configure
index 5a8b52c77d..6ec25dd622 100755
--- a/configure
+++ b/configure
@@ -3610,8 +3610,6 @@ amovie_filter_deps="avcodec avformat"
aresample_filter_deps="swresample"
asr_filter_deps="pocketsphinx"
ass_filter_deps="libass"
-atempo_filter_deps="avcodec"
-atempo_filter_select="rdft"
avgblur_opencl_filter_deps="opencl"
avgblur_vulkan_filter_deps="vulkan spirv_compiler"
azmq_filter_deps="libzmq"
@@ -7387,7 +7385,6 @@ enabled zlib && add_cppflags -DZLIB_CONST
# conditional library dependencies, in any order
enabled amovie_filter && prepend avfilter_deps "avformat avcodec"
enabled aresample_filter && prepend avfilter_deps "swresample"
-enabled atempo_filter && prepend avfilter_deps "avcodec"
enabled bm3d_filter && prepend avfilter_deps "avcodec"
enabled cover_rect_filter && prepend avfilter_deps "avformat avcodec"
enabled ebur128_filter && enabled swresample && prepend avfilter_deps "swresample"
diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
index e9a6da7970..27f2f6daa0 100644
--- a/libavfilter/af_atempo.c
+++ b/libavfilter/af_atempo.c
@@ -39,13 +39,13 @@
*/
#include <float.h>
-#include "libavcodec/avfft.h"
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
#include "libavutil/channel_layout.h"
#include "libavutil/eval.h"
#include "libavutil/opt.h"
#include "libavutil/samplefmt.h"
+#include "libavutil/tx.h"
#include "avfilter.h"
#include "audio.h"
#include "internal.h"
@@ -67,7 +67,8 @@ typedef struct AudioFragment {
// rDFT transform of the down-mixed mono fragment, used for
// fast waveform alignment via correlation in frequency domain:
- FFTSample *xdat;
+ float *xdat_in;
+ float *xdat;
} AudioFragment;
/**
@@ -140,9 +141,11 @@ typedef struct ATempoContext {
FilterState state;
// for fast correlation calculation in frequency domain:
- RDFTContext *real_to_complex;
- RDFTContext *complex_to_real;
- FFTSample *correlation;
+ AVTXContext *real_to_complex;
+ AVTXContext *complex_to_real;
+ av_tx_fn r2c_fn, c2r_fn;
+ float *correlation_in;
+ float *correlation;
// for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
AVFrame *dst_buffer;
@@ -228,18 +231,18 @@ static void yae_release_buffers(ATempoContext *atempo)
av_freep(&atempo->frag[0].data);
av_freep(&atempo->frag[1].data);
+ av_freep(&atempo->frag[0].xdat_in);
+ av_freep(&atempo->frag[1].xdat_in);
av_freep(&atempo->frag[0].xdat);
av_freep(&atempo->frag[1].xdat);
av_freep(&atempo->buffer);
av_freep(&atempo->hann);
+ av_freep(&atempo->correlation_in);
av_freep(&atempo->correlation);
- av_rdft_end(atempo->real_to_complex);
- atempo->real_to_complex = NULL;
-
- av_rdft_end(atempo->complex_to_real);
- atempo->complex_to_real = NULL;
+ av_tx_uninit(&atempo->real_to_complex);
+ av_tx_uninit(&atempo->complex_to_real);
}
/* av_realloc is not aligned enough; fortunately, the data does not need to
@@ -247,7 +250,7 @@ static void yae_release_buffers(ATempoContext *atempo)
#define RE_MALLOC_OR_FAIL(field, field_size) \
do { \
av_freep(&field); \
- field = av_malloc(field_size); \
+ field = av_calloc(field_size, 1); \
if (!field) { \
yae_release_buffers(atempo); \
return AVERROR(ENOMEM); \
@@ -265,6 +268,7 @@ static int yae_reset(ATempoContext *atempo,
{
const int sample_size = av_get_bytes_per_sample(format);
uint32_t nlevels = 0;
+ float scale = 1.f, iscale = 1.f;
uint32_t pot;
int i;
@@ -288,29 +292,29 @@ static int yae_reset(ATempoContext *atempo,
// initialize audio fragment buffers:
RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride);
RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride);
- RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * sizeof(FFTComplex));
- RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * sizeof(FFTComplex));
+ RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) * sizeof(AVComplexFloat));
+ RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) * sizeof(AVComplexFloat));
+ RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) * sizeof(AVComplexFloat));
+ RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) * sizeof(AVComplexFloat));
// initialize rDFT contexts:
- av_rdft_end(atempo->real_to_complex);
- atempo->real_to_complex = NULL;
-
- av_rdft_end(atempo->complex_to_real);
- atempo->complex_to_real = NULL;
+ av_tx_uninit(&atempo->real_to_complex);
+ av_tx_uninit(&atempo->complex_to_real);
- atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
+ av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn, AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
if (!atempo->real_to_complex) {
yae_release_buffers(atempo);
return AVERROR(ENOMEM);
}
- atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
+ av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn, AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
if (!atempo->complex_to_real) {
yae_release_buffers(atempo);
return AVERROR(ENOMEM);
}
- RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(FFTComplex));
+ RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) * sizeof(AVComplexFloat));
+ RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(AVComplexFloat));
atempo->ring = atempo->window * 3;
RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
@@ -348,7 +352,7 @@ static int yae_update(AVFilterContext *ctx)
const uint8_t *src_end = src + \
frag->nsamples * atempo->channels * sizeof(scalar_type); \
\
- FFTSample *xdat = frag->xdat; \
+ float *xdat = frag->xdat_in; \
scalar_type tmp; \
\
if (atempo->channels == 1) { \
@@ -356,27 +360,27 @@ static int yae_update(AVFilterContext *ctx)
tmp = *(const scalar_type *)src; \
src += sizeof(scalar_type); \
\
- *xdat = (FFTSample)tmp; \
+ *xdat = (float)tmp; \
} \
} else { \
- FFTSample s, max, ti, si; \
+ float s, max, ti, si; \
int i; \
\
for (; src < src_end; xdat++) { \
tmp = *(const scalar_type *)src; \
src += sizeof(scalar_type); \
\
- max = (FFTSample)tmp; \
- s = FFMIN((FFTSample)scalar_max, \
- (FFTSample)fabsf(max)); \
+ max = (float)tmp; \
+ s = FFMIN((float)scalar_max, \
+ (float)fabsf(max)); \
\
for (i = 1; i < atempo->channels; i++) { \
tmp = *(const scalar_type *)src; \
src += sizeof(scalar_type); \
\
- ti = (FFTSample)tmp; \
- si = FFMIN((FFTSample)scalar_max, \
- (FFTSample)fabsf(ti)); \
+ ti = (float)tmp; \
+ si = FFMIN((float)scalar_max, \
+ (float)fabsf(ti)); \
\
if (s < si) { \
s = si; \
@@ -399,7 +403,7 @@ static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
const uint8_t *src = frag->data;
// init complex data buffer used for FFT and Correlation:
- memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
+ memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * atempo->window);
if (atempo->format == AV_SAMPLE_FMT_U8) {
yae_init_xdat(uint8_t, 127);
@@ -598,32 +602,24 @@ static void yae_advance_to_next_frag(ATempoContext *atempo)
* Multiply two vectors of complex numbers (result of real_to_complex rDFT)
* and transform back via complex_to_real rDFT.
*/
-static void yae_xcorr_via_rdft(FFTSample *xcorr,
- RDFTContext *complex_to_real,
- const FFTComplex *xa,
- const FFTComplex *xb,
+static void yae_xcorr_via_rdft(float *xcorr_in,
+ float *xcorr,
+ AVTXContext *complex_to_real,
+ av_tx_fn c2r_fn,
+ const AVComplexFloat *xa,
+ const AVComplexFloat *xb,
const int window)
{
- FFTComplex *xc = (FFTComplex *)xcorr;
+ AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
int i;
- // NOTE: first element requires special care -- Given Y = rDFT(X),
- // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
- // stores Re(Y[N/2]) in place of Im(Y[0]).
-
- xc->re = xa->re * xb->re;
- xc->im = xa->im * xb->im;
- xa++;
- xb++;
- xc++;
-
- for (i = 1; i < window; i++, xa++, xb++, xc++) {
+ for (i = 0; i <= window; i++, xa++, xb++, xc++) {
xc->re = (xa->re * xb->re + xa->im * xb->im);
xc->im = (xa->im * xb->re - xa->re * xb->im);
}
// apply inverse rDFT:
- av_rdft_calc(complex_to_real, xcorr);
+ c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(float));
}
/**
@@ -637,21 +633,25 @@ static int yae_align(AudioFragment *frag,
const int window,
const int delta_max,
const int drift,
- FFTSample *correlation,
- RDFTContext *complex_to_real)
+ float *correlation_in,
+ float *correlation,
+ AVTXContext *complex_to_real,
+ av_tx_fn c2r_fn)
{
int best_offset = -drift;
- FFTSample best_metric = -FLT_MAX;
- FFTSample *xcorr;
+ float best_metric = -FLT_MAX;
+ float *xcorr;
int i0;
int i1;
int i;
- yae_xcorr_via_rdft(correlation,
+ yae_xcorr_via_rdft(correlation_in,
+ correlation,
complex_to_real,
- (const FFTComplex *)prev->xdat,
- (const FFTComplex *)frag->xdat,
+ c2r_fn,
+ (const AVComplexFloat *)prev->xdat,
+ (const AVComplexFloat *)frag->xdat,
window);
// identify search window boundaries:
@@ -665,11 +665,11 @@ static int yae_align(AudioFragment *frag,
xcorr = correlation + i0;
for (i = i0; i < i1; i++, xcorr++) {
- FFTSample metric = *xcorr;
+ float metric = *xcorr;
// normalize:
- FFTSample drifti = (FFTSample)(drift + i);
- metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
+ float drifti = (float)(drift + i);
+ metric *= drifti * (float)(i - i0) * (float)(i1 - i);
if (metric > best_metric) {
best_metric = metric;
@@ -706,8 +706,10 @@ static int yae_adjust_position(ATempoContext *atempo)
atempo->window,
delta_max,
drift,
+ atempo->correlation_in,
atempo->correlation,
- atempo->complex_to_real);
+ atempo->complex_to_real,
+ atempo->c2r_fn);
if (correction) {
// adjust fragment position:
@@ -833,7 +835,7 @@ yae_apply(ATempoContext *atempo,
yae_downmix(atempo, yae_curr_frag(atempo));
// apply rDFT:
- av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
+ atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
// must load the second fragment before alignment can start:
if (!atempo->nfrag) {
@@ -865,7 +867,7 @@ yae_apply(ATempoContext *atempo,
yae_downmix(atempo, yae_curr_frag(atempo));
// apply rDFT:
- av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
+ atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
atempo->state = YAE_OUTPUT_OVERLAP_ADD;
}
@@ -929,7 +931,7 @@ static int yae_flush(ATempoContext *atempo,
yae_downmix(atempo, frag);
// apply rDFT:
- av_rdft_calc(atempo->real_to_complex, frag->xdat);
+ atempo->r2c_fn(atempo->real_to_complex, frag->xdat, frag->xdat_in, sizeof(float));
// align current fragment to previous fragment:
if (yae_adjust_position(atempo)) {
--
2.33.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avfilter/af_atempo: switch to rdft from lavu/tx
2022-02-06 11:25 [FFmpeg-devel] [PATCH] avfilter/af_atempo: switch to rdft from lavu/tx Paul B Mahol
@ 2022-02-06 17:15 ` Pavel Koshevoy
2022-02-06 17:24 ` Paul B Mahol
0 siblings, 1 reply; 5+ messages in thread
From: Pavel Koshevoy @ 2022-02-06 17:15 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Sun, Feb 6, 2022 at 4:24 AM Paul B Mahol <onemda@gmail.com> wrote:
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
> configure | 3 -
> libavfilter/af_atempo.c | 126 ++++++++++++++++++++--------------------
> 2 files changed, 64 insertions(+), 65 deletions(-)
>
> diff --git a/configure b/configure
> index 5a8b52c77d..6ec25dd622 100755
> --- a/configure
> +++ b/configure
> @@ -3610,8 +3610,6 @@ amovie_filter_deps="avcodec avformat"
> aresample_filter_deps="swresample"
> asr_filter_deps="pocketsphinx"
> ass_filter_deps="libass"
> -atempo_filter_deps="avcodec"
> -atempo_filter_select="rdft"
> avgblur_opencl_filter_deps="opencl"
> avgblur_vulkan_filter_deps="vulkan spirv_compiler"
> azmq_filter_deps="libzmq"
> @@ -7387,7 +7385,6 @@ enabled zlib && add_cppflags -DZLIB_CONST
> # conditional library dependencies, in any order
> enabled amovie_filter && prepend avfilter_deps "avformat avcodec"
> enabled aresample_filter && prepend avfilter_deps "swresample"
> -enabled atempo_filter && prepend avfilter_deps "avcodec"
> enabled bm3d_filter && prepend avfilter_deps "avcodec"
> enabled cover_rect_filter && prepend avfilter_deps "avformat avcodec"
> enabled ebur128_filter && enabled swresample && prepend avfilter_deps
> "swresample"
> diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
> index e9a6da7970..27f2f6daa0 100644
> --- a/libavfilter/af_atempo.c
> +++ b/libavfilter/af_atempo.c
> @@ -39,13 +39,13 @@
> */
>
> #include <float.h>
> -#include "libavcodec/avfft.h"
> #include "libavutil/avassert.h"
> #include "libavutil/avstring.h"
> #include "libavutil/channel_layout.h"
> #include "libavutil/eval.h"
> #include "libavutil/opt.h"
> #include "libavutil/samplefmt.h"
> +#include "libavutil/tx.h"
> #include "avfilter.h"
> #include "audio.h"
> #include "internal.h"
> @@ -67,7 +67,8 @@ typedef struct AudioFragment {
>
> // rDFT transform of the down-mixed mono fragment, used for
> // fast waveform alignment via correlation in frequency domain:
> - FFTSample *xdat;
> + float *xdat_in;
> + float *xdat;
> } AudioFragment;
>
>
Is the old API being removed or deprecated?
Just wondering why this change is necessary.
> /**
> @@ -140,9 +141,11 @@ typedef struct ATempoContext {
> FilterState state;
>
> // for fast correlation calculation in frequency domain:
> - RDFTContext *real_to_complex;
> - RDFTContext *complex_to_real;
> - FFTSample *correlation;
> + AVTXContext *real_to_complex;
> + AVTXContext *complex_to_real;
> + av_tx_fn r2c_fn, c2r_fn;
> + float *correlation_in;
> + float *correlation;
>
> // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
> AVFrame *dst_buffer;
> @@ -228,18 +231,18 @@ static void yae_release_buffers(ATempoContext
> *atempo)
>
> av_freep(&atempo->frag[0].data);
> av_freep(&atempo->frag[1].data);
> + av_freep(&atempo->frag[0].xdat_in);
> + av_freep(&atempo->frag[1].xdat_in);
> av_freep(&atempo->frag[0].xdat);
> av_freep(&atempo->frag[1].xdat);
>
> av_freep(&atempo->buffer);
> av_freep(&atempo->hann);
> + av_freep(&atempo->correlation_in);
> av_freep(&atempo->correlation);
>
> - av_rdft_end(atempo->real_to_complex);
> - atempo->real_to_complex = NULL;
> -
> - av_rdft_end(atempo->complex_to_real);
> - atempo->complex_to_real = NULL;
> + av_tx_uninit(&atempo->real_to_complex);
> + av_tx_uninit(&atempo->complex_to_real);
> }
>
> /* av_realloc is not aligned enough; fortunately, the data does not need
> to
> @@ -247,7 +250,7 @@ static void yae_release_buffers(ATempoContext *atempo)
> #define RE_MALLOC_OR_FAIL(field, field_size) \
> do { \
> av_freep(&field); \
> - field = av_malloc(field_size); \
> + field = av_calloc(field_size, 1); \
> if (!field) { \
> yae_release_buffers(atempo); \
> return AVERROR(ENOMEM); \
> @@ -265,6 +268,7 @@ static int yae_reset(ATempoContext *atempo,
> {
> const int sample_size = av_get_bytes_per_sample(format);
> uint32_t nlevels = 0;
> + float scale = 1.f, iscale = 1.f;
> uint32_t pot;
> int i;
>
> @@ -288,29 +292,29 @@ static int yae_reset(ATempoContext *atempo,
> // initialize audio fragment buffers:
> RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window *
> atempo->stride);
> RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window *
> atempo->stride);
> - RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window *
> sizeof(FFTComplex));
> - RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window *
> sizeof(FFTComplex));
> + RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) *
> sizeof(AVComplexFloat));
> + RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) *
> sizeof(AVComplexFloat));
> + RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) *
> sizeof(AVComplexFloat));
> + RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) *
> sizeof(AVComplexFloat));
>
> // initialize rDFT contexts:
> - av_rdft_end(atempo->real_to_complex);
> - atempo->real_to_complex = NULL;
> -
> - av_rdft_end(atempo->complex_to_real);
> - atempo->complex_to_real = NULL;
> + av_tx_uninit(&atempo->real_to_complex);
> + av_tx_uninit(&atempo->complex_to_real);
>
> - atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
> + av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn,
> AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
> if (!atempo->real_to_complex) {
> yae_release_buffers(atempo);
> return AVERROR(ENOMEM);
> }
>
> - atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
> + av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn,
> AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
> if (!atempo->complex_to_real) {
> yae_release_buffers(atempo);
> return AVERROR(ENOMEM);
> }
>
> - RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> sizeof(FFTComplex));
> + RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) *
> sizeof(AVComplexFloat));
> + RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> sizeof(AVComplexFloat));
>
> atempo->ring = atempo->window * 3;
> RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
> @@ -348,7 +352,7 @@ static int yae_update(AVFilterContext *ctx)
> const uint8_t *src_end = src + \
> frag->nsamples * atempo->channels * sizeof(scalar_type); \
> \
> - FFTSample *xdat = frag->xdat; \
> + float *xdat = frag->xdat_in; \
> scalar_type tmp; \
> \
> if (atempo->channels == 1) { \
> @@ -356,27 +360,27 @@ static int yae_update(AVFilterContext *ctx)
> tmp = *(const scalar_type *)src; \
> src += sizeof(scalar_type); \
> \
> - *xdat = (FFTSample)tmp; \
> + *xdat = (float)tmp; \
> } \
> } else { \
> - FFTSample s, max, ti, si; \
> + float s, max, ti, si; \
> int i; \
> \
> for (; src < src_end; xdat++) { \
> tmp = *(const scalar_type *)src; \
> src += sizeof(scalar_type); \
> \
> - max = (FFTSample)tmp; \
> - s = FFMIN((FFTSample)scalar_max, \
> - (FFTSample)fabsf(max)); \
> + max = (float)tmp; \
> + s = FFMIN((float)scalar_max, \
> + (float)fabsf(max)); \
> \
> for (i = 1; i < atempo->channels; i++) { \
> tmp = *(const scalar_type *)src; \
> src += sizeof(scalar_type); \
> \
> - ti = (FFTSample)tmp; \
> - si = FFMIN((FFTSample)scalar_max, \
> - (FFTSample)fabsf(ti)); \
> + ti = (float)tmp; \
> + si = FFMIN((float)scalar_max, \
> + (float)fabsf(ti)); \
> \
> if (s < si) { \
> s = si; \
> @@ -399,7 +403,7 @@ static void yae_downmix(ATempoContext *atempo,
> AudioFragment *frag)
> const uint8_t *src = frag->data;
>
> // init complex data buffer used for FFT and Correlation:
> - memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
> + memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * atempo->window);
>
> if (atempo->format == AV_SAMPLE_FMT_U8) {
> yae_init_xdat(uint8_t, 127);
> @@ -598,32 +602,24 @@ static void yae_advance_to_next_frag(ATempoContext
> *atempo)
> * Multiply two vectors of complex numbers (result of real_to_complex
> rDFT)
> * and transform back via complex_to_real rDFT.
> */
> -static void yae_xcorr_via_rdft(FFTSample *xcorr,
> - RDFTContext *complex_to_real,
> - const FFTComplex *xa,
> - const FFTComplex *xb,
> +static void yae_xcorr_via_rdft(float *xcorr_in,
> + float *xcorr,
> + AVTXContext *complex_to_real,
> + av_tx_fn c2r_fn,
> + const AVComplexFloat *xa,
> + const AVComplexFloat *xb,
> const int window)
> {
> - FFTComplex *xc = (FFTComplex *)xcorr;
> + AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
> int i;
>
> - // NOTE: first element requires special care -- Given Y = rDFT(X),
> - // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
> - // stores Re(Y[N/2]) in place of Im(Y[0]).
> -
> - xc->re = xa->re * xb->re;
> - xc->im = xa->im * xb->im;
> - xa++;
> - xb++;
> - xc++;
> -
> - for (i = 1; i < window; i++, xa++, xb++, xc++) {
> + for (i = 0; i <= window; i++, xa++, xb++, xc++) {
>
This used to iterate over [1, window - 1] elements.
Now it iterates over [0, window] elements.
Is this correct? That's 2 additional elements.
> xc->re = (xa->re * xb->re + xa->im * xb->im);
> xc->im = (xa->im * xb->re - xa->re * xb->im);
> }
>
> // apply inverse rDFT:
> - av_rdft_calc(complex_to_real, xcorr);
> + c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(float));
> }
>
> /**
> @@ -637,21 +633,25 @@ static int yae_align(AudioFragment *frag,
> const int window,
> const int delta_max,
> const int drift,
> - FFTSample *correlation,
> - RDFTContext *complex_to_real)
> + float *correlation_in,
> + float *correlation,
> + AVTXContext *complex_to_real,
> + av_tx_fn c2r_fn)
> {
> int best_offset = -drift;
> - FFTSample best_metric = -FLT_MAX;
> - FFTSample *xcorr;
> + float best_metric = -FLT_MAX;
> + float *xcorr;
>
> int i0;
> int i1;
> int i;
>
> - yae_xcorr_via_rdft(correlation,
> + yae_xcorr_via_rdft(correlation_in,
> + correlation,
> complex_to_real,
> - (const FFTComplex *)prev->xdat,
> - (const FFTComplex *)frag->xdat,
> + c2r_fn,
> + (const AVComplexFloat *)prev->xdat,
> + (const AVComplexFloat *)frag->xdat,
> window);
>
> // identify search window boundaries:
> @@ -665,11 +665,11 @@ static int yae_align(AudioFragment *frag,
> xcorr = correlation + i0;
>
> for (i = i0; i < i1; i++, xcorr++) {
> - FFTSample metric = *xcorr;
> + float metric = *xcorr;
>
> // normalize:
> - FFTSample drifti = (FFTSample)(drift + i);
> - metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
> + float drifti = (float)(drift + i);
> + metric *= drifti * (float)(i - i0) * (float)(i1 - i);
>
> if (metric > best_metric) {
> best_metric = metric;
> @@ -706,8 +706,10 @@ static int yae_adjust_position(ATempoContext *atempo)
> atempo->window,
> delta_max,
> drift,
> + atempo->correlation_in,
> atempo->correlation,
> - atempo->complex_to_real);
> + atempo->complex_to_real,
> + atempo->c2r_fn);
>
> if (correction) {
> // adjust fragment position:
> @@ -833,7 +835,7 @@ yae_apply(ATempoContext *atempo,
> yae_downmix(atempo, yae_curr_frag(atempo));
>
> // apply rDFT:
> - av_rdft_calc(atempo->real_to_complex,
> yae_curr_frag(atempo)->xdat);
> + atempo->r2c_fn(atempo->real_to_complex,
> yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
>
> // must load the second fragment before alignment can start:
> if (!atempo->nfrag) {
> @@ -865,7 +867,7 @@ yae_apply(ATempoContext *atempo,
> yae_downmix(atempo, yae_curr_frag(atempo));
>
> // apply rDFT:
> - av_rdft_calc(atempo->real_to_complex,
> yae_curr_frag(atempo)->xdat);
> + atempo->r2c_fn(atempo->real_to_complex,
> yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
>
> atempo->state = YAE_OUTPUT_OVERLAP_ADD;
> }
> @@ -929,7 +931,7 @@ static int yae_flush(ATempoContext *atempo,
> yae_downmix(atempo, frag);
>
> // apply rDFT:
> - av_rdft_calc(atempo->real_to_complex, frag->xdat);
> + atempo->r2c_fn(atempo->real_to_complex, frag->xdat,
> frag->xdat_in, sizeof(float));
>
> // align current fragment to previous fragment:
> if (yae_adjust_position(atempo)) {
> --
> 2.33.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avfilter/af_atempo: switch to rdft from lavu/tx
2022-02-06 17:15 ` Pavel Koshevoy
@ 2022-02-06 17:24 ` Paul B Mahol
2022-02-06 18:04 ` Pavel Koshevoy
0 siblings, 1 reply; 5+ messages in thread
From: Paul B Mahol @ 2022-02-06 17:24 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Sun, Feb 6, 2022 at 6:16 PM Pavel Koshevoy <pkoshevoy@gmail.com> wrote:
> On Sun, Feb 6, 2022 at 4:24 AM Paul B Mahol <onemda@gmail.com> wrote:
>
> > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > ---
> > configure | 3 -
> > libavfilter/af_atempo.c | 126 ++++++++++++++++++++--------------------
> > 2 files changed, 64 insertions(+), 65 deletions(-)
> >
> > diff --git a/configure b/configure
> > index 5a8b52c77d..6ec25dd622 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3610,8 +3610,6 @@ amovie_filter_deps="avcodec avformat"
> > aresample_filter_deps="swresample"
> > asr_filter_deps="pocketsphinx"
> > ass_filter_deps="libass"
> > -atempo_filter_deps="avcodec"
> > -atempo_filter_select="rdft"
> > avgblur_opencl_filter_deps="opencl"
> > avgblur_vulkan_filter_deps="vulkan spirv_compiler"
> > azmq_filter_deps="libzmq"
> > @@ -7387,7 +7385,6 @@ enabled zlib && add_cppflags -DZLIB_CONST
> > # conditional library dependencies, in any order
> > enabled amovie_filter && prepend avfilter_deps "avformat avcodec"
> > enabled aresample_filter && prepend avfilter_deps "swresample"
> > -enabled atempo_filter && prepend avfilter_deps "avcodec"
> > enabled bm3d_filter && prepend avfilter_deps "avcodec"
> > enabled cover_rect_filter && prepend avfilter_deps "avformat avcodec"
> > enabled ebur128_filter && enabled swresample && prepend avfilter_deps
> > "swresample"
> > diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
> > index e9a6da7970..27f2f6daa0 100644
> > --- a/libavfilter/af_atempo.c
> > +++ b/libavfilter/af_atempo.c
> > @@ -39,13 +39,13 @@
> > */
> >
> > #include <float.h>
> > -#include "libavcodec/avfft.h"
> > #include "libavutil/avassert.h"
> > #include "libavutil/avstring.h"
> > #include "libavutil/channel_layout.h"
> > #include "libavutil/eval.h"
> > #include "libavutil/opt.h"
> > #include "libavutil/samplefmt.h"
> > +#include "libavutil/tx.h"
> > #include "avfilter.h"
> > #include "audio.h"
> > #include "internal.h"
> > @@ -67,7 +67,8 @@ typedef struct AudioFragment {
> >
> > // rDFT transform of the down-mixed mono fragment, used for
> > // fast waveform alignment via correlation in frequency domain:
> > - FFTSample *xdat;
> > + float *xdat_in;
> > + float *xdat;
> > } AudioFragment;
> >
> >
> Is the old API being removed or deprecated?
> Just wondering why this change is necessary.
>
New api is faster.
>
>
>
>
> > /**
> > @@ -140,9 +141,11 @@ typedef struct ATempoContext {
> > FilterState state;
> >
> > // for fast correlation calculation in frequency domain:
> > - RDFTContext *real_to_complex;
> > - RDFTContext *complex_to_real;
> > - FFTSample *correlation;
> > + AVTXContext *real_to_complex;
> > + AVTXContext *complex_to_real;
> > + av_tx_fn r2c_fn, c2r_fn;
> > + float *correlation_in;
> > + float *correlation;
> >
> > // for managing AVFilterPad.request_frame and
> AVFilterPad.filter_frame
> > AVFrame *dst_buffer;
> > @@ -228,18 +231,18 @@ static void yae_release_buffers(ATempoContext
> > *atempo)
> >
> > av_freep(&atempo->frag[0].data);
> > av_freep(&atempo->frag[1].data);
> > + av_freep(&atempo->frag[0].xdat_in);
> > + av_freep(&atempo->frag[1].xdat_in);
> > av_freep(&atempo->frag[0].xdat);
> > av_freep(&atempo->frag[1].xdat);
> >
> > av_freep(&atempo->buffer);
> > av_freep(&atempo->hann);
> > + av_freep(&atempo->correlation_in);
> > av_freep(&atempo->correlation);
> >
> > - av_rdft_end(atempo->real_to_complex);
> > - atempo->real_to_complex = NULL;
> > -
> > - av_rdft_end(atempo->complex_to_real);
> > - atempo->complex_to_real = NULL;
> > + av_tx_uninit(&atempo->real_to_complex);
> > + av_tx_uninit(&atempo->complex_to_real);
> > }
> >
> > /* av_realloc is not aligned enough; fortunately, the data does not need
> > to
> > @@ -247,7 +250,7 @@ static void yae_release_buffers(ATempoContext
> *atempo)
> > #define RE_MALLOC_OR_FAIL(field, field_size) \
> > do { \
> > av_freep(&field); \
> > - field = av_malloc(field_size); \
> > + field = av_calloc(field_size, 1); \
> > if (!field) { \
> > yae_release_buffers(atempo); \
> > return AVERROR(ENOMEM); \
> > @@ -265,6 +268,7 @@ static int yae_reset(ATempoContext *atempo,
> > {
> > const int sample_size = av_get_bytes_per_sample(format);
> > uint32_t nlevels = 0;
> > + float scale = 1.f, iscale = 1.f;
> > uint32_t pot;
> > int i;
> >
> > @@ -288,29 +292,29 @@ static int yae_reset(ATempoContext *atempo,
> > // initialize audio fragment buffers:
> > RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window *
> > atempo->stride);
> > RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window *
> > atempo->stride);
> > - RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window *
> > sizeof(FFTComplex));
> > - RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window *
> > sizeof(FFTComplex));
> > + RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) *
> > sizeof(AVComplexFloat));
> > + RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) *
> > sizeof(AVComplexFloat));
> > + RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) *
> > sizeof(AVComplexFloat));
> > + RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) *
> > sizeof(AVComplexFloat));
> >
> > // initialize rDFT contexts:
> > - av_rdft_end(atempo->real_to_complex);
> > - atempo->real_to_complex = NULL;
> > -
> > - av_rdft_end(atempo->complex_to_real);
> > - atempo->complex_to_real = NULL;
> > + av_tx_uninit(&atempo->real_to_complex);
> > + av_tx_uninit(&atempo->complex_to_real);
> >
> > - atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
> > + av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn,
> > AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
> > if (!atempo->real_to_complex) {
> > yae_release_buffers(atempo);
> > return AVERROR(ENOMEM);
> > }
> >
> > - atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
> > + av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn,
> > AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
> > if (!atempo->complex_to_real) {
> > yae_release_buffers(atempo);
> > return AVERROR(ENOMEM);
> > }
> >
> > - RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> > sizeof(FFTComplex));
> > + RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) *
> > sizeof(AVComplexFloat));
> > + RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> > sizeof(AVComplexFloat));
> >
> > atempo->ring = atempo->window * 3;
> > RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
> > @@ -348,7 +352,7 @@ static int yae_update(AVFilterContext *ctx)
> > const uint8_t *src_end = src +
> \
> > frag->nsamples * atempo->channels * sizeof(scalar_type);
> \
> >
> \
> > - FFTSample *xdat = frag->xdat;
> \
> > + float *xdat = frag->xdat_in;
> \
> > scalar_type tmp;
> \
> >
> \
> > if (atempo->channels == 1) {
> \
> > @@ -356,27 +360,27 @@ static int yae_update(AVFilterContext *ctx)
> > tmp = *(const scalar_type *)src;
> \
> > src += sizeof(scalar_type);
> \
> >
> \
> > - *xdat = (FFTSample)tmp;
> \
> > + *xdat = (float)tmp;
> \
> > }
> \
> > } else {
> \
> > - FFTSample s, max, ti, si;
> \
> > + float s, max, ti, si;
> \
> > int i;
> \
> >
> \
> > for (; src < src_end; xdat++) {
> \
> > tmp = *(const scalar_type *)src;
> \
> > src += sizeof(scalar_type);
> \
> >
> \
> > - max = (FFTSample)tmp;
> \
> > - s = FFMIN((FFTSample)scalar_max,
> \
> > - (FFTSample)fabsf(max));
> \
> > + max = (float)tmp;
> \
> > + s = FFMIN((float)scalar_max,
> \
> > + (float)fabsf(max));
> \
> >
> \
> > for (i = 1; i < atempo->channels; i++) {
> \
> > tmp = *(const scalar_type *)src;
> \
> > src += sizeof(scalar_type);
> \
> >
> \
> > - ti = (FFTSample)tmp;
> \
> > - si = FFMIN((FFTSample)scalar_max,
> \
> > - (FFTSample)fabsf(ti));
> \
> > + ti = (float)tmp;
> \
> > + si = FFMIN((float)scalar_max,
> \
> > + (float)fabsf(ti));
> \
> >
> \
> > if (s < si) {
> \
> > s = si;
> \
> > @@ -399,7 +403,7 @@ static void yae_downmix(ATempoContext *atempo,
> > AudioFragment *frag)
> > const uint8_t *src = frag->data;
> >
> > // init complex data buffer used for FFT and Correlation:
> > - memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
> > + memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * atempo->window);
> >
> > if (atempo->format == AV_SAMPLE_FMT_U8) {
> > yae_init_xdat(uint8_t, 127);
> > @@ -598,32 +602,24 @@ static void yae_advance_to_next_frag(ATempoContext
> > *atempo)
> > * Multiply two vectors of complex numbers (result of real_to_complex
> > rDFT)
> > * and transform back via complex_to_real rDFT.
> > */
> > -static void yae_xcorr_via_rdft(FFTSample *xcorr,
> > - RDFTContext *complex_to_real,
> > - const FFTComplex *xa,
> > - const FFTComplex *xb,
> > +static void yae_xcorr_via_rdft(float *xcorr_in,
> > + float *xcorr,
> > + AVTXContext *complex_to_real,
> > + av_tx_fn c2r_fn,
> > + const AVComplexFloat *xa,
> > + const AVComplexFloat *xb,
> > const int window)
> > {
> > - FFTComplex *xc = (FFTComplex *)xcorr;
> > + AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
> > int i;
> >
> > - // NOTE: first element requires special care -- Given Y = rDFT(X),
> > - // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
> > - // stores Re(Y[N/2]) in place of Im(Y[0]).
> > -
> > - xc->re = xa->re * xb->re;
> > - xc->im = xa->im * xb->im;
> > - xa++;
> > - xb++;
> > - xc++;
> > -
> > - for (i = 1; i < window; i++, xa++, xb++, xc++) {
> > + for (i = 0; i <= window; i++, xa++, xb++, xc++) {
> >
>
> This used to iterate over [1, window - 1] elements.
> Now it iterates over [0, window] elements.
> Is this correct? That's 2 additional elements.
>
Yes, newer api does not use previous api hack of producing n/2 complex
numbers, but n/2 +1.
>
>
>
> > xc->re = (xa->re * xb->re + xa->im * xb->im);
> > xc->im = (xa->im * xb->re - xa->re * xb->im);
> > }
> >
> > // apply inverse rDFT:
> > - av_rdft_calc(complex_to_real, xcorr);
> > + c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(float));
> > }
> >
> > /**
> > @@ -637,21 +633,25 @@ static int yae_align(AudioFragment *frag,
> > const int window,
> > const int delta_max,
> > const int drift,
> > - FFTSample *correlation,
> > - RDFTContext *complex_to_real)
> > + float *correlation_in,
> > + float *correlation,
> > + AVTXContext *complex_to_real,
> > + av_tx_fn c2r_fn)
> > {
> > int best_offset = -drift;
> > - FFTSample best_metric = -FLT_MAX;
> > - FFTSample *xcorr;
> > + float best_metric = -FLT_MAX;
> > + float *xcorr;
> >
> > int i0;
> > int i1;
> > int i;
> >
> > - yae_xcorr_via_rdft(correlation,
> > + yae_xcorr_via_rdft(correlation_in,
> > + correlation,
> > complex_to_real,
> > - (const FFTComplex *)prev->xdat,
> > - (const FFTComplex *)frag->xdat,
> > + c2r_fn,
> > + (const AVComplexFloat *)prev->xdat,
> > + (const AVComplexFloat *)frag->xdat,
> > window);
> >
> > // identify search window boundaries:
> > @@ -665,11 +665,11 @@ static int yae_align(AudioFragment *frag,
> > xcorr = correlation + i0;
> >
> > for (i = i0; i < i1; i++, xcorr++) {
> > - FFTSample metric = *xcorr;
> > + float metric = *xcorr;
> >
> > // normalize:
> > - FFTSample drifti = (FFTSample)(drift + i);
> > - metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
> > + float drifti = (float)(drift + i);
> > + metric *= drifti * (float)(i - i0) * (float)(i1 - i);
> >
> > if (metric > best_metric) {
> > best_metric = metric;
> > @@ -706,8 +706,10 @@ static int yae_adjust_position(ATempoContext
> *atempo)
> > atempo->window,
> > delta_max,
> > drift,
> > + atempo->correlation_in,
> > atempo->correlation,
> > - atempo->complex_to_real);
> > + atempo->complex_to_real,
> > + atempo->c2r_fn);
> >
> > if (correction) {
> > // adjust fragment position:
> > @@ -833,7 +835,7 @@ yae_apply(ATempoContext *atempo,
> > yae_downmix(atempo, yae_curr_frag(atempo));
> >
> > // apply rDFT:
> > - av_rdft_calc(atempo->real_to_complex,
> > yae_curr_frag(atempo)->xdat);
> > + atempo->r2c_fn(atempo->real_to_complex,
> > yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in,
> sizeof(float));
> >
> > // must load the second fragment before alignment can start:
> > if (!atempo->nfrag) {
> > @@ -865,7 +867,7 @@ yae_apply(ATempoContext *atempo,
> > yae_downmix(atempo, yae_curr_frag(atempo));
> >
> > // apply rDFT:
> > - av_rdft_calc(atempo->real_to_complex,
> > yae_curr_frag(atempo)->xdat);
> > + atempo->r2c_fn(atempo->real_to_complex,
> > yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in,
> sizeof(float));
> >
> > atempo->state = YAE_OUTPUT_OVERLAP_ADD;
> > }
> > @@ -929,7 +931,7 @@ static int yae_flush(ATempoContext *atempo,
> > yae_downmix(atempo, frag);
> >
> > // apply rDFT:
> > - av_rdft_calc(atempo->real_to_complex, frag->xdat);
> > + atempo->r2c_fn(atempo->real_to_complex, frag->xdat,
> > frag->xdat_in, sizeof(float));
> >
> > // align current fragment to previous fragment:
> > if (yae_adjust_position(atempo)) {
> > --
> > 2.33.0
> >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avfilter/af_atempo: switch to rdft from lavu/tx
2022-02-06 17:24 ` Paul B Mahol
@ 2022-02-06 18:04 ` Pavel Koshevoy
2022-02-06 19:02 ` Lynne
0 siblings, 1 reply; 5+ messages in thread
From: Pavel Koshevoy @ 2022-02-06 18:04 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Sun, Feb 6, 2022 at 10:24 AM Paul B Mahol <onemda@gmail.com> wrote:
> On Sun, Feb 6, 2022 at 6:16 PM Pavel Koshevoy <pkoshevoy@gmail.com> wrote:
>
> > On Sun, Feb 6, 2022 at 4:24 AM Paul B Mahol <onemda@gmail.com> wrote:
> >
> > > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > > ---
> > > configure | 3 -
> > > libavfilter/af_atempo.c | 126 ++++++++++++++++++++--------------------
> > > 2 files changed, 64 insertions(+), 65 deletions(-)
> > >
> > > diff --git a/configure b/configure
> > > index 5a8b52c77d..6ec25dd622 100755
> > > --- a/configure
> > > +++ b/configure
> > > @@ -3610,8 +3610,6 @@ amovie_filter_deps="avcodec avformat"
> > > aresample_filter_deps="swresample"
> > > asr_filter_deps="pocketsphinx"
> > > ass_filter_deps="libass"
> > > -atempo_filter_deps="avcodec"
> > > -atempo_filter_select="rdft"
> > > avgblur_opencl_filter_deps="opencl"
> > > avgblur_vulkan_filter_deps="vulkan spirv_compiler"
> > > azmq_filter_deps="libzmq"
> > > @@ -7387,7 +7385,6 @@ enabled zlib && add_cppflags -DZLIB_CONST
> > > # conditional library dependencies, in any order
> > > enabled amovie_filter && prepend avfilter_deps "avformat
> avcodec"
> > > enabled aresample_filter && prepend avfilter_deps "swresample"
> > > -enabled atempo_filter && prepend avfilter_deps "avcodec"
> > > enabled bm3d_filter && prepend avfilter_deps "avcodec"
> > > enabled cover_rect_filter && prepend avfilter_deps "avformat
> avcodec"
> > > enabled ebur128_filter && enabled swresample && prepend avfilter_deps
> > > "swresample"
> > > diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
> > > index e9a6da7970..27f2f6daa0 100644
> > > --- a/libavfilter/af_atempo.c
> > > +++ b/libavfilter/af_atempo.c
> > > @@ -39,13 +39,13 @@
> > > */
> > >
> > > #include <float.h>
> > > -#include "libavcodec/avfft.h"
> > > #include "libavutil/avassert.h"
> > > #include "libavutil/avstring.h"
> > > #include "libavutil/channel_layout.h"
> > > #include "libavutil/eval.h"
> > > #include "libavutil/opt.h"
> > > #include "libavutil/samplefmt.h"
> > > +#include "libavutil/tx.h"
> > > #include "avfilter.h"
> > > #include "audio.h"
> > > #include "internal.h"
> > > @@ -67,7 +67,8 @@ typedef struct AudioFragment {
> > >
> > > // rDFT transform of the down-mixed mono fragment, used for
> > > // fast waveform alignment via correlation in frequency domain:
> > > - FFTSample *xdat;
> > > + float *xdat_in;
> > > + float *xdat;
> > > } AudioFragment;
> > >
> > >
> > Is the old API being removed or deprecated?
> > Just wondering why this change is necessary.
> >
>
> New api is faster.
>
> >
> >
> >
> >
> > > /**
> > > @@ -140,9 +141,11 @@ typedef struct ATempoContext {
> > > FilterState state;
> > >
> > > // for fast correlation calculation in frequency domain:
> > > - RDFTContext *real_to_complex;
> > > - RDFTContext *complex_to_real;
> > > - FFTSample *correlation;
> > > + AVTXContext *real_to_complex;
> > > + AVTXContext *complex_to_real;
> > > + av_tx_fn r2c_fn, c2r_fn;
> > > + float *correlation_in;
> > > + float *correlation;
> > >
> > > // for managing AVFilterPad.request_frame and
> > AVFilterPad.filter_frame
> > > AVFrame *dst_buffer;
> > > @@ -228,18 +231,18 @@ static void yae_release_buffers(ATempoContext
> > > *atempo)
> > >
> > > av_freep(&atempo->frag[0].data);
> > > av_freep(&atempo->frag[1].data);
> > > + av_freep(&atempo->frag[0].xdat_in);
> > > + av_freep(&atempo->frag[1].xdat_in);
> > > av_freep(&atempo->frag[0].xdat);
> > > av_freep(&atempo->frag[1].xdat);
> > >
> > > av_freep(&atempo->buffer);
> > > av_freep(&atempo->hann);
> > > + av_freep(&atempo->correlation_in);
> > > av_freep(&atempo->correlation);
> > >
> > > - av_rdft_end(atempo->real_to_complex);
> > > - atempo->real_to_complex = NULL;
> > > -
> > > - av_rdft_end(atempo->complex_to_real);
> > > - atempo->complex_to_real = NULL;
> > > + av_tx_uninit(&atempo->real_to_complex);
> > > + av_tx_uninit(&atempo->complex_to_real);
> > > }
> > >
> > > /* av_realloc is not aligned enough; fortunately, the data does not
> need
> > > to
> > > @@ -247,7 +250,7 @@ static void yae_release_buffers(ATempoContext
> > *atempo)
> > > #define RE_MALLOC_OR_FAIL(field, field_size) \
> > > do { \
> > > av_freep(&field); \
> > > - field = av_malloc(field_size); \
> > > + field = av_calloc(field_size, 1); \
> > > if (!field) { \
> > > yae_release_buffers(atempo); \
> > > return AVERROR(ENOMEM); \
> > > @@ -265,6 +268,7 @@ static int yae_reset(ATempoContext *atempo,
> > > {
> > > const int sample_size = av_get_bytes_per_sample(format);
> > > uint32_t nlevels = 0;
> > > + float scale = 1.f, iscale = 1.f;
> > > uint32_t pot;
> > > int i;
> > >
> > > @@ -288,29 +292,29 @@ static int yae_reset(ATempoContext *atempo,
> > > // initialize audio fragment buffers:
> > > RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window *
> > > atempo->stride);
> > > RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window *
> > > atempo->stride);
> > > - RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window *
> > > sizeof(FFTComplex));
> > > - RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window *
> > > sizeof(FFTComplex));
> > > + RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) *
> > > sizeof(AVComplexFloat));
> > > + RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) *
> > > sizeof(AVComplexFloat));
> > > + RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) *
> > > sizeof(AVComplexFloat));
> > > + RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) *
> > > sizeof(AVComplexFloat));
> > >
> > > // initialize rDFT contexts:
> > > - av_rdft_end(atempo->real_to_complex);
> > > - atempo->real_to_complex = NULL;
> > > -
> > > - av_rdft_end(atempo->complex_to_real);
> > > - atempo->complex_to_real = NULL;
> > > + av_tx_uninit(&atempo->real_to_complex);
> > > + av_tx_uninit(&atempo->complex_to_real);
> > >
> > > - atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
> > > + av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn,
> > > AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
> > > if (!atempo->real_to_complex) {
> > > yae_release_buffers(atempo);
> > > return AVERROR(ENOMEM);
> > > }
> > >
> > > - atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
> > > + av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn,
> > > AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
> > > if (!atempo->complex_to_real) {
> > > yae_release_buffers(atempo);
> > > return AVERROR(ENOMEM);
> > > }
> > >
> > > - RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> > > sizeof(FFTComplex));
> > > + RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) *
> > > sizeof(AVComplexFloat));
> > > + RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> > > sizeof(AVComplexFloat));
> > >
> > > atempo->ring = atempo->window * 3;
> > > RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
> > > @@ -348,7 +352,7 @@ static int yae_update(AVFilterContext *ctx)
> > > const uint8_t *src_end = src +
> > \
> > > frag->nsamples * atempo->channels * sizeof(scalar_type);
> > \
> > >
> > \
> > > - FFTSample *xdat = frag->xdat;
> > \
> > > + float *xdat = frag->xdat_in;
> > \
> > > scalar_type tmp;
> > \
> > >
> > \
> > > if (atempo->channels == 1) {
> > \
> > > @@ -356,27 +360,27 @@ static int yae_update(AVFilterContext *ctx)
> > > tmp = *(const scalar_type *)src;
> > \
> > > src += sizeof(scalar_type);
> > \
> > >
> > \
> > > - *xdat = (FFTSample)tmp;
> > \
> > > + *xdat = (float)tmp;
> > \
> > > }
> > \
> > > } else {
> > \
> > > - FFTSample s, max, ti, si;
> > \
> > > + float s, max, ti, si;
> > \
> > > int i;
> > \
> > >
> > \
> > > for (; src < src_end; xdat++) {
> > \
> > > tmp = *(const scalar_type *)src;
> > \
> > > src += sizeof(scalar_type);
> > \
> > >
> > \
> > > - max = (FFTSample)tmp;
> > \
> > > - s = FFMIN((FFTSample)scalar_max,
> > \
> > > - (FFTSample)fabsf(max));
> > \
> > > + max = (float)tmp;
> > \
> > > + s = FFMIN((float)scalar_max,
> > \
> > > + (float)fabsf(max));
> > \
> > >
> > \
> > > for (i = 1; i < atempo->channels; i++) {
> > \
> > > tmp = *(const scalar_type *)src;
> > \
> > > src += sizeof(scalar_type);
> > \
> > >
> > \
> > > - ti = (FFTSample)tmp;
> > \
> > > - si = FFMIN((FFTSample)scalar_max,
> > \
> > > - (FFTSample)fabsf(ti));
> > \
> > > + ti = (float)tmp;
> > \
> > > + si = FFMIN((float)scalar_max,
> > \
> > > + (float)fabsf(ti));
> > \
> > >
> > \
> > > if (s < si) {
> > \
> > > s = si;
> > \
> > > @@ -399,7 +403,7 @@ static void yae_downmix(ATempoContext *atempo,
> > > AudioFragment *frag)
> > > const uint8_t *src = frag->data;
> > >
> > > // init complex data buffer used for FFT and Correlation:
> > > - memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
> > > + memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * atempo->window);
> > >
> > > if (atempo->format == AV_SAMPLE_FMT_U8) {
> > > yae_init_xdat(uint8_t, 127);
> > > @@ -598,32 +602,24 @@ static void
> yae_advance_to_next_frag(ATempoContext
> > > *atempo)
> > > * Multiply two vectors of complex numbers (result of real_to_complex
> > > rDFT)
> > > * and transform back via complex_to_real rDFT.
> > > */
> > > -static void yae_xcorr_via_rdft(FFTSample *xcorr,
> > > - RDFTContext *complex_to_real,
> > > - const FFTComplex *xa,
> > > - const FFTComplex *xb,
> > > +static void yae_xcorr_via_rdft(float *xcorr_in,
> > > + float *xcorr,
> > > + AVTXContext *complex_to_real,
> > > + av_tx_fn c2r_fn,
> > > + const AVComplexFloat *xa,
> > > + const AVComplexFloat *xb,
> > > const int window)
> > > {
> > > - FFTComplex *xc = (FFTComplex *)xcorr;
> > > + AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
> > > int i;
> > >
> > > - // NOTE: first element requires special care -- Given Y = rDFT(X),
> > > - // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
> > > - // stores Re(Y[N/2]) in place of Im(Y[0]).
> > > -
> > > - xc->re = xa->re * xb->re;
> > > - xc->im = xa->im * xb->im;
> > > - xa++;
> > > - xb++;
> > > - xc++;
> > > -
> > > - for (i = 1; i < window; i++, xa++, xb++, xc++) {
> > > + for (i = 0; i <= window; i++, xa++, xb++, xc++) {
> > >
> >
> > This used to iterate over [1, window - 1] elements.
> > Now it iterates over [0, window] elements.
> > Is this correct? That's 2 additional elements.
> >
>
> Yes, newer api does not use previous api hack of producing n/2 complex
> numbers, but n/2 +1.
>
cool, thanks ... lgtm if it still works
>
> >
> >
> >
> > > xc->re = (xa->re * xb->re + xa->im * xb->im);
> > > xc->im = (xa->im * xb->re - xa->re * xb->im);
> > > }
> > >
> > > // apply inverse rDFT:
> > > - av_rdft_calc(complex_to_real, xcorr);
> > > + c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(float));
> > > }
> > >
> > > /**
> > > @@ -637,21 +633,25 @@ static int yae_align(AudioFragment *frag,
> > > const int window,
> > > const int delta_max,
> > > const int drift,
> > > - FFTSample *correlation,
> > > - RDFTContext *complex_to_real)
> > > + float *correlation_in,
> > > + float *correlation,
> > > + AVTXContext *complex_to_real,
> > > + av_tx_fn c2r_fn)
> > > {
> > > int best_offset = -drift;
> > > - FFTSample best_metric = -FLT_MAX;
> > > - FFTSample *xcorr;
> > > + float best_metric = -FLT_MAX;
> > > + float *xcorr;
> > >
> > > int i0;
> > > int i1;
> > > int i;
> > >
> > > - yae_xcorr_via_rdft(correlation,
> > > + yae_xcorr_via_rdft(correlation_in,
> > > + correlation,
> > > complex_to_real,
> > > - (const FFTComplex *)prev->xdat,
> > > - (const FFTComplex *)frag->xdat,
> > > + c2r_fn,
> > > + (const AVComplexFloat *)prev->xdat,
> > > + (const AVComplexFloat *)frag->xdat,
> > > window);
> > >
> > > // identify search window boundaries:
> > > @@ -665,11 +665,11 @@ static int yae_align(AudioFragment *frag,
> > > xcorr = correlation + i0;
> > >
> > > for (i = i0; i < i1; i++, xcorr++) {
> > > - FFTSample metric = *xcorr;
> > > + float metric = *xcorr;
> > >
> > > // normalize:
> > > - FFTSample drifti = (FFTSample)(drift + i);
> > > - metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
> > > + float drifti = (float)(drift + i);
> > > + metric *= drifti * (float)(i - i0) * (float)(i1 - i);
> > >
> > > if (metric > best_metric) {
> > > best_metric = metric;
> > > @@ -706,8 +706,10 @@ static int yae_adjust_position(ATempoContext
> > *atempo)
> > > atempo->window,
> > > delta_max,
> > > drift,
> > > + atempo->correlation_in,
> > > atempo->correlation,
> > > - atempo->complex_to_real);
> > > + atempo->complex_to_real,
> > > + atempo->c2r_fn);
> > >
> > > if (correction) {
> > > // adjust fragment position:
> > > @@ -833,7 +835,7 @@ yae_apply(ATempoContext *atempo,
> > > yae_downmix(atempo, yae_curr_frag(atempo));
> > >
> > > // apply rDFT:
> > > - av_rdft_calc(atempo->real_to_complex,
> > > yae_curr_frag(atempo)->xdat);
> > > + atempo->r2c_fn(atempo->real_to_complex,
> > > yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in,
> > sizeof(float));
> > >
> > > // must load the second fragment before alignment can
> start:
> > > if (!atempo->nfrag) {
> > > @@ -865,7 +867,7 @@ yae_apply(ATempoContext *atempo,
> > > yae_downmix(atempo, yae_curr_frag(atempo));
> > >
> > > // apply rDFT:
> > > - av_rdft_calc(atempo->real_to_complex,
> > > yae_curr_frag(atempo)->xdat);
> > > + atempo->r2c_fn(atempo->real_to_complex,
> > > yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in,
> > sizeof(float));
> > >
> > > atempo->state = YAE_OUTPUT_OVERLAP_ADD;
> > > }
> > > @@ -929,7 +931,7 @@ static int yae_flush(ATempoContext *atempo,
> > > yae_downmix(atempo, frag);
> > >
> > > // apply rDFT:
> > > - av_rdft_calc(atempo->real_to_complex, frag->xdat);
> > > + atempo->r2c_fn(atempo->real_to_complex, frag->xdat,
> > > frag->xdat_in, sizeof(float));
> > >
> > > // align current fragment to previous fragment:
> > > if (yae_adjust_position(atempo)) {
> > > --
> > > 2.33.0
> > >
> > > _______________________________________________
> > > ffmpeg-devel mailing list
> > > ffmpeg-devel@ffmpeg.org
> > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> > > To unsubscribe, visit link above, or email
> > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> > >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avfilter/af_atempo: switch to rdft from lavu/tx
2022-02-06 18:04 ` Pavel Koshevoy
@ 2022-02-06 19:02 ` Lynne
0 siblings, 0 replies; 5+ messages in thread
From: Lynne @ 2022-02-06 19:02 UTC (permalink / raw)
To: FFmpeg development discussions and patches
6 Feb 2022, 19:04 by pkoshevoy@gmail.com:
> On Sun, Feb 6, 2022 at 10:24 AM Paul B Mahol <onemda@gmail.com> wrote:
>
>> On Sun, Feb 6, 2022 at 6:16 PM Pavel Koshevoy <pkoshevoy@gmail.com> wrote:
>>
>> > On Sun, Feb 6, 2022 at 4:24 AM Paul B Mahol <onemda@gmail.com> wrote:
>>
>> > >
>> > >
>> > Is the old API being removed or deprecated?
>> > Just wondering why this change is necessary.
>> >
>>
>> New api is faster.
>>
Old API will get deprecated soon, but we have to replace its use
in our code first.
New API is faster, and supports non-power-of-two lengths.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-02-06 19:02 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-06 11:25 [FFmpeg-devel] [PATCH] avfilter/af_atempo: switch to rdft from lavu/tx Paul B Mahol
2022-02-06 17:15 ` Pavel Koshevoy
2022-02-06 17:24 ` Paul B Mahol
2022-02-06 18:04 ` Pavel Koshevoy
2022-02-06 19:02 ` Lynne
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git