From: Pavel Koshevoy <pkoshevoy@gmail.com> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Subject: Re: [FFmpeg-devel] [PATCH] avfilter/af_atempo: switch to rdft from lavu/tx Date: Sun, 6 Feb 2022 11:04:09 -0700 Message-ID: <CAJgjuoxT2vVD=QS8DiS_1Twgcx7JVHoyXfSiG+yLs+iJ5vF+iA@mail.gmail.com> (raw) In-Reply-To: <CAPYw7P5wykpQjnszfGHePmH45LUb9WzA=PLThvp+Fu3mkSnmJg@mail.gmail.com> On Sun, Feb 6, 2022 at 10:24 AM Paul B Mahol <onemda@gmail.com> wrote: > On Sun, Feb 6, 2022 at 6:16 PM Pavel Koshevoy <pkoshevoy@gmail.com> wrote: > > > On Sun, Feb 6, 2022 at 4:24 AM Paul B Mahol <onemda@gmail.com> wrote: > > > > > Signed-off-by: Paul B Mahol <onemda@gmail.com> > > > --- > > > configure | 3 - > > > libavfilter/af_atempo.c | 126 ++++++++++++++++++++-------------------- > > > 2 files changed, 64 insertions(+), 65 deletions(-) > > > > > > diff --git a/configure b/configure > > > index 5a8b52c77d..6ec25dd622 100755 > > > --- a/configure > > > +++ b/configure > > > @@ -3610,8 +3610,6 @@ amovie_filter_deps="avcodec avformat" > > > aresample_filter_deps="swresample" > > > asr_filter_deps="pocketsphinx" > > > ass_filter_deps="libass" > > > -atempo_filter_deps="avcodec" > > > -atempo_filter_select="rdft" > > > avgblur_opencl_filter_deps="opencl" > > > avgblur_vulkan_filter_deps="vulkan spirv_compiler" > > > azmq_filter_deps="libzmq" > > > @@ -7387,7 +7385,6 @@ enabled zlib && add_cppflags -DZLIB_CONST > > > # conditional library dependencies, in any order > > > enabled amovie_filter && prepend avfilter_deps "avformat > avcodec" > > > enabled aresample_filter && prepend avfilter_deps "swresample" > > > -enabled atempo_filter && prepend avfilter_deps "avcodec" > > > enabled bm3d_filter && prepend avfilter_deps "avcodec" > > > enabled cover_rect_filter && prepend avfilter_deps "avformat > avcodec" > > > enabled ebur128_filter && enabled swresample && prepend avfilter_deps > > > "swresample" > > > diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c > > > index e9a6da7970..27f2f6daa0 100644 > > > --- a/libavfilter/af_atempo.c > > > +++ b/libavfilter/af_atempo.c > > > @@ -39,13 +39,13 @@ > > > */ > > > > > > #include <float.h> > > > -#include "libavcodec/avfft.h" > > > #include "libavutil/avassert.h" > > > #include "libavutil/avstring.h" > > > #include "libavutil/channel_layout.h" > > > #include "libavutil/eval.h" > > > #include "libavutil/opt.h" > > > #include "libavutil/samplefmt.h" > > > +#include "libavutil/tx.h" > > > #include "avfilter.h" > > > #include "audio.h" > > > #include "internal.h" > > > @@ -67,7 +67,8 @@ typedef struct AudioFragment { > > > > > > // rDFT transform of the down-mixed mono fragment, used for > > > // fast waveform alignment via correlation in frequency domain: > > > - FFTSample *xdat; > > > + float *xdat_in; > > > + float *xdat; > > > } AudioFragment; > > > > > > > > Is the old API being removed or deprecated? > > Just wondering why this change is necessary. > > > > New api is faster. > > > > > > > > > > > > /** > > > @@ -140,9 +141,11 @@ typedef struct ATempoContext { > > > FilterState state; > > > > > > // for fast correlation calculation in frequency domain: > > > - RDFTContext *real_to_complex; > > > - RDFTContext *complex_to_real; > > > - FFTSample *correlation; > > > + AVTXContext *real_to_complex; > > > + AVTXContext *complex_to_real; > > > + av_tx_fn r2c_fn, c2r_fn; > > > + float *correlation_in; > > > + float *correlation; > > > > > > // for managing AVFilterPad.request_frame and > > AVFilterPad.filter_frame > > > AVFrame *dst_buffer; > > > @@ -228,18 +231,18 @@ static void yae_release_buffers(ATempoContext > > > *atempo) > > > > > > av_freep(&atempo->frag[0].data); > > > av_freep(&atempo->frag[1].data); > > > + av_freep(&atempo->frag[0].xdat_in); > > > + av_freep(&atempo->frag[1].xdat_in); > > > av_freep(&atempo->frag[0].xdat); > > > av_freep(&atempo->frag[1].xdat); > > > > > > av_freep(&atempo->buffer); > > > av_freep(&atempo->hann); > > > + av_freep(&atempo->correlation_in); > > > av_freep(&atempo->correlation); > > > > > > - av_rdft_end(atempo->real_to_complex); > > > - atempo->real_to_complex = NULL; > > > - > > > - av_rdft_end(atempo->complex_to_real); > > > - atempo->complex_to_real = NULL; > > > + av_tx_uninit(&atempo->real_to_complex); > > > + av_tx_uninit(&atempo->complex_to_real); > > > } > > > > > > /* av_realloc is not aligned enough; fortunately, the data does not > need > > > to > > > @@ -247,7 +250,7 @@ static void yae_release_buffers(ATempoContext > > *atempo) > > > #define RE_MALLOC_OR_FAIL(field, field_size) \ > > > do { \ > > > av_freep(&field); \ > > > - field = av_malloc(field_size); \ > > > + field = av_calloc(field_size, 1); \ > > > if (!field) { \ > > > yae_release_buffers(atempo); \ > > > return AVERROR(ENOMEM); \ > > > @@ -265,6 +268,7 @@ static int yae_reset(ATempoContext *atempo, > > > { > > > const int sample_size = av_get_bytes_per_sample(format); > > > uint32_t nlevels = 0; > > > + float scale = 1.f, iscale = 1.f; > > > uint32_t pot; > > > int i; > > > > > > @@ -288,29 +292,29 @@ static int yae_reset(ATempoContext *atempo, > > > // initialize audio fragment buffers: > > > RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * > > > atempo->stride); > > > RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * > > > atempo->stride); > > > - RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * > > > sizeof(FFTComplex)); > > > - RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * > > > sizeof(FFTComplex)); > > > + RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) * > > > sizeof(AVComplexFloat)); > > > + RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) * > > > sizeof(AVComplexFloat)); > > > + RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) * > > > sizeof(AVComplexFloat)); > > > + RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) * > > > sizeof(AVComplexFloat)); > > > > > > // initialize rDFT contexts: > > > - av_rdft_end(atempo->real_to_complex); > > > - atempo->real_to_complex = NULL; > > > - > > > - av_rdft_end(atempo->complex_to_real); > > > - atempo->complex_to_real = NULL; > > > + av_tx_uninit(&atempo->real_to_complex); > > > + av_tx_uninit(&atempo->complex_to_real); > > > > > > - atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C); > > > + av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn, > > > AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0); > > > if (!atempo->real_to_complex) { > > > yae_release_buffers(atempo); > > > return AVERROR(ENOMEM); > > > } > > > > > > - atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R); > > > + av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn, > > > AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0); > > > if (!atempo->complex_to_real) { > > > yae_release_buffers(atempo); > > > return AVERROR(ENOMEM); > > > } > > > > > > - RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * > > > sizeof(FFTComplex)); > > > + RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) * > > > sizeof(AVComplexFloat)); > > > + RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * > > > sizeof(AVComplexFloat)); > > > > > > atempo->ring = atempo->window * 3; > > > RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride); > > > @@ -348,7 +352,7 @@ static int yae_update(AVFilterContext *ctx) > > > const uint8_t *src_end = src + > > \ > > > frag->nsamples * atempo->channels * sizeof(scalar_type); > > \ > > > > > \ > > > - FFTSample *xdat = frag->xdat; > > \ > > > + float *xdat = frag->xdat_in; > > \ > > > scalar_type tmp; > > \ > > > > > \ > > > if (atempo->channels == 1) { > > \ > > > @@ -356,27 +360,27 @@ static int yae_update(AVFilterContext *ctx) > > > tmp = *(const scalar_type *)src; > > \ > > > src += sizeof(scalar_type); > > \ > > > > > \ > > > - *xdat = (FFTSample)tmp; > > \ > > > + *xdat = (float)tmp; > > \ > > > } > > \ > > > } else { > > \ > > > - FFTSample s, max, ti, si; > > \ > > > + float s, max, ti, si; > > \ > > > int i; > > \ > > > > > \ > > > for (; src < src_end; xdat++) { > > \ > > > tmp = *(const scalar_type *)src; > > \ > > > src += sizeof(scalar_type); > > \ > > > > > \ > > > - max = (FFTSample)tmp; > > \ > > > - s = FFMIN((FFTSample)scalar_max, > > \ > > > - (FFTSample)fabsf(max)); > > \ > > > + max = (float)tmp; > > \ > > > + s = FFMIN((float)scalar_max, > > \ > > > + (float)fabsf(max)); > > \ > > > > > \ > > > for (i = 1; i < atempo->channels; i++) { > > \ > > > tmp = *(const scalar_type *)src; > > \ > > > src += sizeof(scalar_type); > > \ > > > > > \ > > > - ti = (FFTSample)tmp; > > \ > > > - si = FFMIN((FFTSample)scalar_max, > > \ > > > - (FFTSample)fabsf(ti)); > > \ > > > + ti = (float)tmp; > > \ > > > + si = FFMIN((float)scalar_max, > > \ > > > + (float)fabsf(ti)); > > \ > > > > > \ > > > if (s < si) { > > \ > > > s = si; > > \ > > > @@ -399,7 +403,7 @@ static void yae_downmix(ATempoContext *atempo, > > > AudioFragment *frag) > > > const uint8_t *src = frag->data; > > > > > > // init complex data buffer used for FFT and Correlation: > > > - memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window); > > > + memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * atempo->window); > > > > > > if (atempo->format == AV_SAMPLE_FMT_U8) { > > > yae_init_xdat(uint8_t, 127); > > > @@ -598,32 +602,24 @@ static void > yae_advance_to_next_frag(ATempoContext > > > *atempo) > > > * Multiply two vectors of complex numbers (result of real_to_complex > > > rDFT) > > > * and transform back via complex_to_real rDFT. > > > */ > > > -static void yae_xcorr_via_rdft(FFTSample *xcorr, > > > - RDFTContext *complex_to_real, > > > - const FFTComplex *xa, > > > - const FFTComplex *xb, > > > +static void yae_xcorr_via_rdft(float *xcorr_in, > > > + float *xcorr, > > > + AVTXContext *complex_to_real, > > > + av_tx_fn c2r_fn, > > > + const AVComplexFloat *xa, > > > + const AVComplexFloat *xb, > > > const int window) > > > { > > > - FFTComplex *xc = (FFTComplex *)xcorr; > > > + AVComplexFloat *xc = (AVComplexFloat *)xcorr_in; > > > int i; > > > > > > - // NOTE: first element requires special care -- Given Y = rDFT(X), > > > - // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc > > > - // stores Re(Y[N/2]) in place of Im(Y[0]). > > > - > > > - xc->re = xa->re * xb->re; > > > - xc->im = xa->im * xb->im; > > > - xa++; > > > - xb++; > > > - xc++; > > > - > > > - for (i = 1; i < window; i++, xa++, xb++, xc++) { > > > + for (i = 0; i <= window; i++, xa++, xb++, xc++) { > > > > > > > This used to iterate over [1, window - 1] elements. > > Now it iterates over [0, window] elements. > > Is this correct? That's 2 additional elements. > > > > Yes, newer api does not use previous api hack of producing n/2 complex > numbers, but n/2 +1. > cool, thanks ... lgtm if it still works > > > > > > > > > > xc->re = (xa->re * xb->re + xa->im * xb->im); > > > xc->im = (xa->im * xb->re - xa->re * xb->im); > > > } > > > > > > // apply inverse rDFT: > > > - av_rdft_calc(complex_to_real, xcorr); > > > + c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(float)); > > > } > > > > > > /** > > > @@ -637,21 +633,25 @@ static int yae_align(AudioFragment *frag, > > > const int window, > > > const int delta_max, > > > const int drift, > > > - FFTSample *correlation, > > > - RDFTContext *complex_to_real) > > > + float *correlation_in, > > > + float *correlation, > > > + AVTXContext *complex_to_real, > > > + av_tx_fn c2r_fn) > > > { > > > int best_offset = -drift; > > > - FFTSample best_metric = -FLT_MAX; > > > - FFTSample *xcorr; > > > + float best_metric = -FLT_MAX; > > > + float *xcorr; > > > > > > int i0; > > > int i1; > > > int i; > > > > > > - yae_xcorr_via_rdft(correlation, > > > + yae_xcorr_via_rdft(correlation_in, > > > + correlation, > > > complex_to_real, > > > - (const FFTComplex *)prev->xdat, > > > - (const FFTComplex *)frag->xdat, > > > + c2r_fn, > > > + (const AVComplexFloat *)prev->xdat, > > > + (const AVComplexFloat *)frag->xdat, > > > window); > > > > > > // identify search window boundaries: > > > @@ -665,11 +665,11 @@ static int yae_align(AudioFragment *frag, > > > xcorr = correlation + i0; > > > > > > for (i = i0; i < i1; i++, xcorr++) { > > > - FFTSample metric = *xcorr; > > > + float metric = *xcorr; > > > > > > // normalize: > > > - FFTSample drifti = (FFTSample)(drift + i); > > > - metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i); > > > + float drifti = (float)(drift + i); > > > + metric *= drifti * (float)(i - i0) * (float)(i1 - i); > > > > > > if (metric > best_metric) { > > > best_metric = metric; > > > @@ -706,8 +706,10 @@ static int yae_adjust_position(ATempoContext > > *atempo) > > > atempo->window, > > > delta_max, > > > drift, > > > + atempo->correlation_in, > > > atempo->correlation, > > > - atempo->complex_to_real); > > > + atempo->complex_to_real, > > > + atempo->c2r_fn); > > > > > > if (correction) { > > > // adjust fragment position: > > > @@ -833,7 +835,7 @@ yae_apply(ATempoContext *atempo, > > > yae_downmix(atempo, yae_curr_frag(atempo)); > > > > > > // apply rDFT: > > > - av_rdft_calc(atempo->real_to_complex, > > > yae_curr_frag(atempo)->xdat); > > > + atempo->r2c_fn(atempo->real_to_complex, > > > yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, > > sizeof(float)); > > > > > > // must load the second fragment before alignment can > start: > > > if (!atempo->nfrag) { > > > @@ -865,7 +867,7 @@ yae_apply(ATempoContext *atempo, > > > yae_downmix(atempo, yae_curr_frag(atempo)); > > > > > > // apply rDFT: > > > - av_rdft_calc(atempo->real_to_complex, > > > yae_curr_frag(atempo)->xdat); > > > + atempo->r2c_fn(atempo->real_to_complex, > > > yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, > > sizeof(float)); > > > > > > atempo->state = YAE_OUTPUT_OVERLAP_ADD; > > > } > > > @@ -929,7 +931,7 @@ static int yae_flush(ATempoContext *atempo, > > > yae_downmix(atempo, frag); > > > > > > // apply rDFT: > > > - av_rdft_calc(atempo->real_to_complex, frag->xdat); > > > + atempo->r2c_fn(atempo->real_to_complex, frag->xdat, > > > frag->xdat_in, sizeof(float)); > > > > > > // align current fragment to previous fragment: > > > if (yae_adjust_position(atempo)) { > > > -- > > > 2.33.0 > > > > > > _______________________________________________ > > > ffmpeg-devel mailing list > > > ffmpeg-devel@ffmpeg.org > > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > > > > To unsubscribe, visit link above, or email > > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > > > > > _______________________________________________ > > ffmpeg-devel mailing list > > ffmpeg-devel@ffmpeg.org > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > > To unsubscribe, visit link above, or email > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2022-02-06 18:04 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-02-06 11:25 Paul B Mahol 2022-02-06 17:15 ` Pavel Koshevoy 2022-02-06 17:24 ` Paul B Mahol 2022-02-06 18:04 ` Pavel Koshevoy [this message] 2022-02-06 19:02 ` Lynne
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to='CAJgjuoxT2vVD=QS8DiS_1Twgcx7JVHoyXfSiG+yLs+iJ5vF+iA@mail.gmail.com' \ --to=pkoshevoy@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git