From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by master.gitmailbox.com (Postfix) with ESMTP id 2EC8840C84
	for <ffmpegdev@gitmailbox.com>; Sun,  6 Feb 2022 11:24:50 +0000 (UTC)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 909E368B162;
	Sun,  6 Feb 2022 13:24:49 +0200 (EET)
Received: from mail-ej1-f47.google.com (mail-ej1-f47.google.com
 [209.85.218.47])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 1CCEE68AEE4
 for <ffmpeg-devel@ffmpeg.org>; Sun,  6 Feb 2022 13:24:43 +0200 (EET)
Received: by mail-ej1-f47.google.com with SMTP id s21so5736904ejx.12
 for <ffmpeg-devel@ffmpeg.org>; Sun, 06 Feb 2022 03:24:43 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112;
 h=from:to:subject:date:message-id:mime-version
 :content-transfer-encoding;
 bh=bCpDK5o3HPw5veWtGgYo//a2Oc4cslZAFxr+WgxbPt0=;
 b=gwroKwCB8nnNNfiXA5RoKYVnlprrzyf/dDZXZw7sL5Fn8OWrZReBV5OHTyigMtKnzB
 ghU0pk1hhcc2WkfYaPPFdMnFCqnCObP+wVLgLckTsEMekWkbD1vviKAik0xxh+UgiebM
 dpsZ7XTScJuefGfWZDnT1jF0YhPDmyDu+bmtJQzOR7piKNjQUjRivhhe4vh0vUeAqzZ2
 3xfU5Acnw8R17XlQWYYiU4acP+xTFb1L8imGX+L1PtG0kMDQupfEJ4CXHs8MxJFTkL4K
 CmpANcqDt0tL6Z56U0hPlOO4FRIwoR/GXuRh9KDY7hW0s5/LFPcO5CC4rhxg7ty6EOXW
 tmag==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 d=1e100.net; s=20210112;
 h=x-gm-message-state:from:to:subject:date:message-id:mime-version
 :content-transfer-encoding;
 bh=bCpDK5o3HPw5veWtGgYo//a2Oc4cslZAFxr+WgxbPt0=;
 b=C0fzZR+5hFgb3ED12m602q3H4NiFlMn5yfNGNW5iFmO3CoG95KqXqkwjXalspiNd3g
 36J+tMnSuNuuTQNOwf4Wp8bM1pVFzHio6Q+s+qdjCJWYciTFTZhYiQdGLXWUvZSLVzlA
 xDpR58iaLr7SsmiU0J10mP0UQgP96eXY/e6dN4n5CbAjpQElRHQf4RKJUgT/2xDo/221
 iErhDYRmML9viR3yzzzf89Bc3ko12w50CmiijfaV1MqmGIWQV3lAEqXFItFJK+dadU6J
 pw7Put4OyzJemVqIAUUevoKMqqar//qX7tnxowUiI4CJODv3VebiMxvRELVwhK7o69W5
 halQ==
X-Gm-Message-State: AOAM531QcR9Ef6LDqeySTj5sXmE987JhtvcKxBIg7g1o8dBALNBftewD
 HXPflUlkSNsI1NNVR32yjuHq1ynT+iU=
X-Google-Smtp-Source: ABdhPJxmYAOa+PXJRnZnQBDElgXLt09pPKO1Ur3x6VOMydJc+ez2xyBDkLXR0KjTe/AjJkFtR4iAgw==
X-Received: by 2002:a17:906:a87:: with SMTP id
 y7mr1142271ejf.429.1644146682498; 
 Sun, 06 Feb 2022 03:24:42 -0800 (PST)
Received: from localhost.localdomain ([95.168.120.2])
 by smtp.gmail.com with ESMTPSA id co26sm902406edb.60.2022.02.06.03.24.41
 for <ffmpeg-devel@ffmpeg.org>
 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
 Sun, 06 Feb 2022 03:24:42 -0800 (PST)
From: Paul B Mahol <onemda@gmail.com>
To: ffmpeg-devel@ffmpeg.org
Date: Sun,  6 Feb 2022 12:25:15 +0100
Message-Id: <20220206112515.1421701-1-onemda@gmail.com>
X-Mailer: git-send-email 2.33.0
MIME-Version: 1.0
Subject: [FFmpeg-devel] [PATCH] avfilter/af_atempo: switch to rdft from
 lavu/tx
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Archived-At: <https://master.gitmailbox.com/ffmpegdev/20220206112515.1421701-1-onemda@gmail.com/>
List-Archive: <https://master.gitmailbox.com/ffmpegdev/>
List-Post: <mailto:ffmpegdev@gitmailbox.com>

Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 configure               |   3 -
 libavfilter/af_atempo.c | 126 ++++++++++++++++++++--------------------
 2 files changed, 64 insertions(+), 65 deletions(-)

diff --git a/configure b/configure
index 5a8b52c77d..6ec25dd622 100755
--- a/configure
+++ b/configure
@@ -3610,8 +3610,6 @@ amovie_filter_deps="avcodec avformat"
 aresample_filter_deps="swresample"
 asr_filter_deps="pocketsphinx"
 ass_filter_deps="libass"
-atempo_filter_deps="avcodec"
-atempo_filter_select="rdft"
 avgblur_opencl_filter_deps="opencl"
 avgblur_vulkan_filter_deps="vulkan spirv_compiler"
 azmq_filter_deps="libzmq"
@@ -7387,7 +7385,6 @@ enabled zlib && add_cppflags -DZLIB_CONST
 # conditional library dependencies, in any order
 enabled amovie_filter       && prepend avfilter_deps "avformat avcodec"
 enabled aresample_filter    && prepend avfilter_deps "swresample"
-enabled atempo_filter       && prepend avfilter_deps "avcodec"
 enabled bm3d_filter         && prepend avfilter_deps "avcodec"
 enabled cover_rect_filter   && prepend avfilter_deps "avformat avcodec"
 enabled ebur128_filter && enabled swresample && prepend avfilter_deps "swresample"
diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
index e9a6da7970..27f2f6daa0 100644
--- a/libavfilter/af_atempo.c
+++ b/libavfilter/af_atempo.c
@@ -39,13 +39,13 @@
  */
 
 #include <float.h>
-#include "libavcodec/avfft.h"
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/eval.h"
 #include "libavutil/opt.h"
 #include "libavutil/samplefmt.h"
+#include "libavutil/tx.h"
 #include "avfilter.h"
 #include "audio.h"
 #include "internal.h"
@@ -67,7 +67,8 @@ typedef struct AudioFragment {
 
     // rDFT transform of the down-mixed mono fragment, used for
     // fast waveform alignment via correlation in frequency domain:
-    FFTSample *xdat;
+    float *xdat_in;
+    float *xdat;
 } AudioFragment;
 
 /**
@@ -140,9 +141,11 @@ typedef struct ATempoContext {
     FilterState state;
 
     // for fast correlation calculation in frequency domain:
-    RDFTContext *real_to_complex;
-    RDFTContext *complex_to_real;
-    FFTSample *correlation;
+    AVTXContext *real_to_complex;
+    AVTXContext *complex_to_real;
+    av_tx_fn r2c_fn, c2r_fn;
+    float *correlation_in;
+    float *correlation;
 
     // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
     AVFrame *dst_buffer;
@@ -228,18 +231,18 @@ static void yae_release_buffers(ATempoContext *atempo)
 
     av_freep(&atempo->frag[0].data);
     av_freep(&atempo->frag[1].data);
+    av_freep(&atempo->frag[0].xdat_in);
+    av_freep(&atempo->frag[1].xdat_in);
     av_freep(&atempo->frag[0].xdat);
     av_freep(&atempo->frag[1].xdat);
 
     av_freep(&atempo->buffer);
     av_freep(&atempo->hann);
+    av_freep(&atempo->correlation_in);
     av_freep(&atempo->correlation);
 
-    av_rdft_end(atempo->real_to_complex);
-    atempo->real_to_complex = NULL;
-
-    av_rdft_end(atempo->complex_to_real);
-    atempo->complex_to_real = NULL;
+    av_tx_uninit(&atempo->real_to_complex);
+    av_tx_uninit(&atempo->complex_to_real);
 }
 
 /* av_realloc is not aligned enough; fortunately, the data does not need to
@@ -247,7 +250,7 @@ static void yae_release_buffers(ATempoContext *atempo)
 #define RE_MALLOC_OR_FAIL(field, field_size)                    \
     do {                                                        \
         av_freep(&field);                                       \
-        field = av_malloc(field_size);                          \
+        field = av_calloc(field_size, 1);                       \
         if (!field) {                                           \
             yae_release_buffers(atempo);                        \
             return AVERROR(ENOMEM);                             \
@@ -265,6 +268,7 @@ static int yae_reset(ATempoContext *atempo,
 {
     const int sample_size = av_get_bytes_per_sample(format);
     uint32_t nlevels  = 0;
+    float scale = 1.f, iscale = 1.f;
     uint32_t pot;
     int i;
 
@@ -288,29 +292,29 @@ static int yae_reset(ATempoContext *atempo,
     // initialize audio fragment buffers:
     RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride);
     RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride);
-    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * sizeof(FFTComplex));
-    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * sizeof(FFTComplex));
+    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) * sizeof(AVComplexFloat));
+    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) * sizeof(AVComplexFloat));
+    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) * sizeof(AVComplexFloat));
+    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) * sizeof(AVComplexFloat));
 
     // initialize rDFT contexts:
-    av_rdft_end(atempo->real_to_complex);
-    atempo->real_to_complex = NULL;
-
-    av_rdft_end(atempo->complex_to_real);
-    atempo->complex_to_real = NULL;
+    av_tx_uninit(&atempo->real_to_complex);
+    av_tx_uninit(&atempo->complex_to_real);
 
-    atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
+    av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn, AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
     if (!atempo->real_to_complex) {
         yae_release_buffers(atempo);
         return AVERROR(ENOMEM);
     }
 
-    atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
+    av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn, AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
     if (!atempo->complex_to_real) {
         yae_release_buffers(atempo);
         return AVERROR(ENOMEM);
     }
 
-    RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(FFTComplex));
+    RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) * sizeof(AVComplexFloat));
+    RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(AVComplexFloat));
 
     atempo->ring = atempo->window * 3;
     RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
@@ -348,7 +352,7 @@ static int yae_update(AVFilterContext *ctx)
         const uint8_t *src_end = src +                                  \
             frag->nsamples * atempo->channels * sizeof(scalar_type);    \
                                                                         \
-        FFTSample *xdat = frag->xdat;                                   \
+        float *xdat = frag->xdat_in;                                    \
         scalar_type tmp;                                                \
                                                                         \
         if (atempo->channels == 1) {                                    \
@@ -356,27 +360,27 @@ static int yae_update(AVFilterContext *ctx)
                 tmp = *(const scalar_type *)src;                        \
                 src += sizeof(scalar_type);                             \
                                                                         \
-                *xdat = (FFTSample)tmp;                                 \
+                *xdat = (float)tmp;                                     \
             }                                                           \
         } else {                                                        \
-            FFTSample s, max, ti, si;                                   \
+            float s, max, ti, si;                                       \
             int i;                                                      \
                                                                         \
             for (; src < src_end; xdat++) {                             \
                 tmp = *(const scalar_type *)src;                        \
                 src += sizeof(scalar_type);                             \
                                                                         \
-                max = (FFTSample)tmp;                                   \
-                s = FFMIN((FFTSample)scalar_max,                        \
-                          (FFTSample)fabsf(max));                       \
+                max = (float)tmp;                                       \
+                s = FFMIN((float)scalar_max,                            \
+                          (float)fabsf(max));                           \
                                                                         \
                 for (i = 1; i < atempo->channels; i++) {                \
                     tmp = *(const scalar_type *)src;                    \
                     src += sizeof(scalar_type);                         \
                                                                         \
-                    ti = (FFTSample)tmp;                                \
-                    si = FFMIN((FFTSample)scalar_max,                   \
-                               (FFTSample)fabsf(ti));                   \
+                    ti = (float)tmp;                                    \
+                    si = FFMIN((float)scalar_max,                       \
+                               (float)fabsf(ti));                       \
                                                                         \
                     if (s < si) {                                       \
                         s   = si;                                       \
@@ -399,7 +403,7 @@ static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
     const uint8_t *src = frag->data;
 
     // init complex data buffer used for FFT and Correlation:
-    memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
+    memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * atempo->window);
 
     if (atempo->format == AV_SAMPLE_FMT_U8) {
         yae_init_xdat(uint8_t, 127);
@@ -598,32 +602,24 @@ static void yae_advance_to_next_frag(ATempoContext *atempo)
  * Multiply two vectors of complex numbers (result of real_to_complex rDFT)
  * and transform back via complex_to_real rDFT.
  */
-static void yae_xcorr_via_rdft(FFTSample *xcorr,
-                               RDFTContext *complex_to_real,
-                               const FFTComplex *xa,
-                               const FFTComplex *xb,
+static void yae_xcorr_via_rdft(float *xcorr_in,
+                               float *xcorr,
+                               AVTXContext *complex_to_real,
+                               av_tx_fn c2r_fn,
+                               const AVComplexFloat *xa,
+                               const AVComplexFloat *xb,
                                const int window)
 {
-    FFTComplex *xc = (FFTComplex *)xcorr;
+    AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
     int i;
 
-    // NOTE: first element requires special care -- Given Y = rDFT(X),
-    // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
-    // stores Re(Y[N/2]) in place of Im(Y[0]).
-
-    xc->re = xa->re * xb->re;
-    xc->im = xa->im * xb->im;
-    xa++;
-    xb++;
-    xc++;
-
-    for (i = 1; i < window; i++, xa++, xb++, xc++) {
+    for (i = 0; i <= window; i++, xa++, xb++, xc++) {
         xc->re = (xa->re * xb->re + xa->im * xb->im);
         xc->im = (xa->im * xb->re - xa->re * xb->im);
     }
 
     // apply inverse rDFT:
-    av_rdft_calc(complex_to_real, xcorr);
+    c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(float));
 }
 
 /**
@@ -637,21 +633,25 @@ static int yae_align(AudioFragment *frag,
                      const int window,
                      const int delta_max,
                      const int drift,
-                     FFTSample *correlation,
-                     RDFTContext *complex_to_real)
+                     float *correlation_in,
+                     float *correlation,
+                     AVTXContext *complex_to_real,
+                     av_tx_fn c2r_fn)
 {
     int       best_offset = -drift;
-    FFTSample best_metric = -FLT_MAX;
-    FFTSample *xcorr;
+    float     best_metric = -FLT_MAX;
+    float    *xcorr;
 
     int i0;
     int i1;
     int i;
 
-    yae_xcorr_via_rdft(correlation,
+    yae_xcorr_via_rdft(correlation_in,
+                       correlation,
                        complex_to_real,
-                       (const FFTComplex *)prev->xdat,
-                       (const FFTComplex *)frag->xdat,
+                       c2r_fn,
+                       (const AVComplexFloat *)prev->xdat,
+                       (const AVComplexFloat *)frag->xdat,
                        window);
 
     // identify search window boundaries:
@@ -665,11 +665,11 @@ static int yae_align(AudioFragment *frag,
     xcorr = correlation + i0;
 
     for (i = i0; i < i1; i++, xcorr++) {
-        FFTSample metric = *xcorr;
+        float metric = *xcorr;
 
         // normalize:
-        FFTSample drifti = (FFTSample)(drift + i);
-        metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
+        float drifti = (float)(drift + i);
+        metric *= drifti * (float)(i - i0) * (float)(i1 - i);
 
         if (metric > best_metric) {
             best_metric = metric;
@@ -706,8 +706,10 @@ static int yae_adjust_position(ATempoContext *atempo)
                                      atempo->window,
                                      delta_max,
                                      drift,
+                                     atempo->correlation_in,
                                      atempo->correlation,
-                                     atempo->complex_to_real);
+                                     atempo->complex_to_real,
+                                     atempo->c2r_fn);
 
     if (correction) {
         // adjust fragment position:
@@ -833,7 +835,7 @@ yae_apply(ATempoContext *atempo,
             yae_downmix(atempo, yae_curr_frag(atempo));
 
             // apply rDFT:
-            av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
+            atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
 
             // must load the second fragment before alignment can start:
             if (!atempo->nfrag) {
@@ -865,7 +867,7 @@ yae_apply(ATempoContext *atempo,
             yae_downmix(atempo, yae_curr_frag(atempo));
 
             // apply rDFT:
-            av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
+            atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
 
             atempo->state = YAE_OUTPUT_OVERLAP_ADD;
         }
@@ -929,7 +931,7 @@ static int yae_flush(ATempoContext *atempo,
             yae_downmix(atempo, frag);
 
             // apply rDFT:
-            av_rdft_calc(atempo->real_to_complex, frag->xdat);
+            atempo->r2c_fn(atempo->real_to_complex, frag->xdat, frag->xdat_in, sizeof(float));
 
             // align current fragment to previous fragment:
             if (yae_adjust_position(atempo)) {
-- 
2.33.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".