* [FFmpeg-devel] [PATCH v3] wmavoice: convert DCT-I/DST-I to lavu/tx
[not found] ` <NbELai4--3-9@lynne.ee-NbELelX----9>
@ 2023-09-01 4:39 ` Lynne
[not found] ` <NdE1Sht--3-9@lynne.ee-NdE1WqH----9>
1 sibling, 0 replies; 6+ messages in thread
From: Lynne @ 2023-09-01 4:39 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 210 bytes --]
Added a patch to fix scaling of R2R transforms and
improved table generation precision slightly.
Planning to push this at the end of today, as it has been
on the mailing list for over a month with two LGTMs.
[-- Attachment #2: 0001-lavu-tx-add-real-to-real-and-real-to-imaginary-RDFT-.patch --]
[-- Type: text/x-diff, Size: 18619 bytes --]
From 145b5ca2be45e07fe16b700aa2b42cbeaa6d1a28 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 3 Aug 2023 18:21:23 +0200
Subject: [PATCH 01/11] lavu/tx: add real to real and real to imaginary RDFT
transforms
These are in-place transforms, required for DCT-I and DST-I.
Templated as the mod2 variant requires minor modifications, and is
required specifically for DCT-I/DST-I.
---
doc/APIchanges | 3 +
libavutil/tx.c | 18 ++++-
libavutil/tx.h | 10 +++
libavutil/tx_template.c | 175 +++++++++++++++++++++++++++++++---------
libavutil/version.h | 2 +-
5 files changed, 167 insertions(+), 41 deletions(-)
diff --git a/doc/APIchanges b/doc/APIchanges
index ad1efe708d..db3242b667 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -2,6 +2,9 @@ The last version increases of all libraries were on 2023-02-09
API changes, most recent first:
+2023-07-xx - xxxxxxxxxx - lavu 58.18.100 - tx.h
+ Add AV_TX_REAL_TO_REAL and AV_TX_REAL_TO_IMAGINARY
+
2023-08-18 - xxxxxxxxxx - lavu 58.17.100 - channel_layout.h
All AV_CHANNEL_LAYOUT_* macros are now compatible with C++ 17 and older.
diff --git a/libavutil/tx.c b/libavutil/tx.c
index e25abf998f..e9826e6107 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -437,7 +437,9 @@ int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
/* Check direction for non-orthogonal codelets */
if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
- ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
+ ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
+ ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
+ ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
continue;
/* Check if the CPU supports the required ISA */
@@ -560,6 +562,10 @@ static void print_flags(AVBPrint *bp, uint64_t f)
av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
if ((f & AV_TX_FULL_IMDCT) && ++prev)
av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
+ if ((f & AV_TX_REAL_TO_REAL) && ++prev)
+ av_bprintf(bp, "%sreal_to_real", prev > 1 ? sep : "");
+ if ((f & AV_TX_REAL_TO_IMAGINARY) && ++prev)
+ av_bprintf(bp, "%sreal_to_imaginary", prev > 1 ? sep : "");
if ((f & FF_TX_ASM_CALL) && ++prev)
av_bprintf(bp, "%sasm_call", prev > 1 ? sep : "");
av_bprintf(bp, "]");
@@ -717,7 +723,11 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
uint64_t req_flags = flags;
/* Flags the codelet may require to be present */
- uint64_t inv_req_mask = AV_TX_FULL_IMDCT | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL;
+ uint64_t inv_req_mask = AV_TX_FULL_IMDCT |
+ AV_TX_REAL_TO_REAL |
+ AV_TX_REAL_TO_IMAGINARY |
+ FF_TX_PRESHUFFLE |
+ FF_TX_ASM_CALL;
/* Unaligned codelets are compatible with the aligned flag */
if (req_flags & FF_TX_ALIGNED)
@@ -742,7 +752,9 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
/* Check direction for non-orthogonal codelets */
if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
- ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
+ ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
+ ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
+ ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
continue;
/* Check if the requested flags match from both sides */
diff --git a/libavutil/tx.h b/libavutil/tx.h
index 064edbc097..d178e8ee9d 100644
--- a/libavutil/tx.h
+++ b/libavutil/tx.h
@@ -149,6 +149,16 @@ enum AVTXFlags {
* Ignored for all transforms but inverse MDCTs.
*/
AV_TX_FULL_IMDCT = 1ULL << 2,
+
+ /**
+ * Perform a real to half-complex RDFT.
+ * Only the real, or imaginary coefficients will
+ * be output, depending on the flag used. Only available for forward RDFTs.
+ * Output array must have enough space to hold N complex values
+ * (regular size for a real to complex transform).
+ */
+ AV_TX_REAL_TO_REAL = 1ULL << 3,
+ AV_TX_REAL_TO_IMAGINARY = 1ULL << 4,
};
/**
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index c4ec9502e0..c56dcf0826 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -1613,14 +1613,17 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
int ret;
double f, m;
TXSample *tab;
+ int len4 = FFALIGN(len, 4) / 4;
s->scale_d = *((SCALE_TYPE *)scale);
s->scale_f = s->scale_d;
+ flags &= ~(AV_TX_REAL_TO_REAL | AV_TX_REAL_TO_IMAGINARY);
+
if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, NULL, len >> 1, inv, scale)))
return ret;
- if (!(s->exp = av_mallocz((8 + (len >> 2) - 1)*sizeof(*s->exp))))
+ if (!(s->exp = av_mallocz((8 + 2*len4)*sizeof(*s->exp))))
return AVERROR(ENOMEM);
tab = (TXSample *)s->exp;
@@ -1639,17 +1642,20 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
*tab++ = RESCALE( (0.5 - inv) * m);
*tab++ = RESCALE(-(0.5 - inv) * m);
- for (int i = 0; i < len >> 2; i++)
+ for (int i = 0; i < len4; i++)
*tab++ = RESCALE(cos(i*f));
- for (int i = len >> 2; i >= 0; i--)
- *tab++ = RESCALE(cos(i*f) * (inv ? +1.0 : -1.0));
+
+ tab = ((TXSample *)s->exp) + len4 + 8;
+
+ for (int i = 0; i < len4; i++)
+ *tab++ = RESCALE(cos(((float)len/4.0 - (float)i + 0)*f) * (inv ? +1.0 : -1.0));
return 0;
}
-#define DECL_RDFT(name, inv) \
-static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, void *_dst, \
- void *_src, ptrdiff_t stride) \
+#define DECL_RDFT(n, inv) \
+static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
+ void *_src, ptrdiff_t stride) \
{ \
const int len2 = s->len >> 1; \
const int len4 = s->len >> 2; \
@@ -1698,40 +1704,131 @@ static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, void *_dst, \
data[len2].re = data[0].im; \
data[ 0].im = data[len2].im = 0; \
} \
-}
+} \
+ \
+static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
+ .name = TX_NAME_STR("rdft_" #n), \
+ .function = TX_NAME(ff_tx_rdft_ ##n), \
+ .type = TX_TYPE(RDFT), \
+ .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
+ (inv ? FF_TX_INVERSE_ONLY : FF_TX_FORWARD_ONLY), \
+ .factors = { 4, TX_FACTOR_ANY }, \
+ .nb_factors = 2, \
+ .min_len = 4, \
+ .max_len = TX_LEN_UNLIMITED, \
+ .init = TX_NAME(ff_tx_rdft_init), \
+ .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
+ .prio = FF_TX_PRIO_BASE, \
+};
-DECL_RDFT(r2c, 0)
-DECL_RDFT(c2r, 1)
+DECL_RDFT(r2c, 0)
+DECL_RDFT(c2r, 1)
-static const FFTXCodelet TX_NAME(ff_tx_rdft_r2c_def) = {
- .name = TX_NAME_STR("rdft_r2c"),
- .function = TX_NAME(ff_tx_rdft_r2c),
- .type = TX_TYPE(RDFT),
- .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
- FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
- .factors = { 2, TX_FACTOR_ANY },
- .nb_factors = 2,
- .min_len = 2,
- .max_len = TX_LEN_UNLIMITED,
- .init = TX_NAME(ff_tx_rdft_init),
- .cpu_flags = FF_TX_CPU_FLAGS_ALL,
- .prio = FF_TX_PRIO_BASE,
+#define DECL_RDFT_HALF(n, mode, mod2) \
+static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
+ void *_src, ptrdiff_t stride) \
+{ \
+ const int len = s->len; \
+ const int len2 = len >> 1; \
+ const int len4 = len >> 2; \
+ const int aligned_len4 = FFALIGN(len, 4)/4; \
+ const TXSample *fact = (void *)s->exp; \
+ const TXSample *tcos = fact + 8; \
+ const TXSample *tsin = tcos + aligned_len4; \
+ TXComplex *data = _dst; \
+ TXSample *out = _dst; /* Half-complex is forward-only */ \
+ TXSample tmp_dc; \
+ av_unused TXSample tmp_mid; \
+ TXSample tmp[4]; \
+ TXComplex sf, sl; \
+ \
+ s->fn[0](&s->sub[0], _dst, _src, sizeof(TXComplex)); \
+ \
+ tmp_dc = data[0].re; \
+ data[ 0].re = tmp_dc + data[0].im; \
+ tmp_dc = tmp_dc - data[0].im; \
+ \
+ data[ 0].re = MULT(fact[0], data[ 0].re); \
+ tmp_dc = MULT(fact[1], tmp_dc); \
+ data[len4].re = MULT(fact[2], data[len4].re); \
+ \
+ if (!mod2) { \
+ data[len4].im = MULT(fact[3], data[len4].im); \
+ } else { \
+ sf = data[len4]; \
+ sl = data[len4 + 1]; \
+ if (mode == AV_TX_REAL_TO_REAL) \
+ tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
+ else \
+ tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
+ tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
+ tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
+ \
+ if (mode == AV_TX_REAL_TO_REAL) { \
+ tmp[3] = tmp[1]*tcos[len4] - tmp[2]*tsin[len4]; \
+ tmp_mid = (tmp[0] - tmp[3]); \
+ } else { \
+ tmp[3] = tmp[1]*tsin[len4] + tmp[2]*tcos[len4]; \
+ tmp_mid = (tmp[0] + tmp[3]); \
+ } \
+ } \
+ \
+ /* NOTE: unrolling this breaks non-mod8 lengths */ \
+ for (int i = 1; i <= len4; i++) { \
+ TXSample tmp[4]; \
+ TXComplex sf = data[i]; \
+ TXComplex sl = data[len2 - i]; \
+ \
+ if (mode == AV_TX_REAL_TO_REAL) \
+ tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
+ else \
+ tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
+ \
+ tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
+ tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
+ \
+ if (mode == AV_TX_REAL_TO_REAL) { \
+ tmp[3] = tmp[1]*tcos[i] - tmp[2]*tsin[i]; \
+ out[i] = (tmp[0] + tmp[3]); \
+ out[len - i] = (tmp[0] - tmp[3]); \
+ } else { \
+ tmp[3] = tmp[1]*tsin[i] + tmp[2]*tcos[i]; \
+ out[i - 1] = (tmp[3] - tmp[0]); \
+ out[len - i - 1] = (tmp[0] + tmp[3]); \
+ } \
+ } \
+ \
+ for (int i = 1; i < (len4 + (mode == AV_TX_REAL_TO_IMAGINARY)); i++) \
+ out[len2 - i] = out[len - i]; \
+ \
+ if (mode == AV_TX_REAL_TO_REAL) { \
+ out[len2] = tmp_dc; \
+ if (mod2) \
+ out[len4 + 1] = tmp_mid; \
+ } else if (mod2) { \
+ out[len4] = tmp_mid; \
+ } \
+} \
+ \
+static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
+ .name = TX_NAME_STR("rdft_" #n), \
+ .function = TX_NAME(ff_tx_rdft_ ##n), \
+ .type = TX_TYPE(RDFT), \
+ .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | mode | \
+ FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
+ .factors = { 2 + 2*(!mod2), TX_FACTOR_ANY }, \
+ .nb_factors = 2, \
+ .min_len = 2 + 2*(!mod2), \
+ .max_len = TX_LEN_UNLIMITED, \
+ .init = TX_NAME(ff_tx_rdft_init), \
+ .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
+ .prio = FF_TX_PRIO_BASE, \
};
-static const FFTXCodelet TX_NAME(ff_tx_rdft_c2r_def) = {
- .name = TX_NAME_STR("rdft_c2r"),
- .function = TX_NAME(ff_tx_rdft_c2r),
- .type = TX_TYPE(RDFT),
- .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
- FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
- .factors = { 2, TX_FACTOR_ANY },
- .nb_factors = 2,
- .min_len = 2,
- .max_len = TX_LEN_UNLIMITED,
- .init = TX_NAME(ff_tx_rdft_init),
- .cpu_flags = FF_TX_CPU_FLAGS_ALL,
- .prio = FF_TX_PRIO_BASE,
-};
+DECL_RDFT_HALF(r2r, AV_TX_REAL_TO_REAL, 0)
+DECL_RDFT_HALF(r2r_mod2, AV_TX_REAL_TO_REAL, 1)
+DECL_RDFT_HALF(r2i, AV_TX_REAL_TO_IMAGINARY, 0)
+DECL_RDFT_HALF(r2i_mod2, AV_TX_REAL_TO_IMAGINARY, 1)
static av_cold int TX_NAME(ff_tx_dct_init)(AVTXContext *s,
const FFTXCodelet *cd,
@@ -1997,6 +2094,10 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
&TX_NAME(ff_tx_mdct_naive_inv_def),
&TX_NAME(ff_tx_mdct_inv_full_def),
&TX_NAME(ff_tx_rdft_r2c_def),
+ &TX_NAME(ff_tx_rdft_r2r_def),
+ &TX_NAME(ff_tx_rdft_r2r_mod2_def),
+ &TX_NAME(ff_tx_rdft_r2i_def),
+ &TX_NAME(ff_tx_rdft_r2i_mod2_def),
&TX_NAME(ff_tx_rdft_c2r_def),
&TX_NAME(ff_tx_dctII_def),
&TX_NAME(ff_tx_dctIII_def),
diff --git a/libavutil/version.h b/libavutil/version.h
index bc43baca9f..6d27f91cce 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@
*/
#define LIBAVUTIL_VERSION_MAJOR 58
-#define LIBAVUTIL_VERSION_MINOR 17
+#define LIBAVUTIL_VERSION_MINOR 18
#define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
--
2.40.1
[-- Attachment #3: 0002-lavu-tx-add-DCT-I-and-DST-I-transforms.patch --]
[-- Type: text/x-diff, Size: 5090 bytes --]
From 3eb6c3982e99ccc8fb60d086c2ee593367369ed4 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 3 Aug 2023 18:23:02 +0200
Subject: [PATCH 02/11] lavu/tx: add DCT-I and DST-I transforms
These are true, actual DCT-I and DST-I transforms, unlike the
libavcodec versions, which are plainly not.
---
libavutil/tx.h | 24 ++++++++++
libavutil/tx_template.c | 103 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 127 insertions(+)
diff --git a/libavutil/tx.h b/libavutil/tx.h
index d178e8ee9d..4696988cae 100644
--- a/libavutil/tx.h
+++ b/libavutil/tx.h
@@ -105,6 +105,30 @@ enum AVTXType {
AV_TX_DOUBLE_DCT = 10,
AV_TX_INT32_DCT = 11,
+ /**
+ * Discrete Cosine Transform I
+ *
+ * The forward transform is a DCT-I.
+ * The inverse transform is a DCT-I multiplied by 2/(N + 1).
+ *
+ * The input array is always overwritten.
+ */
+ AV_TX_FLOAT_DCT_I = 12,
+ AV_TX_DOUBLE_DCT_I = 13,
+ AV_TX_INT32_DCT_I = 14,
+
+ /**
+ * Discrete Sine Transform I
+ *
+ * The forward transform is a DST-I.
+ * The inverse transform is a DST-I multiplied by 2/(N + 1).
+ *
+ * The input array is always overwritten.
+ */
+ AV_TX_FLOAT_DST_I = 15,
+ AV_TX_DOUBLE_DST_I = 16,
+ AV_TX_INT32_DST_I = 17,
+
/* Not part of the API, do not use */
AV_TX_NB,
};
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index c56dcf0826..c026cb40c4 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -2004,6 +2004,107 @@ static const FFTXCodelet TX_NAME(ff_tx_dctIII_def) = {
.prio = FF_TX_PRIO_BASE,
};
+static av_cold int TX_NAME(ff_tx_dcstI_init)(AVTXContext *s,
+ const FFTXCodelet *cd,
+ uint64_t flags,
+ FFTXCodeletOptions *opts,
+ int len, int inv,
+ const void *scale)
+{
+ int ret;
+ SCALE_TYPE rsc = *((SCALE_TYPE *)scale);
+
+ if (inv) {
+ len *= 2;
+ s->len *= 2;
+ rsc *= 0.5;
+ }
+
+ /* We want a half-complex RDFT */
+ flags |= cd->type == TX_TYPE(DCT_I) ? AV_TX_REAL_TO_REAL :
+ AV_TX_REAL_TO_IMAGINARY;
+
+ if ((ret = ff_tx_init_subtx(s, TX_TYPE(RDFT), flags, NULL,
+ (len - 1 + 2*(cd->type == TX_TYPE(DST_I)))*2,
+ 0, &rsc)))
+ return ret;
+
+ s->tmp = av_mallocz((len + 1)*2*sizeof(TXSample));
+ if (!s->tmp)
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+static void TX_NAME(ff_tx_dctI)(AVTXContext *s, void *_dst,
+ void *_src, ptrdiff_t stride)
+{
+ TXSample *dst = _dst;
+ TXSample *src = _src;
+ const int len = s->len - 1;
+ TXSample *tmp = (TXSample *)s->tmp;
+
+ stride /= sizeof(TXSample);
+
+ for (int i = 0; i < len; i++)
+ tmp[i] = tmp[2*len - i] = src[i * stride];
+
+ tmp[len] = src[len * stride]; /* Middle */
+
+ s->fn[0](&s->sub[0], dst, tmp, sizeof(TXSample));
+}
+
+static void TX_NAME(ff_tx_dstI)(AVTXContext *s, void *_dst,
+ void *_src, ptrdiff_t stride)
+{
+ TXSample *dst = _dst;
+ TXSample *src = _src;
+ const int len = s->len + 1;
+ TXSample *tmp = (void *)s->tmp;
+
+ stride /= sizeof(TXSample);
+
+ tmp[0] = 0;
+
+ for (int i = 1; i < len; i++) {
+ TXSample a = src[(i - 1) * stride];
+ tmp[i] = -a;
+ tmp[2*len - i] = a;
+ }
+
+ tmp[len] = 0; /* i == n, Nyquist */
+
+ s->fn[0](&s->sub[0], dst, tmp, sizeof(float));
+}
+
+static const FFTXCodelet TX_NAME(ff_tx_dctI_def) = {
+ .name = TX_NAME_STR("dctI"),
+ .function = TX_NAME(ff_tx_dctI),
+ .type = TX_TYPE(DCT_I),
+ .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE,
+ .factors = { 2, TX_FACTOR_ANY },
+ .nb_factors = 2,
+ .min_len = 2,
+ .max_len = TX_LEN_UNLIMITED,
+ .init = TX_NAME(ff_tx_dcstI_init),
+ .cpu_flags = FF_TX_CPU_FLAGS_ALL,
+ .prio = FF_TX_PRIO_BASE,
+};
+
+static const FFTXCodelet TX_NAME(ff_tx_dstI_def) = {
+ .name = TX_NAME_STR("dstI"),
+ .function = TX_NAME(ff_tx_dstI),
+ .type = TX_TYPE(DST_I),
+ .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE,
+ .factors = { 2, TX_FACTOR_ANY },
+ .nb_factors = 2,
+ .min_len = 2,
+ .max_len = TX_LEN_UNLIMITED,
+ .init = TX_NAME(ff_tx_dcstI_init),
+ .cpu_flags = FF_TX_CPU_FLAGS_ALL,
+ .prio = FF_TX_PRIO_BASE,
+};
+
int TX_TAB(ff_tx_mdct_gen_exp)(AVTXContext *s, int *pre_tab)
{
int off = 0;
@@ -2101,6 +2202,8 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
&TX_NAME(ff_tx_rdft_c2r_def),
&TX_NAME(ff_tx_dctII_def),
&TX_NAME(ff_tx_dctIII_def),
+ &TX_NAME(ff_tx_dctI_def),
+ &TX_NAME(ff_tx_dstI_def),
NULL,
};
--
2.40.1
[-- Attachment #4: 0003-wmavoice-convert-RDFT-to-lavu-tx.patch --]
[-- Type: text/x-diff, Size: 7977 bytes --]
From d7d41f9c3e526be8e610a33446e140a066d882ef Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 4 Aug 2023 20:20:10 +0200
Subject: [PATCH 03/11] wmavoice: convert RDFT to lavu/tx
---
libavcodec/wmavoice.c | 75 ++++++++++++++++++++++++++-----------------
1 file changed, 45 insertions(+), 30 deletions(-)
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 44fda0e2d6..5ae92e2dbc 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -31,6 +31,7 @@
#include "libavutil/float_dsp.h"
#include "libavutil/mem_internal.h"
#include "libavutil/thread.h"
+#include "libavutil/tx.h"
#include "avcodec.h"
#include "codec_internal.h"
#include "decode.h"
@@ -263,8 +264,8 @@ typedef struct WMAVoiceContext {
* smoothing and so on, and context variables for FFT/iFFT.
* @{
*/
- RDFTContext rdft, irdft; ///< contexts for FFT-calculation in the
- ///< postfilter (for denoise filter)
+ AVTXContext *rdft, *irdft; ///< contexts for FFT-calculation in the
+ av_tx_fn rdft_fn, irdft_fn; ///< postfilter (for denoise filter)
DCTContext dct, dst; ///< contexts for phase shift (in Hilbert
///< transform, part of postfilter)
float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]
@@ -277,9 +278,9 @@ typedef struct WMAVoiceContext {
///< by postfilter
float denoise_filter_cache[MAX_FRAMESIZE];
int denoise_filter_cache_size; ///< samples in #denoise_filter_cache
- DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
+ DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x82];
///< aligned buffer for LPC tilting
- DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
+ DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x82];
///< aligned buffer for denoise coefficients
DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
///< aligned buffer for postfilter speech
@@ -388,12 +389,20 @@ static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
s->do_apf = flags & 0x1;
if (s->do_apf) {
- if ((ret = ff_rdft_init(&s->rdft, 7, DFT_R2C)) < 0 ||
- (ret = ff_rdft_init(&s->irdft, 7, IDFT_C2R)) < 0 ||
- (ret = ff_dct_init (&s->dct, 6, DCT_I)) < 0 ||
+ float scale = 1.0f;
+
+ if ((ret = ff_dct_init (&s->dct, 6, DCT_I)) < 0 ||
(ret = ff_dct_init (&s->dst, 6, DST_I)) < 0)
return ret;
+ ret = av_tx_init(&s->rdft, &s->rdft_fn, AV_TX_FLOAT_RDFT, 0, 1 << 7, &scale, 0);
+ if (ret < 0)
+ return ret;
+
+ ret = av_tx_init(&s->irdft, &s->irdft_fn, AV_TX_FLOAT_RDFT, 1, 1 << 7, &scale, 0);
+ if (ret < 0)
+ return ret;
+
ff_sine_window_init(s->cos, 256);
memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
for (n = 0; n < 255; n++) {
@@ -596,20 +605,24 @@ static float tilt_factor(const float *lpcs, int n_lpcs)
/**
* Derive denoise filter coefficients (in real domain) from the LPCs.
*/
-static void calc_input_response(WMAVoiceContext *s, float *lpcs,
- int fcb_type, float *coeffs, int remainder)
+static void calc_input_response(WMAVoiceContext *s, float *lpcs_src,
+ int fcb_type, float *coeffs_dst, int remainder)
{
float last_coeff, min = 15.0, max = -15.0;
float irange, angle_mul, gain_mul, range, sq;
+ LOCAL_ALIGNED_32(float, coeffs, [0x82]);
+ LOCAL_ALIGNED_32(float, lpcs, [0x82]);
int n, idx;
+ memcpy(coeffs, coeffs_dst, 0x82*sizeof(float));
+
/* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
- s->rdft.rdft_calc(&s->rdft, lpcs);
+ s->rdft_fn(s->rdft, lpcs, lpcs_src, sizeof(float));
#define log_range(var, assign) do { \
float tmp = log10f(assign); var = tmp; \
max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
} while (0)
- log_range(last_coeff, lpcs[1] * lpcs[1]);
+ log_range(last_coeff, lpcs[64] * lpcs[64]);
for (n = 1; n < 64; n++)
log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
@@ -668,25 +681,25 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs,
coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
coeffs[n * 2] = coeffs[n] * s->cos[idx];
}
- coeffs[1] = last_coeff;
+ coeffs[64] = last_coeff;
/* move into real domain */
- s->irdft.rdft_calc(&s->irdft, coeffs);
+ s->irdft_fn(s->irdft, coeffs_dst, coeffs, sizeof(AVComplexFloat));
/* tilt correction and normalize scale */
- memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
+ memset(&coeffs_dst[remainder], 0, sizeof(coeffs_dst[0]) * (128 - remainder));
if (s->denoise_tilt_corr) {
float tilt_mem = 0;
- coeffs[remainder - 1] = 0;
+ coeffs_dst[remainder - 1] = 0;
ff_tilt_compensation(&tilt_mem,
- -1.8 * tilt_factor(coeffs, remainder - 1),
- coeffs, remainder);
+ -1.8 * tilt_factor(coeffs_dst, remainder - 1),
+ coeffs_dst, remainder);
}
- sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs, coeffs,
+ sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs_dst, coeffs_dst,
remainder));
for (n = 0; n < remainder; n++)
- coeffs[n] *= sq;
+ coeffs_dst[n] *= sq;
}
/**
@@ -722,6 +735,8 @@ static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
int remainder, lim, n;
if (fcb_type != FCB_TYPE_SILENCE) {
+ LOCAL_ALIGNED_32(float, coeffs_f, [0x82]);
+ LOCAL_ALIGNED_32(float, synth_f, [0x82]);
float *tilted_lpcs = s->tilted_lpcs_pf,
*coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
@@ -742,16 +757,16 @@ static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
/* apply coefficients (in frequency spectrum domain), i.e. complex
* number multiplication */
memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
- s->rdft.rdft_calc(&s->rdft, synth_pf);
- s->rdft.rdft_calc(&s->rdft, coeffs);
- synth_pf[0] *= coeffs[0];
- synth_pf[1] *= coeffs[1];
- for (n = 1; n < 64; n++) {
- float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
- synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
- synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
+ s->rdft_fn(s->rdft, synth_f, synth_pf, sizeof(float));
+ s->rdft_fn(s->rdft, coeffs_f, coeffs, sizeof(float));
+ synth_f[0] *= coeffs_f[0];
+ synth_f[1] *= coeffs_f[1];
+ for (n = 1; n <= 64; n++) {
+ float v1 = synth_f[n * 2], v2 = synth_f[n * 2 + 1];
+ synth_f[n * 2] = v1 * coeffs_f[n * 2] - v2 * coeffs_f[n * 2 + 1];
+ synth_f[n * 2 + 1] = v2 * coeffs_f[n * 2] + v1 * coeffs_f[n * 2 + 1];
}
- s->irdft.rdft_calc(&s->irdft, synth_pf);
+ s->irdft_fn(s->irdft, synth_pf, synth_f, sizeof(AVComplexFloat));
}
/* merge filter output with the history of previous runs */
@@ -1986,8 +2001,8 @@ static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
WMAVoiceContext *s = ctx->priv_data;
if (s->do_apf) {
- ff_rdft_end(&s->rdft);
- ff_rdft_end(&s->irdft);
+ av_tx_uninit(&s->rdft);
+ av_tx_uninit(&s->irdft);
ff_dct_end(&s->dct);
ff_dct_end(&s->dst);
}
--
2.40.1
[-- Attachment #5: 0004-wmavoice-convert-DCT-I-DST-I-to-lavu-tx.patch --]
[-- Type: text/x-diff, Size: 5304 bytes --]
From 9f3a21397e25c91ba2053df298d527b1366214c9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 4 Aug 2023 21:16:30 +0200
Subject: [PATCH 04/11] wmavoice: convert DCT-I/DST-I to lavu/tx
This is the very last user of any lavc transform code.
This also *corrects* wmavoice decoding, as the previous DCT/DST
transforms were incorrect, bringing it closer to Microsoft's
own wmavoice decoder.
---
libavcodec/wmavoice.c | 29 +++++++++++++++++------------
tests/fate/wma.mak | 11 +++++++----
2 files changed, 24 insertions(+), 16 deletions(-)
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 5ae92e2dbc..915315cb8a 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -42,8 +42,6 @@
#include "acelp_vectors.h"
#include "acelp_filters.h"
#include "lsp.h"
-#include "dct.h"
-#include "rdft.h"
#include "sinewin.h"
#define MAX_BLOCKS 8 ///< maximum number of blocks per frame
@@ -266,8 +264,8 @@ typedef struct WMAVoiceContext {
*/
AVTXContext *rdft, *irdft; ///< contexts for FFT-calculation in the
av_tx_fn rdft_fn, irdft_fn; ///< postfilter (for denoise filter)
- DCTContext dct, dst; ///< contexts for phase shift (in Hilbert
- ///< transform, part of postfilter)
+ AVTXContext *dct, *dst; ///< contexts for phase shift (in Hilbert
+ av_tx_fn dct_fn, dst_fn; ///< transform, part of postfilter)
float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]
///< range
float postfilter_agc; ///< gain control memory, used in
@@ -391,10 +389,6 @@ static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
if (s->do_apf) {
float scale = 1.0f;
- if ((ret = ff_dct_init (&s->dct, 6, DCT_I)) < 0 ||
- (ret = ff_dct_init (&s->dst, 6, DST_I)) < 0)
- return ret;
-
ret = av_tx_init(&s->rdft, &s->rdft_fn, AV_TX_FLOAT_RDFT, 0, 1 << 7, &scale, 0);
if (ret < 0)
return ret;
@@ -403,6 +397,16 @@ static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
if (ret < 0)
return ret;
+ scale = 1.0 / (1 << 6);
+ ret = av_tx_init(&s->dct, &s->dct_fn, AV_TX_FLOAT_DCT_I, 0, 1 << 6, &scale, 0);
+ if (ret < 0)
+ return ret;
+
+ scale = 1.0 / (1 << 6);
+ ret = av_tx_init(&s->dst, &s->dst_fn, AV_TX_FLOAT_DST_I, 0, 1 << 6, &scale, 0);
+ if (ret < 0)
+ return ret;
+
ff_sine_window_init(s->cos, 256);
memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
for (n = 0; n < 255; n++) {
@@ -612,6 +616,7 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs_src,
float irange, angle_mul, gain_mul, range, sq;
LOCAL_ALIGNED_32(float, coeffs, [0x82]);
LOCAL_ALIGNED_32(float, lpcs, [0x82]);
+ LOCAL_ALIGNED_32(float, lpcs_dct, [0x82]);
int n, idx;
memcpy(coeffs, coeffs_dst, 0x82*sizeof(float));
@@ -662,8 +667,8 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs_src,
* is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
* Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
* "moment" of the LPCs in this filter. */
- s->dct.dct_calc(&s->dct, lpcs);
- s->dst.dct_calc(&s->dst, lpcs);
+ s->dct_fn(s->dct, lpcs_dct, lpcs, sizeof(float));
+ s->dst_fn(s->dst, lpcs, lpcs_dct, sizeof(float));
/* Split out the coefficient indexes into phase/magnitude pairs */
idx = 255 + av_clip(lpcs[64], -255, 255);
@@ -2003,8 +2008,8 @@ static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
if (s->do_apf) {
av_tx_uninit(&s->rdft);
av_tx_uninit(&s->irdft);
- ff_dct_end(&s->dct);
- ff_dct_end(&s->dst);
+ av_tx_uninit(&s->dct);
+ av_tx_uninit(&s->dst);
}
return 0;
diff --git a/tests/fate/wma.mak b/tests/fate/wma.mak
index c13874ebfc..ed2ac24c65 100644
--- a/tests/fate/wma.mak
+++ b/tests/fate/wma.mak
@@ -20,18 +20,21 @@ fate-wmapro: $(FATE_WMAPRO-yes)
FATE_WMAVOICE-$(call DEMDEC, ASF, WMAVOICE) += fate-wmavoice-7k
fate-wmavoice-7k: CMD = pcm -i $(TARGET_SAMPLES)/wmavoice/streaming_CBR-7K.wma
-fate-wmavoice-7k: REF = $(SAMPLES)/wmavoice/streaming_CBR-7K.pcm
+fate-wmavoice-7k: REF = $(SAMPLES)/wmavoice/streaming_CBR-7K_ref.pcm
+fate-wmavoice-7k: CMP_TARGET = 1368.61
fate-wmavoice-7k: FUZZ = 3
FATE_WMAVOICE-$(call DEMDEC, ASF, WMAVOICE) += fate-wmavoice-11k
fate-wmavoice-11k: CMD = pcm -i $(TARGET_SAMPLES)/wmavoice/streaming_CBR-11K.wma
-fate-wmavoice-11k: REF = $(SAMPLES)/wmavoice/streaming_CBR-11K.pcm
+fate-wmavoice-11k: REF = $(SAMPLES)/wmavoice/streaming_CBR-11K_ref.pcm
+fate-wmavoice-11k: CMP_TARGET = 965.24
fate-wmavoice-11k: FUZZ = 3
FATE_WMAVOICE-$(call DEMDEC, ASF, WMAVOICE) += fate-wmavoice-19k
fate-wmavoice-19k: CMD = pcm -i $(TARGET_SAMPLES)/wmavoice/streaming_CBR-19K.wma
-fate-wmavoice-19k: REF = $(SAMPLES)/wmavoice/streaming_CBR-19K.pcm
-fate-wmavoice-19k: FUZZ = 3
+fate-wmavoice-19k: REF = $(SAMPLES)/wmavoice/streaming_CBR-19K_ref.pcm
+fate-wmavoice-19k: CMP_TARGET = 689.33
+fate-wmavoice-11k: FUZZ = 3
$(FATE_WMAVOICE-yes): CMP = stddev
--
2.40.1
[-- Attachment #6: 0005-ffplay-port-to-lavu-tx.patch --]
[-- Type: text/x-diff, Size: 5058 bytes --]
From 218f595d9097da785bd5915cc318879be64a0132 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sat, 18 Feb 2023 13:14:31 +0100
Subject: [PATCH 05/11] ffplay: port to lavu/tx
---
fftools/ffplay.c | 42 +++++++++++++++++++++++++++---------------
1 file changed, 27 insertions(+), 15 deletions(-)
diff --git a/fftools/ffplay.c b/fftools/ffplay.c
index 5212ad053e..006da7ab57 100644
--- a/fftools/ffplay.c
+++ b/fftools/ffplay.c
@@ -47,7 +47,7 @@
#include "libavdevice/avdevice.h"
#include "libswscale/swscale.h"
#include "libavutil/opt.h"
-#include "libavcodec/avfft.h"
+#include "libavutil/tx.h"
#include "libswresample/swresample.h"
#include "libavfilter/avfilter.h"
@@ -262,9 +262,11 @@ typedef struct VideoState {
int16_t sample_array[SAMPLE_ARRAY_SIZE];
int sample_array_index;
int last_i_start;
- RDFTContext *rdft;
+ AVTXContext *rdft;
+ av_tx_fn rdft_fn;
int rdft_bits;
- FFTSample *rdft_data;
+ float *real_data;
+ AVComplexFloat *rdft_data;
int xpos;
double last_vis_time;
SDL_Texture *vis_texture;
@@ -1120,6 +1122,7 @@ static void video_audio_display(VideoState *s)
fill_rectangle(s->xleft, y, s->width, 1);
}
} else {
+ int err = 0;
if (realloc_texture(&s->vis_texture, SDL_PIXELFORMAT_ARGB8888, s->width, s->height, SDL_BLENDMODE_NONE, 1) < 0)
return;
@@ -1127,31 +1130,39 @@ static void video_audio_display(VideoState *s)
s->xpos = 0;
nb_display_channels= FFMIN(nb_display_channels, 2);
if (rdft_bits != s->rdft_bits) {
- av_rdft_end(s->rdft);
- av_free(s->rdft_data);
- s->rdft = av_rdft_init(rdft_bits, DFT_R2C);
+ const float rdft_scale = 1.0;
+ av_tx_uninit(&s->rdft);
+ av_freep(&s->real_data);
+ av_freep(&s->rdft_data);
s->rdft_bits = rdft_bits;
- s->rdft_data = av_malloc_array(nb_freq, 4 *sizeof(*s->rdft_data));
+ s->real_data = av_malloc_array(nb_freq, 4 *sizeof(*s->real_data));
+ s->rdft_data = av_malloc_array(nb_freq + 1, 2 *sizeof(*s->rdft_data));
+ err = av_tx_init(&s->rdft, &s->rdft_fn, AV_TX_FLOAT_RDFT,
+ 0, 1 << rdft_bits, &rdft_scale, 0);
}
- if (!s->rdft || !s->rdft_data){
+ if (err < 0 || !s->rdft_data) {
av_log(NULL, AV_LOG_ERROR, "Failed to allocate buffers for RDFT, switching to waves display\n");
s->show_mode = SHOW_MODE_WAVES;
} else {
- FFTSample *data[2];
+ float *data_in[2];
+ AVComplexFloat *data[2];
SDL_Rect rect = {.x = s->xpos, .y = 0, .w = 1, .h = s->height};
uint32_t *pixels;
int pitch;
for (ch = 0; ch < nb_display_channels; ch++) {
- data[ch] = s->rdft_data + 2 * nb_freq * ch;
+ data_in[ch] = s->real_data + 2 * nb_freq * ch;
+ data[ch] = s->rdft_data + nb_freq * ch;
i = i_start + ch;
for (x = 0; x < 2 * nb_freq; x++) {
double w = (x-nb_freq) * (1.0 / nb_freq);
- data[ch][x] = s->sample_array[i] * (1.0 - w * w);
+ data_in[ch][x] = s->sample_array[i] * (1.0 - w * w);
i += channels;
if (i >= SAMPLE_ARRAY_SIZE)
i -= SAMPLE_ARRAY_SIZE;
}
- av_rdft_calc(s->rdft, data[ch]);
+ s->rdft_fn(s->rdft, data[ch], data_in[ch], sizeof(float));
+ data[ch][0].im = data[ch][nb_freq].re;
+ data[ch][nb_freq].re = 0;
}
/* Least efficient way to do this, we should of course
* directly access it but it is more than fast enough. */
@@ -1160,8 +1171,8 @@ static void video_audio_display(VideoState *s)
pixels += pitch * s->height;
for (y = 0; y < s->height; y++) {
double w = 1 / sqrt(nb_freq);
- int a = sqrt(w * sqrt(data[0][2 * y + 0] * data[0][2 * y + 0] + data[0][2 * y + 1] * data[0][2 * y + 1]));
- int b = (nb_display_channels == 2 ) ? sqrt(w * hypot(data[1][2 * y + 0], data[1][2 * y + 1]))
+ int a = sqrt(w * sqrt(data[0][y].re * data[0][y].re + data[0][y].im * data[0][y].im));
+ int b = (nb_display_channels == 2 ) ? sqrt(w * hypot(data[1][y].re, data[1][y].im))
: a;
a = FFMIN(a, 255);
b = FFMIN(b, 255);
@@ -1197,7 +1208,8 @@ static void stream_component_close(VideoState *is, int stream_index)
is->audio_buf = NULL;
if (is->rdft) {
- av_rdft_end(is->rdft);
+ av_tx_uninit(&is->rdft);
+ av_freep(&is->real_data);
av_freep(&is->rdft_data);
is->rdft = NULL;
is->rdft_bits = 0;
--
2.40.1
[-- Attachment #7: 0006-avfft-wrap-lavu-tx-instead-of-ff_fft.patch --]
[-- Type: text/x-diff, Size: 1889 bytes --]
From 0d506a5903536f7b45c503ea89cc5ff1e756db83 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Nov 2022 11:23:38 +0100
Subject: [PATCH 06/11] avfft: wrap lavu/tx instead of ff_fft
---
libavcodec/avfft.c | 40 +++++++++++++++++++++++++++++++---------
1 file changed, 31 insertions(+), 9 deletions(-)
diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 2200f37708..e4b19af272 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -18,38 +18,60 @@
#include "libavutil/attributes.h"
#include "libavutil/mem.h"
+#include "libavutil/tx.h"
#include "avfft.h"
#include "fft.h"
#include "rdft.h"
#include "dct.h"
+typedef struct AVTXWrapper {
+ AVTXContext *ctx;
+ av_tx_fn fn;
+
+ AVTXContext *ctx2;
+ av_tx_fn fn2;
+
+ ptrdiff_t stride;
+} AVTXWrapper;
+
/* FFT */
FFTContext *av_fft_init(int nbits, int inverse)
{
- FFTContext *s = av_mallocz(sizeof(*s));
-
- if (s && ff_fft_init(s, nbits, inverse))
- av_freep(&s);
+ int ret;
+ float scale = 1.0f;
+ AVTXWrapper *s = av_malloc(sizeof(*s));
+ if (!s)
+ return NULL;
+
+ ret = av_tx_init(&s->ctx, &s->fn, AV_TX_FLOAT_FFT, inverse, 1 << nbits,
+ &scale, AV_TX_INPLACE);
+ if (ret < 0) {
+ av_free(s);
+ return NULL;
+ }
- return s;
+ return (FFTContext *)s;
}
void av_fft_permute(FFTContext *s, FFTComplex *z)
{
- s->fft_permute(s, z);
+ /* Empty */
}
void av_fft_calc(FFTContext *s, FFTComplex *z)
{
- s->fft_calc(s, z);
+ AVTXWrapper *w = (AVTXWrapper *)s;
+ w->fn(w->ctx, z, (void *)z, sizeof(AVComplexFloat));
}
av_cold void av_fft_end(FFTContext *s)
{
if (s) {
- ff_fft_end(s);
- av_free(s);
+ AVTXWrapper *w = (AVTXWrapper *)s;
+ av_tx_uninit(&w->ctx);
+ av_tx_uninit(&w->ctx2);
+ av_free(w);
}
}
--
2.40.1
[-- Attachment #8: 0007-avfft-wrap-lavu-tx-instead-of-ff_mdct.patch --]
[-- Type: text/x-diff, Size: 2385 bytes --]
From d47cf87783e9d01f85f05e8dabe94fa325433805 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Nov 2022 11:26:33 +0100
Subject: [PATCH 07/11] avfft: wrap lavu/tx instead of ff_mdct
---
libavcodec/avfft.c | 43 ++++++++++++++++++++++++++++++-------------
1 file changed, 30 insertions(+), 13 deletions(-)
diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index e4b19af272..107b510828 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -75,43 +75,60 @@ av_cold void av_fft_end(FFTContext *s)
}
}
-#if CONFIG_MDCT
-
FFTContext *av_mdct_init(int nbits, int inverse, double scale)
{
- FFTContext *s = av_malloc(sizeof(*s));
+ int ret;
+ float scale_f = scale;
+ AVTXWrapper *s = av_malloc(sizeof(*s));
+ if (!s)
+ return NULL;
- if (s && ff_mdct_init(s, nbits, inverse, scale))
- av_freep(&s);
+ ret = av_tx_init(&s->ctx, &s->fn, AV_TX_FLOAT_MDCT, inverse, 1 << (nbits - 1), &scale_f, 0);
+ if (ret < 0) {
+ av_free(s);
+ return NULL;
+ }
- return s;
+ if (inverse) {
+ ret = av_tx_init(&s->ctx2, &s->fn2, AV_TX_FLOAT_MDCT, inverse, 1 << (nbits - 1),
+ &scale_f, AV_TX_FULL_IMDCT);
+ if (ret < 0) {
+ av_tx_uninit(&s->ctx);
+ av_free(s);
+ return NULL;
+ }
+ }
+
+ return (FFTContext *)s;
}
void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
{
- s->imdct_calc(s, output, input);
+ AVTXWrapper *w = (AVTXWrapper *)s;
+ w->fn2(w->ctx2, output, (void *)input, sizeof(float));
}
void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
{
- s->imdct_half(s, output, input);
+ AVTXWrapper *w = (AVTXWrapper *)s;
+ w->fn(w->ctx, output, (void *)input, sizeof(float));
}
void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
{
- s->mdct_calc(s, output, input);
+ AVTXWrapper *w = (AVTXWrapper *)s;
+ w->fn(w->ctx, output, (void *)input, sizeof(float));
}
av_cold void av_mdct_end(FFTContext *s)
{
if (s) {
- ff_mdct_end(s);
- av_free(s);
+ AVTXWrapper *w = (AVTXWrapper *)s;
+ av_tx_uninit(&w->ctx);
+ av_free(w);
}
}
-#endif /* CONFIG_MDCT */
-
#if CONFIG_RDFT
RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
--
2.40.1
[-- Attachment #9: 0008-avfft-wrap-lavu-tx-instead-of-ff_rdft.patch --]
[-- Type: text/x-diff, Size: 2311 bytes --]
From 4212330a8945bc16cb7e8d2a0f9ce09aeff71111 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Nov 2022 11:26:59 +0100
Subject: [PATCH 08/11] avfft: wrap lavu/tx instead of ff_rdft
---
libavcodec/avfft.c | 45 ++++++++++++++++++++++++++++++++++-----------
1 file changed, 34 insertions(+), 11 deletions(-)
diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 107b510828..9f5d256773 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -32,6 +32,8 @@ typedef struct AVTXWrapper {
av_tx_fn fn2;
ptrdiff_t stride;
+ int len;
+ int inv;
} AVTXWrapper;
/* FFT */
@@ -129,33 +131,54 @@ av_cold void av_mdct_end(FFTContext *s)
}
}
-#if CONFIG_RDFT
-
RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
{
- RDFTContext *s = av_malloc(sizeof(*s));
+ int ret;
+ float scale = trans == IDFT_C2R ? 0.5f : 1.0f;
+ AVTXWrapper *s;
- if (s && ff_rdft_init(s, nbits, trans))
- av_freep(&s);
+ /* The other 2 modes are unconventional, do not form an orthogonal
+ * transform, have never been useful, and so they're not implemented. */
+ if (trans != IDFT_C2R && trans != DFT_R2C)
+ return NULL;
- return s;
+ s = av_malloc(sizeof(*s));
+ if (!s)
+ return NULL;
+
+ ret = av_tx_init(&s->ctx, &s->fn, AV_TX_FLOAT_RDFT, trans == IDFT_C2R,
+ 1 << nbits, &scale, AV_TX_INPLACE);
+ if (ret < 0) {
+ av_free(s);
+ return NULL;
+ }
+
+ s->stride = (trans == DFT_C2R) ? sizeof(float) : sizeof(AVComplexFloat);
+ s->len = 1 << nbits;
+ s->inv = trans == IDFT_C2R;
+
+ return (RDFTContext *)s;
}
void av_rdft_calc(RDFTContext *s, FFTSample *data)
{
- s->rdft_calc(s, data);
+ AVTXWrapper *w = (AVTXWrapper *)s;
+ if (w->inv)
+ FFSWAP(float, data[1], data[w->len]);
+ w->fn(w->ctx, data, (void *)data, w->stride);
+ if (!w->inv)
+ FFSWAP(float, data[1], data[w->len]);
}
av_cold void av_rdft_end(RDFTContext *s)
{
if (s) {
- ff_rdft_end(s);
- av_free(s);
+ AVTXWrapper *w = (AVTXWrapper *)s;
+ av_tx_uninit(&w->ctx);
+ av_free(w);
}
}
-#endif /* CONFIG_RDFT */
-
#if CONFIG_DCT
DCTContext *av_dct_init(int nbits, enum DCTTransformType inverse)
--
2.40.1
[-- Attachment #10: 0009-avfft-wrap-lavu-tx-instead-of-ff_dct.patch --]
[-- Type: text/x-diff, Size: 2651 bytes --]
From a1e76768f4c862ef0ec705f9c4f4ba5705647b52 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Mon, 7 Aug 2023 12:07:10 +0200
Subject: [PATCH 09/11] avfft: wrap lavu/tx instead of ff_dct
---
libavcodec/avfft.c | 62 ++++++++++++++++++++++++++++++++++++++--------
1 file changed, 51 insertions(+), 11 deletions(-)
diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 9f5d256773..d07c495022 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -34,6 +34,9 @@ typedef struct AVTXWrapper {
ptrdiff_t stride;
int len;
int inv;
+
+ float *tmp;
+ int out_of_place;
} AVTXWrapper;
/* FFT */
@@ -179,29 +182,66 @@ av_cold void av_rdft_end(RDFTContext *s)
}
}
-#if CONFIG_DCT
-
DCTContext *av_dct_init(int nbits, enum DCTTransformType inverse)
{
- DCTContext *s = av_malloc(sizeof(*s));
+ int ret;
+ const float scale_map[] = {
+ [DCT_II] = 0.5f,
+ [DCT_III] = 1.0f / (1 << nbits),
+ [DCT_I] = 0.5f,
+ [DST_I] = 2.0f,
+ };
+ static const enum AVTXType type_map[] = {
+ [DCT_II] = AV_TX_FLOAT_DCT,
+ [DCT_III] = AV_TX_FLOAT_DCT,
+ [DCT_I] = AV_TX_FLOAT_DCT_I,
+ [DST_I] = AV_TX_FLOAT_DST_I,
+ };
+
+ AVTXWrapper *s = av_malloc(sizeof(*s));
+ if (!s)
+ return NULL;
- if (s && ff_dct_init(s, nbits, inverse))
- av_freep(&s);
+ s->len = (1 << nbits);
+ s->out_of_place = (inverse == DCT_I) || (inverse == DST_I);
- return s;
+ ret = av_tx_init(&s->ctx, &s->fn, type_map[inverse],
+ (inverse == DCT_III), 1 << (nbits - (inverse == DCT_III)),
+ &scale_map[inverse], s->out_of_place ? 0 : AV_TX_INPLACE);
+ if (ret < 0) {
+ av_free(s);
+ return NULL;
+ }
+
+ if (s->out_of_place) {
+ s->tmp = av_malloc((1 << (nbits + 1))*sizeof(float));
+ if (!s->tmp) {
+ av_tx_uninit(&s->ctx);
+ av_free(s);
+ return NULL;
+ }
+ }
+
+ return (DCTContext *)s;
}
void av_dct_calc(DCTContext *s, FFTSample *data)
{
- s->dct_calc(s, data);
+ AVTXWrapper *w = (AVTXWrapper *)s;
+ if (w->out_of_place) {
+ memcpy(w->tmp, data, w->len*sizeof(float));
+ w->fn(w->ctx, (void *)data, w->tmp, sizeof(float));
+ } else {
+ w->fn(w->ctx, data, (void *)data, sizeof(float));
+ }
}
av_cold void av_dct_end(DCTContext *s)
{
if (s) {
- ff_dct_end(s);
- av_free(s);
+ AVTXWrapper *w = (AVTXWrapper *)s;
+ av_tx_uninit(&w->ctx);
+ av_free(w->tmp);
+ av_free(w);
}
}
-
-#endif /* CONFIG_DCT */
--
2.40.1
[-- Attachment #11: 0010-lavu-tx-improve-rdft-table-generation-precision-slig.patch --]
[-- Type: text/x-diff, Size: 816 bytes --]
From acdc1594e58547e67455a5aab963c525bb273964 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 1 Sep 2023 06:06:44 +0200
Subject: [PATCH 10/11] lavu/tx: improve rdft table generation precision
slightly
---
libavutil/tx_template.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index c026cb40c4..4745b56700 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -1648,7 +1648,7 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
tab = ((TXSample *)s->exp) + len4 + 8;
for (int i = 0; i < len4; i++)
- *tab++ = RESCALE(cos(((float)len/4.0 - (float)i + 0)*f) * (inv ? +1.0 : -1.0));
+ *tab++ = RESCALE(cos(((len - i*4)/4.0)*f)) * (inv ? 1 : -1);
return 0;
}
--
2.40.1
[-- Attachment #12: 0011-lavu-tx-fix-scaling-of-R2R-transforms.patch --]
[-- Type: text/x-diff, Size: 1917 bytes --]
From 30003aa435f2dc21722b344598ae5087e0b6f8ef Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 1 Sep 2023 06:13:51 +0200
Subject: [PATCH 11/11] lavu/tx: fix scaling of R2R transforms
Still slightly inaccurate, but it's good enough now.
---
libavutil/tx_template.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 4745b56700..6e3b3dad33 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -1613,6 +1613,7 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
int ret;
double f, m;
TXSample *tab;
+ uint64_t r2r = flags & AV_TX_REAL_TO_REAL;
int len4 = FFALIGN(len, 4) / 4;
s->scale_d = *((SCALE_TYPE *)scale);
@@ -1638,7 +1639,10 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
*tab++ = RESCALE(-m);
*tab++ = RESCALE( (0.5 - 0.0) * m);
- *tab++ = RESCALE( (0.0 - 0.5) * m);
+ if (r2r)
+ *tab++ = 1 / s->scale_f;
+ else
+ *tab++ = RESCALE( (0.0 - 0.5) * m);
*tab++ = RESCALE( (0.5 - inv) * m);
*tab++ = RESCALE(-(0.5 - inv) * m);
@@ -1804,7 +1808,7 @@ static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
if (mode == AV_TX_REAL_TO_REAL) { \
out[len2] = tmp_dc; \
if (mod2) \
- out[len4 + 1] = tmp_mid; \
+ out[len4 + 1] = tmp_mid * fact[5]; \
} else if (mod2) { \
out[len4] = tmp_mid; \
} \
--
2.40.1
[-- Attachment #13: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 6+ messages in thread