* [FFmpeg-devel] [PATCH 2/2] lavu/tx: add a half-complex R2C transform
[not found] ` <NaBQCTh--3-9@lynne.ee-NaBQFrM----9>
@ 2023-07-25 9:43 ` Lynne
2023-07-27 9:59 ` [FFmpeg-devel] [PATCH 1/2] lavu/tx: disable odd-length RDFTs Lynne
1 sibling, 0 replies; 3+ messages in thread
From: Lynne @ 2023-07-25 9:43 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 48 bytes --]
Required for DCT-I and DST-I.
Patch attached.
[-- Attachment #2: 0002-lavu-tx-add-a-half-complex-R2C-transform.patch --]
[-- Type: text/x-diff, Size: 7571 bytes --]
From 2e95c3a7128b7f826db8394eb238c5405c47864b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 25 Jul 2023 10:57:21 +0200
Subject: [PATCH 2/2] lavu/tx: add a half-complex R2C transform
Required for DCT-I and DST-I.
---
doc/APIchanges | 3 ++
libavutil/tx.c | 13 +++++++--
libavutil/tx.h | 9 ++++++
libavutil/tx_template.c | 65 +++++++++++++++++++++++++++++++++++++++--
libavutil/version.h | 2 +-
5 files changed, 86 insertions(+), 6 deletions(-)
diff --git a/doc/APIchanges b/doc/APIchanges
index 5afe8bcb75..0797dcc372 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -2,6 +2,9 @@ The last version increases of all libraries were on 2023-02-09
API changes, most recent first:
+2023-07-xx - xxxxxxxxxx - lavu 58.15.100 - tx.h
+ Add AV_TX_HALF_COMPLEX
+
2023-07-xx - xxxxxxxxxx - lavc 60 - avcodec.h
Deprecate AV_CODEC_FLAG_DROPCHANGED without replacement.
diff --git a/libavutil/tx.c b/libavutil/tx.c
index e25abf998f..d6ab8ee485 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -437,7 +437,8 @@ int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
/* Check direction for non-orthogonal codelets */
if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
- ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
+ ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
+ ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_HALF_COMPLEX)) && inv))
continue;
/* Check if the CPU supports the required ISA */
@@ -560,6 +561,8 @@ static void print_flags(AVBPrint *bp, uint64_t f)
av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
if ((f & AV_TX_FULL_IMDCT) && ++prev)
av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
+ if ((f & AV_TX_HALF_COMPLEX) && ++prev)
+ av_bprintf(bp, "%shalf_complex", prev > 1 ? sep : "");
if ((f & FF_TX_ASM_CALL) && ++prev)
av_bprintf(bp, "%sasm_call", prev > 1 ? sep : "");
av_bprintf(bp, "]");
@@ -717,7 +720,10 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
uint64_t req_flags = flags;
/* Flags the codelet may require to be present */
- uint64_t inv_req_mask = AV_TX_FULL_IMDCT | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL;
+ uint64_t inv_req_mask = AV_TX_FULL_IMDCT |
+ AV_TX_HALF_COMPLEX |
+ FF_TX_PRESHUFFLE |
+ FF_TX_ASM_CALL;
/* Unaligned codelets are compatible with the aligned flag */
if (req_flags & FF_TX_ALIGNED)
@@ -742,7 +748,8 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
/* Check direction for non-orthogonal codelets */
if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
- ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
+ ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
+ ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_HALF_COMPLEX)) && inv))
continue;
/* Check if the requested flags match from both sides */
diff --git a/libavutil/tx.h b/libavutil/tx.h
index 064edbc097..63fba8e8b4 100644
--- a/libavutil/tx.h
+++ b/libavutil/tx.h
@@ -149,6 +149,15 @@ enum AVTXFlags {
* Ignored for all transforms but inverse MDCTs.
*/
AV_TX_FULL_IMDCT = 1ULL << 2,
+
+ /**
+ * Perform a real to half-complex RDFT or FFT.
+ * Only the real coefficients will
+ * be output. Only available for forward RDFTs.
+ * Output array must have enough space to hold N complex values
+ * (regular size for a real to complex transform).
+ */
+ AV_TX_HALF_COMPLEX = 1ULL << 3,
};
/**
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index a4040454db..bdec53f335 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -1617,6 +1617,8 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
s->scale_d = *((SCALE_TYPE *)scale);
s->scale_f = s->scale_d;
+ flags &= ~AV_TX_HALF_COMPLEX;
+
if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, NULL, len >> 1, inv, scale)))
return ret;
@@ -1700,8 +1702,51 @@ static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, void *_dst, \
} \
}
-DECL_RDFT(r2c, 0)
-DECL_RDFT(c2r, 1)
+DECL_RDFT(r2c, 0)
+DECL_RDFT(c2r, 1)
+
+static void TX_NAME(ff_tx_rdft_r2hc)(AVTXContext *s, void *_dst,
+ void *_src, ptrdiff_t stride)
+{
+ const int len = s->len;
+ const int len2 = len >> 1;
+ const int len4 = len >> 2;
+ const TXSample *fact = (void *)s->exp;
+ const TXSample *tcos = fact + 8;
+ const TXSample *tsin = tcos + len4;
+ TXComplex *data = _dst;
+ TXSample *out = _dst; /* Half-complex is forward-only */
+ TXSample tmp_dc;
+
+ s->fn[0](&s->sub[0], _dst, _src, sizeof(TXComplex));
+
+ tmp_dc = data[0].re;
+
+ /* NOTE: we can avoid all multiplies here as they're all 1.0, except
+ * for one, -1.0, which is not needed */
+ out[0] = tmp_dc + data[0].im;
+ tmp_dc -= data[0].im;
+
+ /* NOTE: unrolling this breaks non-mod8 lengths */
+ for (int i = 1; i <= len4; i++) {
+ TXSample tmp[4];
+ TXComplex sf = data[i];
+ TXComplex sl = data[len2 - i];
+
+ tmp[0] = MULT(fact[4], (sf.re + sl.re));
+ tmp[1] = MULT(fact[6], (sf.im + sl.im));
+ tmp[2] = MULT(fact[7], (sf.re - sl.re));
+ tmp[3] = tmp[1]*tcos[i] - tmp[2]*tsin[i];
+
+ out[i] = tmp[0] + tmp[3];
+ out[len - i] = tmp[0] - tmp[3];
+ }
+
+ for (int i = 1; i < len4; i++)
+ out[len2 - i - 0] = out[len - i];
+
+ out[len2] = tmp_dc;
+}
static const FFTXCodelet TX_NAME(ff_tx_rdft_r2c_def) = {
.name = TX_NAME_STR("rdft_r2c"),
@@ -1733,6 +1778,21 @@ static const FFTXCodelet TX_NAME(ff_tx_rdft_c2r_def) = {
.prio = FF_TX_PRIO_BASE,
};
+static const FFTXCodelet TX_NAME(ff_tx_rdft_r2hc_def) = {
+ .name = TX_NAME_STR("rdft_r2hc"),
+ .function = TX_NAME(ff_tx_rdft_r2hc),
+ .type = TX_TYPE(RDFT),
+ .flags = AV_TX_UNALIGNED | AV_TX_HALF_COMPLEX | AV_TX_INPLACE |
+ FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
+ .factors[0] = 2,
+ .nb_factors = 1,
+ .min_len = 2,
+ .max_len = TX_LEN_UNLIMITED,
+ .init = TX_NAME(ff_tx_rdft_init),
+ .cpu_flags = FF_TX_CPU_FLAGS_ALL,
+ .prio = FF_TX_PRIO_BASE,
+};
+
static av_cold int TX_NAME(ff_tx_dct_init)(AVTXContext *s,
const FFTXCodelet *cd,
uint64_t flags,
@@ -1997,6 +2057,7 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
&TX_NAME(ff_tx_mdct_naive_inv_def),
&TX_NAME(ff_tx_mdct_inv_full_def),
&TX_NAME(ff_tx_rdft_r2c_def),
+ &TX_NAME(ff_tx_rdft_r2hc_def),
&TX_NAME(ff_tx_rdft_c2r_def),
&TX_NAME(ff_tx_dctII_def),
&TX_NAME(ff_tx_dctIII_def),
diff --git a/libavutil/version.h b/libavutil/version.h
index 24af520e08..9e798b0e3f 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@
*/
#define LIBAVUTIL_VERSION_MAJOR 58
-#define LIBAVUTIL_VERSION_MINOR 14
+#define LIBAVUTIL_VERSION_MINOR 15
#define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
--
2.40.1
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread