From: Lynne <dev@lynne.ee>
To: Ffmpeg Devel <ffmpeg-devel@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH 1/2] lavu/tx: add real to real and real to imaginary RDFT transforms
Date: Thu, 3 Aug 2023 18:26:16 +0200 (CEST)
Message-ID: <NawD9Lq--3-9@lynne.ee> (raw)
[-- Attachment #1: Type: text/plain, Size: 241 bytes --]
These are in-place transforms, required for DCT-I and DST-I.
Templated as the mod2 variant requires minor modifications, and is
required specifically for DCT-I/DST-I.
Quite optimized, as there's no need for any additional buffer storage.
[-- Attachment #2: 0001-lavu-tx-add-real-to-real-and-real-to-imaginary-RDFT-.patch --]
[-- Type: text/x-diff, Size: 18587 bytes --]
From 2ea5e2541c2551bf1b56e967d35946289a85aa49 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 3 Aug 2023 18:21:23 +0200
Subject: [PATCH 1/2] lavu/tx: add real to real and real to imaginary RDFT
transforms
These are in-place transforms, required for DCT-I and DST-I.
Templated as the mod2 variant requires minor modifications, and is
required specifically for DCT-I/DST-I.
---
doc/APIchanges | 3 +
libavutil/tx.c | 18 ++++-
libavutil/tx.h | 10 +++
libavutil/tx_template.c | 175 +++++++++++++++++++++++++++++++---------
libavutil/version.h | 2 +-
5 files changed, 167 insertions(+), 41 deletions(-)
diff --git a/doc/APIchanges b/doc/APIchanges
index 5afe8bcb75..edd178be4f 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -2,6 +2,9 @@ The last version increases of all libraries were on 2023-02-09
API changes, most recent first:
+2023-07-xx - xxxxxxxxxx - lavu 58.15.100 - tx.h
+ Add AV_TX_REAL_TO_REAL and AV_TX_REAL_TO_IMAGINARY
+
2023-07-xx - xxxxxxxxxx - lavc 60 - avcodec.h
Deprecate AV_CODEC_FLAG_DROPCHANGED without replacement.
diff --git a/libavutil/tx.c b/libavutil/tx.c
index e25abf998f..e9826e6107 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -437,7 +437,9 @@ int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
/* Check direction for non-orthogonal codelets */
if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
- ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
+ ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
+ ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
+ ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
continue;
/* Check if the CPU supports the required ISA */
@@ -560,6 +562,10 @@ static void print_flags(AVBPrint *bp, uint64_t f)
av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
if ((f & AV_TX_FULL_IMDCT) && ++prev)
av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
+ if ((f & AV_TX_REAL_TO_REAL) && ++prev)
+ av_bprintf(bp, "%sreal_to_real", prev > 1 ? sep : "");
+ if ((f & AV_TX_REAL_TO_IMAGINARY) && ++prev)
+ av_bprintf(bp, "%sreal_to_imaginary", prev > 1 ? sep : "");
if ((f & FF_TX_ASM_CALL) && ++prev)
av_bprintf(bp, "%sasm_call", prev > 1 ? sep : "");
av_bprintf(bp, "]");
@@ -717,7 +723,11 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
uint64_t req_flags = flags;
/* Flags the codelet may require to be present */
- uint64_t inv_req_mask = AV_TX_FULL_IMDCT | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL;
+ uint64_t inv_req_mask = AV_TX_FULL_IMDCT |
+ AV_TX_REAL_TO_REAL |
+ AV_TX_REAL_TO_IMAGINARY |
+ FF_TX_PRESHUFFLE |
+ FF_TX_ASM_CALL;
/* Unaligned codelets are compatible with the aligned flag */
if (req_flags & FF_TX_ALIGNED)
@@ -742,7 +752,9 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
/* Check direction for non-orthogonal codelets */
if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
- ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
+ ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
+ ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
+ ((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
continue;
/* Check if the requested flags match from both sides */
diff --git a/libavutil/tx.h b/libavutil/tx.h
index 064edbc097..d178e8ee9d 100644
--- a/libavutil/tx.h
+++ b/libavutil/tx.h
@@ -149,6 +149,16 @@ enum AVTXFlags {
* Ignored for all transforms but inverse MDCTs.
*/
AV_TX_FULL_IMDCT = 1ULL << 2,
+
+ /**
+ * Perform a real to half-complex RDFT.
+ * Only the real, or imaginary coefficients will
+ * be output, depending on the flag used. Only available for forward RDFTs.
+ * Output array must have enough space to hold N complex values
+ * (regular size for a real to complex transform).
+ */
+ AV_TX_REAL_TO_REAL = 1ULL << 3,
+ AV_TX_REAL_TO_IMAGINARY = 1ULL << 4,
};
/**
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index c4ec9502e0..50c65d00b5 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -1613,14 +1613,17 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
int ret;
double f, m;
TXSample *tab;
+ int len4 = FFALIGN(len, 4) / 4;
s->scale_d = *((SCALE_TYPE *)scale);
s->scale_f = s->scale_d;
+ flags &= ~(AV_TX_REAL_TO_REAL | AV_TX_REAL_TO_IMAGINARY);
+
if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, NULL, len >> 1, inv, scale)))
return ret;
- if (!(s->exp = av_mallocz((8 + (len >> 2) - 1)*sizeof(*s->exp))))
+ if (!(s->exp = av_mallocz((8 + 2*len4)*sizeof(*s->exp))))
return AVERROR(ENOMEM);
tab = (TXSample *)s->exp;
@@ -1639,17 +1642,20 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
*tab++ = RESCALE( (0.5 - inv) * m);
*tab++ = RESCALE(-(0.5 - inv) * m);
- for (int i = 0; i < len >> 2; i++)
+ for (int i = 0; i < len4; i++)
*tab++ = RESCALE(cos(i*f));
- for (int i = len >> 2; i >= 0; i--)
- *tab++ = RESCALE(cos(i*f) * (inv ? +1.0 : -1.0));
+
+ tab = ((TXSample *)s->exp) + len4 + 8;
+
+ for (int i = 0; i < len4; i++)
+ *tab++ = RESCALE(cos(((float)len/4.0 - (float)i + 0)*f) * (inv ? +1.0 : -1.0));
return 0;
}
-#define DECL_RDFT(name, inv) \
-static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, void *_dst, \
- void *_src, ptrdiff_t stride) \
+#define DECL_RDFT(n, inv) \
+static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
+ void *_src, ptrdiff_t stride) \
{ \
const int len2 = s->len >> 1; \
const int len4 = s->len >> 2; \
@@ -1698,40 +1704,131 @@ static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, void *_dst, \
data[len2].re = data[0].im; \
data[ 0].im = data[len2].im = 0; \
} \
-}
+} \
+ \
+static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
+ .name = TX_NAME_STR("rdft_" #n), \
+ .function = TX_NAME(ff_tx_rdft_ ##n), \
+ .type = TX_TYPE(RDFT), \
+ .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
+ inv ? FF_TX_INVERSE_ONLY : FF_TX_FORWARD_ONLY, \
+ .factors = { 4, TX_FACTOR_ANY }, \
+ .nb_factors = 2, \
+ .min_len = 4, \
+ .max_len = TX_LEN_UNLIMITED, \
+ .init = TX_NAME(ff_tx_rdft_init), \
+ .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
+ .prio = FF_TX_PRIO_BASE, \
+};
-DECL_RDFT(r2c, 0)
-DECL_RDFT(c2r, 1)
+DECL_RDFT(r2c, 0)
+DECL_RDFT(c2r, 1)
-static const FFTXCodelet TX_NAME(ff_tx_rdft_r2c_def) = {
- .name = TX_NAME_STR("rdft_r2c"),
- .function = TX_NAME(ff_tx_rdft_r2c),
- .type = TX_TYPE(RDFT),
- .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
- FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
- .factors = { 2, TX_FACTOR_ANY },
- .nb_factors = 2,
- .min_len = 2,
- .max_len = TX_LEN_UNLIMITED,
- .init = TX_NAME(ff_tx_rdft_init),
- .cpu_flags = FF_TX_CPU_FLAGS_ALL,
- .prio = FF_TX_PRIO_BASE,
+#define DECL_RDFT_HALF(n, mode, mod2) \
+static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
+ void *_src, ptrdiff_t stride) \
+{ \
+ const int len = s->len; \
+ const int len2 = len >> 1; \
+ const int len4 = len >> 2; \
+ const int aligned_len4 = FFALIGN(len, 4)/4; \
+ const TXSample *fact = (void *)s->exp; \
+ const TXSample *tcos = fact + 8; \
+ const TXSample *tsin = tcos + aligned_len4; \
+ TXComplex *data = _dst; \
+ TXSample *out = _dst; /* Half-complex is forward-only */ \
+ TXSample tmp_dc; \
+ av_unused TXSample tmp_mid; \
+ TXSample tmp[4]; \
+ TXComplex sf, sl; \
+ \
+ s->fn[0](&s->sub[0], _dst, _src, sizeof(TXComplex)); \
+ \
+ tmp_dc = data[0].re; \
+ data[ 0].re = tmp_dc + data[0].im; \
+ tmp_dc = tmp_dc - data[0].im; \
+ \
+ data[ 0].re = MULT(fact[0], data[ 0].re); \
+ tmp_dc = MULT(fact[1], tmp_dc); \
+ data[len4].re = MULT(fact[2], data[len4].re); \
+ \
+ if (!mod2) { \
+ data[len4].im = MULT(fact[3], data[len4].im); \
+ } else { \
+ sf = data[len4]; \
+ sl = data[len4 + 1]; \
+ if (mode == AV_TX_REAL_TO_REAL) \
+ tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
+ else \
+ tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
+ tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
+ tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
+ \
+ if (mode == AV_TX_REAL_TO_REAL) { \
+ tmp[3] = tmp[1]*tcos[len4] - tmp[2]*tsin[len4]; \
+ tmp_mid = (tmp[0] - tmp[3]); \
+ } else { \
+ tmp[3] = tmp[1]*tsin[len4] + tmp[2]*tcos[len4]; \
+ tmp_mid = (tmp[0] + tmp[3]); \
+ } \
+ } \
+ \
+ /* NOTE: unrolling this breaks non-mod8 lengths */ \
+ for (int i = 1; i <= len4; i++) { \
+ TXSample tmp[4]; \
+ TXComplex sf = data[i]; \
+ TXComplex sl = data[len2 - i]; \
+ \
+ if (mode == AV_TX_REAL_TO_REAL) \
+ tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
+ else \
+ tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
+ \
+ tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
+ tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
+ \
+ if (mode == AV_TX_REAL_TO_REAL) { \
+ tmp[3] = tmp[1]*tcos[i] - tmp[2]*tsin[i]; \
+ out[i] = (tmp[0] + tmp[3]); \
+ out[len - i] = (tmp[0] - tmp[3]); \
+ } else { \
+ tmp[3] = tmp[1]*tsin[i] + tmp[2]*tcos[i]; \
+ out[i - 1] = (tmp[3] - tmp[0]); \
+ out[len - i - 1] = (tmp[0] + tmp[3]); \
+ } \
+ } \
+ \
+ for (int i = 1; i < (len4 + (mode == AV_TX_REAL_TO_IMAGINARY)); i++) \
+ out[len2 - i] = out[len - i]; \
+ \
+ if (mode == AV_TX_REAL_TO_REAL) { \
+ out[len2] = tmp_dc; \
+ if (mod2) \
+ out[len4 + 1] = tmp_mid; \
+ } else if (mod2) { \
+ out[len4] = tmp_mid; \
+ } \
+} \
+ \
+static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
+ .name = TX_NAME_STR("rdft_" #n), \
+ .function = TX_NAME(ff_tx_rdft_ ##n), \
+ .type = TX_TYPE(RDFT), \
+ .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | mode | \
+ FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
+ .factors = { 2 + 2*(!mod2), TX_FACTOR_ANY }, \
+ .nb_factors = 2, \
+ .min_len = 2 + 2*(!mod2), \
+ .max_len = TX_LEN_UNLIMITED, \
+ .init = TX_NAME(ff_tx_rdft_init), \
+ .cpu_flags = FF_TX_CPU_FLAGS_ALL, \
+ .prio = FF_TX_PRIO_BASE, \
};
-static const FFTXCodelet TX_NAME(ff_tx_rdft_c2r_def) = {
- .name = TX_NAME_STR("rdft_c2r"),
- .function = TX_NAME(ff_tx_rdft_c2r),
- .type = TX_TYPE(RDFT),
- .flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
- FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
- .factors = { 2, TX_FACTOR_ANY },
- .nb_factors = 2,
- .min_len = 2,
- .max_len = TX_LEN_UNLIMITED,
- .init = TX_NAME(ff_tx_rdft_init),
- .cpu_flags = FF_TX_CPU_FLAGS_ALL,
- .prio = FF_TX_PRIO_BASE,
-};
+DECL_RDFT_HALF(r2r, AV_TX_REAL_TO_REAL, 0)
+DECL_RDFT_HALF(r2r_mod2, AV_TX_REAL_TO_REAL, 1)
+DECL_RDFT_HALF(r2i, AV_TX_REAL_TO_IMAGINARY, 0)
+DECL_RDFT_HALF(r2i_mod2, AV_TX_REAL_TO_IMAGINARY, 1)
static av_cold int TX_NAME(ff_tx_dct_init)(AVTXContext *s,
const FFTXCodelet *cd,
@@ -1997,6 +2094,10 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
&TX_NAME(ff_tx_mdct_naive_inv_def),
&TX_NAME(ff_tx_mdct_inv_full_def),
&TX_NAME(ff_tx_rdft_r2c_def),
+ &TX_NAME(ff_tx_rdft_r2r_def),
+ &TX_NAME(ff_tx_rdft_r2r_mod2_def),
+ &TX_NAME(ff_tx_rdft_r2i_def),
+ &TX_NAME(ff_tx_rdft_r2i_mod2_def),
&TX_NAME(ff_tx_rdft_c2r_def),
&TX_NAME(ff_tx_dctII_def),
&TX_NAME(ff_tx_dctIII_def),
diff --git a/libavutil/version.h b/libavutil/version.h
index 24af520e08..9e798b0e3f 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@
*/
#define LIBAVUTIL_VERSION_MAJOR 58
-#define LIBAVUTIL_VERSION_MINOR 14
+#define LIBAVUTIL_VERSION_MINOR 15
#define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
--
2.40.1
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2023-08-03 16:26 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-03 16:26 Lynne [this message]
[not found] ` <NawD9Lq--3-9@lynne.ee-NawDDOJ----9>
2023-08-03 16:31 ` [FFmpeg-devel] [PATCH 2/2] lavu/tx: add DCT-I and DST-I transforms Lynne
2023-08-03 16:42 ` [FFmpeg-devel] [PATCH 1/2] lavu/tx: add real to real and real to imaginary RDFT transforms Lynne
[not found] ` <NawGxLe--3-9@lynne.ee-NawH0-d----9>
2023-08-04 2:05 ` [FFmpeg-devel] [PATCH v2 " Lynne
2023-08-03 20:32 ` [FFmpeg-devel] [PATCH " Michael Niedermayer
2023-08-03 20:39 ` Andreas Rheinhardt
2023-08-04 2:04 ` Lynne
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=NawD9Lq--3-9@lynne.ee \
--to=dev@lynne.ee \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git