Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] lavu/tx: make 32-bit fixed-point transforms more bitexact
@ 2023-06-20  0:57 Lynne
  2023-06-20  8:53 ` Martin Storsjö
  0 siblings, 1 reply; 2+ messages in thread
From: Lynne @ 2023-06-20  0:57 UTC (permalink / raw)
  To: Ffmpeg Devel

[-- Attachment #1: Type: text/plain, Size: 155 bytes --]

Using the sqrt/cos/sin approximations we have, the only parts left
which may be inexact are multiplies and divisions in some transforms.

Patch attached.


[-- Attachment #2: 0001-lavu-tx-make-32-bit-fixed-point-transforms-more-bite.patch --]
[-- Type: text/x-diff, Size: 5187 bytes --]

From b2fd8fde86d421109d7922ded7b4691384af2214 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 20 Jun 2023 02:47:17 +0200
Subject: [PATCH] lavu/tx: make 32-bit fixed-point transforms more bitexact

Using the sqrt/cos/sin approximations we have, the only parts left
which may be inexact are multiplies and divisions in some transforms.
---
 libavutil/tx_priv.h               |  2 ++
 libavutil/tx_template.c           | 38 +++++++++++++++++++++++++++----
 tests/fate/ac3.mak                |  2 +-
 tests/ref/fate/unknown_layout-ac3 |  2 +-
 tests/ref/lavf/rm                 |  2 +-
 5 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h
index d5ff8e1421..2f056a777c 100644
--- a/libavutil/tx_priv.h
+++ b/libavutil/tx_priv.h
@@ -110,6 +110,8 @@ typedef void TXComplex;
 
 #elif defined(TX_INT32)
 
+#include "softfloat.h"
+
 /* Properly rounds the result */
 #define CMUL(dre, dim, are, aim, bre, bim)             \
     do {                                               \
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 983de75a47..719dae2440 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -60,6 +60,17 @@ typedef struct FFTabInitData {
     int factors[TX_MAX_SUB]; /* Must be sorted high -> low */
 } FFTabInitData;
 
+#if defined(TX_INT32)
+static TXSample COS_GEN(double freq)
+{
+    int c_f, s_f;
+    av_sincos_sf(llrintf(freq * (1 << 30) / M_PI), &s_f, &c_f);
+    return av_clip64(((int64_t)c_f) << 1, INT32_MIN, INT32_MAX);
+}
+#else
+#define COS_GEN cos
+#endif
+
 #define SR_TABLE(len)                                              \
 static av_cold void TX_TAB(ff_tx_init_tab_ ##len)(void)            \
 {                                                                  \
@@ -67,7 +78,7 @@ static av_cold void TX_TAB(ff_tx_init_tab_ ##len)(void)            \
     TXSample *tab = TX_TAB(ff_tx_tab_ ##len);                      \
                                                                    \
     for (int i = 0; i < len/4; i++)                                \
-        *tab++ = RESCALE(cos(i*freq));                             \
+        *tab++ = COS_GEN(i*freq);                                  \
                                                                    \
     *tab = 0;                                                      \
 }
@@ -1903,22 +1914,39 @@ int TX_TAB(ff_tx_mdct_gen_exp)(AVTXContext *s, int *pre_tab)
 {
     int off = 0;
     int len4 = s->len >> 1;
-    double scale = s->scale_d;
-    const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
+    const double theta = (s->scale_d < 0 ? len4 : 0) + 1.0/8.0;
     size_t alloc = pre_tab ? 2*len4 : len4;
 
+#if defined(TX_INT32)
+    int scale = llrintf(fabs(s->scale_d) * (1 << 30));
+    SoftFloat scale_sf = av_int2sf(scale, 30);
+    scale_sf = av_sqrt_sf(scale_sf);
+#else
+    double scale = sqrt(fabs(s->scale_d));
+#endif
+
     if (!(s->exp = av_malloc_array(alloc, sizeof(*s->exp))))
         return AVERROR(ENOMEM);
 
-    scale = sqrt(fabs(scale));
-
     if (pre_tab)
         off = len4;
 
     for (int i = 0; i < len4; i++) {
         const double alpha = M_PI_2 * (i + theta) / len4;
+
+#if defined(TX_INT32)
+        int c_f, s_f;
+        SoftFloat cos_sf, sin_sf;
+        av_sincos_sf(llrintf(alpha * (1 << 30) / M_PI), &s_f, &c_f);
+        cos_sf = av_int2sf(c_f, 30);
+        sin_sf = av_int2sf(s_f, 30);
+        cos_sf = av_mul_sf(cos_sf, scale_sf);
+        sin_sf = av_mul_sf(sin_sf, scale_sf);
+        s->exp[off + i] = (TXComplex){ av_sf2int(cos_sf, 30) << 1, av_sf2int(sin_sf, 30) << 1 };
+#else
         s->exp[off + i] = (TXComplex){ RESCALE(cos(alpha) * scale),
                                        RESCALE(sin(alpha) * scale) };
+#endif
     }
 
     if (pre_tab)
diff --git a/tests/fate/ac3.mak b/tests/fate/ac3.mak
index 2dfd59dfb1..85766e82c7 100644
--- a/tests/fate/ac3.mak
+++ b/tests/fate/ac3.mak
@@ -89,7 +89,7 @@ fate-ac3-fixed-encode: tests/data/asynth-44100-2.wav
 fate-ac3-fixed-encode: SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav
 fate-ac3-fixed-encode: CMD = md5 -i $(SRC) -c ac3_fixed -ab 128k -f ac3 -flags +bitexact -af aresample
 fate-ac3-fixed-encode: CMP = oneline
-fate-ac3-fixed-encode: REF = e9d78bca187b4bbafc4512bcea8efd3e
+fate-ac3-fixed-encode: REF = 3c1781a78ba3ea653c145798511644eb
 
 FATE_EAC3-$(call ALLYES, EAC3_DEMUXER EAC3_MUXER EAC3_CORE_BSF) += fate-eac3-core-bsf
 fate-eac3-core-bsf: CMD = md5pipe -i $(TARGET_SAMPLES)/eac3/the_great_wall_7.1.eac3 -c:a copy -bsf:a eac3_core -fflags +bitexact -f eac3
diff --git a/tests/ref/fate/unknown_layout-ac3 b/tests/ref/fate/unknown_layout-ac3
index a694c52899..c535c4ff05 100644
--- a/tests/ref/fate/unknown_layout-ac3
+++ b/tests/ref/fate/unknown_layout-ac3
@@ -1 +1 @@
-ff7e25844b3cb6abb571ef7e226cbafa
+c40992cfc42a620b592e03153a74ff68
diff --git a/tests/ref/lavf/rm b/tests/ref/lavf/rm
index 62251380cf..dc7b9ed57b 100644
--- a/tests/ref/lavf/rm
+++ b/tests/ref/lavf/rm
@@ -1,2 +1,2 @@
-a7b0ac6e5131bbf662a07ccc82ab8618 *tests/data/lavf/lavf.rm
+b471964c3f313ed33245ef3e56f144c0 *tests/data/lavf/lavf.rm
 346424 tests/data/lavf/lavf.rm
-- 
2.40.1


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [FFmpeg-devel] [PATCH] lavu/tx: make 32-bit fixed-point transforms more bitexact
  2023-06-20  0:57 [FFmpeg-devel] [PATCH] lavu/tx: make 32-bit fixed-point transforms more bitexact Lynne
@ 2023-06-20  8:53 ` Martin Storsjö
  0 siblings, 0 replies; 2+ messages in thread
From: Martin Storsjö @ 2023-06-20  8:53 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Tue, 20 Jun 2023, Lynne wrote:

> Using the sqrt/cos/sin approximations we have, the only parts left
> which may be inexact are multiplies and divisions in some transforms.

This seems to help somewhat, but there still are cases of inexactness, 
somewhere.

The content of the tables that are initialized here does become bitexact 
(at least across some of the configs that otherwise disagree with the 
output), but despite that, the output differs.

With the test references generated on linux/x86_64 compiled with GCC, run 
on an Intel CPU, I get the following set of machines that either agree or 
disagree with the reference:

matching
- linux x86_64 gcc11 Intel
- linux aarch64 gcc12 on Apple M1
- linux aarch64 clang10 Neoverse N1
- linux aarch64 gcc9 Neoverse N1
- linux armv7 gcc9 Neoverse N1

disagreeing
- macos x86_64 clang Xcode14 Intel
- mingw x86_64 clang trunk Dragonboard
- macos aarch64 clang Xcode12 Apple M1
- macos aarch64 clang Xcode14 Apple M1
- linux i686 gcc11 Intel
- mingw aarch64 clang trunk Dragonboard
- linux aarch64 gcc7 Dragonboard
- mingw armv7 clang trunk Dragonboard
- mingw i686 clang trunk Intel
- mingw i686 clang trunk -march=i686 Intel


The configs that are easiest to reproduce are probably the ones on macOS 
on Apple M1, or macOS on x86_64 if you happen to have access to that, or 
GCC/i686 on Linux (just configure with --extra-cflags=-m32 
--extra-ldflags=-m32).

// Martin

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-06-20  8:53 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-20  0:57 [FFmpeg-devel] [PATCH] lavu/tx: make 32-bit fixed-point transforms more bitexact Lynne
2023-06-20  8:53 ` Martin Storsjö

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git