Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24

From: flow gg <hlefthleft@gmail.com>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Subject: Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
Date: Wed, 22 Nov 2023 22:30:44 +0800
Message-ID: <CAEa-L+tkx5FDob+ppJ0jgJuoyobtbHfatCAygZGXReym+zQ0QA@mail.gmail.com> (raw)
In-Reply-To: <502DD795-A076-42CF-94EA-11B198219668@remlab.net>

[-- Attachment #1: Type: text/plain, Size: 1628 bytes --]

> How did you test it?

I wrote a test, but it was a bit rough, so I want to modify it before
submitting. I've added it to this reply.

> This does not seem according to the C ABI. AFAIK `unsigned` is
sign-extended.

I'm a bit confused... because this passed in the tests I wrote in qemu.
Maybe there's a problem with my test？

> ALU right before dependent conditional branch should be avoided.

Should the sub be moved forward? I've modified it.

> SHxADD can be used advantageously.

Okay, I've made the modification

Rémi Denis-Courmont <remi@remlab.net> 于2023年11月22日周三 21:41写道：

> Hi,
>
> How did you test it? As per
> http://ffmpeg.org/pipermail/ffmpeg-devel/2023-June/310720.html we still
> don't have a FATE instance set up with the RISC-V Vector extension. The
> only testing consists of my manual runs of checkasm on a K230 board. (We
> *do* have Zba and Zbb now though, hence the existing extract_exponents()).
>
> Also:
> - This does not seem according to the C ABI. AFAIK `unsigned` is
> sign-extended.
> - ALU right before dependent conditional branch should be avoided.
> - SHxADD can be used advantageously.
>
>
> Le 22 novembre 2023 14:00:07 GMT+02:00, flow gg <hlefthleft@gmail.com> a
> écrit :
> >c910
> >    float_to_fixed24_c: 208.2
> >    float_to_fixed24_rvv_f32: 71.5
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

[-- Attachment #2: lavc-ac3dsp-R-V-V-float_to_fixed24_2.patch --]
[-- Type: text/x-patch, Size: 3521 bytes --]

From 3e790fdccd780257f464aa8f8a56a37321ddd429 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Wed, 22 Nov 2023 14:57:29 +0800
Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24

c910
    float_to_fixed24_c: 208.2
    float_to_fixed24_rvv_f32: 71.5
---
 libavcodec/riscv/Makefile      |  3 ++-
 libavcodec/riscv/ac3dsp_init.c |  5 +++++
 libavcodec/riscv/ac3dsp_rvv.S  | 39 ++++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/ac3dsp_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 8f2a519827..ac7b7c2929 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,7 +1,8 @@
 OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o riscv/sbrdsp_init.o
 RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o riscv/sbrdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o \
-                         riscv/ac3dsp_rvb.o
+                         riscv/ac3dsp_rvb.o \
+                         riscv/ac3dsp_rvv.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index 20f294f1de..de82d1c7a7 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -26,6 +26,7 @@
 #include "libavcodec/ac3dsp.h"
 
 void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, unsigned int len);
 
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
@@ -35,4 +36,8 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
         if (flags & AV_CPU_FLAG_RVB_BASIC)
             c->extract_exponents = ff_extract_exponents_rvb;
     }
+
+    if (flags & AV_CPU_FLAG_RVV_F32) {
+        c->float_to_fixed24 = ff_float_to_fixed24_rvv;
+    }
 }
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
new file mode 100644
index 0000000000..c0e2880e28
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_float_to_fixed24_rvv, zve32f
+        li            t1, 1 << 24
+        fcvt.s.w      f0, t1
+1:
+        vsetvli       t0, a2, e32, m4, ta, ma
+        sub           a2, a2, t0
+        vle32.v       v0, (a1)
+        vfmul.vf      v0, v0, f0
+        vfcvt.x.f.v   v0, v0
+        sh2add        a1, t0, a1
+        vse32.v       v0, (a0)
+        sh2add        a0, t0, a0
+        bgtz          a2, 1b
+
+        ret
+endfunc
-- 
2.43.0


[-- Attachment #3: 0001-add-ac3dsp-test.patch --]
[-- Type: text/x-patch, Size: 4930 bytes --]

From 08a012d86db51275fd2cda8dd7ad47cc1f1481ce Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Wed, 22 Nov 2023 14:57:29 +0800
Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24

---
 tests/checkasm/Makefile   |  1 +
 tests/checkasm/ac3dsp.c   | 88 +++++++++++++++++++++++++++++++++++++++
 tests/checkasm/checkasm.c |  3 ++
 tests/checkasm/checkasm.h |  1 +
 4 files changed, 93 insertions(+)
 create mode 100644 tests/checkasm/ac3dsp.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 8bc241d29b..8c714c2a07 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -5,6 +5,7 @@ AVCODECOBJS-$(CONFIG_BLOCKDSP)          += blockdsp.o
 AVCODECOBJS-$(CONFIG_BSWAPDSP)          += bswapdsp.o
 AVCODECOBJS-$(CONFIG_FMTCONVERT)        += fmtconvert.o
 AVCODECOBJS-$(CONFIG_G722DSP)           += g722dsp.o
+AVCODECOBJS-$(CONFIG_AC3DSP)            += ac3dsp.o
 AVCODECOBJS-$(CONFIG_H264CHROMA)        += h264chroma.o
 AVCODECOBJS-$(CONFIG_H264DSP)           += h264dsp.o
 AVCODECOBJS-$(CONFIG_H264PRED)          += h264pred.o
diff --git a/tests/checkasm/ac3dsp.c b/tests/checkasm/ac3dsp.c
new file mode 100644
index 0000000000..ebebe06990
--- /dev/null
+++ b/tests/checkasm/ac3dsp.c
@@ -0,0 +1,88 @@
+#include "checkasm.h"
+#include <stdio.h>
+
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+#include "libavutil/mem_internal.h"
+
+#include "libavcodec/ac3dsp.h"
+
+/**
+ * Convert an array of float in range [-1.0,1.0] to int32_t with range
+ * [-(1<<24),(1<<24)]
+ *
+ * @param dst destination array of int32_t.
+ *            constraints: 16-byte aligned
+ * @param src source array of float.
+ *            constraints: 16-byte aligned
+ * @param len number of elements to convert.
+ *            constraints: multiple of 32 greater than zero
+ */
+// void (*float_to_fixed24)(int32_t *dst, const float *src, unsigned int len);
+
+
+#define randomize_float(buf, len)                               \
+    do {                                                        \
+        int i;                                                  \
+        for (i = 0; i < len; i++) {                             \
+            float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f;   \
+            buf[i] = f;                                         \
+        }                                                       \
+    } while (0)
+
+#define randomize_int(buf, len, size, bits)                         \
+    do {                                                            \
+        int i;                                                      \
+        for (i = 0; i < len; i++) {                                 \
+            uint ## size ## _t r = rnd() & ((1LL << bits) - 1);     \
+            AV_WN ## size ## A(buf + i, -(1LL << (bits - 1)) + r);  \
+        }                                                           \
+    } while (0)
+
+static void check_float_to_fixed24(AC3DSPContext *c) {
+#define BUF_SIZE 800
+    LOCAL_ALIGNED_32(int32_t, v1, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(float, v2, [BUF_SIZE]);
+
+    declare_func(void, int32_t *, const float *, unsigned int);
+
+    randomize_int(v1, BUF_SIZE, 32, 10);
+    randomize_float(v2, BUF_SIZE);
+
+    if (check_func(c->float_to_fixed24, "float_to_fixed24")) {
+        LOCAL_ALIGNED_32(int32_t, dst, [BUF_SIZE]);
+        LOCAL_ALIGNED_32(int32_t, dst2, [BUF_SIZE]);
+
+        call_ref(dst, v2, 80);
+        call_new(dst2, v2, 80);
+
+				if (memcmp(dst, dst2, sizeof(*dst) * 10) != 0){
+						puts(">>>>>>>>>>>>>> fail --------------------");
+						for(int i = 0 ; i < 10; i++){
+							printf("dst[%d] = %d, dst2[%d] = %d\n", i, dst[i], i, dst2[i]);
+						}
+						puts("");
+
+            fail();
+				} else {
+					puts(">>>>>>>>>>>>>> ok --------------------");
+				}
+
+        bench_new(v1, v2, 80);
+    }
+
+
+	report("float_to_fixed24");
+}
+
+void checkasm_check_ac3dsp(void)
+{
+	AC3DSPContext c;
+	ff_ac3dsp_init(&c);
+
+	check_float_to_fixed24(&c);
+}
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 708119e7c6..9502e372a1 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -105,6 +105,9 @@ static const struct {
     #if CONFIG_G722DSP
         { "g722dsp", checkasm_check_g722dsp },
     #endif
+    #if CONFIG_AC3DSP
+        { "ac3dsp", checkasm_check_ac3dsp },
+    #endif
     #if CONFIG_H264CHROMA
         { "h264chroma", checkasm_check_h264chroma },
     #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index cfea868ff1..4c73589606 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -96,6 +96,7 @@ void checkasm_check_vp8dsp(void);
 void checkasm_check_vp9dsp(void);
 void checkasm_check_videodsp(void);
 void checkasm_check_vorbisdsp(void);
+void checkasm_check_ac3dsp(void);
 
 struct CheckasmPerf;
 
-- 
2.43.0


[-- Attachment #4: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".