Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
@ 2023-11-22 12:00 flow gg
  2023-11-22 13:40 ` Rémi Denis-Courmont
  2023-11-23  7:11 ` flow gg
  0 siblings, 2 replies; 17+ messages in thread
From: flow gg @ 2023-11-22 12:00 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 70 bytes --]

c910
    float_to_fixed24_c: 208.2
    float_to_fixed24_rvv_f32: 71.5

[-- Attachment #2: lavc-ac3dsp-R-V-V-float_to_fixed24.patch --]
[-- Type: text/x-patch, Size: 3554 bytes --]

From 69da974fd0febaa74db4dd551b05172caeefb846 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Wed, 22 Nov 2023 14:57:29 +0800
Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24

c910
    float_to_fixed24_c: 208.2
    float_to_fixed24_rvv_f32: 71.5
---
 libavcodec/riscv/Makefile      |  3 ++-
 libavcodec/riscv/ac3dsp_init.c |  5 +++++
 libavcodec/riscv/ac3dsp_rvv.S  | 40 ++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/ac3dsp_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 8f2a519827..ac7b7c2929 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,7 +1,8 @@
 OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o riscv/sbrdsp_init.o
 RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o riscv/sbrdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o \
-                         riscv/ac3dsp_rvb.o
+                         riscv/ac3dsp_rvb.o \
+                         riscv/ac3dsp_rvv.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index 20f294f1de..de82d1c7a7 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -26,6 +26,7 @@
 #include "libavcodec/ac3dsp.h"
 
 void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, unsigned int len);
 
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
@@ -35,4 +36,8 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
         if (flags & AV_CPU_FLAG_RVB_BASIC)
             c->extract_exponents = ff_extract_exponents_rvb;
     }
+
+    if (flags & AV_CPU_FLAG_RVV_F32) {
+        c->float_to_fixed24 = ff_float_to_fixed24_rvv;
+    }
 }
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
new file mode 100644
index 0000000000..4d8ab060e7
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_float_to_fixed24_rvv, zve32f
+        li            t1, 1 << 24
+        fcvt.s.w      f0, t1
+1:
+        vsetvli       t0, a2, e32, m4, ta, ma
+        vle32.v       v0, (a1)
+        slli          t3, t0, 2
+        vfmul.vf      v0, v0, f0
+        vfcvt.x.f.v   v0, v0
+        add           a1, a1, t3
+        vse32.v       v0, (a0)
+        add           a0, a0, t3
+        sub           a2, a2, t0
+        bgtz          a2, 1b
+
+        ret
+endfunc
-- 
2.43.0


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-22 12:00 [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24 flow gg
@ 2023-11-22 13:40 ` Rémi Denis-Courmont
  2023-11-22 14:30   ` flow gg
  2023-11-23  7:11 ` flow gg
  1 sibling, 1 reply; 17+ messages in thread
From: Rémi Denis-Courmont @ 2023-11-22 13:40 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Hi,

How did you test it? As per http://ffmpeg.org/pipermail/ffmpeg-devel/2023-June/310720.html we still don't have a FATE instance set up with the RISC-V Vector extension. The only testing consists of my manual runs of checkasm on a K230 board. (We *do* have Zba and Zbb now though, hence the existing extract_exponents()).

Also:
- This does not seem according to the C ABI. AFAIK `unsigned` is sign-extended.
- ALU right before dependent conditional branch should be avoided.
- SHxADD can be used advantageously.


Le 22 novembre 2023 14:00:07 GMT+02:00, flow gg <hlefthleft@gmail.com> a écrit :
>c910
>    float_to_fixed24_c: 208.2
>    float_to_fixed24_rvv_f32: 71.5
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-22 13:40 ` Rémi Denis-Courmont
@ 2023-11-22 14:30   ` flow gg
  2023-11-22 14:35     ` flow gg
                       ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: flow gg @ 2023-11-22 14:30 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1628 bytes --]

> How did you test it?

I wrote a test, but it was a bit rough, so I want to modify it before
submitting. I've added it to this reply.

> This does not seem according to the C ABI. AFAIK `unsigned` is
sign-extended.

I'm a bit confused... because this passed in the tests I wrote in qemu.
Maybe there's a problem with my test?

> ALU right before dependent conditional branch should be avoided.

Should the sub be moved forward? I've modified it.

> SHxADD can be used advantageously.

Okay, I've made the modification

Rémi Denis-Courmont <remi@remlab.net> 于2023年11月22日周三 21:41写道:

> Hi,
>
> How did you test it? As per
> http://ffmpeg.org/pipermail/ffmpeg-devel/2023-June/310720.html we still
> don't have a FATE instance set up with the RISC-V Vector extension. The
> only testing consists of my manual runs of checkasm on a K230 board. (We
> *do* have Zba and Zbb now though, hence the existing extract_exponents()).
>
> Also:
> - This does not seem according to the C ABI. AFAIK `unsigned` is
> sign-extended.
> - ALU right before dependent conditional branch should be avoided.
> - SHxADD can be used advantageously.
>
>
> Le 22 novembre 2023 14:00:07 GMT+02:00, flow gg <hlefthleft@gmail.com> a
> écrit :
> >c910
> >    float_to_fixed24_c: 208.2
> >    float_to_fixed24_rvv_f32: 71.5
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

[-- Attachment #2: lavc-ac3dsp-R-V-V-float_to_fixed24_2.patch --]
[-- Type: text/x-patch, Size: 3521 bytes --]

From 3e790fdccd780257f464aa8f8a56a37321ddd429 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Wed, 22 Nov 2023 14:57:29 +0800
Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24

c910
    float_to_fixed24_c: 208.2
    float_to_fixed24_rvv_f32: 71.5
---
 libavcodec/riscv/Makefile      |  3 ++-
 libavcodec/riscv/ac3dsp_init.c |  5 +++++
 libavcodec/riscv/ac3dsp_rvv.S  | 39 ++++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/ac3dsp_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 8f2a519827..ac7b7c2929 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,7 +1,8 @@
 OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o riscv/sbrdsp_init.o
 RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o riscv/sbrdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o \
-                         riscv/ac3dsp_rvb.o
+                         riscv/ac3dsp_rvb.o \
+                         riscv/ac3dsp_rvv.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index 20f294f1de..de82d1c7a7 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -26,6 +26,7 @@
 #include "libavcodec/ac3dsp.h"
 
 void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, unsigned int len);
 
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
@@ -35,4 +36,8 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
         if (flags & AV_CPU_FLAG_RVB_BASIC)
             c->extract_exponents = ff_extract_exponents_rvb;
     }
+
+    if (flags & AV_CPU_FLAG_RVV_F32) {
+        c->float_to_fixed24 = ff_float_to_fixed24_rvv;
+    }
 }
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
new file mode 100644
index 0000000000..c0e2880e28
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_float_to_fixed24_rvv, zve32f
+        li            t1, 1 << 24
+        fcvt.s.w      f0, t1
+1:
+        vsetvli       t0, a2, e32, m4, ta, ma
+        sub           a2, a2, t0
+        vle32.v       v0, (a1)
+        vfmul.vf      v0, v0, f0
+        vfcvt.x.f.v   v0, v0
+        sh2add        a1, t0, a1
+        vse32.v       v0, (a0)
+        sh2add        a0, t0, a0
+        bgtz          a2, 1b
+
+        ret
+endfunc
-- 
2.43.0


[-- Attachment #3: 0001-add-ac3dsp-test.patch --]
[-- Type: text/x-patch, Size: 4930 bytes --]

From 08a012d86db51275fd2cda8dd7ad47cc1f1481ce Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Wed, 22 Nov 2023 14:57:29 +0800
Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24

---
 tests/checkasm/Makefile   |  1 +
 tests/checkasm/ac3dsp.c   | 88 +++++++++++++++++++++++++++++++++++++++
 tests/checkasm/checkasm.c |  3 ++
 tests/checkasm/checkasm.h |  1 +
 4 files changed, 93 insertions(+)
 create mode 100644 tests/checkasm/ac3dsp.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 8bc241d29b..8c714c2a07 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -5,6 +5,7 @@ AVCODECOBJS-$(CONFIG_BLOCKDSP)          += blockdsp.o
 AVCODECOBJS-$(CONFIG_BSWAPDSP)          += bswapdsp.o
 AVCODECOBJS-$(CONFIG_FMTCONVERT)        += fmtconvert.o
 AVCODECOBJS-$(CONFIG_G722DSP)           += g722dsp.o
+AVCODECOBJS-$(CONFIG_AC3DSP)            += ac3dsp.o
 AVCODECOBJS-$(CONFIG_H264CHROMA)        += h264chroma.o
 AVCODECOBJS-$(CONFIG_H264DSP)           += h264dsp.o
 AVCODECOBJS-$(CONFIG_H264PRED)          += h264pred.o
diff --git a/tests/checkasm/ac3dsp.c b/tests/checkasm/ac3dsp.c
new file mode 100644
index 0000000000..ebebe06990
--- /dev/null
+++ b/tests/checkasm/ac3dsp.c
@@ -0,0 +1,88 @@
+#include "checkasm.h"
+#include <stdio.h>
+
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+#include "libavutil/mem_internal.h"
+
+#include "libavcodec/ac3dsp.h"
+
+/**
+ * Convert an array of float in range [-1.0,1.0] to int32_t with range
+ * [-(1<<24),(1<<24)]
+ *
+ * @param dst destination array of int32_t.
+ *            constraints: 16-byte aligned
+ * @param src source array of float.
+ *            constraints: 16-byte aligned
+ * @param len number of elements to convert.
+ *            constraints: multiple of 32 greater than zero
+ */
+// void (*float_to_fixed24)(int32_t *dst, const float *src, unsigned int len);
+
+
+#define randomize_float(buf, len)                               \
+    do {                                                        \
+        int i;                                                  \
+        for (i = 0; i < len; i++) {                             \
+            float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f;   \
+            buf[i] = f;                                         \
+        }                                                       \
+    } while (0)
+
+#define randomize_int(buf, len, size, bits)                         \
+    do {                                                            \
+        int i;                                                      \
+        for (i = 0; i < len; i++) {                                 \
+            uint ## size ## _t r = rnd() & ((1LL << bits) - 1);     \
+            AV_WN ## size ## A(buf + i, -(1LL << (bits - 1)) + r);  \
+        }                                                           \
+    } while (0)
+
+static void check_float_to_fixed24(AC3DSPContext *c) {
+#define BUF_SIZE 800
+    LOCAL_ALIGNED_32(int32_t, v1, [BUF_SIZE]);
+    LOCAL_ALIGNED_32(float, v2, [BUF_SIZE]);
+
+    declare_func(void, int32_t *, const float *, unsigned int);
+
+    randomize_int(v1, BUF_SIZE, 32, 10);
+    randomize_float(v2, BUF_SIZE);
+
+    if (check_func(c->float_to_fixed24, "float_to_fixed24")) {
+        LOCAL_ALIGNED_32(int32_t, dst, [BUF_SIZE]);
+        LOCAL_ALIGNED_32(int32_t, dst2, [BUF_SIZE]);
+
+        call_ref(dst, v2, 80);
+        call_new(dst2, v2, 80);
+
+				if (memcmp(dst, dst2, sizeof(*dst) * 10) != 0){
+						puts(">>>>>>>>>>>>>> fail --------------------");
+						for(int i = 0 ; i < 10; i++){
+							printf("dst[%d] = %d, dst2[%d] = %d\n", i, dst[i], i, dst2[i]);
+						}
+						puts("");
+
+            fail();
+				} else {
+					puts(">>>>>>>>>>>>>> ok --------------------");
+				}
+
+        bench_new(v1, v2, 80);
+    }
+
+
+	report("float_to_fixed24");
+}
+
+void checkasm_check_ac3dsp(void)
+{
+	AC3DSPContext c;
+	ff_ac3dsp_init(&c);
+
+	check_float_to_fixed24(&c);
+}
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 708119e7c6..9502e372a1 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -105,6 +105,9 @@ static const struct {
     #if CONFIG_G722DSP
         { "g722dsp", checkasm_check_g722dsp },
     #endif
+    #if CONFIG_AC3DSP
+        { "ac3dsp", checkasm_check_ac3dsp },
+    #endif
     #if CONFIG_H264CHROMA
         { "h264chroma", checkasm_check_h264chroma },
     #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index cfea868ff1..4c73589606 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -96,6 +96,7 @@ void checkasm_check_vp8dsp(void);
 void checkasm_check_vp9dsp(void);
 void checkasm_check_videodsp(void);
 void checkasm_check_vorbisdsp(void);
+void checkasm_check_ac3dsp(void);
 
 struct CheckasmPerf;
 
-- 
2.43.0


[-- Attachment #4: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-22 14:30   ` flow gg
@ 2023-11-22 14:35     ` flow gg
  2023-11-22 14:51     ` Rémi Denis-Courmont
  2023-11-22 17:18     ` James Almer
  2 siblings, 0 replies; 17+ messages in thread
From: flow gg @ 2023-11-22 14:35 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

qemu-riscv64 -cpu rv64,v=true,g=true,c=true,zba=true,vlen=128 checkasm
--test=ac3dsp

flow gg <hlefthleft@gmail.com> 于2023年11月22日周三 22:30写道:

> > How did you test it?
>
> I wrote a test, but it was a bit rough, so I want to modify it before
> submitting. I've added it to this reply.
>
> > This does not seem according to the C ABI. AFAIK `unsigned` is
> sign-extended.
>
> I'm a bit confused... because this passed in the tests I wrote in qemu.
> Maybe there's a problem with my test?
>
> > ALU right before dependent conditional branch should be avoided.
>
> Should the sub be moved forward? I've modified it.
>
> > SHxADD can be used advantageously.
>
> Okay, I've made the modification
>
> Rémi Denis-Courmont <remi@remlab.net> 于2023年11月22日周三 21:41写道:
>
>> Hi,
>>
>> How did you test it? As per
>> http://ffmpeg.org/pipermail/ffmpeg-devel/2023-June/310720.html we still
>> don't have a FATE instance set up with the RISC-V Vector extension. The
>> only testing consists of my manual runs of checkasm on a K230 board. (We
>> *do* have Zba and Zbb now though, hence the existing extract_exponents()).
>>
>> Also:
>> - This does not seem according to the C ABI. AFAIK `unsigned` is
>> sign-extended.
>> - ALU right before dependent conditional branch should be avoided.
>> - SHxADD can be used advantageously.
>>
>>
>> Le 22 novembre 2023 14:00:07 GMT+02:00, flow gg <hlefthleft@gmail.com> a
>> écrit :
>> >c910
>> >    float_to_fixed24_c: 208.2
>> >    float_to_fixed24_rvv_f32: 71.5
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-22 14:30   ` flow gg
  2023-11-22 14:35     ` flow gg
@ 2023-11-22 14:51     ` Rémi Denis-Courmont
  2023-11-22 16:37       ` flow gg
  2023-11-22 17:18     ` James Almer
  2 siblings, 1 reply; 17+ messages in thread
From: Rémi Denis-Courmont @ 2023-11-22 14:51 UTC (permalink / raw)
  To: FFmpeg development discussions and patches



Le 22 novembre 2023 16:30:44 GMT+02:00, flow gg <hlefthleft@gmail.com> a écrit :
>> How did you test it?
>
>I wrote a test, but it was a bit rough, so I want to modify it before
>submitting. I've added it to this reply.
>
>> This does not seem according to the C ABI. AFAIK `unsigned` is
>sign-extended.
>
>I'm a bit confused... because this passed in the tests I wrote in qemu.
>Maybe there's a problem with my test?

You probably didn't test sizes between 2^31 and 2^32-1. This might not even be feasible in QEMU.

Ideally the prototype would use size_t, then the problem wouldn't exist.

>
>> ALU right before dependent conditional branch should be avoided.
>
>Should the sub be moved forward? I've modified it.
>
>> SHxADD can be used advantageously.
>
>Okay, I've made the modification
>
>Rémi Denis-Courmont <remi@remlab.net> 于2023年11月22日周三 21:41写道:
>
>> Hi,
>>
>> How did you test it? As per
>> http://ffmpeg.org/pipermail/ffmpeg-devel/2023-June/310720.html we still
>> don't have a FATE instance set up with the RISC-V Vector extension. The
>> only testing consists of my manual runs of checkasm on a K230 board. (We
>> *do* have Zba and Zbb now though, hence the existing extract_exponents()).
>>
>> Also:
>> - This does not seem according to the C ABI. AFAIK `unsigned` is
>> sign-extended.
>> - ALU right before dependent conditional branch should be avoided.
>> - SHxADD can be used advantageously.
>>
>>
>> Le 22 novembre 2023 14:00:07 GMT+02:00, flow gg <hlefthleft@gmail.com> a
>> écrit :
>> >c910
>> >    float_to_fixed24_c: 208.2
>> >    float_to_fixed24_rvv_f32: 71.5
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-22 14:51     ` Rémi Denis-Courmont
@ 2023-11-22 16:37       ` flow gg
  2023-11-22 16:49         ` James Almer
  2023-11-22 23:17         ` flow gg
  0 siblings, 2 replies; 17+ messages in thread
From: flow gg @ 2023-11-22 16:37 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 2637 bytes --]

Thank you for your guidance, I finally understand..  How about choosing
manual zero-extension for rv64? I modified the patch.

#if (__riscv_xlen == 64)
        slli a2, a2, 32
        srli a2, a2, 32
#endif

Rémi Denis-Courmont <remi@remlab.net> 于2023年11月22日周三 22:51写道:

>
>
> Le 22 novembre 2023 16:30:44 GMT+02:00, flow gg <hlefthleft@gmail.com> a
> écrit :
> >> How did you test it?
> >
> >I wrote a test, but it was a bit rough, so I want to modify it before
> >submitting. I've added it to this reply.
> >
> >> This does not seem according to the C ABI. AFAIK `unsigned` is
> >sign-extended.
> >
> >I'm a bit confused... because this passed in the tests I wrote in qemu.
> >Maybe there's a problem with my test?
>
> You probably didn't test sizes between 2^31 and 2^32-1. This might not
> even be feasible in QEMU.
>
> Ideally the prototype would use size_t, then the problem wouldn't exist.
>
> >
> >> ALU right before dependent conditional branch should be avoided.
> >
> >Should the sub be moved forward? I've modified it.
> >
> >> SHxADD can be used advantageously.
> >
> >Okay, I've made the modification
> >
> >Rémi Denis-Courmont <remi@remlab.net> 于2023年11月22日周三 21:41写道:
> >
> >> Hi,
> >>
> >> How did you test it? As per
> >> http://ffmpeg.org/pipermail/ffmpeg-devel/2023-June/310720.html we still
> >> don't have a FATE instance set up with the RISC-V Vector extension. The
> >> only testing consists of my manual runs of checkasm on a K230 board. (We
> >> *do* have Zba and Zbb now though, hence the existing
> extract_exponents()).
> >>
> >> Also:
> >> - This does not seem according to the C ABI. AFAIK `unsigned` is
> >> sign-extended.
> >> - ALU right before dependent conditional branch should be avoided.
> >> - SHxADD can be used advantageously.
> >>
> >>
> >> Le 22 novembre 2023 14:00:07 GMT+02:00, flow gg <hlefthleft@gmail.com>
> a
> >> écrit :
> >> >c910
> >> >    float_to_fixed24_c: 208.2
> >> >    float_to_fixed24_rvv_f32: 71.5
> >> _______________________________________________
> >> ffmpeg-devel mailing list
> >> ffmpeg-devel@ffmpeg.org
> >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >>
> >> To unsubscribe, visit link above, or email
> >> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

[-- Attachment #2: lavc-ac3dsp-R-V-V-float_to_fixed24.patch --]
[-- Type: text/x-patch, Size: 3608 bytes --]

From d709519219138b746ff622b15bb004b27eed7333 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Wed, 22 Nov 2023 14:57:29 +0800
Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24

c910
    float_to_fixed24_c: 208.2
    float_to_fixed24_rvv_f32: 71.5
---
 libavcodec/riscv/Makefile      |  3 ++-
 libavcodec/riscv/ac3dsp_init.c |  5 ++++
 libavcodec/riscv/ac3dsp_rvv.S  | 45 ++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/ac3dsp_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 8f2a519827..ac7b7c2929 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,7 +1,8 @@
 OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o riscv/sbrdsp_init.o
 RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o riscv/sbrdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o \
-                         riscv/ac3dsp_rvb.o
+                         riscv/ac3dsp_rvb.o \
+                         riscv/ac3dsp_rvv.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index 20f294f1de..de82d1c7a7 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -26,6 +26,7 @@
 #include "libavcodec/ac3dsp.h"
 
 void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, unsigned int len);
 
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
@@ -35,4 +36,8 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
         if (flags & AV_CPU_FLAG_RVB_BASIC)
             c->extract_exponents = ff_extract_exponents_rvb;
     }
+
+    if (flags & AV_CPU_FLAG_RVV_F32) {
+        c->float_to_fixed24 = ff_float_to_fixed24_rvv;
+    }
 }
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
new file mode 100644
index 0000000000..4d4a566659
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_float_to_fixed24_rvv, zve32f
+        li            t1, 1 << 24
+        fcvt.s.w      f0, t1
+
+#if (__riscv_xlen == 64)
+        slli a2, a2, 32
+        srli a2, a2, 32
+#endif
+
+1:
+        vsetvli       t0, a2, e32, m4, ta, ma
+        sub           a2, a2, t0
+        vle32.v       v0, (a1)
+        vfmul.vf      v0, v0, f0
+        vfcvt.x.f.v   v0, v0
+        sh2add        a1, t0, a1
+        vse32.v       v0, (a0)
+        sh2add        a0, t0, a0
+        bgtz          a2, 1b
+
+        ret
+endfunc
-- 
2.43.0


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-22 16:37       ` flow gg
@ 2023-11-22 16:49         ` James Almer
  2023-11-22 23:17         ` flow gg
  1 sibling, 0 replies; 17+ messages in thread
From: James Almer @ 2023-11-22 16:49 UTC (permalink / raw)
  To: ffmpeg-devel

On 11/22/2023 1:37 PM, flow gg wrote:
> Thank you for your guidance, I finally understand..  How about choosing
> manual zero-extension for rv64? I modified the patch.
> 
> #if (__riscv_xlen == 64)
>          slli a2, a2, 32
>          srli a2, a2, 32
> #endif

Please, don't top post.

I think it will be better to change the prototype to use ptrdiff_t for 
len, as it's done in other dps functions.

> 
> Rémi Denis-Courmont <remi@remlab.net> 于2023年11月22日周三 22:51写道:
> 
>>
>>
>> Le 22 novembre 2023 16:30:44 GMT+02:00, flow gg <hlefthleft@gmail.com> a
>> écrit :
>>>> How did you test it?
>>>
>>> I wrote a test, but it was a bit rough, so I want to modify it before
>>> submitting. I've added it to this reply.
>>>
>>>> This does not seem according to the C ABI. AFAIK `unsigned` is
>>> sign-extended.
>>>
>>> I'm a bit confused... because this passed in the tests I wrote in qemu.
>>> Maybe there's a problem with my test?
>>
>> You probably didn't test sizes between 2^31 and 2^32-1. This might not
>> even be feasible in QEMU.
>>
>> Ideally the prototype would use size_t, then the problem wouldn't exist.
>>
>>>
>>>> ALU right before dependent conditional branch should be avoided.
>>>
>>> Should the sub be moved forward? I've modified it.
>>>
>>>> SHxADD can be used advantageously.
>>>
>>> Okay, I've made the modification
>>>
>>> Rémi Denis-Courmont <remi@remlab.net> 于2023年11月22日周三 21:41写道:
>>>
>>>> Hi,
>>>>
>>>> How did you test it? As per
>>>> http://ffmpeg.org/pipermail/ffmpeg-devel/2023-June/310720.html we still
>>>> don't have a FATE instance set up with the RISC-V Vector extension. The
>>>> only testing consists of my manual runs of checkasm on a K230 board. (We
>>>> *do* have Zba and Zbb now though, hence the existing
>> extract_exponents()).
>>>>
>>>> Also:
>>>> - This does not seem according to the C ABI. AFAIK `unsigned` is
>>>> sign-extended.
>>>> - ALU right before dependent conditional branch should be avoided.
>>>> - SHxADD can be used advantageously.
>>>>
>>>>
>>>> Le 22 novembre 2023 14:00:07 GMT+02:00, flow gg <hlefthleft@gmail.com>
>> a
>>>> écrit :
>>>>> c910
>>>>>     float_to_fixed24_c: 208.2
>>>>>     float_to_fixed24_rvv_f32: 71.5
>>>> _______________________________________________
>>>> ffmpeg-devel mailing list
>>>> ffmpeg-devel@ffmpeg.org
>>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>>
>>>> To unsubscribe, visit link above, or email
>>>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-22 14:30   ` flow gg
  2023-11-22 14:35     ` flow gg
  2023-11-22 14:51     ` Rémi Denis-Courmont
@ 2023-11-22 17:18     ` James Almer
  2023-11-22 17:34       ` flow gg
  2 siblings, 1 reply; 17+ messages in thread
From: James Almer @ 2023-11-22 17:18 UTC (permalink / raw)
  To: ffmpeg-devel

On 11/22/2023 11:30 AM, flow gg wrote:
>> How did you test it?
> 
> I wrote a test, but it was a bit rough, so I want to modify it before
> submitting. I've added it to this reply.


> From 08a012d86db51275fd2cda8dd7ad47cc1f1481ce Mon Sep 17 00:00:00 2001
> From: sunyuechi <sunyuechi@iscas.ac.cn>
> Date: Wed, 22 Nov 2023 14:57:29 +0800
> Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24
> 
> ---
>  tests/checkasm/Makefile   |  1 +
>  tests/checkasm/ac3dsp.c   | 88 +++++++++++++++++++++++++++++++++++++++
>  tests/checkasm/checkasm.c |  3 ++
>  tests/checkasm/checkasm.h |  1 +
>  4 files changed, 93 insertions(+)
>  create mode 100644 tests/checkasm/ac3dsp.c
> 
> diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
> index 8bc241d29b..8c714c2a07 100644
> --- a/tests/checkasm/Makefile
> +++ b/tests/checkasm/Makefile
> @@ -5,6 +5,7 @@ AVCODECOBJS-$(CONFIG_BLOCKDSP)          += blockdsp.o
>  AVCODECOBJS-$(CONFIG_BSWAPDSP)          += bswapdsp.o
>  AVCODECOBJS-$(CONFIG_FMTCONVERT)        += fmtconvert.o
>  AVCODECOBJS-$(CONFIG_G722DSP)           += g722dsp.o
> +AVCODECOBJS-$(CONFIG_AC3DSP)            += ac3dsp.o
>  AVCODECOBJS-$(CONFIG_H264CHROMA)        += h264chroma.o
>  AVCODECOBJS-$(CONFIG_H264DSP)           += h264dsp.o
>  AVCODECOBJS-$(CONFIG_H264PRED)          += h264pred.o
> diff --git a/tests/checkasm/ac3dsp.c b/tests/checkasm/ac3dsp.c
> new file mode 100644
> index 0000000000..ebebe06990
> --- /dev/null
> +++ b/tests/checkasm/ac3dsp.c
> @@ -0,0 +1,88 @@
> +#include "checkasm.h"
> +#include <stdio.h>
> +
> +
> +#include <string.h>
> +
> +#include "libavutil/common.h"
> +#include "libavutil/intreadwrite.h"
> +#include "libavutil/mem.h"
> +#include "libavutil/mem_internal.h"
> +
> +#include "libavcodec/ac3dsp.h"
> +
> +/**
> + * Convert an array of float in range [-1.0,1.0] to int32_t with range
> + * [-(1<<24),(1<<24)]
> + *
> + * @param dst destination array of int32_t.
> + *            constraints: 16-byte aligned
> + * @param src source array of float.
> + *            constraints: 16-byte aligned
> + * @param len number of elements to convert.
> + *            constraints: multiple of 32 greater than zero
> + */
> +// void (*float_to_fixed24)(int32_t *dst, const float *src, unsigned int len);
> +
> +
> +#define randomize_float(buf, len)                               \
> +    do {                                                        \
> +        int i;                                                  \
> +        for (i = 0; i < len; i++) {                             \
> +            float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f;   \
> +            buf[i] = f;                                         \
> +        }                                                       \
> +    } while (0)
> +
> +#define randomize_int(buf, len, size, bits)                         \
> +    do {                                                            \
> +        int i;                                                      \
> +        for (i = 0; i < len; i++) {                                 \
> +            uint ## size ## _t r = rnd() & ((1LL << bits) - 1);     \
> +            AV_WN ## size ## A(buf + i, -(1LL << (bits - 1)) + r);  \
> +        }                                                           \
> +    } while (0)
> +
> +static void check_float_to_fixed24(AC3DSPContext *c) {
> +#define BUF_SIZE 800

800, if this is meant to be used as len, is not a multiple of 32.

> +    LOCAL_ALIGNED_32(int32_t, v1, [BUF_SIZE]);
> +    LOCAL_ALIGNED_32(float, v2, [BUF_SIZE]);
> +
> +    declare_func(void, int32_t *, const float *, unsigned int);
> +
> +    randomize_int(v1, BUF_SIZE, 32, 10);

This is not really used at all. The input is floats, and the output is 
write only.

> +    randomize_float(v2, BUF_SIZE);
> +
> +    if (check_func(c->float_to_fixed24, "float_to_fixed24")) {
> +        LOCAL_ALIGNED_32(int32_t, dst, [BUF_SIZE]);
> +        LOCAL_ALIGNED_32(int32_t, dst2, [BUF_SIZE]);

The requirement is 16 byte alignment.

> +
> +        call_ref(dst, v2, 80);

This should be BUF_SIZE. And 80 is also not a multiple of 32.

> +        call_new(dst2, v2, 80);
> +
> +				if (memcmp(dst, dst2, sizeof(*dst) * 10) != 0){

memcmp(dst, dst2, sizeof(dst))

> +						puts(">>>>>>>>>>>>>> fail --------------------");

No puts(), please. This line is also not needed.

> +						for(int i = 0 ; i < 10; i++){
> +							printf("dst[%d] = %d, dst2[%d] = %d\n", i, dst[i], i, dst2[i]);

fprintf(stderr, ...);

> +						}
> +						puts("");
> +
> +            fail();
> +				} else {
> +					puts(">>>>>>>>>>>>>> ok --------------------");

Same.

> +				}
> +
> +        bench_new(v1, v2, 80);

bench_new(dst2, v2...

> +    }
> +
> +
> +	report("float_to_fixed24");
> +}
> +
> +void checkasm_check_ac3dsp(void)
> +{
> +	AC3DSPContext c;
> +	ff_ac3dsp_init(&c);
> +
> +	check_float_to_fixed24(&c);
> +}
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index 708119e7c6..9502e372a1 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -105,6 +105,9 @@ static const struct {
>      #if CONFIG_G722DSP
>          { "g722dsp", checkasm_check_g722dsp },
>      #endif
> +    #if CONFIG_AC3DSP
> +        { "ac3dsp", checkasm_check_ac3dsp },
> +    #endif
>      #if CONFIG_H264CHROMA
>          { "h264chroma", checkasm_check_h264chroma },
>      #endif
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index cfea868ff1..4c73589606 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -96,6 +96,7 @@ void checkasm_check_vp8dsp(void);
>  void checkasm_check_vp9dsp(void);
>  void checkasm_check_videodsp(void);
>  void checkasm_check_vorbisdsp(void);
> +void checkasm_check_ac3dsp(void);
>  
>  struct CheckasmPerf;
>  
> -- 
> 2.43.0
> 
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-22 17:18     ` James Almer
@ 2023-11-22 17:34       ` flow gg
  0 siblings, 0 replies; 17+ messages in thread
From: flow gg @ 2023-11-22 17:34 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Wow, thank you for reviewing this. I just wanted to see if the function was
working properly. There are so many bugs in the test code ...
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-22 16:37       ` flow gg
  2023-11-22 16:49         ` James Almer
@ 2023-11-22 23:17         ` flow gg
  2023-11-23 17:08           ` Rémi Denis-Courmont
  1 sibling, 1 reply; 17+ messages in thread
From: flow gg @ 2023-11-22 23:17 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 157 bytes --]

Hello, I saw the new commit "avcodec/ac3dsp: make len a size_t in
float_to_fixed24."

So I removed the part #if (__riscv_xlen == 64) and restored the patch.

[-- Attachment #2: lavc-ac3dsp-R-V-V-float_to_fixed24.patch --]
[-- Type: text/x-patch, Size: 3521 bytes --]

From 3e790fdccd780257f464aa8f8a56a37321ddd429 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Wed, 22 Nov 2023 14:57:29 +0800
Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24

c910
    float_to_fixed24_c: 208.2
    float_to_fixed24_rvv_f32: 71.5
---
 libavcodec/riscv/Makefile      |  3 ++-
 libavcodec/riscv/ac3dsp_init.c |  5 +++++
 libavcodec/riscv/ac3dsp_rvv.S  | 39 ++++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/ac3dsp_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 8f2a519827..ac7b7c2929 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,7 +1,8 @@
 OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o riscv/sbrdsp_init.o
 RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o riscv/sbrdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o \
-                         riscv/ac3dsp_rvb.o
+                         riscv/ac3dsp_rvb.o \
+                         riscv/ac3dsp_rvv.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index 20f294f1de..de82d1c7a7 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -26,6 +26,7 @@
 #include "libavcodec/ac3dsp.h"
 
 void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, unsigned int len);
 
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
@@ -35,4 +36,8 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
         if (flags & AV_CPU_FLAG_RVB_BASIC)
             c->extract_exponents = ff_extract_exponents_rvb;
     }
+
+    if (flags & AV_CPU_FLAG_RVV_F32) {
+        c->float_to_fixed24 = ff_float_to_fixed24_rvv;
+    }
 }
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
new file mode 100644
index 0000000000..c0e2880e28
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_float_to_fixed24_rvv, zve32f
+        li            t1, 1 << 24
+        fcvt.s.w      f0, t1
+1:
+        vsetvli       t0, a2, e32, m4, ta, ma
+        sub           a2, a2, t0
+        vle32.v       v0, (a1)
+        vfmul.vf      v0, v0, f0
+        vfcvt.x.f.v   v0, v0
+        sh2add        a1, t0, a1
+        vse32.v       v0, (a0)
+        sh2add        a0, t0, a0
+        bgtz          a2, 1b
+
+        ret
+endfunc
-- 
2.43.0


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-22 12:00 [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24 flow gg
  2023-11-22 13:40 ` Rémi Denis-Courmont
@ 2023-11-23  7:11 ` flow gg
  1 sibling, 0 replies; 17+ messages in thread
From: flow gg @ 2023-11-23  7:11 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 424 bytes --]

I modified the temporary test and sent it in "[FFmpeg-devel] [PATCH]
checkasm/ac3dsp: add float_to_fixed24 test".

So the test time results have changed, and I updated them in the patch.

c910
  float_to_fixed24_c: 2207.2
  float_to_fixed24_rvv_f32: 696.2

flow gg <hlefthleft@gmail.com> 于2023年11月22日周三 20:00写道:

> c910
>     float_to_fixed24_c: 208.2
>     float_to_fixed24_rvv_f32: 71.5
>

[-- Attachment #2: lavc-ac3dsp-R-V-V-float_to_fixed24.patch --]
[-- Type: text/x-patch, Size: 3519 bytes --]

From 3e790fdccd780257f464aa8f8a56a37321ddd429 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Wed, 22 Nov 2023 14:57:29 +0800
Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24

c910
  float_to_fixed24_c: 2207.2
  float_to_fixed24_rvv_f32: 696.2
---
 libavcodec/riscv/Makefile      |  3 ++-
 libavcodec/riscv/ac3dsp_init.c |  5 +++++
 libavcodec/riscv/ac3dsp_rvv.S  | 39 ++++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/ac3dsp_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 8f2a519827..ac7b7c2929 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,7 +1,8 @@
 OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o riscv/sbrdsp_init.o
 RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o riscv/sbrdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o \
-                         riscv/ac3dsp_rvb.o
+                         riscv/ac3dsp_rvb.o \
+                         riscv/ac3dsp_rvv.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index 20f294f1de..de82d1c7a7 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -26,6 +26,7 @@
 #include "libavcodec/ac3dsp.h"
 
 void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, unsigned int len);
 
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
@@ -35,4 +36,8 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
         if (flags & AV_CPU_FLAG_RVB_BASIC)
             c->extract_exponents = ff_extract_exponents_rvb;
     }
+
+    if (flags & AV_CPU_FLAG_RVV_F32) {
+        c->float_to_fixed24 = ff_float_to_fixed24_rvv;
+    }
 }
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
new file mode 100644
index 0000000000..c0e2880e28
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_float_to_fixed24_rvv, zve32f
+        li            t1, 1 << 24
+        fcvt.s.w      f0, t1
+1:
+        vsetvli       t0, a2, e32, m4, ta, ma
+        sub           a2, a2, t0
+        vle32.v       v0, (a1)
+        vfmul.vf      v0, v0, f0
+        vfcvt.x.f.v   v0, v0
+        sh2add        a1, t0, a1
+        vse32.v       v0, (a0)
+        sh2add        a0, t0, a0
+        bgtz          a2, 1b
+
+        ret
+endfunc
-- 
2.43.0


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-22 23:17         ` flow gg
@ 2023-11-23 17:08           ` Rémi Denis-Courmont
  2023-11-23 22:39             ` flow gg
  0 siblings, 1 reply; 17+ messages in thread
From: Rémi Denis-Courmont @ 2023-11-23 17:08 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Le torstaina 23. marraskuuta 2023, 1.17.03 EET flow gg a écrit :
> Hello, I saw the new commit "avcodec/ac3dsp: make len a size_t in
> float_to_fixed24."
> 
> So I removed the part #if (__riscv_xlen == 64) and restored the patch.

You're not checking for Zba. Also 'bnez'  would be more logical than 'bgtz' 
for an unsigned counter.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-23 17:08           ` Rémi Denis-Courmont
@ 2023-11-23 22:39             ` flow gg
  2023-12-01 18:35               ` Rémi Denis-Courmont
  0 siblings, 1 reply; 17+ messages in thread
From: flow gg @ 2023-11-23 22:39 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 812 bytes --]

Okay, changed

Rémi Denis-Courmont <remi@remlab.net> 于2023年11月24日周五 01:09写道:

> Le torstaina 23. marraskuuta 2023, 1.17.03 EET flow gg a écrit :
> > Hello, I saw the new commit "avcodec/ac3dsp: make len a size_t in
> > float_to_fixed24."
> >
> > So I removed the part #if (__riscv_xlen == 64) and restored the patch.
>
> You're not checking for Zba. Also 'bnez'  would be more logical than
> 'bgtz'
> for an unsigned counter.
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

[-- Attachment #2: lavc-ac3dsp-R-V-V-float_to_fixed24.patch --]
[-- Type: text/x-patch, Size: 3559 bytes --]

From af221d659ebc1e97b6d274681061fa8331d0b147 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Wed, 22 Nov 2023 14:57:29 +0800
Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24

c910
    float_to_fixed24_c: 2207.2
    float_to_fixed24_rvv_f32: 696.2
---
 libavcodec/riscv/Makefile      |  3 ++-
 libavcodec/riscv/ac3dsp_init.c |  3 +++
 libavcodec/riscv/ac3dsp_rvv.S  | 39 ++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/ac3dsp_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 8f2a519827..ac7b7c2929 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,7 +1,8 @@
 OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o riscv/sbrdsp_init.o
 RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o riscv/sbrdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o \
-                         riscv/ac3dsp_rvb.o
+                         riscv/ac3dsp_rvb.o \
+                         riscv/ac3dsp_rvv.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index 20f294f1de..25244943cb 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -26,6 +26,7 @@
 #include "libavcodec/ac3dsp.h"
 
 void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, unsigned int len);
 
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
@@ -34,5 +35,7 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
     if (flags & AV_CPU_FLAG_RVB_ADDR) {
         if (flags & AV_CPU_FLAG_RVB_BASIC)
             c->extract_exponents = ff_extract_exponents_rvb;
+        if (flags & AV_CPU_FLAG_RVV_F32)
+            c->float_to_fixed24 = ff_float_to_fixed24_rvv;
     }
 }
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
new file mode 100644
index 0000000000..82c14ea275
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_float_to_fixed24_rvv, zve32f
+        li            t1, 1 << 24
+        fcvt.s.w      f0, t1
+1:
+        vsetvli       t0, a2, e32, m4, ta, ma
+        sub           a2, a2, t0
+        vle32.v       v0, (a1)
+        vfmul.vf      v0, v0, f0
+        vfcvt.x.f.v   v0, v0
+        sh2add        a1, t0, a1
+        vse32.v       v0, (a0)
+        sh2add        a0, t0, a0
+        bnez          a2, 1b
+
+        ret
+endfunc
-- 
2.43.0


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-11-23 22:39             ` flow gg
@ 2023-12-01 18:35               ` Rémi Denis-Courmont
  2023-12-01 18:38                 ` Rémi Denis-Courmont
  0 siblings, 1 reply; 17+ messages in thread
From: Rémi Denis-Courmont @ 2023-12-01 18:35 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Le perjantaina 24. marraskuuta 2023, 0.39.39 EET flow gg a écrit :
> Okay, changed

src/libavcodec/riscv/ac3dsp_init.c: In function ‘ff_ac3dsp_init_riscv’:
src/libavcodec/riscv/ac3dsp_init.c:39:33: warning: assignment to ‘void (*)
(int32_t *, const float *, size_t)’ {aka ‘void (*)(int *, const float *, long 
unsigned int)’} from incompatible pointer type ‘void (*)(int32_t *, const float 
*, unsigned int)’ {aka ‘void (*)(int *, const float *, unsigned int)’} [-
Wincompatible-pointer-types]
   39 |             c->float_to_fixed24 = ff_float_to_fixed24_rvv;
      |                                 ^

Also the Makefile precondition is inaccurate.

-- 
雷米‧德尼-库尔蒙
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-12-01 18:35               ` Rémi Denis-Courmont
@ 2023-12-01 18:38                 ` Rémi Denis-Courmont
  2023-12-01 19:50                   ` flow gg
  0 siblings, 1 reply; 17+ messages in thread
From: Rémi Denis-Courmont @ 2023-12-01 18:38 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Le perjantaina 1. joulukuuta 2023, 20.35.10 EET Rémi Denis-Courmont a écrit :
> Le perjantaina 24. marraskuuta 2023, 0.39.39 EET flow gg a écrit :
> > Okay, changed
> 
> src/libavcodec/riscv/ac3dsp_init.c: In function ‘ff_ac3dsp_init_riscv’:
> src/libavcodec/riscv/ac3dsp_init.c:39:33: warning: assignment to ‘void (*)
> (int32_t *, const float *, size_t)’ {aka ‘void (*)(int *, const float *,
> long unsigned int)’} from incompatible pointer type ‘void (*)(int32_t *,
> const float *, unsigned int)’ {aka ‘void (*)(int *, const float *, unsigned
> int)’} [- Wincompatible-pointer-types]
>    39 |             c->float_to_fixed24 = ff_float_to_fixed24_rvv;
> 
>       |                                 ^
> 
> Also the Makefile precondition is inaccurate.

Oh, and on C908, LMUL=8 is actually faster than LMUL=4. Generally speaking, 
you should maximise the LMUL unless there is a *specific* reason not to.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-12-01 18:38                 ` Rémi Denis-Courmont
@ 2023-12-01 19:50                   ` flow gg
  2023-12-01 20:16                     ` flow gg
  0 siblings, 1 reply; 17+ messages in thread
From: flow gg @ 2023-12-01 19:50 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1456 bytes --]

Okay, changed and attached

Rémi Denis-Courmont <remi@remlab.net> 于2023年12月2日周六 02:38写道:

> Le perjantaina 1. joulukuuta 2023, 20.35.10 EET Rémi Denis-Courmont a
> écrit :
> > Le perjantaina 24. marraskuuta 2023, 0.39.39 EET flow gg a écrit :
> > > Okay, changed
> >
> > src/libavcodec/riscv/ac3dsp_init.c: In function ‘ff_ac3dsp_init_riscv’:
> > src/libavcodec/riscv/ac3dsp_init.c:39:33: warning: assignment to ‘void
> (*)
> > (int32_t *, const float *, size_t)’ {aka ‘void (*)(int *, const float *,
> > long unsigned int)’} from incompatible pointer type ‘void (*)(int32_t *,
> > const float *, unsigned int)’ {aka ‘void (*)(int *, const float *,
> unsigned
> > int)’} [- Wincompatible-pointer-types]
> >    39 |             c->float_to_fixed24 = ff_float_to_fixed24_rvv;
> >
> >       |                                 ^
> >
> > Also the Makefile precondition is inaccurate.
>
> Oh, and on C908, LMUL=8 is actually faster than LMUL=4. Generally
> speaking,
> you should maximise the LMUL unless there is a *specific* reason not to.
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

[-- Attachment #2: lavc-ac3dsp-R-V-V-float_to_fixed24.patch --]
[-- Type: text/x-patch, Size: 3725 bytes --]

From db03232a9ef1caab333b2fb5a1b684a68c7b0114 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Wed, 22 Nov 2023 14:57:29 +0800
Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24

c910
    float_to_fixed24_c: 2207.2
    float_to_fixed24_rvv_f32: 696.2
---
 libavcodec/riscv/Makefile      |  3 ++-
 libavcodec/riscv/ac3dsp_init.c |  4 ++++
 libavcodec/riscv/ac3dsp_rvv.S  | 39 ++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/ac3dsp_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 2d0e6c19c8..b00db279c1 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,7 +1,8 @@
 OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o riscv/sbrdsp_init.o
 RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o riscv/sbrdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o \
-                         riscv/ac3dsp_rvb.o
+                         riscv/ac3dsp_rvb.o \
+                         riscv/ac3dsp_rvv.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index 20f294f1de..118b2955ca 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <stddef.h>
 #include <stdint.h>
 
 #include "config.h"
@@ -26,6 +27,7 @@
 #include "libavcodec/ac3dsp.h"
 
 void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, size_t len);
 
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
@@ -34,5 +36,7 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
     if (flags & AV_CPU_FLAG_RVB_ADDR) {
         if (flags & AV_CPU_FLAG_RVB_BASIC)
             c->extract_exponents = ff_extract_exponents_rvb;
+        if (flags & AV_CPU_FLAG_RVV_F32)
+            c->float_to_fixed24 = ff_float_to_fixed24_rvv;
     }
 }
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
new file mode 100644
index 0000000000..b8d32c4677
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_float_to_fixed24_rvv, zve32f
+        li            t1, 1 << 24
+        fcvt.s.w      f0, t1
+1:
+        vsetvli       t0, a2, e32, m8, ta, ma
+        sub           a2, a2, t0
+        vle32.v       v0, (a1)
+        vfmul.vf      v0, v0, f0
+        vfcvt.x.f.v   v0, v0
+        sh2add        a1, t0, a1
+        vse32.v       v0, (a0)
+        sh2add        a0, t0, a0
+        bnez          a2, 1b
+
+        ret
+endfunc
-- 
2.43.0


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24
  2023-12-01 19:50                   ` flow gg
@ 2023-12-01 20:16                     ` flow gg
  0 siblings, 0 replies; 17+ messages in thread
From: flow gg @ 2023-12-01 20:16 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1650 bytes --]

I forgot to modify the Makefile; I've made the changes in this reply.

flow gg <hlefthleft@gmail.com> 于2023年12月2日周六 03:50写道:

> Okay, changed and attached
>
> Rémi Denis-Courmont <remi@remlab.net> 于2023年12月2日周六 02:38写道:
>
>> Le perjantaina 1. joulukuuta 2023, 20.35.10 EET Rémi Denis-Courmont a
>> écrit :
>> > Le perjantaina 24. marraskuuta 2023, 0.39.39 EET flow gg a écrit :
>> > > Okay, changed
>> >
>> > src/libavcodec/riscv/ac3dsp_init.c: In function ‘ff_ac3dsp_init_riscv’:
>> > src/libavcodec/riscv/ac3dsp_init.c:39:33: warning: assignment to ‘void
>> (*)
>> > (int32_t *, const float *, size_t)’ {aka ‘void (*)(int *, const float *,
>> > long unsigned int)’} from incompatible pointer type ‘void (*)(int32_t *,
>> > const float *, unsigned int)’ {aka ‘void (*)(int *, const float *,
>> unsigned
>> > int)’} [- Wincompatible-pointer-types]
>> >    39 |             c->float_to_fixed24 = ff_float_to_fixed24_rvv;
>> >
>> >       |                                 ^
>> >
>> > Also the Makefile precondition is inaccurate.
>>
>> Oh, and on C908, LMUL=8 is actually faster than LMUL=4. Generally
>> speaking,
>> you should maximise the LMUL unless there is a *specific* reason not to.
>>
>> --
>> レミ・デニ-クールモン
>> http://www.remlab.net/
>>
>>
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>

[-- Attachment #2: lavc-ac3dsp-R-V-V-float_to_fixed24.patch --]
[-- Type: text/x-patch, Size: 3664 bytes --]

From 2aa06d9d8d4853ac089a13ed6a758f9ecb0aa5a9 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Wed, 22 Nov 2023 14:57:29 +0800
Subject: [PATCH] lavc/ac3dsp: R-V V float_to_fixed24

c910
    float_to_fixed24_c: 2207.2
    float_to_fixed24_rvv_f32: 696.2
---
 libavcodec/riscv/Makefile      |  1 +
 libavcodec/riscv/ac3dsp_init.c |  4 ++++
 libavcodec/riscv/ac3dsp_rvv.S  | 39 ++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+)
 create mode 100644 libavcodec/riscv/ac3dsp_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 2d0e6c19c8..29a7fec455 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -2,6 +2,7 @@ OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o riscv/sbrdsp_init.o
 RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o riscv/sbrdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o \
                          riscv/ac3dsp_rvb.o
+RVV-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvv.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index 20f294f1de..118b2955ca 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <stddef.h>
 #include <stdint.h>
 
 #include "config.h"
@@ -26,6 +27,7 @@
 #include "libavcodec/ac3dsp.h"
 
 void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, size_t len);
 
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
@@ -34,5 +36,7 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
     if (flags & AV_CPU_FLAG_RVB_ADDR) {
         if (flags & AV_CPU_FLAG_RVB_BASIC)
             c->extract_exponents = ff_extract_exponents_rvb;
+        if (flags & AV_CPU_FLAG_RVV_F32)
+            c->float_to_fixed24 = ff_float_to_fixed24_rvv;
     }
 }
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
new file mode 100644
index 0000000000..b8d32c4677
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_float_to_fixed24_rvv, zve32f
+        li            t1, 1 << 24
+        fcvt.s.w      f0, t1
+1:
+        vsetvli       t0, a2, e32, m8, ta, ma
+        sub           a2, a2, t0
+        vle32.v       v0, (a1)
+        vfmul.vf      v0, v0, f0
+        vfcvt.x.f.v   v0, v0
+        sh2add        a1, t0, a1
+        vse32.v       v0, (a0)
+        sh2add        a0, t0, a0
+        bnez          a2, 1b
+
+        ret
+endfunc
-- 
2.43.0


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2023-12-01 20:16 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-22 12:00 [FFmpeg-devel] [PATCH] ac3dsp: RISC-V V float_to_fixed24 flow gg
2023-11-22 13:40 ` Rémi Denis-Courmont
2023-11-22 14:30   ` flow gg
2023-11-22 14:35     ` flow gg
2023-11-22 14:51     ` Rémi Denis-Courmont
2023-11-22 16:37       ` flow gg
2023-11-22 16:49         ` James Almer
2023-11-22 23:17         ` flow gg
2023-11-23 17:08           ` Rémi Denis-Courmont
2023-11-23 22:39             ` flow gg
2023-12-01 18:35               ` Rémi Denis-Courmont
2023-12-01 18:38                 ` Rémi Denis-Courmont
2023-12-01 19:50                   ` flow gg
2023-12-01 20:16                     ` flow gg
2023-11-22 17:18     ` James Almer
2023-11-22 17:34       ` flow gg
2023-11-23  7:11 ` flow gg

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git