Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 01/31] lavu/cpu: detect RISC-V base extensions
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
@ 2022-09-25 14:25 ` remi
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 02/31] lavu/riscv: initial common header for assembler macros remi
                   ` (30 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

This introduces compile-time and run-time CPU detection on RISC-V. In
practice, I doubt that FFmpeg will ever see a RISC-V CPU without all of
I, F and D extensions, and if it does, it probably won't have run-time
detection. So the flags are essentially always set.

But as things stand, checkasm wants them that way. Compare the ARMV8
flag on AArch64. We are nowhere near running short on CPU flag bits.
---
 libavutil/cpu.c           |  9 ++++++
 libavutil/cpu.h           |  5 +++
 libavutil/cpu_internal.h  |  3 ++
 libavutil/riscv/Makefile  |  1 +
 libavutil/riscv/cpu.c     | 66 +++++++++++++++++++++++++++++++++++++++
 tests/checkasm/checkasm.c |  4 +++
 6 files changed, 88 insertions(+)
 create mode 100644 libavutil/riscv/Makefile
 create mode 100644 libavutil/riscv/cpu.c

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 0035e927a5..78e92a1bf6 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -62,6 +62,8 @@ static int get_cpu_flags(void)
     return ff_get_cpu_flags_arm();
 #elif ARCH_PPC
     return ff_get_cpu_flags_ppc();
+#elif ARCH_RISCV
+    return ff_get_cpu_flags_riscv();
 #elif ARCH_X86
     return ff_get_cpu_flags_x86();
 #elif ARCH_LOONGARCH
@@ -95,6 +97,9 @@ void av_force_cpu_flags(int arg){
         arg |= AV_CPU_FLAG_MMX;
     }
 
+#if ARCH_RISCV
+    arg = ff_force_cpu_flags_riscv(arg);
+#endif
     atomic_store_explicit(&cpu_flags, arg, memory_order_relaxed);
 }
 
@@ -178,6 +183,10 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
 #elif ARCH_LOONGARCH
         { "lsx",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX      },    .unit = "flags" },
         { "lasx",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX     },    .unit = "flags" },
+#elif ARCH_RISCV
+        { "rvi",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVI      },    .unit = "flags" },
+        { "rvf",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVF      },    .unit = "flags" },
+        { "rvd",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVD      },    .unit = "flags" },
 #endif
         { NULL },
     };
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 9711e574c5..9aae2ccc7a 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -78,6 +78,11 @@
 #define AV_CPU_FLAG_LSX          (1 << 0)
 #define AV_CPU_FLAG_LASX         (1 << 1)
 
+// RISC-V extensions
+#define AV_CPU_FLAG_RVI          (1 << 0) ///< I (full GPR bank)
+#define AV_CPU_FLAG_RVF          (1 << 1) ///< F (single precision FP)
+#define AV_CPU_FLAG_RVD          (1 << 2) ///< D (double precision FP)
+
 /**
  * Return the flags which specify extensions supported by the CPU.
  * The returned value is affected by av_force_cpu_flags() if that was used
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 650d47fc96..9ddf11488b 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -48,9 +48,12 @@ int ff_get_cpu_flags_mips(void);
 int ff_get_cpu_flags_aarch64(void);
 int ff_get_cpu_flags_arm(void);
 int ff_get_cpu_flags_ppc(void);
+int ff_get_cpu_flags_riscv(void);
 int ff_get_cpu_flags_x86(void);
 int ff_get_cpu_flags_loongarch(void);
 
+int ff_force_cpu_flags_riscv(int flags);
+
 size_t ff_get_cpu_max_align_mips(void);
 size_t ff_get_cpu_max_align_aarch64(void);
 size_t ff_get_cpu_max_align_arm(void);
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
new file mode 100644
index 0000000000..1f818043dc
--- /dev/null
+++ b/libavutil/riscv/Makefile
@@ -0,0 +1 @@
+OBJS += riscv/cpu.o
diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c
new file mode 100644
index 0000000000..fec1f7822a
--- /dev/null
+++ b/libavutil/riscv/cpu.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
+#include "libavutil/log.h"
+#include "config.h"
+
+#if HAVE_GETAUXVAL
+#include <sys/auxv.h>
+#define HWCAP_RV(letter) (1ul << ((letter) - 'A'))
+#endif
+
+int ff_force_cpu_flags_riscv(int flags)
+{
+    if ((flags & AV_CPU_FLAG_RVD) && !(flags & AV_CPU_FLAG_RVF)) {
+        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n", "F");
+        flags |= AV_CPU_FLAG_RVF;
+    }
+
+    return flags;
+}
+
+int ff_get_cpu_flags_riscv(void)
+{
+    int ret = 0;
+#if HAVE_GETAUXVAL
+    const unsigned long hwcap = getauxval(AT_HWCAP);
+
+    if (hwcap & HWCAP_RV('I'))
+        ret |= AV_CPU_FLAG_RVI;
+    if (hwcap & HWCAP_RV('F'))
+        ret |= AV_CPU_FLAG_RVF;
+    if (hwcap & HWCAP_RV('D'))
+        ret |= AV_CPU_FLAG_RVD;
+#endif
+
+#ifdef __riscv_i
+    ret |= AV_CPU_FLAG_RVI;
+#endif
+#if defined (__riscv_flen) && (__riscv_flen >= 32)
+    ret |= AV_CPU_FLAG_RVF;
+#if (__riscv_flen >= 64)
+    ret |= AV_CPU_FLAG_RVD;
+#endif
+#endif
+
+    return ret;
+}
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 8fd9bba0b0..e1135a84ac 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -232,6 +232,10 @@ static const struct {
     { "ALTIVEC",  "altivec",  AV_CPU_FLAG_ALTIVEC },
     { "VSX",      "vsx",      AV_CPU_FLAG_VSX },
     { "POWER8",   "power8",   AV_CPU_FLAG_POWER8 },
+#elif ARCH_RISCV
+    { "RVI",      "rvi",      AV_CPU_FLAG_RVI },
+    { "RVF",      "rvf",      AV_CPU_FLAG_RVF },
+    { "RVD",      "rvd",      AV_CPU_FLAG_RVD },
 #elif ARCH_MIPS
     { "MMI",      "mmi",      AV_CPU_FLAG_MMI },
     { "MSA",      "msa",      AV_CPU_FLAG_MSA },
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 02/31] lavu/riscv: initial common header for assembler macros
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 01/31] lavu/cpu: detect RISC-V base extensions remi
@ 2022-09-25 14:25 ` remi
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 03/31] lavc/audiodsp: RISC-V F vector_clipf remi
                   ` (29 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/asm.S | 77 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 libavutil/riscv/asm.S

diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
new file mode 100644
index 0000000000..dbd97f40a4
--- /dev/null
+++ b/libavutil/riscv/asm.S
@@ -0,0 +1,77 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ * Loosely based on earlier work copyrighted by Måns Rullgård, 2008.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#if defined (__riscv_float_abi_soft)
+#define NOHWF
+#define NOHWD
+#define HWF   #
+#define HWD   #
+#elif defined (__riscv_float_abi_single)
+#define NOHWF #
+#define NOHWD
+#define HWF
+#define HWD   #
+#else
+#define NOHWF #
+#define NOHWD #
+#define HWF
+#define HWD
+#endif
+
+        .macro func sym, ext=
+            .text
+            .align 2
+
+            .option push
+            .ifnb \ext
+            .option arch, +\ext
+            .endif
+
+            .global \sym
+            .hidden \sym
+            .type   \sym, %function
+            \sym:
+
+            .macro endfunc
+                .size   \sym, . - \sym
+                .option pop
+                .previous
+                .purgem endfunc
+            .endm
+        .endm
+
+        .macro const sym, align=3, relocate=0
+            .if \relocate
+                .pushsection .data.rel.ro
+            .else
+                .pushsection .rodata
+            .endif
+            .align \align
+            \sym:
+
+            .macro endconst
+                .size  \sym, . - \sym
+                .popsection
+                .purgem endconst
+            .endm
+        .endm
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 03/31] lavc/audiodsp: RISC-V F vector_clipf
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 01/31] lavu/cpu: detect RISC-V base extensions remi
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 02/31] lavu/riscv: initial common header for assembler macros remi
@ 2022-09-25 14:25 ` remi
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 04/31] lavc/pixblockdsp: RISC-V I get_pixels remi
                   ` (28 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

RV64G supports MIN & MAX instructions natively only on floating point
registers, not general purpose ones. The later would require the Zbb
extension. Due to that, it is actually faster to perform the clipping
"properly" in FPU.

Benchmarks on SiFive U74-MC (courtesy of Shanghai StarFive Tech):
audiodsp.vector_clipf_c: 29551.5
audiodsp.vector_clipf_rvf: 17871.0

Also tried unrolling with 2 or 8 elements but it gets worse either way.
---
 libavcodec/audiodsp.c            |  2 ++
 libavcodec/audiodsp.h            |  1 +
 libavcodec/riscv/Makefile        |  2 ++
 libavcodec/riscv/audiodsp_init.c | 33 +++++++++++++++++++++
 libavcodec/riscv/audiodsp_rvf.S  | 49 ++++++++++++++++++++++++++++++++
 5 files changed, 87 insertions(+)
 create mode 100644 libavcodec/riscv/Makefile
 create mode 100644 libavcodec/riscv/audiodsp_init.c
 create mode 100644 libavcodec/riscv/audiodsp_rvf.S

diff --git a/libavcodec/audiodsp.c b/libavcodec/audiodsp.c
index ff43e87dce..eba6e809fd 100644
--- a/libavcodec/audiodsp.c
+++ b/libavcodec/audiodsp.c
@@ -113,6 +113,8 @@ av_cold void ff_audiodsp_init(AudioDSPContext *c)
     ff_audiodsp_init_arm(c);
 #elif ARCH_PPC
     ff_audiodsp_init_ppc(c);
+#elif ARCH_RISCV
+    ff_audiodsp_init_riscv(c);
 #elif ARCH_X86
     ff_audiodsp_init_x86(c);
 #endif
diff --git a/libavcodec/audiodsp.h b/libavcodec/audiodsp.h
index aa6fa7898b..485b512839 100644
--- a/libavcodec/audiodsp.h
+++ b/libavcodec/audiodsp.h
@@ -55,6 +55,7 @@ typedef struct AudioDSPContext {
 void ff_audiodsp_init(AudioDSPContext *c);
 void ff_audiodsp_init_arm(AudioDSPContext *c);
 void ff_audiodsp_init_ppc(AudioDSPContext *c);
+void ff_audiodsp_init_riscv(AudioDSPContext *c);
 void ff_audiodsp_init_x86(AudioDSPContext *c);
 
 #endif /* AVCODEC_AUDIODSP_H */
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
new file mode 100644
index 0000000000..414a9e9bd8
--- /dev/null
+++ b/libavcodec/riscv/Makefile
@@ -0,0 +1,2 @@
+OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
+                           riscv/audiodsp_rvf.o
diff --git a/libavcodec/riscv/audiodsp_init.c b/libavcodec/riscv/audiodsp_init.c
new file mode 100644
index 0000000000..c5842815d6
--- /dev/null
+++ b/libavcodec/riscv/audiodsp_init.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/audiodsp.h"
+
+void ff_vector_clipf_rvf(float *dst, const float *src, int len, float min, float max);
+
+av_cold void ff_audiodsp_init_riscv(AudioDSPContext *c)
+{
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_RVF)
+        c->vector_clipf = ff_vector_clipf_rvf;
+}
diff --git a/libavcodec/riscv/audiodsp_rvf.S b/libavcodec/riscv/audiodsp_rvf.S
new file mode 100644
index 0000000000..2ec8a11691
--- /dev/null
+++ b/libavcodec/riscv/audiodsp_rvf.S
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_vector_clipf_rvf, f
+NOHWF   fmv.w.x fa0, a3
+NOHWF   fmv.w.x fa1, a4
+1:
+        flw     ft0,   (a1)
+        flw     ft1,  4(a1)
+        fmax.s  ft0, ft0, fa0
+        flw     ft2,  8(a1)
+        fmax.s  ft1, ft1, fa0
+        flw     ft3, 12(a1)
+        fmax.s  ft2, ft2, fa0
+        addi    a2, a2, -4
+        fmax.s  ft3, ft3, fa0
+        addi    a1, a1, 16
+        fmin.s  ft0, ft0, fa1
+        fmin.s  ft1, ft1, fa1
+        fsw     ft0,   (a0)
+        fmin.s  ft2, ft2, fa1
+        fsw     ft1,  4(a0)
+        fmin.s  ft3, ft3, fa1
+        fsw     ft2,  8(a0)
+        fsw     ft3, 12(a0)
+        addi    a0, a0, 16
+        bnez    a2, 1b
+
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 04/31] lavc/pixblockdsp: RISC-V I get_pixels
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (2 preceding siblings ...)
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 03/31] lavc/audiodsp: RISC-V F vector_clipf remi
@ 2022-09-25 14:25 ` remi
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 05/31] lavu/cpu: CPU flags for the RISC-V Vector extension remi
                   ` (27 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

Benchmarks on SiFive U74-MC (courtesy of Shanghai StarFive Tech):
get_pixels_c: 180.0
get_pixels_rvi: 136.7
---
 libavcodec/pixblockdsp.c            |  2 +
 libavcodec/pixblockdsp.h            |  2 +
 libavcodec/riscv/Makefile           |  2 +
 libavcodec/riscv/pixblockdsp_init.c | 45 ++++++++++++++++++++++
 libavcodec/riscv/pixblockdsp_rvi.S  | 59 +++++++++++++++++++++++++++++
 5 files changed, 110 insertions(+)
 create mode 100644 libavcodec/riscv/pixblockdsp_init.c
 create mode 100644 libavcodec/riscv/pixblockdsp_rvi.S

diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index 17c487da1e..4294075cee 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -109,6 +109,8 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
     ff_pixblockdsp_init_arm(c, avctx, high_bit_depth);
 #elif ARCH_PPC
     ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth);
+#elif ARCH_RISCV
+    ff_pixblockdsp_init_riscv(c, avctx, high_bit_depth);
 #elif ARCH_X86
     ff_pixblockdsp_init_x86(c, avctx, high_bit_depth);
 #elif ARCH_MIPS
diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
index 07c2ec4f40..9b002aa3d6 100644
--- a/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@ -52,6 +52,8 @@ void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx,
                              unsigned high_bit_depth);
 void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx,
                              unsigned high_bit_depth);
+void ff_pixblockdsp_init_riscv(PixblockDSPContext *c, AVCodecContext *avctx,
+                               unsigned high_bit_depth);
 void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx,
                              unsigned high_bit_depth);
 void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx,
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 414a9e9bd8..da07f1fe96 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,2 +1,4 @@
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
                            riscv/audiodsp_rvf.o
+OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o \
+                              riscv/pixblockdsp_rvi.o
diff --git a/libavcodec/riscv/pixblockdsp_init.c b/libavcodec/riscv/pixblockdsp_init.c
new file mode 100644
index 0000000000..04bf52649f
--- /dev/null
+++ b/libavcodec/riscv/pixblockdsp_init.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/pixblockdsp.h"
+
+void ff_get_pixels_8_rvi(int16_t *block, const uint8_t *pixels,
+                         ptrdiff_t stride);
+void ff_get_pixels_16_rvi(int16_t *block, const uint8_t *pixels,
+                          ptrdiff_t stride);
+
+av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
+                                       AVCodecContext *avctx,
+                                       unsigned high_bit_depth)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (cpu_flags & AV_CPU_FLAG_RVI) {
+        if (high_bit_depth)
+            c->get_pixels = ff_get_pixels_16_rvi;
+        else
+            c->get_pixels = ff_get_pixels_8_rvi;
+    }
+}
diff --git a/libavcodec/riscv/pixblockdsp_rvi.S b/libavcodec/riscv/pixblockdsp_rvi.S
new file mode 100644
index 0000000000..93ece4405e
--- /dev/null
+++ b/libavcodec/riscv/pixblockdsp_rvi.S
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "../libavutil/riscv/asm.S"
+
+func ff_get_pixels_8_rvi
+.irp row, 0, 1, 2, 3, 4, 5, 6, 7
+        ld      t0,    (a1)
+        add     a1, a1, a2
+        sd      zero, ((\row * 16) + 0)(a0)
+        addi    t6, t6, -1
+        sd      zero, ((\row * 16) + 8)(a0)
+        srli    t1, t0, 8
+        sb      t0, ((\row * 16) + 0)(a0)
+        srli    t2, t0, 16
+        sb      t1, ((\row * 16) + 2)(a0)
+        srli    t3, t0, 24
+        sb      t2, ((\row * 16) + 4)(a0)
+        srli    t4, t0, 32
+        sb      t3, ((\row * 16) + 6)(a0)
+        srli    t1, t0, 40
+        sb      t4, ((\row * 16) + 8)(a0)
+        srli    t2, t0, 48
+        sb      t1, ((\row * 16) + 10)(a0)
+        srli    t3, t0, 56
+        sb      t2, ((\row * 16) + 12)(a0)
+        sb      t3, ((\row * 16) + 14)(a0)
+.endr
+        ret
+endfunc
+
+func ff_get_pixels_16_rvi
+.irp row, 0, 1, 2, 3, 4, 5, 6, 7
+        ld      t0, 0(a1)
+        ld      t1, 8(a1)
+        add     a1, a1, a2
+        sd      t0, ((\row * 16) + 0)(a0)
+        sd      t1, ((\row * 16) + 8)(a0)
+.endr
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 05/31] lavu/cpu: CPU flags for the RISC-V Vector extension
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (3 preceding siblings ...)
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 04/31] lavc/pixblockdsp: RISC-V I get_pixels remi
@ 2022-09-25 14:25 ` remi
  2022-09-26  6:51   ` Lynne
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 06/31] configure: probe " remi
                   ` (26 subsequent siblings)
  31 siblings, 1 reply; 40+ messages in thread
From: remi @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

RVV defines a total of 12 different extensions, including:

- 5 different instruction subsets:
  - Zve32x: 8-, 16- and 32-bit integers,
  - Zve32f: Zve32x plus single precision floats,
  - Zve64x: Zve32x plus 64-bit integers,
  - Zve64f: Zve32f plus Zve64x,
  - Zve64d: Zve64f plus double precision floats.

- 6 different vector lengths:
  - Zvl32b (embedded only),
  - Zvl64b (embedded only),
  - Zvl128b,
  - Zvl256b,
  - Zvl512b,
  - Zvl1024b,

- and the V extension proper: equivalent to Zve64f and Zvl128b.

In total, there are 6 different possible sets of supported instructions
(including the empty set), but for convenience we allocate one bit for
each type sets: up-to-32-bit ints (ZVE32X), floats (ZV32F),
64-bit ints (ZV64X) and doubles (ZVE64D).

Whence the vector size is needed, it can be retrieved by reading the
unprivileged read-only vlenb CSR. This should probably be a separate
helper macro if needed at a later point.
---
 libavutil/cpu.c           |  4 ++++
 libavutil/cpu.h           |  4 ++++
 libavutil/riscv/cpu.c     | 46 ++++++++++++++++++++++++++++++++++++++-
 tests/checkasm/checkasm.c | 10 ++++++---
 4 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 78e92a1bf6..58ae4858b4 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -187,6 +187,10 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
         { "rvi",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVI      },    .unit = "flags" },
         { "rvf",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVF      },    .unit = "flags" },
         { "rvd",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVD      },    .unit = "flags" },
+        { "rvve32",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVE32X},    .unit = "flags" },
+        { "rvvf",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVE32F},    .unit = "flags" },
+        { "rvve64",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVE64X},    .unit = "flags" },
+        { "rvv",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVE64D},    .unit = "flags" },
 #endif
         { NULL },
     };
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 9aae2ccc7a..00698e30ef 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -82,6 +82,10 @@
 #define AV_CPU_FLAG_RVI          (1 << 0) ///< I (full GPR bank)
 #define AV_CPU_FLAG_RVF          (1 << 1) ///< F (single precision FP)
 #define AV_CPU_FLAG_RVD          (1 << 2) ///< D (double precision FP)
+#define AV_CPU_FLAG_RV_ZVE32X    (1 << 3) ///< Vectors of 8/16/32-bit int's */
+#define AV_CPU_FLAG_RV_ZVE32F    (1 << 4) ///< Vectors of float's */
+#define AV_CPU_FLAG_RV_ZVE64X    (1 << 5) ///< Vectors of 64-bit int's */
+#define AV_CPU_FLAG_RV_ZVE64D    (1 << 6) ///< Vectors of double's
 
 /**
  * Return the flags which specify extensions supported by the CPU.
diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c
index fec1f7822a..6f862635b3 100644
--- a/libavutil/riscv/cpu.c
+++ b/libavutil/riscv/cpu.c
@@ -30,7 +30,32 @@
 
 int ff_force_cpu_flags_riscv(int flags)
 {
-    if ((flags & AV_CPU_FLAG_RVD) && !(flags & AV_CPU_FLAG_RVF)) {
+    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RV_ZVE64X)) {
+        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
+               "_ZVE64X");
+        flags |= AV_CPU_FLAG_RV_ZVE64X;
+    }
+
+    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RV_ZVE32F)) {
+        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
+               "_ZVE32F");
+        flags |= AV_CPU_FLAG_RV_ZVE32F;
+    }
+
+    if ((flags & (AV_CPU_FLAG_RV_ZVE64X | AV_CPU_FLAG_RV_ZVE32F))
+        && !(flags & AV_CPU_FLAG_RV_ZVE32X)) {
+        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
+               "_ZVE32X");
+        flags |= AV_CPU_FLAG_RV_ZVE32X;
+    }
+
+    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RVD)) {
+        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n", "D");
+        flags |= AV_CPU_FLAG_RVD;
+    }
+
+    if ((flags & (AV_CPU_FLAG_RVD | AV_CPU_FLAG_RV_ZVE32F))
+        && !(flags & AV_CPU_FLAG_RVF)) {
         av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n", "F");
         flags |= AV_CPU_FLAG_RVF;
     }
@@ -50,6 +75,11 @@ int ff_get_cpu_flags_riscv(void)
         ret |= AV_CPU_FLAG_RVF;
     if (hwcap & HWCAP_RV('D'))
         ret |= AV_CPU_FLAG_RVD;
+
+    /* The V extension implies all Zve* functional subsets */
+    if (hwcap & HWCAP_RV('V'))
+        ret |= AV_CPU_FLAG_RV_ZVE32X | AV_CPU_FLAG_RV_ZVE64X
+             | AV_CPU_FLAG_RV_ZVE32F | AV_CPU_FLAG_RV_ZVE64D;
 #endif
 
 #ifdef __riscv_i
@@ -60,6 +90,20 @@ int ff_get_cpu_flags_riscv(void)
 #if (__riscv_flen >= 64)
     ret |= AV_CPU_FLAG_RVD;
 #endif
+#endif
+
+    /* If RV-V is enabled statically at compile-time, check the details. */
+#ifdef __riscv_vectors
+    ret |= AV_CPU_FLAG_RV_ZVE32X;
+#if __riscv_v_elen >= 64
+    ret |= AV_CPU_FLAG_RV_ZVE64X;
+#endif
+#if __riscv_v_elen_fp >= 32
+    ret |= AV_CPU_FLAG_RV_ZVE32F;
+#if __riscv_v_elen_fp >= 64
+    ret |= AV_CPU_FLAG_RV_ZVE64F;
+#endif
+#endif
 #endif
 
     return ret;
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index e1135a84ac..f7d108e8ea 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -233,9 +233,13 @@ static const struct {
     { "VSX",      "vsx",      AV_CPU_FLAG_VSX },
     { "POWER8",   "power8",   AV_CPU_FLAG_POWER8 },
 #elif ARCH_RISCV
-    { "RVI",      "rvi",      AV_CPU_FLAG_RVI },
-    { "RVF",      "rvf",      AV_CPU_FLAG_RVF },
-    { "RVD",      "rvd",      AV_CPU_FLAG_RVD },
+    { "RVI",        "rvi",       AV_CPU_FLAG_RVI },
+    { "RVF",        "rvf",       AV_CPU_FLAG_RVF },
+    { "RVD",        "rvd",       AV_CPU_FLAG_RVD },
+    { "RV_Zve32x",  "rv_zve32x", AV_CPU_FLAG_RV_ZVE32X },
+    { "RV_Zve32f",  "rv_zve32f", AV_CPU_FLAG_RV_ZVE32F },
+    { "RV_Zve64x",  "rv_zve64x", AV_CPU_FLAG_RV_ZVE64X },
+    { "RV_Zve64d",  "rv_zve64d", AV_CPU_FLAG_RV_ZVE64D },
 #elif ARCH_MIPS
     { "MMI",      "mmi",      AV_CPU_FLAG_MMI },
     { "MSA",      "msa",      AV_CPU_FLAG_MSA },
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 06/31] configure: probe RISC-V Vector extension
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (4 preceding siblings ...)
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 05/31] lavu/cpu: CPU flags for the RISC-V Vector extension remi
@ 2022-09-25 14:25 ` remi
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 07/31] lavu/riscv: fallback macros for SH{1, 2, 3}ADD remi
                   ` (25 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 Makefile         |  2 +-
 configure        | 15 +++++++++++++++
 ffbuild/arch.mak |  2 ++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 61f79e27ae..1fb742f390 100644
--- a/Makefile
+++ b/Makefile
@@ -91,7 +91,7 @@ ffbuild/.config: $(CONFIGURABLE_COMPONENTS)
 SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS               \
                HEADERS ARCH_HEADERS BUILT_HEADERS SKIPHEADERS            \
                ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS     \
-               ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS                \
+               ALTIVEC-OBJS VSX-OBJS RVV-OBJS MMX-OBJS X86ASM-OBJS       \
                MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS         \
                MMI-OBJS LSX-OBJS LASX-OBJS OBJS SLIBOBJS SHLIBOBJS       \
                STLIBOBJS HOSTOBJS TESTOBJS
diff --git a/configure b/configure
index c157338b1f..a41ebda6d4 100755
--- a/configure
+++ b/configure
@@ -462,6 +462,7 @@ Optimization options (experts only):
   --disable-mmi            disable Loongson MMI optimizations
   --disable-lsx            disable Loongson LSX optimizations
   --disable-lasx           disable Loongson LASX optimizations
+  --disable-rvv            disable RISC-V Vector optimizations
   --disable-fast-unaligned consider unaligned accesses slow
 
 Developer options (useful when working on FFmpeg itself):
@@ -2126,6 +2127,10 @@ ARCH_EXT_LIST_PPC="
     vsx
 "
 
+ARCH_EXT_LIST_RISCV="
+    rvv
+"
+
 ARCH_EXT_LIST_X86="
     $ARCH_EXT_LIST_X86_SIMD
     cpunop
@@ -2135,6 +2140,7 @@ ARCH_EXT_LIST_X86="
 ARCH_EXT_LIST="
     $ARCH_EXT_LIST_ARM
     $ARCH_EXT_LIST_PPC
+    $ARCH_EXT_LIST_RISCV
     $ARCH_EXT_LIST_X86
     $ARCH_EXT_LIST_MIPS
     $ARCH_EXT_LIST_LOONGSON
@@ -2642,6 +2648,8 @@ ppc4xx_deps="ppc"
 vsx_deps="altivec"
 power8_deps="vsx"
 
+rvv_deps="riscv"
+
 loongson2_deps="mips"
 loongson3_deps="mips"
 mmi_deps_any="loongson2 loongson3"
@@ -6110,6 +6118,10 @@ elif enabled ppc; then
         check_cpp_condition power8 "altivec.h" "defined(_ARCH_PWR8)"
     fi
 
+elif enabled riscv; then
+
+    enabled rvv && check_inline_asm rvv '".option arch, +v\nvsetivli zero, 0, e8, m1, ta, ma"'
+
 elif enabled x86; then
 
     check_builtin rdtsc    intrin.h   "__rdtsc()"
@@ -7596,6 +7608,9 @@ if enabled loongarch; then
     echo "LSX enabled               ${lsx-no}"
     echo "LASX enabled              ${lasx-no}"
 fi
+if enabled riscv; then
+    echo "RISC-V Vector enabled     ${rvv-no}"
+fi
 echo "debug symbols             ${debug-no}"
 echo "strip symbols             ${stripping-no}"
 echo "optimize for size         ${small-no}"
diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak
index 997e31e85e..39d76ee152 100644
--- a/ffbuild/arch.mak
+++ b/ffbuild/arch.mak
@@ -15,5 +15,7 @@ OBJS-$(HAVE_LASX)      += $(LASX-OBJS)       $(LASX-OBJS-yes)
 OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
 OBJS-$(HAVE_VSX)     += $(VSX-OBJS) $(VSX-OBJS-yes)
 
+OBJS-$(HAVE_RVV)     += $(RVV-OBJS)     $(RVV-OBJS-yes)
+
 OBJS-$(HAVE_MMX)     += $(MMX-OBJS)     $(MMX-OBJS-yes)
 OBJS-$(HAVE_X86ASM)  += $(X86ASM-OBJS)  $(X86ASM-OBJS-yes)
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions
@ 2022-09-25 14:25 Rémi Denis-Courmont
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 01/31] lavu/cpu: detect RISC-V base extensions remi
                   ` (31 more replies)
  0 siblings, 32 replies; 40+ messages in thread
From: Rémi Denis-Courmont @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

   Hello,

Changes since version version 5:
- Use shifted-add instructions where applicable (pointer arithmetic) to
   minimise scalar operations to the absolute minimum.
- Add AAC PS DSP stereo interpolation [0].

The following changes since commit 7cd252ee41ddc693fa140c5b5eb472b6d6f27f9e:

  avcodec/parser: Remove declaration of inexistent function (2022-09-24 20:25:19 +0200)

are available in the Git repository at:

  https://git.remlab.net/git/ffmpeg.git rv-cpu

for you to fetch changes up to 2f7afca1e160b3bb1bc761ac3eb5e08a0852120b:

  lavc/aacpsdsp: RISC-V V stereo_interpolate[0] (2022-09-25 17:19:09 +0300)

----------------------------------------------------------------
Rémi Denis-Courmont (31):
      lavu/cpu: detect RISC-V base extensions
      lavu/riscv: initial common header for assembler macros
      lavc/audiodsp: RISC-V F vector_clipf
      lavc/pixblockdsp: RISC-V I get_pixels
      lavu/cpu: CPU flags for the RISC-V Vector extension
      configure: probe RISC-V Vector extension
      lavu/riscv: fallback macros for SH{1,2,3}ADD
      lavu/floatdsp: RISC-V V vector_fmul_scalar
      lavu/floatdsp: RISC-V V vector_dmul_scalar
      lavu/floatdsp: RISC-V V vector_fmul
      lavu/floatdsp: RISC-V V vector_dmul
      lavu/floatdsp: RISC-V V vector_fmac_scalar
      lavu/floatdsp: RISC-V V vector_dmac_scalar
      lavu/floatdsp: RISC-V V vector_fmul_add
      lavu/floatdsp: RISC-V V butterflies_float
      lavu/floatdsp: RISC-V V vector_fmul_reverse
      lavu/floatdsp: RISC-V V vector_fmul_window
      lavu/floatdsp: RISC-V V scalarproduct_float
      lavu/fixeddsp: RISC-V V butterflies_fixed
      lavc/audiodsp: RISC-V V vector_clip_int32
      lavc/audiodsp: RISC-V V vector_clipf
      lavc/audiodsp: RISC-V V scalarproduct_int16
      lavc/fmtconvert: RISC-V V int32_to_float_fmul_scalar
      lavc/fmtconvert: RISC-V V int32_to_float_fmul_array8
      lavc/vorbisdsp: RISC-V V inverse_coupling
      lavc/aacpsdsp: RISC-V V add_squares
      lavc/aacpsdsp: RISC-V V mul_pair_single
      lavc/aacpsdsp: RISC-V V hybrid_analysis
      lavc/aacpsdsp: RISC-V V hybrid_analysis_ileave
      lavc/aacpsdsp: RISC-V V hybrid_synthesis_deint
      lavc/aacpsdsp: RISC-V V stereo_interpolate[0]

 Makefile                            |   2 +-
 configure                           |  15 ++
 ffbuild/arch.mak                    |   2 +
 libavcodec/aacpsdsp.h               |   1 +
 libavcodec/aacpsdsp_template.c      |   2 +
 libavcodec/audiodsp.c               |   2 +
 libavcodec/audiodsp.h               |   1 +
 libavcodec/fmtconvert.c             |   2 +
 libavcodec/fmtconvert.h             |   1 +
 libavcodec/pixblockdsp.c            |   2 +
 libavcodec/pixblockdsp.h            |   2 +
 libavcodec/riscv/Makefile           |  11 ++
 libavcodec/riscv/aacpsdsp_init.c    |  57 +++++++
 libavcodec/riscv/aacpsdsp_rvv.S     | 286 ++++++++++++++++++++++++++++++++++++
 libavcodec/riscv/audiodsp_init.c    |  49 ++++++
 libavcodec/riscv/audiodsp_rvf.S     |  49 ++++++
 libavcodec/riscv/audiodsp_rvv.S     |  72 +++++++++
 libavcodec/riscv/fmtconvert_init.c  |  44 ++++++
 libavcodec/riscv/fmtconvert_rvv.S   |  67 +++++++++
 libavcodec/riscv/pixblockdsp_init.c |  45 ++++++
 libavcodec/riscv/pixblockdsp_rvi.S  |  59 ++++++++
 libavcodec/riscv/vorbisdsp_init.c   |  37 +++++
 libavcodec/riscv/vorbisdsp_rvv.S    |  44 ++++++
 libavcodec/vorbisdsp.c              |   2 +
 libavcodec/vorbisdsp.h              |   1 +
 libavutil/cpu.c                     |  13 ++
 libavutil/cpu.h                     |   9 ++
 libavutil/cpu_internal.h            |   3 +
 libavutil/fixed_dsp.c               |   4 +-
 libavutil/fixed_dsp.h               |   1 +
 libavutil/float_dsp.c               |   2 +
 libavutil/float_dsp.h               |   1 +
 libavutil/riscv/Makefile            |   5 +
 libavutil/riscv/asm.S               |  96 ++++++++++++
 libavutil/riscv/cpu.c               | 110 ++++++++++++++
 libavutil/riscv/fixed_dsp_init.c    |  38 +++++
 libavutil/riscv/fixed_dsp_rvv.S     |  40 +++++
 libavutil/riscv/float_dsp_init.c    |  72 +++++++++
 libavutil/riscv/float_dsp_rvv.S     | 238 ++++++++++++++++++++++++++++++
 tests/checkasm/checkasm.c           |   8 +
 40 files changed, 1493 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/riscv/Makefile
 create mode 100644 libavcodec/riscv/aacpsdsp_init.c
 create mode 100644 libavcodec/riscv/aacpsdsp_rvv.S
 create mode 100644 libavcodec/riscv/audiodsp_init.c
 create mode 100644 libavcodec/riscv/audiodsp_rvf.S
 create mode 100644 libavcodec/riscv/audiodsp_rvv.S
 create mode 100644 libavcodec/riscv/fmtconvert_init.c
 create mode 100644 libavcodec/riscv/fmtconvert_rvv.S
 create mode 100644 libavcodec/riscv/pixblockdsp_init.c
 create mode 100644 libavcodec/riscv/pixblockdsp_rvi.S
 create mode 100644 libavcodec/riscv/vorbisdsp_init.c
 create mode 100644 libavcodec/riscv/vorbisdsp_rvv.S
 create mode 100644 libavutil/riscv/Makefile
 create mode 100644 libavutil/riscv/asm.S
 create mode 100644 libavutil/riscv/cpu.c
 create mode 100644 libavutil/riscv/fixed_dsp_init.c
 create mode 100644 libavutil/riscv/fixed_dsp_rvv.S
 create mode 100644 libavutil/riscv/float_dsp_init.c
 create mode 100644 libavutil/riscv/float_dsp_rvv.S

-- 
雷米‧德尼-库尔蒙
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 07/31] lavu/riscv: fallback macros for SH{1, 2, 3}ADD
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (5 preceding siblings ...)
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 06/31] configure: probe " remi
@ 2022-09-25 14:25 ` remi
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 08/31] lavu/floatdsp: RISC-V V vector_fmul_scalar remi
                   ` (24 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

Those mnemonics require the very latest binutils release at the time of
writing. These macros provide seamless backward compatibility.
---
 libavutil/riscv/asm.S | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
index dbd97f40a4..de5e1ad0a6 100644
--- a/libavutil/riscv/asm.S
+++ b/libavutil/riscv/asm.S
@@ -75,3 +75,22 @@
                 .purgem endconst
             .endm
         .endm
+
+#if !defined (__riscv_zba)
+        /* SH{1,2,3}ADD definitions for pre-Zba assemblers */
+        .macro  shnadd n, rd, rs1, rs2
+        .insn r OP, 2 * \n, 16, \rd, \rs1, \rs2
+        .endm
+
+        .macro  sh1add rd, rs1, rs2
+        shnadd  1, \rd, \rs1, \rs2
+        .endm
+
+        .macro  sh2add rd, rs1, rs2
+        shnadd  2, \rd, \rs1, \rs2
+        .endm
+
+        .macro  sh3add rd, rs1, rs2
+        shnadd  3, \rd, \rs1, \rs2
+        .endm
+#endif
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 08/31] lavu/floatdsp: RISC-V V vector_fmul_scalar
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (6 preceding siblings ...)
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 07/31] lavu/riscv: fallback macros for SH{1, 2, 3}ADD remi
@ 2022-09-25 14:25 ` remi
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 09/31] lavu/floatdsp: RISC-V V vector_dmul_scalar remi
                   ` (23 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

This is based on existing code from the VLC git tree with two minor
changes to account for the different function prototypes.
---
 libavutil/float_dsp.c            |  2 ++
 libavutil/float_dsp.h            |  1 +
 libavutil/riscv/Makefile         |  4 +++-
 libavutil/riscv/float_dsp_init.c | 39 ++++++++++++++++++++++++++++++++
 libavutil/riscv/float_dsp_rvv.S  | 39 ++++++++++++++++++++++++++++++++
 5 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 libavutil/riscv/float_dsp_init.c
 create mode 100644 libavutil/riscv/float_dsp_rvv.S

diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 8676c8b0f8..742dd679d2 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -156,6 +156,8 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
     ff_float_dsp_init_arm(fdsp);
 #elif ARCH_PPC
     ff_float_dsp_init_ppc(fdsp, bit_exact);
+#elif ARCH_RISCV
+    ff_float_dsp_init_riscv(fdsp);
 #elif ARCH_X86
     ff_float_dsp_init_x86(fdsp);
 #elif ARCH_MIPS
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 9c664592bd..7cad9fc622 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -205,6 +205,7 @@ float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
 void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
+void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp);
 
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
index 1f818043dc..89a8d0d990 100644
--- a/libavutil/riscv/Makefile
+++ b/libavutil/riscv/Makefile
@@ -1 +1,3 @@
-OBJS += riscv/cpu.o
+OBJS +=     riscv/float_dsp_init.o \
+            riscv/cpu.o
+RVV-OBJS += riscv/float_dsp_rvv.o
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
new file mode 100644
index 0000000000..de567c50d2
--- /dev/null
+++ b/libavutil/riscv/float_dsp_init.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/float_dsp.h"
+
+void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
+                                int len);
+
+av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
+{
+#if HAVE_RVV
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_RV_ZVE32F)
+        fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
+#endif
+}
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
new file mode 100644
index 0000000000..50cb1fa90f
--- /dev/null
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "asm.S"
+
+// (a0) = (a1) * fa0 [0..a2-1]
+func ff_vector_fmul_scalar_rvv, zve32f
+NOHWF   fmv.w.x  fa0, a2
+NOHWF   mv       a2, a3
+1:
+        vsetvli  t0, a2, e32, m1, ta, ma
+        vle32.v  v16, (a1)
+        sub      a2, a2, t0
+        vfmul.vf v16, v16, fa0
+        sh2add   a1, t0, a1
+        vse32.v  v16, (a0)
+        sh2add   a0, t0, a0
+        bnez     a2, 1b
+
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 09/31] lavu/floatdsp: RISC-V V vector_dmul_scalar
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (7 preceding siblings ...)
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 08/31] lavu/floatdsp: RISC-V V vector_fmul_scalar remi
@ 2022-09-25 14:25 ` remi
  2022-09-26  6:53   ` Lynne
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 10/31] lavu/floatdsp: RISC-V V vector_fmul remi
                   ` (22 subsequent siblings)
  31 siblings, 1 reply; 40+ messages in thread
From: remi @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/float_dsp_init.c |  9 ++++++++-
 libavutil/riscv/float_dsp_rvv.S  | 17 +++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index de567c50d2..b829c0f736 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -28,12 +28,19 @@
 void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
                                 int len);
 
+void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
+                                int len);
+
 av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
 {
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RV_ZVE32F)
+    if (flags & AV_CPU_FLAG_RV_ZVE32F) {
         fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
+
+        if (flags & AV_CPU_FLAG_RV_ZVE64D)
+            fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
+    }
 #endif
 }
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 50cb1fa90f..17dda471b4 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -37,3 +37,20 @@ NOHWF   mv       a2, a3
 
         ret
 endfunc
+
+// (a0) = (a1) * fa0 [0..a2-1]
+func ff_vector_dmul_scalar_rvv, zve64d
+NOHWD   fmv.d.x  fa0, a2
+NOHWD   mv       a2, a3
+1:
+        vsetvli  t0, a2, e64, m1, ta, ma
+        vle64.v  v16, (a1)
+        sub      a2, a2, t0
+        vfmul.vf v16, v16, fa0
+        sh3add   a1, t0, a1
+        vse64.v  v16, (a0)
+        sh3add   a0, t0, a0
+        bnez     a2, 1b
+
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 10/31] lavu/floatdsp: RISC-V V vector_fmul
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (8 preceding siblings ...)
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 09/31] lavu/floatdsp: RISC-V V vector_dmul_scalar remi
@ 2022-09-25 14:25 ` remi
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 11/31] lavu/floatdsp: RISC-V V vector_dmul remi
                   ` (21 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/float_dsp_init.c |  3 +++
 libavutil/riscv/float_dsp_rvv.S  | 17 +++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index b829c0f736..60b79bd59e 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -25,6 +25,8 @@
 #include "libavutil/cpu.h"
 #include "libavutil/float_dsp.h"
 
+void ff_vector_fmul_rvv(float *dst, const float *src0, const float *src1,
+                         int len);
 void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
                                 int len);
 
@@ -37,6 +39,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
     int flags = av_get_cpu_flags();
 
     if (flags & AV_CPU_FLAG_RV_ZVE32F) {
+        fdsp->vector_fmul = ff_vector_fmul_rvv;
         fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
 
         if (flags & AV_CPU_FLAG_RV_ZVE64D)
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 17dda471b4..00fb7354bb 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -21,6 +21,23 @@
 #include "config.h"
 #include "asm.S"
 
+// (a0) = (a1) * (a2) [0..a3-1]
+func ff_vector_fmul_rvv, zve32f
+1:
+        vsetvli  t0, a3, e32, m1, ta, ma
+        vle32.v  v16, (a1)
+        sub      a3, a3, t0
+        vle32.v  v24, (a2)
+        sh2add   a1, t0, a1
+        vfmul.vv v16, v16, v24
+        sh2add   a2, t0, a2
+        vse32.v  v16, (a0)
+        sh2add   a0, t0, a0
+        bnez     a3, 1b
+
+        ret
+endfunc
+
 // (a0) = (a1) * fa0 [0..a2-1]
 func ff_vector_fmul_scalar_rvv, zve32f
 NOHWF   fmv.w.x  fa0, a2
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 11/31] lavu/floatdsp: RISC-V V vector_dmul
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (9 preceding siblings ...)
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 10/31] lavu/floatdsp: RISC-V V vector_fmul remi
@ 2022-09-25 14:25 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 12/31] lavu/floatdsp: RISC-V V vector_fmac_scalar remi
                   ` (20 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:25 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/float_dsp_init.c |  6 +++++-
 libavutil/riscv/float_dsp_rvv.S  | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 60b79bd59e..6027a67b46 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -30,6 +30,8 @@ void ff_vector_fmul_rvv(float *dst, const float *src0, const float *src1,
 void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
                                 int len);
 
+void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
+                         int len);
 void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
                                 int len);
 
@@ -42,8 +44,10 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
         fdsp->vector_fmul = ff_vector_fmul_rvv;
         fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
 
-        if (flags & AV_CPU_FLAG_RV_ZVE64D)
+        if (flags & AV_CPU_FLAG_RV_ZVE64D) {
+            fdsp->vector_dmul = ff_vector_dmul_rvv;
             fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
+        }
     }
 #endif
 }
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 00fb7354bb..710e122444 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -55,6 +55,23 @@ NOHWF   mv       a2, a3
         ret
 endfunc
 
+// (a0) = (a1) * (a2) [0..a3-1]
+func ff_vector_dmul_rvv, zve64d
+1:
+        vsetvli  t0, a3, e64, m1, ta, ma
+        vle64.v  v16, (a1)
+        sub      a3, a3, t0
+        vle64.v  v24, (a2)
+        sh3add   a1, t0, a1
+        vfmul.vv v16, v16, v24
+        sh3add   a2, t0, a2
+        vse64.v  v16, (a0)
+        sh3add   a0, t0, a0
+        bnez     a3, 1b
+
+        ret
+endfunc
+
 // (a0) = (a1) * fa0 [0..a2-1]
 func ff_vector_dmul_scalar_rvv, zve64d
 NOHWD   fmv.d.x  fa0, a2
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 12/31] lavu/floatdsp: RISC-V V vector_fmac_scalar
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (10 preceding siblings ...)
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 11/31] lavu/floatdsp: RISC-V V vector_dmul remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 13/31] lavu/floatdsp: RISC-V V vector_dmac_scalar remi
                   ` (19 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/float_dsp_init.c |  3 +++
 libavutil/riscv/float_dsp_rvv.S  | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 6027a67b46..c2d93e0cd7 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -27,6 +27,8 @@
 
 void ff_vector_fmul_rvv(float *dst, const float *src0, const float *src1,
                          int len);
+void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul,
+                                int len);
 void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
                                 int len);
 
@@ -42,6 +44,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
 
     if (flags & AV_CPU_FLAG_RV_ZVE32F) {
         fdsp->vector_fmul = ff_vector_fmul_rvv;
+        fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
         fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
 
         if (flags & AV_CPU_FLAG_RV_ZVE64D) {
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 710e122444..4c325db9fd 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -38,6 +38,25 @@ func ff_vector_fmul_rvv, zve32f
         ret
 endfunc
 
+// (a0) += (a1) * fa0 [0..a2-1]
+func ff_vector_fmac_scalar_rvv, zve32f
+NOHWF   fmv.w.x   fa0, a2
+NOHWF   mv        a2, a3
+1:
+        vsetvli   t0, a2, e32, m1, ta, ma
+        slli      t1, t0, 2
+        vle32.v   v24, (a1)
+        sub       a2, a2, t0
+        vle32.v   v16, (a0)
+        sh2add    a1, t0, a1
+        vfmacc.vf v16, fa0, v24
+        vse32.v   v16, (a0)
+        sh2add    a0, t0, a0
+        bnez      a2, 1b
+
+        ret
+endfunc
+
 // (a0) = (a1) * fa0 [0..a2-1]
 func ff_vector_fmul_scalar_rvv, zve32f
 NOHWF   fmv.w.x  fa0, a2
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 13/31] lavu/floatdsp: RISC-V V vector_dmac_scalar
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (11 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 12/31] lavu/floatdsp: RISC-V V vector_fmac_scalar remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 14/31] lavu/floatdsp: RISC-V V vector_fmul_add remi
                   ` (18 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/float_dsp_init.c |  3 +++
 libavutil/riscv/float_dsp_rvv.S  | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index c2d93e0cd7..d17d0f66c5 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -34,6 +34,8 @@ void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
 
 void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
                          int len);
+void ff_vector_dmac_scalar_rvv(double *dst, const double *src, double mul,
+                                int len);
 void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
                                 int len);
 
@@ -49,6 +51,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
 
         if (flags & AV_CPU_FLAG_RV_ZVE64D) {
             fdsp->vector_dmul = ff_vector_dmul_rvv;
+            fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_rvv;
             fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
         }
     }
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 4c325db9fd..048ec0bc40 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -91,6 +91,24 @@ func ff_vector_dmul_rvv, zve64d
         ret
 endfunc
 
+// (a0) += (a1) * fa0 [0..a2-1]
+func ff_vector_dmac_scalar_rvv, zve64d
+NOHWD   fmv.d.x   fa0, a2
+NOHWD   mv        a2, a3
+1:
+        vsetvli   t0, a2, e64, m1, ta, ma
+        vle64.v   v24, (a1)
+        sub       a2, a2, t0
+        vle64.v   v16, (a0)
+        sh3add    a1, t0, a1
+        vfmacc.vf v16, fa0, v24
+        vse64.v   v16, (a0)
+        sh3add    a0, t0, a0
+        bnez      a2, 1b
+
+        ret
+endfunc
+
 // (a0) = (a1) * fa0 [0..a2-1]
 func ff_vector_dmul_scalar_rvv, zve64d
 NOHWD   fmv.d.x  fa0, a2
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 14/31] lavu/floatdsp: RISC-V V vector_fmul_add
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (12 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 13/31] lavu/floatdsp: RISC-V V vector_dmac_scalar remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 15/31] lavu/floatdsp: RISC-V V butterflies_float remi
                   ` (17 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/float_dsp_init.c |  3 +++
 libavutil/riscv/float_dsp_rvv.S  | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index d17d0f66c5..2ddd2050f7 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -31,6 +31,8 @@ void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul,
                                 int len);
 void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
                                 int len);
+void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
+                             const float *src2, int len);
 
 void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
                          int len);
@@ -48,6 +50,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
         fdsp->vector_fmul = ff_vector_fmul_rvv;
         fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
         fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
+        fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
 
         if (flags & AV_CPU_FLAG_RV_ZVE64D) {
             fdsp->vector_dmul = ff_vector_dmul_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 048ec0bc40..db62402878 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -74,6 +74,25 @@ NOHWF   mv       a2, a3
         ret
 endfunc
 
+// (a0) = (a1) * (a2) + (a3) [0..a4-1]
+func ff_vector_fmul_add_rvv, zve32f
+1:
+        vsetvli   t0, a4, e32, m1, ta, ma
+        vle32.v   v8, (a1)
+        sub       a4, a4, t0
+        vle32.v   v16, (a2)
+        sh2add    a1, t0, a1
+        vle32.v   v24, (a3)
+        sh2add    a2, t0, a2
+        vfmadd.vv v8, v16, v24
+        sh2add    a3, t0, a3
+        vse32.v   v8, (a0)
+        sh2add    a0, t0, a0
+        bnez      a4, 1b
+
+        ret
+endfunc
+
 // (a0) = (a1) * (a2) [0..a3-1]
 func ff_vector_dmul_rvv, zve64d
 1:
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 15/31] lavu/floatdsp: RISC-V V butterflies_float
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (13 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 14/31] lavu/floatdsp: RISC-V V vector_fmul_add remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 16/31] lavu/floatdsp: RISC-V V vector_fmul_reverse remi
                   ` (16 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/float_dsp_init.c |  2 ++
 libavutil/riscv/float_dsp_rvv.S  | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 2ddd2050f7..f164b1308f 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -33,6 +33,7 @@ void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
                                 int len);
 void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
                              const float *src2, int len);
+void ff_butterflies_float_rvv(float *v1, float *v2, int len);
 
 void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
                          int len);
@@ -51,6 +52,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
         fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
         fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
         fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
+        fdsp->butterflies_float = ff_butterflies_float_rvv;
 
         if (flags & AV_CPU_FLAG_RV_ZVE64D) {
             fdsp->vector_dmul = ff_vector_dmul_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index db62402878..a721c44667 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -93,6 +93,24 @@ func ff_vector_fmul_add_rvv, zve32f
         ret
 endfunc
 
+// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
+func ff_butterflies_float_rvv, zve32f
+1:
+        vsetvli  t0, a2, e32, m1, ta, ma
+        vle32.v  v16, (a0)
+        sub      a2, a2, t0
+        vle32.v  v24, (a1)
+        vfadd.vv v0, v16, v24
+        vfsub.vv v8, v16, v24
+        vse32.v  v0, (a0)
+        sh2add   a0, t0, a0
+        vse32.v  v8, (a1)
+        sh2add   a1, t0, a1
+        bnez     a2, 1b
+
+        ret
+endfunc
+
 // (a0) = (a1) * (a2) [0..a3-1]
 func ff_vector_dmul_rvv, zve64d
 1:
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 16/31] lavu/floatdsp: RISC-V V vector_fmul_reverse
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (14 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 15/31] lavu/floatdsp: RISC-V V butterflies_float remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 17/31] lavu/floatdsp: RISC-V V vector_fmul_window remi
                   ` (15 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/float_dsp_init.c |  3 +++
 libavutil/riscv/float_dsp_rvv.S  | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index f164b1308f..9b8fd9942b 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -33,6 +33,8 @@ void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
                                 int len);
 void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
                              const float *src2, int len);
+void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
+                                 const float *src1, int len);
 void ff_butterflies_float_rvv(float *v1, float *v2, int len);
 
 void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
@@ -52,6 +54,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
         fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
         fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
         fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
+        fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
         fdsp->butterflies_float = ff_butterflies_float_rvv;
 
         if (flags & AV_CPU_FLAG_RV_ZVE64D) {
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index a721c44667..fbd2777463 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -93,6 +93,27 @@ func ff_vector_fmul_add_rvv, zve32f
         ret
 endfunc
 
+// (a0) = (a1) * reverse(a2) [0..a3-1]
+func ff_vector_fmul_reverse_rvv, zve32f
+        sh2add   a2, a3, a2
+        li       t2, -4 // byte stride
+        addi     a2, a2, -4
+1:
+        vsetvli  t0, a3, e32, m1, ta, ma
+        slli     t1, t0, 2
+        vle32.v  v16, (a1)
+        sub      a3, a3, t0
+        vlse32.v v24, (a2), t2
+        add      a1, a1, t1
+        vfmul.vv v16, v16, v24
+        sub      a2, a2, t1
+        vse32.v  v16, (a0)
+        add      a0, a0, t1
+        bnez     a3, 1b
+
+        ret
+endfunc
+
 // (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
 func ff_butterflies_float_rvv, zve32f
 1:
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 17/31] lavu/floatdsp: RISC-V V vector_fmul_window
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (15 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 16/31] lavu/floatdsp: RISC-V V vector_fmul_reverse remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 18/31] lavu/floatdsp: RISC-V V scalarproduct_float remi
                   ` (14 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/float_dsp_init.c |  3 +++
 libavutil/riscv/float_dsp_rvv.S  | 33 ++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 9b8fd9942b..dacd81c08b 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -31,6 +31,8 @@ void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul,
                                 int len);
 void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
                                 int len);
+void ff_vector_fmul_window_rvv(float *dst, const float *src0,
+                                const float *src1, const float *win, int len);
 void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
                              const float *src2, int len);
 void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
@@ -53,6 +55,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
         fdsp->vector_fmul = ff_vector_fmul_rvv;
         fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
         fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
+        fdsp->vector_fmul_window = ff_vector_fmul_window_rvv;
         fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
         fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
         fdsp->butterflies_float = ff_butterflies_float_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index fbd2777463..ce530f6108 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -74,6 +74,39 @@ NOHWF   mv       a2, a3
         ret
 endfunc
 
+func ff_vector_fmul_window_rvv, zve32f
+        // a0: dst, a1: src0, a2: src1, a3: window, a4: length
+        addi       t0, a4, -1
+        add        t1, t0, a4
+        sh2add     a2, t0, a2
+        sh2add     t0, t1, a0
+        sh2add     t3, t1, a3
+        li         t1, -4 // byte stride
+1:
+        vsetvli    t2, a4, e32, m1, ta, ma
+        vle32.v    v16, (a1)
+        slli       t4, t2, 2
+        vlse32.v   v20, (a2), t1
+        sub        a4, a4, t2
+        vle32.v    v24, (a3)
+        add        a1, a1, t4
+        vlse32.v   v28, (t3), t1
+        sub        a2, a2, t4
+        vfmul.vv   v0, v16, v28
+        add        a3, a3, t4
+        vfmul.vv   v8, v16, v24
+        sub        t3, t3, t4
+        vfnmsac.vv v0, v20, v24
+        vfmacc.vv  v8, v20, v28
+        vse32.v    v0, (a0)
+        add        a0, a0, t4
+        vsse32.v   v8, (t0), t1
+        sub        t0, t0, t4
+        bnez       a4, 1b
+
+        ret
+endfunc
+
 // (a0) = (a1) * (a2) + (a3) [0..a4-1]
 func ff_vector_fmul_add_rvv, zve32f
 1:
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 18/31] lavu/floatdsp: RISC-V V scalarproduct_float
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (16 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 17/31] lavu/floatdsp: RISC-V V vector_fmul_window remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 19/31] lavu/fixeddsp: RISC-V V butterflies_fixed remi
                   ` (13 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/float_dsp_init.c |  2 ++
 libavutil/riscv/float_dsp_rvv.S  | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index dacd81c08b..cc9b7e83dc 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -38,6 +38,7 @@ void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
 void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
                                  const float *src1, int len);
 void ff_butterflies_float_rvv(float *v1, float *v2, int len);
+float ff_scalarproduct_float_rvv(const float *v1, const float *v2, int len);
 
 void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
                          int len);
@@ -59,6 +60,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
         fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
         fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
         fdsp->butterflies_float = ff_butterflies_float_rvv;
+        fdsp->scalarproduct_float = ff_scalarproduct_float_rvv;
 
         if (flags & AV_CPU_FLAG_RV_ZVE64D) {
             fdsp->vector_dmul = ff_vector_dmul_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index ce530f6108..ab2e0c42d7 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -165,6 +165,26 @@ func ff_butterflies_float_rvv, zve32f
         ret
 endfunc
 
+// a0 = (a0).(a1) [0..a2-1]
+func ff_scalarproduct_float_rvv, zve32f
+        vsetvli      zero, zero, e32, m1, ta, ma
+        vmv.s.x      v8, zero
+1:
+        vsetvli      t0, a2, e32, m1, ta, ma
+        vle32.v      v16, (a0)
+        sub          a2, a2, t0
+        vle32.v      v24, (a1)
+        sh2add       a0, t0, a0
+        vfmul.vv     v16, v16, v24
+        sh2add       a1, t0, a1
+        vfredusum.vs v8, v16, v8
+        bnez         a2, 1b
+
+        vfmv.f.s fa0, v8
+NOHWF   fmv.x.w  a0, fa0
+        ret
+endfunc
+
 // (a0) = (a1) * (a2) [0..a3-1]
 func ff_vector_dmul_rvv, zve64d
 1:
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 19/31] lavu/fixeddsp: RISC-V V butterflies_fixed
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (17 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 18/31] lavu/floatdsp: RISC-V V scalarproduct_float remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 20/31] lavc/audiodsp: RISC-V V vector_clip_int32 remi
                   ` (12 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/fixed_dsp.c            |  4 +++-
 libavutil/fixed_dsp.h            |  1 +
 libavutil/riscv/Makefile         |  4 +++-
 libavutil/riscv/fixed_dsp_init.c | 38 ++++++++++++++++++++++++++++++
 libavutil/riscv/fixed_dsp_rvv.S  | 40 ++++++++++++++++++++++++++++++++
 5 files changed, 85 insertions(+), 2 deletions(-)
 create mode 100644 libavutil/riscv/fixed_dsp_init.c
 create mode 100644 libavutil/riscv/fixed_dsp_rvv.S

diff --git a/libavutil/fixed_dsp.c b/libavutil/fixed_dsp.c
index 154f3bc2d3..bc847949dc 100644
--- a/libavutil/fixed_dsp.c
+++ b/libavutil/fixed_dsp.c
@@ -162,7 +162,9 @@ AVFixedDSPContext * avpriv_alloc_fixed_dsp(int bit_exact)
     fdsp->butterflies_fixed = butterflies_fixed_c;
     fdsp->scalarproduct_fixed = scalarproduct_fixed_c;
 
-#if ARCH_X86
+#if ARCH_RISCV
+    ff_fixed_dsp_init_riscv(fdsp);
+#elif ARCH_X86
     ff_fixed_dsp_init_x86(fdsp);
 #endif
 
diff --git a/libavutil/fixed_dsp.h b/libavutil/fixed_dsp.h
index fec806ff2d..1217d3a53b 100644
--- a/libavutil/fixed_dsp.h
+++ b/libavutil/fixed_dsp.h
@@ -161,6 +161,7 @@ typedef struct AVFixedDSPContext {
  */
 AVFixedDSPContext * avpriv_alloc_fixed_dsp(int strict);
 
+void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp);
 void ff_fixed_dsp_init_x86(AVFixedDSPContext *fdsp);
 
 /**
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
index 89a8d0d990..1597154ba5 100644
--- a/libavutil/riscv/Makefile
+++ b/libavutil/riscv/Makefile
@@ -1,3 +1,5 @@
 OBJS +=     riscv/float_dsp_init.o \
+            riscv/fixed_dsp_init.o \
             riscv/cpu.o
-RVV-OBJS += riscv/float_dsp_rvv.o
+RVV-OBJS += riscv/float_dsp_rvv.o \
+            riscv/fixed_dsp_rvv.o
diff --git a/libavutil/riscv/fixed_dsp_init.c b/libavutil/riscv/fixed_dsp_init.c
new file mode 100644
index 0000000000..4075e521f2
--- /dev/null
+++ b/libavutil/riscv/fixed_dsp_init.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/fixed_dsp.h"
+
+void ff_butterflies_fixed_rvv(int *v1, int *v2, int len);
+
+av_cold void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp)
+{
+#if HAVE_RVV
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_RV_ZVE32X)
+        fdsp->butterflies_fixed = ff_butterflies_fixed_rvv;
+#endif
+}
diff --git a/libavutil/riscv/fixed_dsp_rvv.S b/libavutil/riscv/fixed_dsp_rvv.S
new file mode 100644
index 0000000000..0e78734b4c
--- /dev/null
+++ b/libavutil/riscv/fixed_dsp_rvv.S
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "asm.S"
+
+// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
+func ff_butterflies_fixed_rvv, zve32x
+1:
+        vsetvli t0, a2, e32, m1, ta, ma
+        vle32.v v16, (a0)
+        sub     a2, a2, t0
+        vle32.v v24, (a1)
+        vadd.vv v0, v16, v24
+        vsub.vv v8, v16, v24
+        vse32.v v0, (a0)
+        sh2add  a0, t0, a0
+        vse32.v v8, (a1)
+        sh2add  a1, t0, a1
+        bnez    a2, 1b
+
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 20/31] lavc/audiodsp: RISC-V V vector_clip_int32
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (18 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 19/31] lavu/fixeddsp: RISC-V V butterflies_fixed remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 21/31] lavc/audiodsp: RISC-V V vector_clipf remi
                   ` (11 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/Makefile        |  1 +
 libavcodec/riscv/audiodsp_init.c |  9 ++++++++
 libavcodec/riscv/audiodsp_rvv.S  | 36 ++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+)
 create mode 100644 libavcodec/riscv/audiodsp_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index da07f1fe96..99541b075e 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,4 +1,5 @@
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
                            riscv/audiodsp_rvf.o
+RVV-OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_rvv.o
 OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o \
                               riscv/pixblockdsp_rvi.o
diff --git a/libavcodec/riscv/audiodsp_init.c b/libavcodec/riscv/audiodsp_init.c
index c5842815d6..ce8b60ee52 100644
--- a/libavcodec/riscv/audiodsp_init.c
+++ b/libavcodec/riscv/audiodsp_init.c
@@ -18,16 +18,25 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
+
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavcodec/audiodsp.h"
 
 void ff_vector_clipf_rvf(float *dst, const float *src, int len, float min, float max);
 
+void ff_vector_clip_int32_rvv(int32_t *dst, const int32_t *src, int32_t min,
+                              int32_t max, unsigned int len);
+
 av_cold void ff_audiodsp_init_riscv(AudioDSPContext *c)
 {
     int flags = av_get_cpu_flags();
 
     if (flags & AV_CPU_FLAG_RVF)
         c->vector_clipf = ff_vector_clipf_rvf;
+#if HAVE_RVV
+    if (flags & AV_CPU_FLAG_RV_ZVE32X)
+        c->vector_clip_int32 = ff_vector_clip_int32_rvv;
+#endif
 }
diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S
new file mode 100644
index 0000000000..49546ee3c4
--- /dev/null
+++ b/libavcodec/riscv/audiodsp_rvv.S
@@ -0,0 +1,36 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_vector_clip_int32_rvv, zve32x
+1:
+        vsetvli t0, a4, e32, m1, ta, ma
+        vle32.v v8, (a1)
+        sub     a4, a4, t0
+        vmax.vx v8, v8, a2
+        sh2add  a1, t0, a1
+        vmin.vx v8, v8, a3
+        vse32.v v8, (a0)
+        sh2add  a0, t0, a0
+        bnez    a4, 1b
+
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 21/31] lavc/audiodsp: RISC-V V vector_clipf
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (19 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 20/31] lavc/audiodsp: RISC-V V vector_clip_int32 remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 22/31] lavc/audiodsp: RISC-V V scalarproduct_int16 remi
                   ` (10 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/audiodsp_init.c |  7 ++++++-
 libavcodec/riscv/audiodsp_rvv.S  | 17 +++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/libavcodec/riscv/audiodsp_init.c b/libavcodec/riscv/audiodsp_init.c
index ce8b60ee52..ddd561484f 100644
--- a/libavcodec/riscv/audiodsp_init.c
+++ b/libavcodec/riscv/audiodsp_init.c
@@ -26,6 +26,7 @@
 
 void ff_vector_clipf_rvf(float *dst, const float *src, int len, float min, float max);
 
+void ff_vector_clipf_rvv(float *dst, const float *src, int len, float min, float max);
 void ff_vector_clip_int32_rvv(int32_t *dst, const int32_t *src, int32_t min,
                               int32_t max, unsigned int len);
 
@@ -36,7 +37,11 @@ av_cold void ff_audiodsp_init_riscv(AudioDSPContext *c)
     if (flags & AV_CPU_FLAG_RVF)
         c->vector_clipf = ff_vector_clipf_rvf;
 #if HAVE_RVV
-    if (flags & AV_CPU_FLAG_RV_ZVE32X)
+    if (flags & AV_CPU_FLAG_RV_ZVE32X) {
         c->vector_clip_int32 = ff_vector_clip_int32_rvv;
+
+        if (flags & AV_CPU_FLAG_RV_ZVE32F)
+            c->vector_clipf = ff_vector_clipf_rvv;
+    }
 #endif
 }
diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S
index 49546ee3c4..427b424cb9 100644
--- a/libavcodec/riscv/audiodsp_rvv.S
+++ b/libavcodec/riscv/audiodsp_rvv.S
@@ -34,3 +34,20 @@ func ff_vector_clip_int32_rvv, zve32x
 
         ret
 endfunc
+
+func ff_vector_clipf_rvv, zve32f
+NOHWF   fmv.w.x  fa0, a3
+NOHWF   fmv.w.x  fa1, a4
+1:
+        vsetvli  t0, a2, e32, m1, ta, ma
+        vle32.v  v8, (a1)
+        sub      a2, a2, t0
+        vfmax.vf v8, v8, fa0
+        sh2add   a1, t0, a1
+        vfmin.vf v8, v8, fa1
+        vse32.v  v8, (a0)
+        sh2add   a0, t0, a0
+        bnez     a2, 1b
+
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 22/31] lavc/audiodsp: RISC-V V scalarproduct_int16
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (20 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 21/31] lavc/audiodsp: RISC-V V vector_clipf remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 23/31] lavc/fmtconvert: RISC-V V int32_to_float_fmul_scalar remi
                   ` (9 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/audiodsp_init.c |  2 ++
 libavcodec/riscv/audiodsp_rvv.S  | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/libavcodec/riscv/audiodsp_init.c b/libavcodec/riscv/audiodsp_init.c
index ddd561484f..6f38b7bc83 100644
--- a/libavcodec/riscv/audiodsp_init.c
+++ b/libavcodec/riscv/audiodsp_init.c
@@ -29,6 +29,7 @@ void ff_vector_clipf_rvf(float *dst, const float *src, int len, float min, float
 void ff_vector_clipf_rvv(float *dst, const float *src, int len, float min, float max);
 void ff_vector_clip_int32_rvv(int32_t *dst, const int32_t *src, int32_t min,
                               int32_t max, unsigned int len);
+int32_t ff_scalarproduct_int16_rvv(const int16_t *v1, const int16_t *v2, int len);
 
 av_cold void ff_audiodsp_init_riscv(AudioDSPContext *c)
 {
@@ -38,6 +39,7 @@ av_cold void ff_audiodsp_init_riscv(AudioDSPContext *c)
         c->vector_clipf = ff_vector_clipf_rvf;
 #if HAVE_RVV
     if (flags & AV_CPU_FLAG_RV_ZVE32X) {
+        c->scalarproduct_int16 = ff_scalarproduct_int16_rvv;
         c->vector_clip_int32 = ff_vector_clip_int32_rvv;
 
         if (flags & AV_CPU_FLAG_RV_ZVE32F)
diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S
index 427b424cb9..f4308f27c5 100644
--- a/libavcodec/riscv/audiodsp_rvv.S
+++ b/libavcodec/riscv/audiodsp_rvv.S
@@ -20,6 +20,25 @@
 
 #include "libavutil/riscv/asm.S"
 
+func ff_scalarproduct_int16_rvv, zve32x
+        vsetvli     zero, zero, e16, m1, ta, ma
+        vmv.s.x     v8, zero
+1:
+        vsetvli     t0, a2, e16, m1, ta, ma
+        vle16.v     v16, (a0)
+        sub         a2, a2, t0
+        vle16.v     v24, (a1)
+        sh1add      a0, t0, a0
+        vwmul.vv    v0, v16, v24
+        sh1add      a1, t0, a1
+        vsetvli     zero, t0, e32, m2, ta, ma
+        vredsum.vs  v8, v0, v8
+        bnez        a2, 1b
+
+        vmv.x.s     a0, v8
+        ret
+endfunc
+
 func ff_vector_clip_int32_rvv, zve32x
 1:
         vsetvli t0, a4, e32, m1, ta, ma
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 23/31] lavc/fmtconvert: RISC-V V int32_to_float_fmul_scalar
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (21 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 22/31] lavc/audiodsp: RISC-V V scalarproduct_int16 remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 24/31] lavc/fmtconvert: RISC-V V int32_to_float_fmul_array8 remi
                   ` (8 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/fmtconvert.c            |  2 ++
 libavcodec/fmtconvert.h            |  1 +
 libavcodec/riscv/Makefile          |  2 ++
 libavcodec/riscv/fmtconvert_init.c | 39 ++++++++++++++++++++++++++++++
 libavcodec/riscv/fmtconvert_rvv.S  | 39 ++++++++++++++++++++++++++++++
 5 files changed, 83 insertions(+)
 create mode 100644 libavcodec/riscv/fmtconvert_init.c
 create mode 100644 libavcodec/riscv/fmtconvert_rvv.S

diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index cedfd61138..d889e61aca 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -52,6 +52,8 @@ av_cold void ff_fmt_convert_init(FmtConvertContext *c)
     ff_fmt_convert_init_arm(c);
 #elif ARCH_PPC
     ff_fmt_convert_init_ppc(c);
+#elif ARCH_RISCV
+    ff_fmt_convert_init_riscv(c);
 #elif ARCH_X86
     ff_fmt_convert_init_x86(c);
 #endif
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index da244e05a5..1cb4628a64 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -61,6 +61,7 @@ void ff_fmt_convert_init(FmtConvertContext *c);
 void ff_fmt_convert_init_aarch64(FmtConvertContext *c);
 void ff_fmt_convert_init_arm(FmtConvertContext *c);
 void ff_fmt_convert_init_ppc(FmtConvertContext *c);
+void ff_fmt_convert_init_riscv(FmtConvertContext *c);
 void ff_fmt_convert_init_x86(FmtConvertContext *c);
 void ff_fmt_convert_init_mips(FmtConvertContext *c);
 
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 99541b075e..682174e875 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,5 +1,7 @@
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
                            riscv/audiodsp_rvf.o
 RVV-OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_rvv.o
+OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_init.o
+RVV-OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_rvv.o
 OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o \
                               riscv/pixblockdsp_rvi.o
diff --git a/libavcodec/riscv/fmtconvert_init.c b/libavcodec/riscv/fmtconvert_init.c
new file mode 100644
index 0000000000..fd2f58d060
--- /dev/null
+++ b/libavcodec/riscv/fmtconvert_init.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/fmtconvert.h"
+
+void ff_int32_to_float_fmul_scalar_rvv(float *dst, const int32_t *src,
+                                       float mul, int len);
+
+av_cold void ff_fmt_convert_init_riscv(FmtConvertContext *c)
+{
+#ifdef HAVE_RVV
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_RV_ZVE32F)
+        c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_rvv;
+#endif
+}
diff --git a/libavcodec/riscv/fmtconvert_rvv.S b/libavcodec/riscv/fmtconvert_rvv.S
new file mode 100644
index 0000000000..b7c78831a0
--- /dev/null
+++ b/libavcodec/riscv/fmtconvert_rvv.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "../libavutil/riscv/asm.S"
+
+func ff_int32_to_float_fmul_scalar_rvv, zve32f
+NOHWF   fmv.w.x     fa0, a2
+NOHWF   mv          a2, a3
+1:
+        vsetvli     t0, a2, e32, m1, ta, ma
+        vle32.v     v24, (a1)
+        sub         a2, a2, t0
+        vfcvt.f.x.v v24, v24
+        sh2add      a1, t0, a1
+        vfmul.vf    v24, v24, fa0
+        vse32.v     v24, (a0)
+        sh2add      a0, t0, a0
+        bnez        a2, 1b
+
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 24/31] lavc/fmtconvert: RISC-V V int32_to_float_fmul_array8
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (22 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 23/31] lavc/fmtconvert: RISC-V V int32_to_float_fmul_scalar remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 25/31] lavc/vorbisdsp: RISC-V V inverse_coupling remi
                   ` (7 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/fmtconvert_init.c |  7 ++++++-
 libavcodec/riscv/fmtconvert_rvv.S  | 28 ++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/libavcodec/riscv/fmtconvert_init.c b/libavcodec/riscv/fmtconvert_init.c
index fd2f58d060..1796717a1c 100644
--- a/libavcodec/riscv/fmtconvert_init.c
+++ b/libavcodec/riscv/fmtconvert_init.c
@@ -27,13 +27,18 @@
 
 void ff_int32_to_float_fmul_scalar_rvv(float *dst, const int32_t *src,
                                        float mul, int len);
+void ff_int32_to_float_fmul_array8_rvv(FmtConvertContext *c, float *dst,
+                                       const int32_t *src, const float *mul,
+                                       int len);
 
 av_cold void ff_fmt_convert_init_riscv(FmtConvertContext *c)
 {
 #ifdef HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RV_ZVE32F)
+    if (flags & AV_CPU_FLAG_RV_ZVE32F) {
         c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_rvv;
+        c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_rvv;
+    }
 #endif
 }
diff --git a/libavcodec/riscv/fmtconvert_rvv.S b/libavcodec/riscv/fmtconvert_rvv.S
index b7c78831a0..c79f80cc47 100644
--- a/libavcodec/riscv/fmtconvert_rvv.S
+++ b/libavcodec/riscv/fmtconvert_rvv.S
@@ -37,3 +37,31 @@ NOHWF   mv          a2, a3
 
         ret
 endfunc
+
+func ff_int32_to_float_fmul_array8_rvv, zve32f
+        srai        a4, a4, 3
+
+1:      vsetvli     t0, a4, e32, m1, ta, ma
+        vle32.v     v24, (a3)
+        slli        t2, t0, 2 + 3
+        vlseg8e32.v v16, (a2)
+        vsetvli     t3, zero, e32, m8, ta, ma
+        vfcvt.f.x.v v16, v16
+        vsetvli     zero, a4, e32, m1, ta, ma
+        vfmul.vv    v16, v16, v24
+        sub         a4, a4, t0
+        vfmul.vv    v17, v17, v24
+        sh2add      a3, t0, a3
+        vfmul.vv    v18, v18, v24
+        add         a2, a2, t2
+        vfmul.vv    v19, v19, v24
+        vfmul.vv    v20, v20, v24
+        vfmul.vv    v21, v21, v24
+        vfmul.vv    v22, v22, v24
+        vfmul.vv    v23, v23, v24
+        vsseg8e32.v v16, (a1)
+        add         a1, a1, t2
+        bnez        a4, 1b
+
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 25/31] lavc/vorbisdsp: RISC-V V inverse_coupling
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (23 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 24/31] lavc/fmtconvert: RISC-V V int32_to_float_fmul_array8 remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 26/31] lavc/aacpsdsp: RISC-V V add_squares remi
                   ` (6 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

This uses the following vectorisation:

    for (i = 0; i < blocksize; i++) {
        ang[i] = mag[i] - copysignf(fmaxf(ang[i], 0.f), mag[i]);
        mag[i] = mag[i] - copysignf(fminf(ang[i], 0.f), mag[i]);
    }
---
 libavcodec/riscv/Makefile         |  2 ++
 libavcodec/riscv/vorbisdsp_init.c | 37 ++++++++++++++++++++++++++
 libavcodec/riscv/vorbisdsp_rvv.S  | 44 +++++++++++++++++++++++++++++++
 libavcodec/vorbisdsp.c            |  2 ++
 libavcodec/vorbisdsp.h            |  1 +
 5 files changed, 86 insertions(+)
 create mode 100644 libavcodec/riscv/vorbisdsp_init.c
 create mode 100644 libavcodec/riscv/vorbisdsp_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 682174e875..03a95301d7 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -5,3 +5,5 @@ OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_init.o
 RVV-OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_rvv.o
 OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o \
                               riscv/pixblockdsp_rvi.o
+OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
+RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/vorbisdsp_init.c b/libavcodec/riscv/vorbisdsp_init.c
new file mode 100644
index 0000000000..d8432bc0f8
--- /dev/null
+++ b/libavcodec/riscv/vorbisdsp_init.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/vorbisdsp.h"
+
+void ff_vorbis_inverse_coupling_rvv(float *mag, float *ang,
+                                    ptrdiff_t blocksize);
+
+av_cold void ff_vorbisdsp_init_riscv(VorbisDSPContext *c)
+{
+#if HAVE_RVV
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_RV_ZVE32F)
+        c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_rvv;
+#endif
+}
diff --git a/libavcodec/riscv/vorbisdsp_rvv.S b/libavcodec/riscv/vorbisdsp_rvv.S
new file mode 100644
index 0000000000..e8953fb548
--- /dev/null
+++ b/libavcodec/riscv/vorbisdsp_rvv.S
@@ -0,0 +1,44 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "../libavutil/riscv/asm.S"
+
+func ff_vorbis_inverse_coupling_rvv, zve32f
+        fmv.w.x   ft0, zero
+1:
+        vsetvli   t0, a2, e32, m1, ta, ma
+        vle32.v   v16, (a1)
+        sub       a2, a2, t0
+        vle32.v   v24, (a0)
+        vfmax.vf  v8, v16, ft0
+        vfmin.vf  v16, v16, ft0
+        vfsgnj.vv v8, v8, v24
+        vfsgnj.vv v16, v16, v24
+        vfsub.vv  v8, v24, v8
+        vfsub.vv  v24, v24, v16
+        vse32.v   v8, (a1)
+        sh2add    a1, t0, a1
+        vse32.v   v24, (a0)
+        sh2add    a0, t0, a0
+        bnez      a2, 1b
+
+        ret
+endfunc
diff --git a/libavcodec/vorbisdsp.c b/libavcodec/vorbisdsp.c
index 693c44dfcb..70022bd262 100644
--- a/libavcodec/vorbisdsp.c
+++ b/libavcodec/vorbisdsp.c
@@ -53,6 +53,8 @@ av_cold void ff_vorbisdsp_init(VorbisDSPContext *dsp)
     ff_vorbisdsp_init_arm(dsp);
 #elif ARCH_PPC
     ff_vorbisdsp_init_ppc(dsp);
+#elif ARCH_RISCV
+    ff_vorbisdsp_init_riscv(dsp);
 #elif ARCH_X86
     ff_vorbisdsp_init_x86(dsp);
 #endif
diff --git a/libavcodec/vorbisdsp.h b/libavcodec/vorbisdsp.h
index 1775a92cf2..5c369ecf22 100644
--- a/libavcodec/vorbisdsp.h
+++ b/libavcodec/vorbisdsp.h
@@ -34,5 +34,6 @@ void ff_vorbisdsp_init_aarch64(VorbisDSPContext *dsp);
 void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp);
 void ff_vorbisdsp_init_arm(VorbisDSPContext *dsp);
 void ff_vorbisdsp_init_ppc(VorbisDSPContext *dsp);
+void ff_vorbisdsp_init_riscv(VorbisDSPContext *dsp);
 
 #endif /* AVCODEC_VORBISDSP_H */
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 26/31] lavc/aacpsdsp: RISC-V V add_squares
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (24 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 25/31] lavc/vorbisdsp: RISC-V V inverse_coupling remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 27/31] lavc/aacpsdsp: RISC-V V mul_pair_single remi
                   ` (5 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/aacpsdsp.h            |  1 +
 libavcodec/aacpsdsp_template.c   |  2 ++
 libavcodec/riscv/Makefile        |  2 ++
 libavcodec/riscv/aacpsdsp_init.c | 37 ++++++++++++++++++++++++++++++++
 libavcodec/riscv/aacpsdsp_rvv.S  | 37 ++++++++++++++++++++++++++++++++
 5 files changed, 79 insertions(+)
 create mode 100644 libavcodec/riscv/aacpsdsp_init.c
 create mode 100644 libavcodec/riscv/aacpsdsp_rvv.S

diff --git a/libavcodec/aacpsdsp.h b/libavcodec/aacpsdsp.h
index 917ac5303f..8b32761bdb 100644
--- a/libavcodec/aacpsdsp.h
+++ b/libavcodec/aacpsdsp.h
@@ -55,6 +55,7 @@ void AAC_RENAME(ff_psdsp_init)(PSDSPContext *s);
 void ff_psdsp_init_arm(PSDSPContext *s);
 void ff_psdsp_init_aarch64(PSDSPContext *s);
 void ff_psdsp_init_mips(PSDSPContext *s);
+void ff_psdsp_init_riscv(PSDSPContext *s);
 void ff_psdsp_init_x86(PSDSPContext *s);
 
 #endif /* AVCODEC_AACPSDSP_H */
diff --git a/libavcodec/aacpsdsp_template.c b/libavcodec/aacpsdsp_template.c
index e3cbf3feec..c063788b89 100644
--- a/libavcodec/aacpsdsp_template.c
+++ b/libavcodec/aacpsdsp_template.c
@@ -230,6 +230,8 @@ av_cold void AAC_RENAME(ff_psdsp_init)(PSDSPContext *s)
     ff_psdsp_init_aarch64(s);
 #elif ARCH_MIPS
     ff_psdsp_init_mips(s);
+#elif ARCH_RISCV
+    ff_psdsp_init_riscv(s);
 #elif ARCH_X86
     ff_psdsp_init_x86(s);
 #endif
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 03a95301d7..829a1823d2 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -1,3 +1,5 @@
+OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_init.o
+RVV-OBJS-$(CONFIG_AAC_DECODER) += riscv/aacpsdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o \
                            riscv/audiodsp_rvf.o
 RVV-OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_rvv.o
diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
new file mode 100644
index 0000000000..525fc9aa38
--- /dev/null
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/aacpsdsp.h"
+
+void ff_ps_add_squares_rvv(float *dst, const float (*src)[2], int n);
+
+av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
+{
+#if HAVE_RVV
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_RV_ZVE32F)
+        c->add_squares = ff_ps_add_squares_rvv;
+#endif
+}
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
new file mode 100644
index 0000000000..b516063ea7
--- /dev/null
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_ps_add_squares_rvv, zve32f
+1:
+        vsetvli     t0, a2, e32, m1, ta, ma
+        vlseg2e32.v v24, (a1)
+        sub         a2, a2, t0
+        vle32.v     v16, (a0)
+        sh3add      a1, t0, a1
+        vfmacc.vv   v16, v24, v24
+        vfmacc.vv   v16, v25, v25
+        vse32.v     v16, (a0)
+        sh2add      a0, t0, a0
+        bnez        a2, 1b
+
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 27/31] lavc/aacpsdsp: RISC-V V mul_pair_single
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (25 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 26/31] lavc/aacpsdsp: RISC-V V add_squares remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 28/31] lavc/aacpsdsp: RISC-V V hybrid_analysis remi
                   ` (4 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/aacpsdsp_init.c |  6 +++++-
 libavcodec/riscv/aacpsdsp_rvv.S  | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
index 525fc9aa38..90c9c501c3 100644
--- a/libavcodec/riscv/aacpsdsp_init.c
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -25,13 +25,17 @@
 #include "libavcodec/aacpsdsp.h"
 
 void ff_ps_add_squares_rvv(float *dst, const float (*src)[2], int n);
+void ff_ps_mul_pair_single_rvv(float (*dst)[2], float (*src0)[2], float *src1,
+                               int n);
 
 av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
 {
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RV_ZVE32F)
+    if (flags & AV_CPU_FLAG_RV_ZVE32F) {
         c->add_squares = ff_ps_add_squares_rvv;
+        c->mul_pair_single = ff_ps_mul_pair_single_rvv;
+    }
 #endif
 }
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index b516063ea7..70b7b72218 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -35,3 +35,20 @@ func ff_ps_add_squares_rvv, zve32f
 
         ret
 endfunc
+
+func ff_ps_mul_pair_single_rvv, zve32f
+1:
+        vsetvli     t0, a3, e32, m1, ta, ma
+        vlseg2e32.v v24, (a1)
+        sub         a3, a3, t0
+        vle32.v     v16, (a2)
+        sh3add      a1, t0, a1
+        vfmul.vv    v24, v24, v16
+        sh2add      a2, t0, a2
+        vfmul.vv    v25, v25, v16
+        vsseg2e32.v v24, (a0)
+        sh3add      a0, t0, a0
+        bnez        a3, 1b
+
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 28/31] lavc/aacpsdsp: RISC-V V hybrid_analysis
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (26 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 27/31] lavc/aacpsdsp: RISC-V V mul_pair_single remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 29/31] lavc/aacpsdsp: RISC-V V hybrid_analysis_ileave remi
                   ` (3 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

This starts with one-time initialisation of the 26 constant factors
like  08edacc248bce3f8946d75e97188d189c74a6de6. That is done with
the scalar instruction set. While the formula can readily be vectored,
the gains would (probably) be more than lost in transfering the results
back to FP registers (or suitably reshuffling them into vector
registers).

Note that the main loop could likely be scheduled sligthly better by
expanding the filter macro and interleaving loads with arithmetic.
It is not clear yet if that would be relevant for vector processing (as
opposed to traditional SIMD).

We could also use fewer vectors, but there is not much point in sparing
them (they are *all* callee-clobbered).
---
 libavcodec/riscv/aacpsdsp_init.c |  3 +
 libavcodec/riscv/aacpsdsp_rvv.S  | 97 ++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
index 90c9c501c3..6222d6f787 100644
--- a/libavcodec/riscv/aacpsdsp_init.c
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -27,6 +27,8 @@
 void ff_ps_add_squares_rvv(float *dst, const float (*src)[2], int n);
 void ff_ps_mul_pair_single_rvv(float (*dst)[2], float (*src0)[2], float *src1,
                                int n);
+void ff_ps_hybrid_analysis_rvv(float (*out)[2], float (*in)[2],
+                               const float (*filter)[8][2], ptrdiff_t, int n);
 
 av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
 {
@@ -36,6 +38,7 @@ av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
     if (flags & AV_CPU_FLAG_RV_ZVE32F) {
         c->add_squares = ff_ps_add_squares_rvv;
         c->mul_pair_single = ff_ps_mul_pair_single_rvv;
+        c->hybrid_analysis = ff_ps_hybrid_analysis_rvv;
     }
 #endif
 }
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index 70b7b72218..65e5e0be4f 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -52,3 +52,100 @@ func ff_ps_mul_pair_single_rvv, zve32f
 
         ret
 endfunc
+
+func ff_ps_hybrid_analysis_rvv, zve32f
+        /* We need 26 FP registers, for 20 scratch ones. Spill fs0-fs5. */
+        addi    sp, sp, -32
+        .irp n, 0, 1, 2, 3, 4, 5
+        fsw     fs\n, (4 * \n)(sp)
+        .endr
+
+        .macro input, j, fd0, fd1, fd2, fd3
+        flw     \fd0, (4 * ((\j * 2) + 0))(a1)
+        flw     fs4, (4 * (((12 - \j) * 2) + 0))(a1)
+        flw     \fd1, (4 * ((\j * 2) + 1))(a1)
+        fsub.s  \fd3, \fd0, fs4
+        flw     fs5, (4 * (((12 - \j) * 2) + 1))(a1)
+        fadd.s  \fd2, \fd1, fs5
+        fadd.s  \fd0, \fd0, fs4
+        fsub.s  \fd1, \fd1, fs5
+        .endm
+
+        //         re0, re1, im0, im1
+        input   0, ft0, ft1, ft2, ft3
+        input   1, ft4, ft5, ft6, ft7
+        input   2, ft8, ft9, ft10, ft11
+        input   3, fa0, fa1, fa2, fa3
+        input   4, fa4, fa5, fa6, fa7
+        input   5, fs0, fs1, fs2, fs3
+        flw     fs4, (4 * ((6 * 2) + 0))(a1)
+        flw     fs5, (4 * ((6 * 2) + 1))(a1)
+
+        add        a2, a2, 6 * 2 * 4 // point to filter[i][6][0]
+        li         t4, 8 * 2 * 4 // filter byte stride
+        slli       a3, a3, 3 // output byte stride
+1:
+        .macro filter, vs0, vs1, fo0, fo1, fo2, fo3
+        vfmacc.vf  v8, \fo0, \vs0
+        vfmacc.vf  v9, \fo2, \vs0
+        vfnmsac.vf v8, \fo1, \vs1
+        vfmacc.vf  v9, \fo3, \vs1
+        .endm
+
+        vsetvli    t0, a4, e32, m1, ta, ma
+        /*
+         * The filter (a2) has 16 segments, of which 13 need to be extracted.
+         * R-V V supports only up to 8 segments, so unrolling is unavoidable.
+         */
+        addi       t1, a2, -48
+        vlse32.v   v22, (a2), t4
+        addi       t2, a2, -44
+        vlse32.v   v16, (t1), t4
+        addi       t1, a2, -40
+        vfmul.vf   v8, v22, fs4
+        vlse32.v   v24, (t2), t4
+        addi       t2, a2, -36
+        vfmul.vf   v9, v22, fs5
+        vlse32.v   v17, (t1), t4
+        addi       t1, a2, -32
+        vlse32.v   v25, (t2), t4
+        addi       t2, a2, -28
+        filter     v16, v24, ft0, ft1, ft2, ft3
+        vlse32.v   v18, (t1), t4
+        addi       t1, a2, -24
+        vlse32.v   v26, (t2), t4
+        addi       t2, a2, -20
+        filter     v17, v25, ft4, ft5, ft6, ft7
+        vlse32.v   v19, (t1), t4
+        addi       t1, a2, -16
+        vlse32.v   v27, (t2), t4
+        addi       t2, a2, -12
+        filter     v18, v26, ft8, ft9, ft10, ft11
+        vlse32.v   v20, (t1), t4
+        addi       t1, a2, -8
+        vlse32.v   v28, (t2), t4
+        addi       t2, a2, -4
+        filter     v19, v27, fa0, fa1, fa2, fa3
+        vlse32.v   v21, (t1), t4
+        sub        a4, a4, t0
+        vlse32.v   v29, (t2), t4
+        slli       t1, t0, 3 + 1 + 2 // ctz(8 * 2 * 4)
+        add        a2, a2, t1
+        filter     v20, v28, fa4, fa5, fa6, fa7
+        filter     v21, v29, fs0, fs1, fs2, fs3
+
+        add        t2, a0, 4
+        vsse32.v   v8, (a0), a3
+        mul        t0, t0, a3
+        vsse32.v   v9, (t2), a3
+        add        a0, a0, t0
+        bnez       a4, 1b
+
+        .irp n, 5, 4, 3, 2, 1, 0
+        flw     fs\n, (4 * \n)(sp)
+        .endr
+        addi    sp, sp, 32
+        ret
+        .purgem input
+        .purgem filter
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 29/31] lavc/aacpsdsp: RISC-V V hybrid_analysis_ileave
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (27 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 28/31] lavc/aacpsdsp: RISC-V V hybrid_analysis remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 30/31] lavc/aacpsdsp: RISC-V V hybrid_synthesis_deint remi
                   ` (2 subsequent siblings)
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/aacpsdsp_init.c | 14 +++++++++----
 libavcodec/riscv/aacpsdsp_rvv.S  | 35 ++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
index 6222d6f787..76f55502ee 100644
--- a/libavcodec/riscv/aacpsdsp_init.c
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -29,16 +29,22 @@ void ff_ps_mul_pair_single_rvv(float (*dst)[2], float (*src0)[2], float *src1,
                                int n);
 void ff_ps_hybrid_analysis_rvv(float (*out)[2], float (*in)[2],
                                const float (*filter)[8][2], ptrdiff_t, int n);
+void ff_ps_hybrid_analysis_ileave_rvv(float (*out)[32][2], float L[2][38][64],
+                                      int i, int len);
 
 av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
 {
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RV_ZVE32F) {
-        c->add_squares = ff_ps_add_squares_rvv;
-        c->mul_pair_single = ff_ps_mul_pair_single_rvv;
-        c->hybrid_analysis = ff_ps_hybrid_analysis_rvv;
+    if (flags & AV_CPU_FLAG_RV_ZVE32X) {
+        c->hybrid_analysis_ileave = ff_ps_hybrid_analysis_ileave_rvv;
+
+        if (flags & AV_CPU_FLAG_RV_ZVE32F) {
+            c->add_squares = ff_ps_add_squares_rvv;
+            c->mul_pair_single = ff_ps_mul_pair_single_rvv;
+            c->hybrid_analysis = ff_ps_hybrid_analysis_rvv;
+        }
     }
 #endif
 }
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index 65e5e0be4f..c9cc15e73d 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -149,3 +149,38 @@ func ff_ps_hybrid_analysis_rvv, zve32f
         .purgem input
         .purgem filter
 endfunc
+
+func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */
+        slli        t0, a2, 5 + 1 + 2 // ctz(32 * 2 * 4)
+        sh2add      a1, a2, a1
+        add         a0, a0, t0
+        addi        a2, a2, -64
+        li          t1, 38 * 64 * 4
+        li          t6, 64 * 4 // (uint8_t *)L[x][j+1][i] - L[x][j][i]
+        add         a4, a1, t1 // &L[1]
+        beqz        a2, 3f
+1:
+        mv          t0, a0
+        mv          t1, a1
+        mv          t3, a3
+        mv          t4, a4
+        addi        a2, a2, 1
+2:
+        vsetvli     t5, t3, e32, m1, ta, ma
+        vlse32.v    v16, (t1), t6
+        sub         t3, t3, t5
+        vlse32.v    v17, (t4), t6
+        mul         t2, t5, t6
+        vsseg2e32.v v16, (t0)
+        sh3add      t0, t5, t0
+        add         t1, t1, t2
+        add         t4, t4, t2
+        bnez        t3, 2b
+
+        add         a0, a0, 32 * 2 * 4
+        add         a1, a1, 4
+        add         a4, a4, 4
+        bnez        a2, 1b
+3:
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 30/31] lavc/aacpsdsp: RISC-V V hybrid_synthesis_deint
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (28 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 29/31] lavc/aacpsdsp: RISC-V V hybrid_analysis_ileave remi
@ 2022-09-25 14:26 ` remi
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 31/31] lavc/aacpsdsp: RISC-V V stereo_interpolate[0] remi
  2022-09-26  7:05 ` [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Lynne
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/aacpsdsp_init.c |  3 +++
 libavcodec/riscv/aacpsdsp_rvv.S  | 35 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
index 76f55502ee..20b1a12741 100644
--- a/libavcodec/riscv/aacpsdsp_init.c
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -31,6 +31,8 @@ void ff_ps_hybrid_analysis_rvv(float (*out)[2], float (*in)[2],
                                const float (*filter)[8][2], ptrdiff_t, int n);
 void ff_ps_hybrid_analysis_ileave_rvv(float (*out)[32][2], float L[2][38][64],
                                       int i, int len);
+void ff_ps_hybrid_synthesis_deint_rvv(float out[2][38][64], float (*in)[32][2],
+                                      int i, int len);
 
 av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
 {
@@ -39,6 +41,7 @@ av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
 
     if (flags & AV_CPU_FLAG_RV_ZVE32X) {
         c->hybrid_analysis_ileave = ff_ps_hybrid_analysis_ileave_rvv;
+        c->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_rvv;
 
         if (flags & AV_CPU_FLAG_RV_ZVE32F) {
             c->add_squares = ff_ps_add_squares_rvv;
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index c9cc15e73d..0cbe4c1d3c 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -184,3 +184,38 @@ func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */
 3:
         ret
 endfunc
+
+func ff_ps_hybrid_synthesis_deint_rvv, zve32x
+        slli        t1, a2, 5 + 1 + 2
+        sh2add      a0, a2, a0
+        add         a1, a1, t1
+        addi        a2, a2, -64
+        li          t1, 38 * 64 * 4
+        li          t6, 64 * 4
+        add         a4, a0, t1
+        beqz        a2, 3f
+1:
+        mv          t0, a0
+        mv          t1, a1
+        mv          t3, a3
+        mv          t4, a4
+        addi        a2, a2, 1
+2:
+        vsetvli     t5, t3, e32, m1, ta, ma
+        vlseg2e32.v v16, (t1)
+        sub         t3, t3, t5
+        vsse32.v    v16, (t0), t6
+        mul         t2, t5, t6
+        vsse32.v    v17, (t4), t6
+        sh3add      t1, t5, t1
+        add         t0, t0, t2
+        add         t4, t4, t2
+        bnez        t3, 2b
+
+        add         a0, a0, 4
+        add         a1, a1, 32 * 2 * 4
+        add         a4, a4, 4
+        bnez        a2, 1b
+3:
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 31/31] lavc/aacpsdsp: RISC-V V stereo_interpolate[0]
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (29 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 30/31] lavc/aacpsdsp: RISC-V V hybrid_synthesis_deint remi
@ 2022-09-25 14:26 ` remi
  2022-09-26  7:05 ` [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Lynne
  31 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-25 14:26 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/aacpsdsp_init.c |  4 ++
 libavcodec/riscv/aacpsdsp_rvv.S  | 65 ++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
index 20b1a12741..58a4c61121 100644
--- a/libavcodec/riscv/aacpsdsp_init.c
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -34,6 +34,9 @@ void ff_ps_hybrid_analysis_ileave_rvv(float (*out)[32][2], float L[2][38][64],
 void ff_ps_hybrid_synthesis_deint_rvv(float out[2][38][64], float (*in)[32][2],
                                       int i, int len);
 
+void ff_ps_stereo_interpolate_rvv(float (*l)[2], float (*r)[2],
+                                  float h[2][4], float h_step[2][4], int len);
+
 av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
 {
 #if HAVE_RVV
@@ -47,6 +50,7 @@ av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
             c->add_squares = ff_ps_add_squares_rvv;
             c->mul_pair_single = ff_ps_mul_pair_single_rvv;
             c->hybrid_analysis = ff_ps_hybrid_analysis_rvv;
+            c->stereo_interpolate[0] = ff_ps_stereo_interpolate_rvv;
         }
     }
 #endif
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index 0cbe4c1d3c..a236dfe43c 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -219,3 +219,68 @@ func ff_ps_hybrid_synthesis_deint_rvv, zve32x
 3:
         ret
 endfunc
+
+func ff_ps_stereo_interpolate_rvv, zve32f
+        vsetvli      t0, zero, e32, m1, ta, ma
+        vid.v        v24
+        flw          ft0,   (a2)
+        vadd.vi      v24, v24, 1   // v24[i] = i + 1
+        flw          ft1,  4(a2)
+        vfcvt.f.xu.v v24, v24
+        flw          ft2,  8(a2)
+        vfmv.v.f     v16, ft0
+        flw          ft3, 12(a2)
+        vfmv.v.f     v17, ft1
+        flw          ft0,   (a3)
+        vfmv.v.f     v18, ft2
+        flw          ft1,  4(a3)
+        vfmv.v.f     v19, ft3
+        flw          ft2,  8(a3)
+        vfmv.v.f     v20, ft0
+        flw          ft3, 12(a3)
+        vfmv.v.f     v21, ft1
+        fcvt.s.wu    ft4, t0       // (float)(vlenb / sizeof (float))
+        vfmv.v.f     v22, ft2
+        li           t1, 8
+        vfmv.v.f     v23, ft3
+        addi         a6, a0, 4     // l[*][1]
+        vfmacc.vv    v16, v24, v20 // h0 += (i + 1) * h0_step
+        addi         a7, a1, 4     // r[*][1]
+        vfmacc.vv    v17, v24, v21
+        fmul.s       ft0, ft0, ft4
+        vfmacc.vv    v18, v24, v22
+        fmul.s       ft1, ft1, ft4
+        vfmacc.vv    v19, v24, v23
+        fmul.s       ft2, ft2, ft4
+        fmul.s       ft3, ft3, ft4
+1:
+        vsetvli   t0, a4, e32, m1, ta, ma
+        vlse32.v  v8, (a0), t1   // l_re
+        sub       a4, a4, t0
+        vlse32.v  v9, (a6), t1   // l_im
+        vlse32.v  v10, (a1), t1  // r_re
+        vlse32.v  v11, (a7), t1  // r_im
+        vfmul.vv  v12, v8, v16
+        vfmul.vv  v13, v9, v16
+        vfmul.vv  v14, v8, v17
+        vfmul.vv  v15, v9, v17
+        vfmacc.vv v12, v10, v18
+        vfmacc.vv v13, v11, v18
+        vfmacc.vv v14, v10, v19
+        vfmacc.vv v15, v11, v19
+        vsse32.v  v12, (a0), t1
+        sh3add    a0, t0, a0
+        vsse32.v  v13, (a6), t1
+        sh3add    a6, t0, a6
+        vsse32.v  v14, (a1), t1
+        sh3add    a1, t0, a1
+        vsse32.v  v15, (a7), t1
+        sh3add    a7, t0, a7
+        vfadd.vf  v16, v16, ft0 // h0 += (vlenb / sizeof (float)) * h0_step
+        vfadd.vf  v17, v17, ft1
+        vfadd.vf  v18, v18, ft2
+        vfadd.vf  v19, v19, ft3
+        bnez      a4, 1b
+
+        ret
+endfunc
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [FFmpeg-devel] [PATCH 05/31] lavu/cpu: CPU flags for the RISC-V Vector extension
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 05/31] lavu/cpu: CPU flags for the RISC-V Vector extension remi
@ 2022-09-26  6:51   ` Lynne
  2022-09-26  8:02     ` Andreas Rheinhardt
  2022-09-26  9:38     ` Rémi Denis-Courmont
  0 siblings, 2 replies; 40+ messages in thread
From: Lynne @ 2022-09-26  6:51 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Sep 25, 2022, 16:25 by remi@remlab.net:

> From: Rémi Denis-Courmont <remi@remlab.net>
> -    if ((flags & AV_CPU_FLAG_RVD) && !(flags & AV_CPU_FLAG_RVF)) {
> +    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RV_ZVE64X)) {
> +        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
> +               "_ZVE64X");
> +        flags |= AV_CPU_FLAG_RV_ZVE64X;
> +    }
> +
> +    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RV_ZVE32F)) {
> +        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
> +               "_ZVE32F");
>

I remember someone complaining about NULL contexts in av_log (mkver?).
I think it's okay, but I have no opinion on this.


> +        flags |= AV_CPU_FLAG_RV_ZVE32F;
> +    }
> +
> +    if ((flags & (AV_CPU_FLAG_RV_ZVE64X | AV_CPU_FLAG_RV_ZVE32F))
> +        && !(flags & AV_CPU_FLAG_RV_ZVE32X)) {
> +        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
> +               "_ZVE32X");
> +        flags |= AV_CPU_FLAG_RV_ZVE32X;
> +    }
> +
> +    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RVD)) {
> +        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n", "D");
> +        flags |= AV_CPU_FLAG_RVD;
> +    }
> +
> +    if ((flags & (AV_CPU_FLAG_RVD | AV_CPU_FLAG_RV_ZVE32F))
> +        && !(flags & AV_CPU_FLAG_RVF)) {
>  av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n", "F");
>  flags |= AV_CPU_FLAG_RVF;
>  }
> @@ -50,6 +75,11 @@ int ff_get_cpu_flags_riscv(void)
>  ret |= AV_CPU_FLAG_RVF;
>  if (hwcap & HWCAP_RV('D'))
>  ret |= AV_CPU_FLAG_RVD;
> +
> +    /* The V extension implies all Zve* functional subsets */
> +    if (hwcap & HWCAP_RV('V'))
> +        ret |= AV_CPU_FLAG_RV_ZVE32X | AV_CPU_FLAG_RV_ZVE64X
> +             | AV_CPU_FLAG_RV_ZVE32F | AV_CPU_FLAG_RV_ZVE64D;
>  #endif
>  
>  #ifdef __riscv_i
> @@ -60,6 +90,20 @@ int ff_get_cpu_flags_riscv(void)
>  #if (__riscv_flen >= 64)
>  ret |= AV_CPU_FLAG_RVD;
>  #endif
> +#endif
> +
> +    /* If RV-V is enabled statically at compile-time, check the details. */
> +#ifdef __riscv_vectors
> +    ret |= AV_CPU_FLAG_RV_ZVE32X;
> +#if __riscv_v_elen >= 64
> +    ret |= AV_CPU_FLAG_RV_ZVE64X;
> +#endif
> +#if __riscv_v_elen_fp >= 32
> +    ret |= AV_CPU_FLAG_RV_ZVE32F;
> +#if __riscv_v_elen_fp >= 64
> +    ret |= AV_CPU_FLAG_RV_ZVE64F;
> +#endif
> +#endif
>  #endif
>  
>  return ret;
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index e1135a84ac..f7d108e8ea 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -233,9 +233,13 @@ static const struct {
>  { "VSX",      "vsx",      AV_CPU_FLAG_VSX },
>  { "POWER8",   "power8",   AV_CPU_FLAG_POWER8 },
>  #elif ARCH_RISCV
> -    { "RVI",      "rvi",      AV_CPU_FLAG_RVI },
> -    { "RVF",      "rvf",      AV_CPU_FLAG_RVF },
> -    { "RVD",      "rvd",      AV_CPU_FLAG_RVD },
> +    { "RVI",        "rvi",       AV_CPU_FLAG_RVI },
> +    { "RVF",        "rvf",       AV_CPU_FLAG_RVF },
> +    { "RVD",        "rvd",       AV_CPU_FLAG_RVD },
> +    { "RV_Zve32x",  "rv_zve32x", AV_CPU_FLAG_RV_ZVE32X },
> +    { "RV_Zve32f",  "rv_zve32f", AV_CPU_FLAG_RV_ZVE32F },
> +    { "RV_Zve64x",  "rv_zve64x", AV_CPU_FLAG_RV_ZVE64X },
> +    { "RV_Zve64d",  "rv_zve64d", AV_CPU_FLAG_RV_ZVE64D }, 
>

I get that this is the official name for the extension, but... what about
simplifying it to something less like a password, like RVV32I/RVV32F/RVV64I/RVV64F?
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [FFmpeg-devel] [PATCH 09/31] lavu/floatdsp: RISC-V V vector_dmul_scalar
  2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 09/31] lavu/floatdsp: RISC-V V vector_dmul_scalar remi
@ 2022-09-26  6:53   ` Lynne
  2022-09-26  9:42     ` Rémi Denis-Courmont
  0 siblings, 1 reply; 40+ messages in thread
From: Lynne @ 2022-09-26  6:53 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Sep 25, 2022, 16:25 by remi@remlab.net:

> From: Rémi Denis-Courmont <remi@remlab.net>
>
> ---
>  libavutil/riscv/float_dsp_init.c |  9 ++++++++-
>  libavutil/riscv/float_dsp_rvv.S  | 17 +++++++++++++++++
>  2 files changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
> index de567c50d2..b829c0f736 100644
> --- a/libavutil/riscv/float_dsp_init.c
> +++ b/libavutil/riscv/float_dsp_init.c
> @@ -28,12 +28,19 @@
>  void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
>  int len);
>  
> +void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
> +                                int len);
> +
>  av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
>  {
>  #if HAVE_RVV
>  int flags = av_get_cpu_flags();
>  
> -    if (flags & AV_CPU_FLAG_RV_ZVE32F)
> +    if (flags & AV_CPU_FLAG_RV_ZVE32F) {
>  fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
> +
> +        if (flags & AV_CPU_FLAG_RV_ZVE64D)
> +            fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
> +    }
>

You don't need to put doubles in the same branch as floats,
it's just extra indentation as one implies the other anyway. 

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions
  2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
                   ` (30 preceding siblings ...)
  2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 31/31] lavc/aacpsdsp: RISC-V V stereo_interpolate[0] remi
@ 2022-09-26  7:05 ` Lynne
  2022-09-26 12:01   ` Rémi Denis-Courmont
  31 siblings, 1 reply; 40+ messages in thread
From: Lynne @ 2022-09-26  7:05 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Sep 25, 2022, 16:25 by remi@remlab.net:

> Hello,
>
> Changes since version version 5:
> - Use shifted-add instructions where applicable (pointer arithmetic) to
>  minimise scalar operations to the absolute minimum.
> - Add AAC PS DSP stereo interpolation [0].
>
> The following changes since commit 7cd252ee41ddc693fa140c5b5eb472b6d6f27f9e:
>
>  avcodec/parser: Remove declaration of inexistent function (2022-09-24 20:25:19 +0200)
>
> are available in the Git repository at:
>
>  https://git.remlab.net/git/ffmpeg.git rv-cpu
>
> for you to fetch changes up to 2f7afca1e160b3bb1bc761ac3eb5e08a0852120b:
>
>  lavc/aacpsdsp: RISC-V V stereo_interpolate[0] (2022-09-25 17:19:09 +0300)
>

Took a look at the patchset, apart from those 2 minor nits,
it looks good to me. I can fix the branching before pushing
if you think it'd be okay. As for the name, if they really have
no alternative names, I'd be fine with RV_Zve64x... hunter2.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [FFmpeg-devel] [PATCH 05/31] lavu/cpu: CPU flags for the RISC-V Vector extension
  2022-09-26  6:51   ` Lynne
@ 2022-09-26  8:02     ` Andreas Rheinhardt
  2022-09-26  9:38     ` Rémi Denis-Courmont
  1 sibling, 0 replies; 40+ messages in thread
From: Andreas Rheinhardt @ 2022-09-26  8:02 UTC (permalink / raw)
  To: ffmpeg-devel

Lynne:
> Sep 25, 2022, 16:25 by remi@remlab.net:
> 
>> From: Rémi Denis-Courmont <remi@remlab.net>
>> -    if ((flags & AV_CPU_FLAG_RVD) && !(flags & AV_CPU_FLAG_RVF)) {
>> +    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RV_ZVE64X)) {
>> +        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
>> +               "_ZVE64X");
>> +        flags |= AV_CPU_FLAG_RV_ZVE64X;
>> +    }
>> +
>> +    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RV_ZVE32F)) {
>> +        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
>> +               "_ZVE32F");
>>
> 
> I remember someone complaining about NULL contexts in av_log (mkver?).
> I think it's okay, but I have no opinion on this.
> 

You are probably referring to Anton; I also dislike them, but not as
much as he. Anyway, the actual caller provides no logcontext, so it is
fine by me to use NULL. This does not mean that I would add these
av_logs myself.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [FFmpeg-devel] [PATCH 05/31] lavu/cpu: CPU flags for the RISC-V Vector extension
  2022-09-26  6:51   ` Lynne
  2022-09-26  8:02     ` Andreas Rheinhardt
@ 2022-09-26  9:38     ` Rémi Denis-Courmont
  1 sibling, 0 replies; 40+ messages in thread
From: Rémi Denis-Courmont @ 2022-09-26  9:38 UTC (permalink / raw)
  To: FFmpeg development discussions and patches, Lynne

Le 26 septembre 2022 09:51:43 GMT+03:00, Lynne <dev@lynne.ee> a écrit :
>Sep 25, 2022, 16:25 by remi@remlab.net:
>
>> From: Rémi Denis-Courmont <remi@remlab.net>
>> -    if ((flags & AV_CPU_FLAG_RVD) && !(flags & AV_CPU_FLAG_RVF)) {
>> +    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RV_ZVE64X)) {
>> +        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
>> +               "_ZVE64X");
>> +        flags |= AV_CPU_FLAG_RV_ZVE64X;
>> +    }
>> +
>> +    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RV_ZVE32F)) {
>> +        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
>> +               "_ZVE32F");
>>
>
>I remember someone complaining about NULL contexts in av_log (mkver?).
>I think it's okay, but I have no opinion on this.

I don't particularly like them either but there is nowhere to get the log context from, in this case. To fix this, I guess we would need to break the API and the ABI.

This is the same as the existing x86 code anyhow. Any solution should be common to both platforms.

>
>> +        flags |= AV_CPU_FLAG_RV_ZVE32F;
>> +    }
>> +
>> +    if ((flags & (AV_CPU_FLAG_RV_ZVE64X | AV_CPU_FLAG_RV_ZVE32F))
>> +        && !(flags & AV_CPU_FLAG_RV_ZVE32X)) {
>> +        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
>> +               "_ZVE32X");
>> +        flags |= AV_CPU_FLAG_RV_ZVE32X;
>> +    }
>> +
>> +    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RVD)) {
>> +        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n", "D");
>> +        flags |= AV_CPU_FLAG_RVD;
>> +    }
>> +
>> +    if ((flags & (AV_CPU_FLAG_RVD | AV_CPU_FLAG_RV_ZVE32F))
>> +        && !(flags & AV_CPU_FLAG_RVF)) {
>>  av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n", "F");
>>  flags |= AV_CPU_FLAG_RVF;
>>  }
>> @@ -50,6 +75,11 @@ int ff_get_cpu_flags_riscv(void)
>>  ret |= AV_CPU_FLAG_RVF;
>>  if (hwcap & HWCAP_RV('D'))
>>  ret |= AV_CPU_FLAG_RVD;
>> +
>> +    /* The V extension implies all Zve* functional subsets */
>> +    if (hwcap & HWCAP_RV('V'))
>> +        ret |= AV_CPU_FLAG_RV_ZVE32X | AV_CPU_FLAG_RV_ZVE64X
>> +             | AV_CPU_FLAG_RV_ZVE32F | AV_CPU_FLAG_RV_ZVE64D;
>>  #endif
>>  
>>  #ifdef __riscv_i
>> @@ -60,6 +90,20 @@ int ff_get_cpu_flags_riscv(void)
>>  #if (__riscv_flen >= 64)
>>  ret |= AV_CPU_FLAG_RVD;
>>  #endif
>> +#endif
>> +
>> +    /* If RV-V is enabled statically at compile-time, check the details. */
>> +#ifdef __riscv_vectors
>> +    ret |= AV_CPU_FLAG_RV_ZVE32X;
>> +#if __riscv_v_elen >= 64
>> +    ret |= AV_CPU_FLAG_RV_ZVE64X;
>> +#endif
>> +#if __riscv_v_elen_fp >= 32
>> +    ret |= AV_CPU_FLAG_RV_ZVE32F;
>> +#if __riscv_v_elen_fp >= 64
>> +    ret |= AV_CPU_FLAG_RV_ZVE64F;
>> +#endif
>> +#endif
>>  #endif
>>  
>>  return ret;
>> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
>> index e1135a84ac..f7d108e8ea 100644
>> --- a/tests/checkasm/checkasm.c
>> +++ b/tests/checkasm/checkasm.c
>> @@ -233,9 +233,13 @@ static const struct {
>>  { "VSX",      "vsx",      AV_CPU_FLAG_VSX },
>>  { "POWER8",   "power8",   AV_CPU_FLAG_POWER8 },
>>  #elif ARCH_RISCV
>> -    { "RVI",      "rvi",      AV_CPU_FLAG_RVI },
>> -    { "RVF",      "rvf",      AV_CPU_FLAG_RVF },
>> -    { "RVD",      "rvd",      AV_CPU_FLAG_RVD },
>> +    { "RVI",        "rvi",       AV_CPU_FLAG_RVI },
>> +    { "RVF",        "rvf",       AV_CPU_FLAG_RVF },
>> +    { "RVD",        "rvd",       AV_CPU_FLAG_RVD },
>> +    { "RV_Zve32x",  "rv_zve32x", AV_CPU_FLAG_RV_ZVE32X },
>> +    { "RV_Zve32f",  "rv_zve32f", AV_CPU_FLAG_RV_ZVE32F },
>> +    { "RV_Zve64x",  "rv_zve64x", AV_CPU_FLAG_RV_ZVE64X },
>> +    { "RV_Zve64d",  "rv_zve64d", AV_CPU_FLAG_RV_ZVE64D }, 
>>
>
>I get that this is the official name for the extension, but... what about
>simplifying it to something less like a password, like RVV32I/RVV32F/RVV64I/RVV64F?

There are 2 prefixes: Zve for vector element, and Zvl for vector bit length. If we drop the E of element, it gets confusing.

Maybe we could use RVV_{I,F}{32,64} if you want to drop the gratuitous Z... ?
Inline...
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [FFmpeg-devel] [PATCH 09/31] lavu/floatdsp: RISC-V V vector_dmul_scalar
  2022-09-26  6:53   ` Lynne
@ 2022-09-26  9:42     ` Rémi Denis-Courmont
  0 siblings, 0 replies; 40+ messages in thread
From: Rémi Denis-Courmont @ 2022-09-26  9:42 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Le 26 septembre 2022 09:53:19 GMT+03:00, Lynne <dev@lynne.ee> a écrit :
>Sep 25, 2022, 16:25 by remi@remlab.net:
>
>> From: Rémi Denis-Courmont <remi@remlab.net>
>>
>> ---
>>  libavutil/riscv/float_dsp_init.c |  9 ++++++++-
>>  libavutil/riscv/float_dsp_rvv.S  | 17 +++++++++++++++++
>>  2 files changed, 25 insertions(+), 1 deletion(-)
>>
>> diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
>> index de567c50d2..b829c0f736 100644
>> --- a/libavutil/riscv/float_dsp_init.c
>> +++ b/libavutil/riscv/float_dsp_init.c
>> @@ -28,12 +28,19 @@
>>  void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
>>  int len);
>>  
>> +void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
>> +                                int len);
>> +
>>  av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
>>  {
>>  #if HAVE_RVV
>>  int flags = av_get_cpu_flags();
>>  
>> -    if (flags & AV_CPU_FLAG_RV_ZVE32F)
>> +    if (flags & AV_CPU_FLAG_RV_ZVE32F) {
>>  fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
>> +
>> +        if (flags & AV_CPU_FLAG_RV_ZVE64D)
>> +            fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
>> +    }
>>
>
>You don't need to put doubles in the same branch as floats,
>it's just extra indentation as one implies the other anyway. 

Well, the idea was to skip an useless check if Zve32f is unsupported. As this is a cold path, I don't really mind either way though.

Note the same construct is used elsewhere. On top of my head, audiodsp and aacpsdsp.

Thanks for the review.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions
  2022-09-26  7:05 ` [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Lynne
@ 2022-09-26 12:01   ` Rémi Denis-Courmont
  0 siblings, 0 replies; 40+ messages in thread
From: Rémi Denis-Courmont @ 2022-09-26 12:01 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Le 26 septembre 2022 10:05:23 GMT+03:00, Lynne <dev@lynne.ee> a écrit :
>Sep 25, 2022, 16:25 by remi@remlab.net:
>
>> Hello,
>>
>> Changes since version version 5:
>> - Use shifted-add instructions where applicable (pointer arithmetic) to
>>  minimise scalar operations to the absolute minimum.
>> - Add AAC PS DSP stereo interpolation [0].
>>
>> The following changes since commit 7cd252ee41ddc693fa140c5b5eb472b6d6f27f9e:
>>
>>  avcodec/parser: Remove declaration of inexistent function (2022-09-24 20:25:19 +0200)
>>
>> are available in the Git repository at:
>>
>>  https://git.remlab.net/git/ffmpeg.git rv-cpu
>>
>> for you to fetch changes up to 2f7afca1e160b3bb1bc761ac3eb5e08a0852120b:
>>
>>  lavc/aacpsdsp: RISC-V V stereo_interpolate[0] (2022-09-25 17:19:09 +0300)
>>
>
>Took a look at the patchset, apart from those 2 minor nits,
>it looks good to me. I can fix the branching before pushing
>if you think it'd be okay. As for the name, if they really have
>no alternative names, I'd be fine with RV_Zve64x... hunter2.

If we lift the nested CPU flags test up, then we can drop the whole forced flags thing, and moot the NULL logger argument.

Anyway, I think I can improve the AAC PS stereo interleave code, so I'll send another patchset version soonish.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [FFmpeg-devel] [PATCH 13/31] lavu/floatdsp: RISC-V V vector_dmac_scalar
  2022-09-26 14:52 [FFmpeg-devel] [PATCHv6 00/31] initial " Rémi Denis-Courmont
@ 2022-09-26 14:52 ` remi
  0 siblings, 0 replies; 40+ messages in thread
From: remi @ 2022-09-26 14:52 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavutil/riscv/float_dsp_init.c |  3 +++
 libavutil/riscv/float_dsp_rvv.S  | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 9e19413d5d..a559bbb32b 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -34,6 +34,8 @@ void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
 
 void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
                          int len);
+void ff_vector_dmac_scalar_rvv(double *dst, const double *src, double mul,
+                                int len);
 void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
                                 int len);
 
@@ -50,6 +52,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
 
     if (flags & AV_CPU_FLAG_RVV_F64) {
         fdsp->vector_dmul = ff_vector_dmul_rvv;
+        fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_rvv;
         fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
     }
 #endif
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 4c325db9fd..048ec0bc40 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -91,6 +91,24 @@ func ff_vector_dmul_rvv, zve64d
         ret
 endfunc
 
+// (a0) += (a1) * fa0 [0..a2-1]
+func ff_vector_dmac_scalar_rvv, zve64d
+NOHWD   fmv.d.x   fa0, a2
+NOHWD   mv        a2, a3
+1:
+        vsetvli   t0, a2, e64, m1, ta, ma
+        vle64.v   v24, (a1)
+        sub       a2, a2, t0
+        vle64.v   v16, (a0)
+        sh3add    a1, t0, a1
+        vfmacc.vf v16, fa0, v24
+        vse64.v   v16, (a0)
+        sh3add    a0, t0, a0
+        bnez      a2, 1b
+
+        ret
+endfunc
+
 // (a0) = (a1) * fa0 [0..a2-1]
 func ff_vector_dmul_scalar_rvv, zve64d
 NOHWD   fmv.d.x  fa0, a2
-- 
2.37.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2022-09-26 14:54 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-25 14:25 [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Rémi Denis-Courmont
2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 01/31] lavu/cpu: detect RISC-V base extensions remi
2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 02/31] lavu/riscv: initial common header for assembler macros remi
2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 03/31] lavc/audiodsp: RISC-V F vector_clipf remi
2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 04/31] lavc/pixblockdsp: RISC-V I get_pixels remi
2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 05/31] lavu/cpu: CPU flags for the RISC-V Vector extension remi
2022-09-26  6:51   ` Lynne
2022-09-26  8:02     ` Andreas Rheinhardt
2022-09-26  9:38     ` Rémi Denis-Courmont
2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 06/31] configure: probe " remi
2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 07/31] lavu/riscv: fallback macros for SH{1, 2, 3}ADD remi
2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 08/31] lavu/floatdsp: RISC-V V vector_fmul_scalar remi
2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 09/31] lavu/floatdsp: RISC-V V vector_dmul_scalar remi
2022-09-26  6:53   ` Lynne
2022-09-26  9:42     ` Rémi Denis-Courmont
2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 10/31] lavu/floatdsp: RISC-V V vector_fmul remi
2022-09-25 14:25 ` [FFmpeg-devel] [PATCH 11/31] lavu/floatdsp: RISC-V V vector_dmul remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 12/31] lavu/floatdsp: RISC-V V vector_fmac_scalar remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 13/31] lavu/floatdsp: RISC-V V vector_dmac_scalar remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 14/31] lavu/floatdsp: RISC-V V vector_fmul_add remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 15/31] lavu/floatdsp: RISC-V V butterflies_float remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 16/31] lavu/floatdsp: RISC-V V vector_fmul_reverse remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 17/31] lavu/floatdsp: RISC-V V vector_fmul_window remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 18/31] lavu/floatdsp: RISC-V V scalarproduct_float remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 19/31] lavu/fixeddsp: RISC-V V butterflies_fixed remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 20/31] lavc/audiodsp: RISC-V V vector_clip_int32 remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 21/31] lavc/audiodsp: RISC-V V vector_clipf remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 22/31] lavc/audiodsp: RISC-V V scalarproduct_int16 remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 23/31] lavc/fmtconvert: RISC-V V int32_to_float_fmul_scalar remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 24/31] lavc/fmtconvert: RISC-V V int32_to_float_fmul_array8 remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 25/31] lavc/vorbisdsp: RISC-V V inverse_coupling remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 26/31] lavc/aacpsdsp: RISC-V V add_squares remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 27/31] lavc/aacpsdsp: RISC-V V mul_pair_single remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 28/31] lavc/aacpsdsp: RISC-V V hybrid_analysis remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 29/31] lavc/aacpsdsp: RISC-V V hybrid_analysis_ileave remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 30/31] lavc/aacpsdsp: RISC-V V hybrid_synthesis_deint remi
2022-09-25 14:26 ` [FFmpeg-devel] [PATCH 31/31] lavc/aacpsdsp: RISC-V V stereo_interpolate[0] remi
2022-09-26  7:05 ` [FFmpeg-devel] [PATCHv5 00/31] RISC-V CPU extensions Lynne
2022-09-26 12:01   ` Rémi Denis-Courmont
2022-09-26 14:52 [FFmpeg-devel] [PATCHv6 00/31] initial " Rémi Denis-Courmont
2022-09-26 14:52 ` [FFmpeg-devel] [PATCH 13/31] lavu/floatdsp: RISC-V V vector_dmac_scalar remi

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git