* [FFmpeg-devel] [PATCH 01/12] lavu/riscv: add CPU flags for the RISC-V Vector extension
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
@ 2022-09-06 18:43 ` remi
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 02/12] checkasm: register the RISC-V V subsets remi
` (10 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:43 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
RVV defines a total of 12 different extensions, including:
- 5 different instruction subsets:
- Zve32x: 8-, 16- and 32-bit integers,
- Zve32f: Zve32x plus single precision floats,
- Zve64x: Zve32x plus 64-bit integers,
- Zve64f: Zve32f plus Zve64x,
- Zve64d: Zve64f plus double precision floats.
- 6 different vector lengths:
- Zvl32b (embedded only),
- Zvl64b (embedded only),
- Zvl128b,
- Zvl256b,
- Zvl512b,
- Zvl1024b,
- and the V extension proper: equivalent to Zve64f and Zvl128b.
In total, there are 6 different possible sets of supported instructions
(including the empty set), but for convenience we allocate one bit for
each type sets: up-to-32-bit ints (ZVE32X), floats (ZV32F),
64-bit ints (ZV64X) and doubles (ZVE64D).
Whence the vector size is needed, it can be retrieved by reading the
unprivileged read-only vlenb CSR. This should probably be a separate
helper macro if needed at a later point.
---
libavutil/cpu.c | 15 +++++++++++
libavutil/cpu.h | 6 +++++
libavutil/cpu_internal.h | 1 +
libavutil/riscv/Makefile | 1 +
libavutil/riscv/cpu.c | 57 ++++++++++++++++++++++++++++++++++++++++
5 files changed, 80 insertions(+)
create mode 100644 libavutil/riscv/Makefile
create mode 100644 libavutil/riscv/cpu.c
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 0035e927a5..89d2fb6f56 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -62,6 +62,8 @@ static int get_cpu_flags(void)
return ff_get_cpu_flags_arm();
#elif ARCH_PPC
return ff_get_cpu_flags_ppc();
+#elif ARCH_RISCV
+ return ff_get_cpu_flags_riscv();
#elif ARCH_X86
return ff_get_cpu_flags_x86();
#elif ARCH_LOONGARCH
@@ -178,6 +180,19 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
#elif ARCH_LOONGARCH
{ "lsx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX }, .unit = "flags" },
{ "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX }, .unit = "flags" },
+#elif ARCH_RISCV
+#define AV_CPU_FLAG_ZVE32X_M (AV_CPU_FLAG_ZVE32X)
+#define AV_CPU_FLAG_ZVE32F_M (AV_CPU_FLAG_ZVE32X_M | AV_CPU_FLAG_ZVE32F)
+#define AV_CPU_FLAG_ZVE64X_M (AV_CPU_FLAG_ZVE32X_M | AV_CPU_FLAG_ZVE64X)
+#define AV_CPU_FLAG_ZVE64F_M (AV_CPU_FLAG_ZVE32F_M | AV_CPU_FLAG_ZVE64X)
+#define AV_CPU_FLAG_ZVE64D_M (AV_CPU_FLAG_ZVE64F_M | AV_CPU_FLAG_ZVE64D)
+#define AV_CPU_FLAG_VECTORS AV_CPU_FLAG_ZVE64D_M
+ { "vectors", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VECTORS }, .unit = "flags" },
+ { "zve32x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE32X }, .unit = "flags" },
+ { "zve32f", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE32F_M }, .unit = "flags" },
+ { "zve64x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64X_M }, .unit = "flags" },
+ { "zve64f", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64F_M }, .unit = "flags" },
+ { "zve64d", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64D_M }, .unit = "flags" },
#endif
{ NULL },
};
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 9711e574c5..44836e50d6 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -78,6 +78,12 @@
#define AV_CPU_FLAG_LSX (1 << 0)
#define AV_CPU_FLAG_LASX (1 << 1)
+// RISC-V Vector extension
+#define AV_CPU_FLAG_ZVE32X (1 << 0) /* 8-, 16-, 32-bit integers */
+#define AV_CPU_FLAG_ZVE32F (1 << 1) /* single precision scalars */
+#define AV_CPU_FLAG_ZVE64X (1 << 2) /* 64-bit integers */
+#define AV_CPU_FLAG_ZVE64D (1 << 3) /* double precision scalars */
+
/**
* Return the flags which specify extensions supported by the CPU.
* The returned value is affected by av_force_cpu_flags() if that was used
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 650d47fc96..634f28bac4 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -48,6 +48,7 @@ int ff_get_cpu_flags_mips(void);
int ff_get_cpu_flags_aarch64(void);
int ff_get_cpu_flags_arm(void);
int ff_get_cpu_flags_ppc(void);
+int ff_get_cpu_flags_riscv(void);
int ff_get_cpu_flags_x86(void);
int ff_get_cpu_flags_loongarch(void);
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
new file mode 100644
index 0000000000..1f818043dc
--- /dev/null
+++ b/libavutil/riscv/Makefile
@@ -0,0 +1 @@
+OBJS += riscv/cpu.o
diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c
new file mode 100644
index 0000000000..9e4cce5e8b
--- /dev/null
+++ b/libavutil/riscv/cpu.c
@@ -0,0 +1,57 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
+#include "config.h"
+
+#if HAVE_GETAUXVAL
+#include <sys/auxv.h>
+#endif
+
+#define HWCAP_RV(letter) (1ul << ((letter) - 'A'))
+
+int ff_get_cpu_flags_riscv(void)
+{
+ int ret = 0;
+
+ /* If RV-V is enabled statically at compile-time, check the details. */
+#ifdef __riscv_vectors
+ ret |= AV_CPU_FLAG_ZVE32X;
+#if __riscv_v_elen >= 64
+ ret |= AV_CPU_FLAG_ZVE64X;
+#endif
+#if __riscv_v_elen_fp >= 32
+ ret |= AV_CPU_FLAG_ZVE32F;
+#if __riscv_v_elen_fp >= 64
+ ret |= AV_CPU_FLAG_ZVE64F;
+#endif
+#endif
+#endif
+
+#if HAVE_GETAUXVAL
+ const unsigned long hwcap = getauxval(AT_HWCAP);
+
+ /* The V extension implies all subsets */
+ if (hwcap & HWCAP_RV('V'))
+ ret |= AV_CPU_FLAG_ZVE32X | AV_CPU_FLAG_ZVE64X
+ | AV_CPU_FLAG_ZVE32F | AV_CPU_FLAG_ZVE64D;
+#endif
+
+ return ret;
+}
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 02/12] checkasm: register the RISC-V V subsets
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 01/12] lavu/riscv: add CPU flags for the RISC-V Vector extension remi
@ 2022-09-06 18:43 ` remi
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 03/12] lavu/riscv: initial common header for assembler macros remi
` (9 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:43 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
tests/checkasm/checkasm.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index e56fd3850e..a5d0503811 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -226,6 +226,11 @@ static const struct {
{ "ALTIVEC", "altivec", AV_CPU_FLAG_ALTIVEC },
{ "VSX", "vsx", AV_CPU_FLAG_VSX },
{ "POWER8", "power8", AV_CPU_FLAG_POWER8 },
+#elif ARCH_RISCV
+ { "Zve32x", "zve32x", AV_CPU_FLAG_ZVE32X },
+ { "Zve32f", "zve32f", AV_CPU_FLAG_ZVE32F },
+ { "Zve64x", "zve64x", AV_CPU_FLAG_ZVE64X },
+ { "Zve64d", "zve64d", AV_CPU_FLAG_ZVE64D },
#elif ARCH_MIPS
{ "MMI", "mmi", AV_CPU_FLAG_MMI },
{ "MSA", "msa", AV_CPU_FLAG_MSA },
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 03/12] lavu/riscv: initial common header for assembler macros
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 01/12] lavu/riscv: add CPU flags for the RISC-V Vector extension remi
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 02/12] checkasm: register the RISC-V V subsets remi
@ 2022-09-06 18:43 ` remi
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 04/12] lavu/riscv: float vector-scalar multiplication with RVV remi
` (8 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:43 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libavutil/riscv/asm.S | 74 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
create mode 100644 libavutil/riscv/asm.S
diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
new file mode 100644
index 0000000000..7623c161cf
--- /dev/null
+++ b/libavutil/riscv/asm.S
@@ -0,0 +1,74 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#if defined (__riscv_float_abi_soft)
+#define NOHWF
+#define NOHWD
+#define HWF #
+#define HWD #
+#elif defined (__riscv_float_abi_single)
+#define NOHWF #
+#define NOHWD
+#define HWF
+#define HWD #
+#else
+#define NOHWF #
+#define NOHWD #
+#define HWF
+#define HWD
+#endif
+
+ .macro func sym, ext=
+ .text
+ .align 2
+
+ .option push
+ .ifnb \ext
+ .option arch, +\ext
+ .endif
+
+ .global \sym
+ .hidden \sym
+ .type \sym, %function
+ \sym:
+
+ .macro endfunc
+ .size \sym, . - \sym
+ .option pop
+ .previous
+ .purgem endfunc
+ .endm
+ .endm
+
+ .macro const sym, align=3, relocate=0
+ .if \relocate
+ .pushsection .data.rel.ro
+ .else
+ .pushsection .rodata
+ .endif
+ .align \align
+ \sym:
+
+ .macro endconst
+ .size \sym, . - \sym
+ .popsection
+ .purgem endconst
+ .endm
+ .endm
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 04/12] lavu/riscv: float vector-scalar multiplication with RVV
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
` (2 preceding siblings ...)
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 03/12] lavu/riscv: initial common header for assembler macros remi
@ 2022-09-06 18:43 ` remi
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 05/12] lavu/riscv: float vector-vector " remi
` (7 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:43 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
This is based on existing code from the VLC git tree with two minor
changes to account for the different function prototypes.
---
libavutil/float_dsp.c | 2 ++
libavutil/float_dsp.h | 1 +
libavutil/riscv/Makefile | 4 ++-
libavutil/riscv/float_dsp_init.c | 41 +++++++++++++++++++++++
libavutil/riscv/float_dsp_rvv.S | 56 ++++++++++++++++++++++++++++++++
5 files changed, 103 insertions(+), 1 deletion(-)
create mode 100644 libavutil/riscv/float_dsp_init.c
create mode 100644 libavutil/riscv/float_dsp_rvv.S
diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 8676c8b0f8..742dd679d2 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -156,6 +156,8 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
ff_float_dsp_init_arm(fdsp);
#elif ARCH_PPC
ff_float_dsp_init_ppc(fdsp, bit_exact);
+#elif ARCH_RISCV
+ ff_float_dsp_init_riscv(fdsp);
#elif ARCH_X86
ff_float_dsp_init_x86(fdsp);
#elif ARCH_MIPS
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 9c664592bd..7cad9fc622 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -205,6 +205,7 @@ float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
+void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp);
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp);
void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp);
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
index 1f818043dc..6bf8243e8d 100644
--- a/libavutil/riscv/Makefile
+++ b/libavutil/riscv/Makefile
@@ -1 +1,3 @@
-OBJS += riscv/cpu.o
+OBJS += riscv/cpu.o \
+ riscv/float_dsp_init.o \
+ riscv/float_dsp_rvv.o
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
new file mode 100644
index 0000000000..279412c036
--- /dev/null
+++ b/libavutil/riscv/float_dsp_init.c
@@ -0,0 +1,41 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/float_dsp.h"
+
+void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
+ int len);
+
+void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
+ int len);
+
+av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_ZVE32F) {
+ fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
+
+ if (flags & AV_CPU_FLAG_ZVE64D)
+ fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
+ }
+}
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
new file mode 100644
index 0000000000..365e00190c
--- /dev/null
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -0,0 +1,56 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "asm.S"
+
+// (a0) = (a1) * fa0 [0..a2-1]
+func ff_vector_fmul_scalar_rvv, zve32f
+NOHWF fmv.w.x fa0, a2
+NOHWF mv a2, a3
+
+1: vsetvli t0, a2, e32, m8, ta, ma
+ slli t1, t0, 2
+ vle32.v v16, (a1)
+ add a1, a1, t1
+ vfmul.vf v16, v16, fa0
+ sub a2, a2, t0
+ vse32.v v16, (a0)
+ add a0, a0, t1
+ bnez a2, 1b
+
+ ret
+endfunc
+
+// (a0) = (a1) * fa0 [0..a2-1]
+func ff_vector_dmul_scalar_rvv, zve64d
+NOHWD fmv.d.x fa0, a2
+NOHWD mv a2, a3
+
+1: vsetvli t0, a2, e64, m8, ta, ma
+ slli t1, t0, 3
+ vle64.v v16, (a1)
+ add a1, a1, t1
+ vfmul.vf v16, v16, fa0
+ sub a2, a2, t0
+ vse64.v v16, (a0)
+ add a0, a0, t1
+ bnez a2, 1b
+
+ ret
+endfunc
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 05/12] lavu/riscv: float vector-vector multiplication with RVV
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
` (3 preceding siblings ...)
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 04/12] lavu/riscv: float vector-scalar multiplication with RVV remi
@ 2022-09-06 18:43 ` remi
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 06/12] lavu/riscv: float vector multiply-accumulate " remi
` (6 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:43 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libavutil/riscv/float_dsp_init.c | 9 ++++++++-
libavutil/riscv/float_dsp_rvv.S | 34 ++++++++++++++++++++++++++++++++
2 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 279412c036..4135284c76 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -22,9 +22,13 @@
#include "libavutil/cpu.h"
#include "libavutil/float_dsp.h"
+void ff_vector_fmul_rvv(float *dst, const float *src0, const float *src1,
+ int len);
void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
int len);
+void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
+ int len);
void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
int len);
@@ -33,9 +37,12 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
int flags = av_get_cpu_flags();
if (flags & AV_CPU_FLAG_ZVE32F) {
+ fdsp->vector_fmul = ff_vector_fmul_rvv;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
- if (flags & AV_CPU_FLAG_ZVE64D)
+ if (flags & AV_CPU_FLAG_ZVE64D) {
+ fdsp->vector_dmul = ff_vector_dmul_rvv;
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
+ }
}
}
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 365e00190c..65c3a77b01 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -19,6 +19,23 @@
#include "config.h"
#include "asm.S"
+// (a0) = (a1) * (a2) [0..a3-1]
+func ff_vector_fmul_rvv, zve32f
+1: vsetvli t0, a3, e32, m8, ta, ma
+ slli t1, t0, 2
+ vle32.v v16, (a1)
+ add a1, a1, t1
+ vle32.v v24, (a2)
+ add a2, a2, t1
+ vfmul.vv v16, v16, v24
+ sub a3, a3, t0
+ vse32.v v16, (a0)
+ add a0, a0, t1
+ bnez a3, 1b
+
+ ret
+endfunc
+
// (a0) = (a1) * fa0 [0..a2-1]
func ff_vector_fmul_scalar_rvv, zve32f
NOHWF fmv.w.x fa0, a2
@@ -37,6 +54,23 @@ NOHWF mv a2, a3
ret
endfunc
+// (a0) = (a1) * (a2) [0..a3-1]
+func ff_vector_dmul_rvv, zve64d
+1: vsetvli t0, a3, e64, m8, ta, ma
+ slli t1, t0, 3
+ vle64.v v16, (a1)
+ add a1, a1, t1
+ vle64.v v24, (a2)
+ add a2, a2, t1
+ vfmul.vv v16, v16, v24
+ sub a3, a3, t0
+ vse64.v v16, (a0)
+ add a0, a0, t1
+ bnez a3, 1b
+
+ ret
+endfunc
+
// (a0) = (a1) * fa0 [0..a2-1]
func ff_vector_dmul_scalar_rvv, zve64d
NOHWD fmv.d.x fa0, a2
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 06/12] lavu/riscv: float vector multiply-accumulate with RVV
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
` (4 preceding siblings ...)
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 05/12] lavu/riscv: float vector-vector " remi
@ 2022-09-06 18:43 ` remi
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 07/12] lavu/riscv: float vector multiplication-addition " remi
` (5 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:43 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libavutil/riscv/float_dsp_init.c | 6 +++++
libavutil/riscv/float_dsp_rvv.S | 38 ++++++++++++++++++++++++++++++++
2 files changed, 44 insertions(+)
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 4135284c76..a1bb112ec7 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -24,11 +24,15 @@
void ff_vector_fmul_rvv(float *dst, const float *src0, const float *src1,
int len);
+void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul,
+ int len);
void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
int len);
void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
int len);
+void ff_vector_dmac_scalar_rvv(double *dst, const double *src, double mul,
+ int len);
void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
int len);
@@ -38,10 +42,12 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
if (flags & AV_CPU_FLAG_ZVE32F) {
fdsp->vector_fmul = ff_vector_fmul_rvv;
+ fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
if (flags & AV_CPU_FLAG_ZVE64D) {
fdsp->vector_dmul = ff_vector_dmul_rvv;
+ fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_rvv;
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
}
}
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 65c3a77b01..5a7d92abd6 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -36,6 +36,25 @@ func ff_vector_fmul_rvv, zve32f
ret
endfunc
+// (a0) += (a1) * fa0 [0..a2-1]
+func ff_vector_fmac_scalar_rvv, zve32f
+NOHWF fmv.w.x fa0, a2
+NOHWF mv a2, a3
+
+1: vsetvli t0, a2, e32, m8, ta, ma
+ slli t1, t0, 2
+ vle32.v v24, (a1)
+ add a1, a1, t1
+ vle32.v v16, (a0)
+ vfmacc.vf v16, fa0, v24
+ sub a2, a2, t0
+ vse32.v v16, (a0)
+ add a0, a0, t1
+ bnez a2, 1b
+
+ ret
+endfunc
+
// (a0) = (a1) * fa0 [0..a2-1]
func ff_vector_fmul_scalar_rvv, zve32f
NOHWF fmv.w.x fa0, a2
@@ -71,6 +90,25 @@ func ff_vector_dmul_rvv, zve64d
ret
endfunc
+// (a0) += (a1) * fa0 [0..a2-1]
+func ff_vector_dmac_scalar_rvv, zve64d
+NOHWD fmv.d.x fa0, a2
+NOHWD mv a2, a3
+
+1: vsetvli t0, a2, e64, m8, ta, ma
+ slli t1, t0, 3
+ vle64.v v24, (a1)
+ add a1, a1, t1
+ vle64.v v16, (a0)
+ vfmacc.vf v16, fa0, v24
+ sub a2, a2, t0
+ vse64.v v16, (a0)
+ add a0, a0, t1
+ bnez a2, 1b
+
+ ret
+endfunc
+
// (a0) = (a1) * fa0 [0..a2-1]
func ff_vector_dmul_scalar_rvv, zve64d
NOHWD fmv.d.x fa0, a2
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 07/12] lavu/riscv: float vector multiplication-addition with RVV
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
` (5 preceding siblings ...)
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 06/12] lavu/riscv: float vector multiply-accumulate " remi
@ 2022-09-06 18:43 ` remi
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 08/12] lavu/riscv: float vector sum-and-difference " remi
` (4 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:43 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libavutil/riscv/float_dsp_init.c | 3 +++
libavutil/riscv/float_dsp_rvv.S | 19 +++++++++++++++++++
2 files changed, 22 insertions(+)
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index a1bb112ec7..8539fe9ac5 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -28,6 +28,8 @@ void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul,
int len);
void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
int len);
+void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
+ const float *src2, int len);
void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
int len);
@@ -44,6 +46,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
fdsp->vector_fmul = ff_vector_fmul_rvv;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
+ fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
if (flags & AV_CPU_FLAG_ZVE64D) {
fdsp->vector_dmul = ff_vector_dmul_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 5a7d92abd6..efbf12179f 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -73,6 +73,25 @@ NOHWF mv a2, a3
ret
endfunc
+// (a0) = (a1) * (a2) + (a3) [0..a4-1]
+func ff_vector_fmul_add_rvv, zve32f
+1: vsetvli t0, a4, e32, m8, ta, ma
+ slli t1, t0, 2
+ vle32.v v8, (a1)
+ add a1, a1, t1
+ vle32.v v16, (a2)
+ add a2, a2, t1
+ vle32.v v24, (a3)
+ add a3, a3, t1
+ vfmadd.vv v8, v16, v24
+ sub a4, a4, t0
+ vse32.v v8, (a0)
+ add a0, a0, t1
+ bnez a4, 1b
+
+ ret
+endfunc
+
// (a0) = (a1) * (a2) [0..a3-1]
func ff_vector_dmul_rvv, zve64d
1: vsetvli t0, a3, e64, m8, ta, ma
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 08/12] lavu/riscv: float vector sum-and-difference with RVV
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
` (6 preceding siblings ...)
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 07/12] lavu/riscv: float vector multiplication-addition " remi
@ 2022-09-06 18:43 ` remi
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 09/12] lavu/riscv: float reversed vector multiplication " remi
` (3 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:43 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libavutil/riscv/float_dsp_init.c | 2 ++
libavutil/riscv/float_dsp_rvv.S | 18 ++++++++++++++++++
2 files changed, 20 insertions(+)
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 8539fe9ac5..2165394585 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -30,6 +30,7 @@ void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
int len);
void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
const float *src2, int len);
+void ff_butterflies_float_rvv(float *v1, float *v2, int len);
void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
int len);
@@ -47,6 +48,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
+ fdsp->butterflies_float = ff_butterflies_float_rvv;
if (flags & AV_CPU_FLAG_ZVE64D) {
fdsp->vector_dmul = ff_vector_dmul_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index efbf12179f..1c3b08b94f 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -92,6 +92,24 @@ func ff_vector_fmul_add_rvv, zve32f
ret
endfunc
+// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
+func ff_butterflies_float_rvv, zve32f
+1: vsetvli t0, a2, e32, m8, ta, ma
+ slli t1, t0, 2
+ vle32.v v16, (a0)
+ vle32.v v24, (a1)
+ vfadd.vv v0, v16, v24
+ vfsub.vv v8, v16, v24
+ sub a2, a2, t0
+ vse32.v v0, (a0)
+ add a0, a0, t1
+ vse32.v v8, (a1)
+ add a1, a1, t1
+ bnez a2, 1b
+
+ ret
+endfunc
+
// (a0) = (a1) * (a2) [0..a3-1]
func ff_vector_dmul_rvv, zve64d
1: vsetvli t0, a3, e64, m8, ta, ma
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 09/12] lavu/riscv: float reversed vector multiplication with RVV
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
` (7 preceding siblings ...)
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 08/12] lavu/riscv: float vector sum-and-difference " remi
@ 2022-09-06 18:43 ` remi
2022-09-06 18:44 ` [FFmpeg-devel] [PATCH 10/12] lavu/riscv: float vector windowed overlap/add " remi
` (2 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:43 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libavutil/riscv/float_dsp_init.c | 3 +++
libavutil/riscv/float_dsp_rvv.S | 22 ++++++++++++++++++++++
2 files changed, 25 insertions(+)
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 2165394585..1183460181 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -30,6 +30,8 @@ void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
int len);
void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
const float *src2, int len);
+void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
+ const float *src1, int len);
void ff_butterflies_float_rvv(float *v1, float *v2, int len);
void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
@@ -48,6 +50,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
+ fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
fdsp->butterflies_float = ff_butterflies_float_rvv;
if (flags & AV_CPU_FLAG_ZVE64D) {
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 1c3b08b94f..b376392294 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -92,6 +92,28 @@ func ff_vector_fmul_add_rvv, zve32f
ret
endfunc
+// (a0) = (a1) * reverse(a2) [0..a3-1]
+func ff_vector_fmul_reverse_rvv, zve32f
+ add t3, a3, -1
+ li t2, -4 // byte stride
+ slli t3, t3, 2
+ add a2, a2, t3
+
+1: vsetvli t0, a3, e32, m8, ta, ma
+ slli t1, t0, 2
+ vle32.v v16, (a1)
+ add a1, a1, t1
+ vlse32.v v24, (a2), t2
+ sub a2, a2, t1
+ vfmul.vv v16, v16, v24
+ sub a3, a3, t0
+ vse32.v v16, (a0)
+ add a0, a0, t1
+ bnez a3, 1b
+
+ ret
+endfunc
+
// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
func ff_butterflies_float_rvv, zve32f
1: vsetvli t0, a2, e32, m8, ta, ma
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 10/12] lavu/riscv: float vector windowed overlap/add with RVV
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
` (8 preceding siblings ...)
2022-09-06 18:43 ` [FFmpeg-devel] [PATCH 09/12] lavu/riscv: float reversed vector multiplication " remi
@ 2022-09-06 18:44 ` remi
2022-09-06 18:44 ` [FFmpeg-devel] [PATCH 11/12] lavu/riscv: float vector dot product " remi
2022-09-06 18:44 ` [FFmpeg-devel] [PATCH 12/12] lavu/riscv: fixed vector sum-and-difference " remi
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:44 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libavutil/riscv/float_dsp_init.c | 3 +++
libavutil/riscv/float_dsp_rvv.S | 35 ++++++++++++++++++++++++++++++++
2 files changed, 38 insertions(+)
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 1183460181..887706d899 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -28,6 +28,8 @@ void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul,
int len);
void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
int len);
+void ff_vector_fmul_window_rvv(float *dst, const float *src0,
+ const float *src1, const float *win, int len);
void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
const float *src2, int len);
void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
@@ -49,6 +51,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
fdsp->vector_fmul = ff_vector_fmul_rvv;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
+ fdsp->vector_fmul_window = ff_vector_fmul_window_rvv;
fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
fdsp->butterflies_float = ff_butterflies_float_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index b376392294..65daaa2d27 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -73,6 +73,41 @@ NOHWF mv a2, a3
ret
endfunc
+func ff_vector_fmul_window_rvv, zve32f
+ // a0: dst, a1: src0, a2: src1, a3: window, a4: length
+ addi t0, a4, -1
+ add t1, t0, a4
+ slli t0, t0, 2
+ slli t1, t1, 2
+ add a2, a2, t0
+ add t0, a0, t1
+ add t3, a3, t1
+ li t1, -4 // byte stride
+
+1: vsetvli t2, a4, e32, m4, ta, ma
+ slli t4, t2, 2
+ vle32.v v16, (a1)
+ add a1, a1, t4
+ vlse32.v v20, (a2), t1
+ sub a2, a2, t4
+ vle32.v v24, (a3)
+ add a3, a3, t4
+ vlse32.v v28, (t3), t1
+ sub t3, t3, t4
+ vfmul.vv v0, v16, v28
+ sub a4, a4, t2
+ vfmul.vv v8, v16, v24
+ vfnmsac.vv v0, v20, v24
+ vfmacc.vv v8, v20, v28
+ vse32.v v0, (a0)
+ add a0, a0, t4
+ vsse32.v v8, (t0), t1
+ sub t0, t0, t4
+ bnez a4, 1b
+
+ ret
+endfunc
+
// (a0) = (a1) * (a2) + (a3) [0..a4-1]
func ff_vector_fmul_add_rvv, zve32f
1: vsetvli t0, a4, e32, m8, ta, ma
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 11/12] lavu/riscv: float vector dot product with RVV
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
` (9 preceding siblings ...)
2022-09-06 18:44 ` [FFmpeg-devel] [PATCH 10/12] lavu/riscv: float vector windowed overlap/add " remi
@ 2022-09-06 18:44 ` remi
2022-09-06 18:44 ` [FFmpeg-devel] [PATCH 12/12] lavu/riscv: fixed vector sum-and-difference " remi
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:44 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libavutil/riscv/float_dsp_init.c | 2 ++
libavutil/riscv/float_dsp_rvv.S | 21 +++++++++++++++++++++
2 files changed, 23 insertions(+)
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 887706d899..7c2fc10e99 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -35,6 +35,7 @@ void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
const float *src1, int len);
void ff_butterflies_float_rvv(float *v1, float *v2, int len);
+float ff_scalarproduct_float_rvv(const float *v1, const float *v2, int len);
void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
int len);
@@ -55,6 +56,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
fdsp->butterflies_float = ff_butterflies_float_rvv;
+ fdsp->scalarproduct_float = ff_scalarproduct_float_rvv;
if (flags & AV_CPU_FLAG_ZVE64D) {
fdsp->vector_dmul = ff_vector_dmul_rvv;
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 65daaa2d27..81bd0e510a 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -167,6 +167,27 @@ func ff_butterflies_float_rvv, zve32f
ret
endfunc
+// a0 = (a0).(a1) [0..a2-1]
+func ff_scalarproduct_float_rvv, zve32f
+ vsetvli zero, zero, e32, m8, ta, ma
+ vmv.s.x v8, zero
+
+1: vsetvli t0, a2, e32, m8, ta, ma
+ slli t1, t0, 2
+ vle32.v v16, (a0)
+ add a0, a0, t1
+ vle32.v v24, (a1)
+ add a1, a1, t1
+ vfmul.vv v16, v16, v24
+ sub a2, a2, t0
+ vfredusum.vs v8, v16, v8
+ bnez a2, 1b
+
+ vfmv.f.s fa0, v8
+NOHWF fmv.x.w a0, fa0
+ ret
+endfunc
+
// (a0) = (a1) * (a2) [0..a3-1]
func ff_vector_dmul_rvv, zve64d
1: vsetvli t0, a3, e64, m8, ta, ma
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 12/12] lavu/riscv: fixed vector sum-and-difference with RVV
2022-09-06 18:43 [FFmpeg-devel] [PATCHv3 0/12] RISC-V Vector functions for lavu float&fixed DSP Rémi Denis-Courmont
` (10 preceding siblings ...)
2022-09-06 18:44 ` [FFmpeg-devel] [PATCH 11/12] lavu/riscv: float vector dot product " remi
@ 2022-09-06 18:44 ` remi
11 siblings, 0 replies; 13+ messages in thread
From: remi @ 2022-09-06 18:44 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libavutil/fixed_dsp.c | 4 +++-
libavutil/fixed_dsp.h | 1 +
libavutil/riscv/Makefile | 2 ++
libavutil/riscv/fixed_dsp_init.c | 33 +++++++++++++++++++++++++++
libavutil/riscv/fixed_dsp_rvv.S | 38 ++++++++++++++++++++++++++++++++
5 files changed, 77 insertions(+), 1 deletion(-)
create mode 100644 libavutil/riscv/fixed_dsp_init.c
create mode 100644 libavutil/riscv/fixed_dsp_rvv.S
diff --git a/libavutil/fixed_dsp.c b/libavutil/fixed_dsp.c
index 154f3bc2d3..bc847949dc 100644
--- a/libavutil/fixed_dsp.c
+++ b/libavutil/fixed_dsp.c
@@ -162,7 +162,9 @@ AVFixedDSPContext * avpriv_alloc_fixed_dsp(int bit_exact)
fdsp->butterflies_fixed = butterflies_fixed_c;
fdsp->scalarproduct_fixed = scalarproduct_fixed_c;
-#if ARCH_X86
+#if ARCH_RISCV
+ ff_fixed_dsp_init_riscv(fdsp);
+#elif ARCH_X86
ff_fixed_dsp_init_x86(fdsp);
#endif
diff --git a/libavutil/fixed_dsp.h b/libavutil/fixed_dsp.h
index fec806ff2d..1217d3a53b 100644
--- a/libavutil/fixed_dsp.h
+++ b/libavutil/fixed_dsp.h
@@ -161,6 +161,7 @@ typedef struct AVFixedDSPContext {
*/
AVFixedDSPContext * avpriv_alloc_fixed_dsp(int strict);
+void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp);
void ff_fixed_dsp_init_x86(AVFixedDSPContext *fdsp);
/**
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
index 6bf8243e8d..0f2fcbd41d 100644
--- a/libavutil/riscv/Makefile
+++ b/libavutil/riscv/Makefile
@@ -1,3 +1,5 @@
OBJS += riscv/cpu.o \
+ riscv/fixed_dsp_init.o \
+ riscv/fixed_dsp_rvv.o \
riscv/float_dsp_init.o \
riscv/float_dsp_rvv.o
diff --git a/libavutil/riscv/fixed_dsp_init.c b/libavutil/riscv/fixed_dsp_init.c
new file mode 100644
index 0000000000..08d4c4d9a7
--- /dev/null
+++ b/libavutil/riscv/fixed_dsp_init.c
@@ -0,0 +1,33 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/fixed_dsp.h"
+
+void ff_butterflies_fixed_rvv(int *v1, int *v2, int len);
+
+av_cold void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_ZVE32X)
+ fdsp->butterflies_fixed = ff_butterflies_fixed_rvv;
+}
diff --git a/libavutil/riscv/fixed_dsp_rvv.S b/libavutil/riscv/fixed_dsp_rvv.S
new file mode 100644
index 0000000000..beb1b949f7
--- /dev/null
+++ b/libavutil/riscv/fixed_dsp_rvv.S
@@ -0,0 +1,38 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "asm.S"
+
+// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
+func ff_butterflies_fixed_rvv, zve32x
+1: vsetvli t0, a2, e32, m8, ta, ma
+ slli t1, t0, 2
+ vle32.v v16, (a0)
+ vle32.v v24, (a1)
+ vadd.vv v0, v16, v24
+ vsub.vv v8, v16, v24
+ sub a2, a2, t0
+ vse32.v v0, (a0)
+ add a0, a0, t1
+ vse32.v v8, (a1)
+ add a1, a1, t1
+ bnez a2, 1b
+
+ ret
+endfunc
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 13+ messages in thread