* [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension
2022-09-03 19:01 [FFmpeg-devel] [PATCHv1 0/3] Float DSP for RISC-V Vector extension - part I Rémi Denis-Courmont
@ 2022-09-03 19:01 ` remi
2022-09-03 19:05 ` Rémi Denis-Courmont
2022-09-03 19:20 ` Lynne
2022-09-03 19:01 ` [FFmpeg-devel] [PATCH 2/3] riscv: initial common header for assembler macros remi
2022-09-03 19:01 ` [FFmpeg-devel] [PATCH 3/3] riscv: add float vector-scalar multiplication remi
2 siblings, 2 replies; 15+ messages in thread
From: remi @ 2022-09-03 19:01 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
RVV defines a total of 12 different extensions: V, Zvl32b, Zvl64b,
Zvl128b, Zvl256b, Zvl512b, Zvl1024b, Zve32x, Zve32f, Zve64x, Zve64f and
Zve64d.
At this stage, we don't care about the vector length extensions Zvl*,
as most or all optimisations will be running in a loop that is
independent on the data set size.
Zve64f is equivalent to Zve32f plus Zve64x, so it is exposed as a
convenience flag, but not tracked internally. Likewise V is the
equivalent of Zve64d plus Zvl128b.
Technically, Zve32f and Zve64x are both implied by Zve64d and both
imply Zve32x, leaving only 5 possibilities (including no vector
support), but we keep 4 separate bits for easy run-time checks as on
other instruction set architectures.
---
libavutil/cpu.c | 14 ++++++++++
libavutil/cpu.h | 6 +++++
libavutil/cpu_internal.h | 1 +
libavutil/riscv/Makefile | 1 +
libavutil/riscv/cpu.c | 58 ++++++++++++++++++++++++++++++++++++++++
5 files changed, 80 insertions(+)
create mode 100644 libavutil/riscv/Makefile
create mode 100644 libavutil/riscv/cpu.c
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 0035e927a5..83bf513cf2 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -62,6 +62,8 @@ static int get_cpu_flags(void)
return ff_get_cpu_flags_arm();
#elif ARCH_PPC
return ff_get_cpu_flags_ppc();
+#elif ARCH_RISCV
+ return ff_get_cpu_flags_riscv();
#elif ARCH_X86
return ff_get_cpu_flags_x86();
#elif ARCH_LOONGARCH
@@ -178,6 +180,18 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
#elif ARCH_LOONGARCH
{ "lsx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX }, .unit = "flags" },
{ "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX }, .unit = "flags" },
+#elif ARCH_RISCV
+#define AV_CPU_FLAG_ZVE32F_M (AV_CPU_FLAG_ZVE32F | AV_CPU_FLAG_ZVE32X)
+#define AV_CPU_FLAG_ZVE64X_M (AV_CPU_FLAG_ZVE64X | AV_CPU_FLAG_ZVE32X)
+#define AV_CPU_FLAG_ZVE64D_M (AV_CPU_FLAG_ZVE64D | AV_CPU_FLAG_ZVE64F_M)
+#define AV_CPU_FLAG_ZVE64F_M (AV_CPU_FLAG_ZVE32F_M | AV_CPU_FLAG_ZVE64X_M)
+#define AV_CPU_FLAG_VECTORS AV_CPU_FLAG_ZVE64D_M
+ { "vectors", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VECTORS }, .unit = "flags" },
+ { "zve32x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE32X }, .unit = "flags" },
+ { "zve32f", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE32F_M }, .unit = "flags" },
+ { "zve64x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64X_M }, .unit = "flags" },
+ { "zve64f", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64F_M }, .unit = "flags" },
+ { "zve64d", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64D_M }, .unit = "flags" },
#endif
{ NULL },
};
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 9711e574c5..44836e50d6 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -78,6 +78,12 @@
#define AV_CPU_FLAG_LSX (1 << 0)
#define AV_CPU_FLAG_LASX (1 << 1)
+// RISC-V Vector extension
+#define AV_CPU_FLAG_ZVE32X (1 << 0) /* 8-, 16-, 32-bit integers */
+#define AV_CPU_FLAG_ZVE32F (1 << 1) /* single precision scalars */
+#define AV_CPU_FLAG_ZVE64X (1 << 2) /* 64-bit integers */
+#define AV_CPU_FLAG_ZVE64D (1 << 3) /* double precision scalars */
+
/**
* Return the flags which specify extensions supported by the CPU.
* The returned value is affected by av_force_cpu_flags() if that was used
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 650d47fc96..634f28bac4 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -48,6 +48,7 @@ int ff_get_cpu_flags_mips(void);
int ff_get_cpu_flags_aarch64(void);
int ff_get_cpu_flags_arm(void);
int ff_get_cpu_flags_ppc(void);
+int ff_get_cpu_flags_riscv(void);
int ff_get_cpu_flags_x86(void);
int ff_get_cpu_flags_loongarch(void);
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
new file mode 100644
index 0000000000..1f818043dc
--- /dev/null
+++ b/libavutil/riscv/Makefile
@@ -0,0 +1 @@
+OBJS += riscv/cpu.o
diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c
new file mode 100644
index 0000000000..96726f2f85
--- /dev/null
+++ b/libavutil/riscv/cpu.c
@@ -0,0 +1,58 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
+#include "config.h"
+
+#if HAVE_GETAUXVAL
+#include <sys/auxv.h>
+#endif
+
+#define HWCAP_RV(letter) (1ul << ((letter) - 'A'))
+#define ZVE_UP_TO(cap) ((2 * (cap)) - 1)
+
+int ff_get_cpu_flags_riscv(void)
+{
+ int ret = 0;
+
+ /* If RV-V is enabled statically at compile-time, check the details. */
+#ifdef __riscv_vectors
+ ret |= AV_CPU_FLAG_ZVE32X;
+#if __riscv_v_elen >= 64
+ ret |= AV_CPU_FLAG_ZVE64X;
+#endif
+#if __riscv_v_elen_fp >= 32
+ ret |= AV_CPU_FLAG_ZVE32F;
+#endif
+#if __riscv_v_elen_fp >= 64
+ ret |= AV_CPU_FLAG_ZVE32F;
+#endif
+#endif
+
+#if HAVE_GETAUXVAL
+ const unsigned long hwcap = getauxval(AT_HWCAP);
+
+ /* The V extension implies all subsets */
+ if (hwcap & HWCAP_RV('V'))
+ ret |= AV_CPU_FLAG_ZVE32X | AV_CPU_FLAG_ZVE64X
+ | AV_CPU_FLAG_ZVE32F | AV_CPU_FLAG_ZVE64D;
+#endif
+
+ return ret;
+}
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension
2022-09-03 19:01 ` [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension remi
@ 2022-09-03 19:05 ` Rémi Denis-Courmont
2022-09-03 19:20 ` Lynne
1 sibling, 0 replies; 15+ messages in thread
From: Rémi Denis-Courmont @ 2022-09-03 19:05 UTC (permalink / raw)
To: ffmpeg-devel
Le lauantaina 3. syyskuuta 2022, 22.01.45 EEST remi@remlab.net a écrit :
> +#define ZVE_UP_TO(cap) ((2 * (cap)) - 1)
Stray code. Ignore.
--
Rémi Denis-Courmont
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension
2022-09-03 19:01 ` [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension remi
2022-09-03 19:05 ` Rémi Denis-Courmont
@ 2022-09-03 19:20 ` Lynne
2022-09-03 19:59 ` Rémi Denis-Courmont
1 sibling, 1 reply; 15+ messages in thread
From: Lynne @ 2022-09-03 19:20 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Sep 3, 2022, 21:01 by remi@remlab.net:
> From: Rémi Denis-Courmont <remi@remlab.net>
>
> RVV defines a total of 12 different extensions: V, Zvl32b, Zvl64b,
> Zvl128b, Zvl256b, Zvl512b, Zvl1024b, Zve32x, Zve32f, Zve64x, Zve64f and
> Zve64d.
>
> At this stage, we don't care about the vector length extensions Zvl*,
> as most or all optimisations will be running in a loop that is
> independent on the data set size.
>
I need to know the maximum length to write an FFT.
Could you add flags for it? I don't mind a 5-bit bitfield for a log2 of it,
or one flag per length (up to 65536).
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension
2022-09-03 19:20 ` Lynne
@ 2022-09-03 19:59 ` Rémi Denis-Courmont
2022-09-03 21:38 ` Lynne
0 siblings, 1 reply; 15+ messages in thread
From: Rémi Denis-Courmont @ 2022-09-03 19:59 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le lauantaina 3. syyskuuta 2022, 22.20.20 EEST Lynne a écrit :
> Sep 3, 2022, 21:01 by remi@remlab.net:
> > From: Rémi Denis-Courmont <remi@remlab.net>
> >
> > RVV defines a total of 12 different extensions: V, Zvl32b, Zvl64b,
> > Zvl128b, Zvl256b, Zvl512b, Zvl1024b, Zve32x, Zve32f, Zve64x, Zve64f and
> > Zve64d.
> >
> > At this stage, we don't care about the vector length extensions Zvl*,
> > as most or all optimisations will be running in a loop that is
> > independent on the data set size.
>
> I need to know the maximum length to write an FFT.
> Could you add flags for it?
I think we should cross that bridge if/when the need actually arises. In most
cases, the vector length returned at run-time from VSETVL is good enough.
--
雷米‧德尼-库尔蒙
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension
2022-09-03 19:59 ` Rémi Denis-Courmont
@ 2022-09-03 21:38 ` Lynne
2022-09-04 5:41 ` Rémi Denis-Courmont
0 siblings, 1 reply; 15+ messages in thread
From: Lynne @ 2022-09-03 21:38 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Sep 3, 2022, 21:59 by remi@remlab.net:
> Le lauantaina 3. syyskuuta 2022, 22.20.20 EEST Lynne a écrit :
>
>> Sep 3, 2022, 21:01 by remi@remlab.net:
>> > From: Rémi Denis-Courmont <remi@remlab.net>
>> >
>> > RVV defines a total of 12 different extensions: V, Zvl32b, Zvl64b,
>> > Zvl128b, Zvl256b, Zvl512b, Zvl1024b, Zve32x, Zve32f, Zve64x, Zve64f and
>> > Zve64d.
>> >
>> > At this stage, we don't care about the vector length extensions Zvl*,
>> > as most or all optimisations will be running in a loop that is
>> > independent on the data set size.
>>
>> I need to know the maximum length to write an FFT.
>> Could you add flags for it?
>>
>
> I think we should cross that bridge if/when the need actually arises. In most
> cases, the vector length returned at run-time from VSETVL is good enough.
>
I need to know the length in C, not assembly. Whilst you're at adding
initial support, I think it makes sense to support all code that's targetting
RISC-V, not just the ones it's convenient to. I'll probably write the FFT
as soon as I get access to a real machine.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension
2022-09-03 21:38 ` Lynne
@ 2022-09-04 5:41 ` Rémi Denis-Courmont
2022-09-04 6:39 ` Lynne
0 siblings, 1 reply; 15+ messages in thread
From: Rémi Denis-Courmont @ 2022-09-04 5:41 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le sunnuntaina 4. syyskuuta 2022, 0.38.32 EEST Lynne a écrit :
> I need to know the length in C, not assembly.
There may be some corner cases where that makes sense, but typically it
doesn't. Even if you're dealing in fixed-size macro blocks, you should leverage
the larger vectors to unroll and process multiple macro blocks in parallel.
And besides, how do you want to get the value if not with assembler? This is
currently not found in ELF HWCAP and probably never will be.
So the only way to find out in pure C is in the embedded case, by checking out
the __riscv_zlvXXXb preprocessor predefined constants. But that only tells what
is the guaranteed minimum vector size for the compile-time target.
Outside of embedded world, that's currently always undefined because everybody
uses RVA20 as the baseline, which does not require vector support. Going
forward, RVA22 will require 128 bits, but that says nothing of what the run-
time CPU can actually do.
> I think it makes sense to support all code that's targetting RISC-V, not
just the ones it's convenient to.
I disagree. There are currently no means to negotiate a vector length with the
OS, so that seems highly premature. And even if there was such a mechanism,
it's simply much faster to call VSETVL in an inline assembler macro where
needed than to compute the whole set of CPU flags.
--
レミ・デニ-クールモン
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension
2022-09-04 5:41 ` Rémi Denis-Courmont
@ 2022-09-04 6:39 ` Lynne
2022-09-04 8:27 ` Rémi Denis-Courmont
0 siblings, 1 reply; 15+ messages in thread
From: Lynne @ 2022-09-04 6:39 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Sep 4, 2022, 07:41 by remi@remlab.net:
> Le sunnuntaina 4. syyskuuta 2022, 0.38.32 EEST Lynne a écrit :
>
>> I need to know the length in C, not assembly.
>>
>
> There may be some corner cases where that makes sense, but typically it
> doesn't. Even if you're dealing in fixed-size macro blocks, you should leverage
> the larger vectors to unroll and process multiple macro blocks in parallel.
>
Some aspects of a split-radix FFT work better if you know how
much you could fit into a register upfront. In particular, doing
the tail, which consists of 2 equal length transforms. On AVX
we interleave the coefficients from 2x4pt transforms during
lookups since we can do them simultaneously and save on
shuffles. Doing them individually wouldn't be as efficient.
Since interleaving is done during the permute step, we have
to know from C how much to interleave.
Of course if you switched away from a split-radix algorithm (X+X/2+X/2),
you could have a very simple 100-line FFT if you had arbitrarily
long vectors (or the pretense of such), but if you didn't have
the hardware to back that up, the penalty for using a suboptimal
algorithm wouldn't be worth it.
> And besides, how do you want to get the value if not with assembler? This is
> currently not found in ELF HWCAP and probably never will be.
>
Sucks, knowing how wide the units are is as important as
knowing how much L1 cache you have for me.
> I disagree. There are currently no means to negotiate a vector length with the
> OS, so that seems highly premature. And even if there was such a mechanism,
> it's simply much faster to call VSETVL in an inline assembler macro where
> needed than to compute the whole set of CPU flags.
>
Guess that's what I'll have to do.In due time anyway, who knows how many years it'll be until
a cheap enough device appears with vector support that
doesn't merely do what SVE2 devices did by reusing old NEON
unit designs.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension
2022-09-04 6:39 ` Lynne
@ 2022-09-04 8:27 ` Rémi Denis-Courmont
0 siblings, 0 replies; 15+ messages in thread
From: Rémi Denis-Courmont @ 2022-09-04 8:27 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le sunnuntaina 4. syyskuuta 2022, 9.39.36 EEST Lynne a écrit :
> In particular, doing the tail, which consists of 2 equal length transforms.
> On AVX we interleave the coefficients from 2x4pt transforms during
> lookups since we can do them simultaneously and save on
> shuffles. Doing them individually wouldn't be as efficient.
I'm not going to boldy state that one size fits all, because I am pretty sure
that it would come back to bite me in soft and sensitive tissue. But unlike
SIMD extensions, RISC-V V and ARM SVE favour the use of offsets and masks to
deal with misaligned edges, so I'm not sure how useful the insights from AVX
are.
> > And besides, how do you want to get the value if not with assembler? This
> > is currently not found in ELF HWCAP and probably never will be.
> Sucks, knowing how wide the units are is as important as
> knowing how much L1 cache you have for me.
I understand that for some multidimensional calculations, you need to make
special cases. The obvious case would be if the vector is too short to fit a
column or row of elements whilst performing a transposition.
But even then, and even if we end up later on with, say, an arch_prctl() call
to find the vector size, I don't think exposing it in CPU flags would be a good
idea. VSETVL & VSETIVL also account for the element size and the vector group
multiplier, so it seems better to use either of them than to reimplement the
same logic in C based on the raw vector bit length.
--
レミ・デニ-クールモン
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 2/3] riscv: initial common header for assembler macros
2022-09-03 19:01 [FFmpeg-devel] [PATCHv1 0/3] Float DSP for RISC-V Vector extension - part I Rémi Denis-Courmont
2022-09-03 19:01 ` [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension remi
@ 2022-09-03 19:01 ` remi
2022-09-03 19:01 ` [FFmpeg-devel] [PATCH 3/3] riscv: add float vector-scalar multiplication remi
2 siblings, 0 replies; 15+ messages in thread
From: remi @ 2022-09-03 19:01 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libavutil/riscv/asm.h | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
create mode 100644 libavutil/riscv/asm.h
diff --git a/libavutil/riscv/asm.h b/libavutil/riscv/asm.h
new file mode 100644
index 0000000000..31001b8bdb
--- /dev/null
+++ b/libavutil/riscv/asm.h
@@ -0,0 +1,33 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+ .macro func sym
+ .text
+ .align 2
+
+ .global \sym
+ .type \sym, %function
+ \sym:
+
+ .macro endfunc
+ .size \sym, . - \sym
+ .purgem endfunc
+ .endm
+ .endm
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 3/3] riscv: add float vector-scalar multiplication
2022-09-03 19:01 [FFmpeg-devel] [PATCHv1 0/3] Float DSP for RISC-V Vector extension - part I Rémi Denis-Courmont
2022-09-03 19:01 ` [FFmpeg-devel] [PATCH 1/3] riscv: add CPU flags for the RISC-V Vector extension remi
2022-09-03 19:01 ` [FFmpeg-devel] [PATCH 2/3] riscv: initial common header for assembler macros remi
@ 2022-09-03 19:01 ` remi
2022-09-03 19:11 ` Lynne
2 siblings, 1 reply; 15+ messages in thread
From: remi @ 2022-09-03 19:01 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
This is based on existing code from the VLC git tree, though the size
and scalar arguments are swapped.
---
libavutil/float_dsp.c | 2 ++
libavutil/float_dsp.h | 1 +
libavutil/riscv/Makefile | 4 ++-
libavutil/riscv/float_dsp_init.c | 42 ++++++++++++++++++++++
libavutil/riscv/float_dsp_rvv.S | 60 ++++++++++++++++++++++++++++++++
5 files changed, 108 insertions(+), 1 deletion(-)
create mode 100644 libavutil/riscv/float_dsp_init.c
create mode 100644 libavutil/riscv/float_dsp_rvv.S
diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 8676c8b0f8..742dd679d2 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -156,6 +156,8 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
ff_float_dsp_init_arm(fdsp);
#elif ARCH_PPC
ff_float_dsp_init_ppc(fdsp, bit_exact);
+#elif ARCH_RISCV
+ ff_float_dsp_init_riscv(fdsp);
#elif ARCH_X86
ff_float_dsp_init_x86(fdsp);
#elif ARCH_MIPS
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 9c664592bd..7cad9fc622 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -205,6 +205,7 @@ float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
+void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp);
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp);
void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp);
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
index 1f818043dc..6bf8243e8d 100644
--- a/libavutil/riscv/Makefile
+++ b/libavutil/riscv/Makefile
@@ -1 +1,3 @@
-OBJS += riscv/cpu.o
+OBJS += riscv/cpu.o \
+ riscv/float_dsp_init.o \
+ riscv/float_dsp_rvv.o
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
new file mode 100644
index 0000000000..9a5b981917
--- /dev/null
+++ b/libavutil/riscv/float_dsp_init.c
@@ -0,0 +1,42 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/float_dsp.h"
+
+void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
+ int len);
+
+void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
+ int len);
+
+av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_ZVE32F) {
+ fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
+
+ if (flags & AV_CPU_FLAG_ZVE64D) {
+ fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
+ }
+ }
+}
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
new file mode 100644
index 0000000000..54ea1d9d6d
--- /dev/null
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -0,0 +1,60 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "asm.h"
+
+ .option arch, +v
+
+func ff_vector_fmul_scalar_rvv
+#if !defined (__riscv_float_abi_soft)
+ srli a2, a2, 2
+#else
+ fmv.w.x fa0, a2
+ srli a2, a3, 2
+#endif
+1: vsetvli t0, a2, e32, m8, ta, ma
+ slli t1, t0, 2
+ vle32.v v16, (a1)
+ add a1, a1, t1
+ vfmul.vf v16, v16, fa0
+ sub a2, a2, t0
+ vse32.v v16, (a0)
+ add a0, a0, t1
+ bnez a2, 1b
+ ret
+endfunc
+
+func ff_vector_dmul_scalar_rvv
+#if !(defined (__riscv_float_abi_soft) || defined (__riscv_float_abi_single))
+ srli a2, a2, 3
+#else
+ fmv.d.x fa0, a2
+ srli a2, a3, 3
+#endif
+1: vsetvli t0, a2, e64, m8, ta, ma
+ slli t1, t0, 3
+ vle64.v v16, (a1)
+ add a1, a1, t1
+ vfmul.vf v16, v16, fa0
+ sub a2, a2, t0
+ vse64.v v16, (a0)
+ add a0, a0, t1
+ bnez a2, 1b
+ ret
+endfunc
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] riscv: add float vector-scalar multiplication
2022-09-03 19:01 ` [FFmpeg-devel] [PATCH 3/3] riscv: add float vector-scalar multiplication remi
@ 2022-09-03 19:11 ` Lynne
2022-09-03 19:34 ` Rémi Denis-Courmont
0 siblings, 1 reply; 15+ messages in thread
From: Lynne @ 2022-09-03 19:11 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Sep 3, 2022, 21:01 by remi@remlab.net:
> From: Rémi Denis-Courmont <remi@remlab.net>
>
> This is based on existing code from the VLC git tree, though the size
> and scalar arguments are swapped.
> ---
> libavutil/float_dsp.c | 2 ++
> libavutil/float_dsp.h | 1 +
> libavutil/riscv/Makefile | 4 ++-
> libavutil/riscv/float_dsp_init.c | 42 ++++++++++++++++++++++
> libavutil/riscv/float_dsp_rvv.S | 60 ++++++++++++++++++++++++++++++++
> 5 files changed, 108 insertions(+), 1 deletion(-)
> create mode 100644 libavutil/riscv/float_dsp_init.c
> create mode 100644 libavutil/riscv/float_dsp_rvv.S
>
> diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
> index 8676c8b0f8..742dd679d2 100644
> --- a/libavutil/float_dsp.c
> +++ b/libavutil/float_dsp.c
> @@ -156,6 +156,8 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
> ff_float_dsp_init_arm(fdsp);
> #elif ARCH_PPC
> ff_float_dsp_init_ppc(fdsp, bit_exact);
> +#elif ARCH_RISCV
> + ff_float_dsp_init_riscv(fdsp);
> #elif ARCH_X86
> ff_float_dsp_init_x86(fdsp);
> #elif ARCH_MIPS
> diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
> index 9c664592bd..7cad9fc622 100644
> --- a/libavutil/float_dsp.h
> +++ b/libavutil/float_dsp.h
> @@ -205,6 +205,7 @@ float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
> void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
> void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
> void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
> +void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp);
> void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp);
> void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp);
>
> diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
> index 1f818043dc..6bf8243e8d 100644
> --- a/libavutil/riscv/Makefile
> +++ b/libavutil/riscv/Makefile
> @@ -1 +1,3 @@
> -OBJS += riscv/cpu.o
> +OBJS += riscv/cpu.o \
> + riscv/float_dsp_init.o \
> + riscv/float_dsp_rvv.o
> diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
> new file mode 100644
> index 0000000000..9a5b981917
> --- /dev/null
> +++ b/libavutil/riscv/float_dsp_init.c
> @@ -0,0 +1,42 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <stdint.h>
> +
> +#include "libavutil/attributes.h"
> +#include "libavutil/cpu.h"
> +#include "libavutil/float_dsp.h"
> +
> +void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
> + int len);
> +
> +void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
> + int len);
> +
> +av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
> +{
> + int flags = av_get_cpu_flags();
> +
> + if (flags & AV_CPU_FLAG_ZVE32F) {
> + fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
> +
> + if (flags & AV_CPU_FLAG_ZVE64D) {
> + fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;H
> + }
>
Style.
> + }
> +}
> diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
> new file mode 100644
> index 0000000000..54ea1d9d6d
> --- /dev/null
> +++ b/libavutil/riscv/float_dsp_rvv.S
> @@ -0,0 +1,60 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "config.h"
> +#include "asm.h"
> +
> + .option arch, +v
> +
> +func ff_vector_fmul_scalar_rvv
> +#if !defined (__riscv_float_abi_soft)
> + srli a2, a2, 2
> +#else
> + fmv.w.x fa0, a2
> + srli a2, a3, 2
> +#endif
>
Can't this be handled by a macro, like it's done by arm64 and x86?
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] riscv: add float vector-scalar multiplication
2022-09-03 19:11 ` Lynne
@ 2022-09-03 19:34 ` Rémi Denis-Courmont
2022-09-03 19:48 ` Lynne
0 siblings, 1 reply; 15+ messages in thread
From: Rémi Denis-Courmont @ 2022-09-03 19:34 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le lauantaina 3. syyskuuta 2022, 22.11.26 EEST Lynne a écrit :
> > diff --git a/libavutil/riscv/float_dsp_rvv.S
> > b/libavutil/riscv/float_dsp_rvv.S new file mode 100644
> > index 0000000000..54ea1d9d6d
> > --- /dev/null
> > +++ b/libavutil/riscv/float_dsp_rvv.S
> > @@ -0,0 +1,60 @@
> > +/*
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA + */
> > +
> > +#include "config.h"
> > +#include "asm.h"
> > +
> > + .option arch, +v
> > +
> > +func ff_vector_fmul_scalar_rvv
> > +#if !defined (__riscv_float_abi_soft)
> > + srli a2, a2, 2
> > +#else
> > + fmv.w.x fa0, a2
> > + srli a2, a3, 2
> > +#endif
>
> Can't this be handled by a macro, like it's done by arm64 and x86?
Err, from a quick glance, the float DSP code for AArch64 just assumes a
hardware floating ABI, and has no conditionals, so I'm not sure what you mean
by that. Do you mean something like VFP/NOVFP on AArch32?
--
雷米‧德尼-库尔蒙
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] riscv: add float vector-scalar multiplication
2022-09-03 19:34 ` Rémi Denis-Courmont
@ 2022-09-03 19:48 ` Lynne
2022-09-03 20:01 ` Rémi Denis-Courmont
0 siblings, 1 reply; 15+ messages in thread
From: Lynne @ 2022-09-03 19:48 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Sep 3, 2022, 21:34 by remi@remlab.net:
> Le lauantaina 3. syyskuuta 2022, 22.11.26 EEST Lynne a écrit :
>
>> > diff --git a/libavutil/riscv/float_dsp_rvv.S
>> > b/libavutil/riscv/float_dsp_rvv.S new file mode 100644
>> > index 0000000000..54ea1d9d6d
>> > --- /dev/null
>> > +++ b/libavutil/riscv/float_dsp_rvv.S
>> > @@ -0,0 +1,60 @@
>> > +/*
>> > + * This file is part of FFmpeg.
>> > + *
>> > + * FFmpeg is free software; you can redistribute it and/or
>> > + * modify it under the terms of the GNU Lesser General Public
>> > + * License as published by the Free Software Foundation; either
>> > + * version 2.1 of the License, or (at your option) any later version.
>> > + *
>> > + * FFmpeg is distributed in the hope that it will be useful,
>> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> > + * Lesser General Public License for more details.
>> > + *
>> > + * You should have received a copy of the GNU Lesser General Public
>> > + * License along with FFmpeg; if not, write to the Free Software
>> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> > 02110-1301 USA + */
>> > +
>> > +#include "config.h"
>> > +#include "asm.h"
>> > +
>> > + .option arch, +v
>> > +
>> > +func ff_vector_fmul_scalar_rvv
>> > +#if !defined (__riscv_float_abi_soft)
>> > + srli a2, a2, 2
>> > +#else
>> > + fmv.w.x fa0, a2
>> > + srli a2, a3, 2
>> > +#endif
>>
>> Can't this be handled by a macro, like it's done by arm64 and x86?
>>
>
> Err, from a quick glance, the float DSP code for AArch64 just assumes a
> hardware floating ABI, and has no conditionals, so I'm not sure what you mean
> by that. Do you mean something like VFP/NOVFP on AArch32?
>
I meant all ABI stuff to be handled by macros, either `func` or the instructions
themselves.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] riscv: add float vector-scalar multiplication
2022-09-03 19:48 ` Lynne
@ 2022-09-03 20:01 ` Rémi Denis-Courmont
0 siblings, 0 replies; 15+ messages in thread
From: Rémi Denis-Courmont @ 2022-09-03 20:01 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le lauantaina 3. syyskuuta 2022, 22.48.45 EEST Lynne a écrit :
> Sep 3, 2022, 21:34 by remi@remlab.net:
> > Le lauantaina 3. syyskuuta 2022, 22.11.26 EEST Lynne a écrit :
> >> > diff --git a/libavutil/riscv/float_dsp_rvv.S
> >> > b/libavutil/riscv/float_dsp_rvv.S new file mode 100644
> >> > index 0000000000..54ea1d9d6d
> >> > --- /dev/null
> >> > +++ b/libavutil/riscv/float_dsp_rvv.S
> >> > @@ -0,0 +1,60 @@
> >> > +/*
> >> > + * This file is part of FFmpeg.
> >> > + *
> >> > + * FFmpeg is free software; you can redistribute it and/or
> >> > + * modify it under the terms of the GNU Lesser General Public
> >> > + * License as published by the Free Software Foundation; either
> >> > + * version 2.1 of the License, or (at your option) any later version.
> >> > + *
> >> > + * FFmpeg is distributed in the hope that it will be useful,
> >> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> >> > + * Lesser General Public License for more details.
> >> > + *
> >> > + * You should have received a copy of the GNU Lesser General Public
> >> > + * License along with FFmpeg; if not, write to the Free Software
> >> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> >> > 02110-1301 USA + */
> >> > +
> >> > +#include "config.h"
> >> > +#include "asm.h"
> >> > +
> >> > + .option arch, +v
> >> > +
> >> > +func ff_vector_fmul_scalar_rvv
> >> > +#if !defined (__riscv_float_abi_soft)
> >> > + srli a2, a2, 2
> >> > +#else
> >> > + fmv.w.x fa0, a2
> >> > + srli a2, a3, 2
> >> > +#endif
> >>
> >> Can't this be handled by a macro, like it's done by arm64 and x86?
> >
> > Err, from a quick glance, the float DSP code for AArch64 just assumes a
> > hardware floating ABI, and has no conditionals, so I'm not sure what you
> > mean by that. Do you mean something like VFP/NOVFP on AArch32?
>
> I meant all ABI stuff to be handled by macros, either `func` or the
> instructions themselves.
I don't think that's possible here, at least not with reasonably low
complexity. AArch32 has the same challenge and the func/endfunc macros don't
handle it.
--
雷米‧德尼-库尔蒙
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread