* [FFmpeg-devel] [PATCH 1/4] lavc/opusdsp: RISC-V V (128-bit) postfilter
2022-10-05 16:12 [FFmpeg-devel] [PATCHv2 0/4] RISC-V V Opus postfilter Rémi Denis-Courmont
@ 2022-10-05 16:12 ` Rémi Denis-Courmont
2022-10-05 16:12 ` [FFmpeg-devel] [PATCH 2/4] lavu/riscv: helper macro for VTYPE encoding Rémi Denis-Courmont
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Rémi Denis-Courmont @ 2022-10-05 16:12 UTC (permalink / raw)
To: ffmpeg-devel
This is implemented for a vector size of 128-bit. Since the scalar
product in the inner loop covers 5 samples or 160 bits, we need a group
multipler of 2.
To avoid reconfiguring the vector type, the outer loop, which loads
multiple input samples sticks to the same multipler. Consequently, the
outer loop loads 8 samples per iteration. This is safe since the minimum
period of the CELT codec is 15 samples.
The same code would also work, albeit needlessly inefficiently with a
vector length of 256 bits. A proper implementation will follow instead.
---
libavcodec/opusdsp.c | 2 ++
libavcodec/opusdsp.h | 1 +
libavcodec/riscv/Makefile | 2 ++
libavcodec/riscv/opusdsp_init.c | 42 ++++++++++++++++++++++++
libavcodec/riscv/opusdsp_rvv.S | 57 +++++++++++++++++++++++++++++++++
5 files changed, 104 insertions(+)
create mode 100644 libavcodec/riscv/opusdsp_init.c
create mode 100644 libavcodec/riscv/opusdsp_rvv.S
diff --git a/libavcodec/opusdsp.c b/libavcodec/opusdsp.c
index badcfcc884..0764d712e4 100644
--- a/libavcodec/opusdsp.c
+++ b/libavcodec/opusdsp.c
@@ -58,6 +58,8 @@ av_cold void ff_opus_dsp_init(OpusDSP *ctx)
#if ARCH_AARCH64
ff_opus_dsp_init_aarch64(ctx);
+#elif ARCH_RISCV
+ ff_opus_dsp_init_riscv(ctx);
#elif ARCH_X86
ff_opus_dsp_init_x86(ctx);
#endif
diff --git a/libavcodec/opusdsp.h b/libavcodec/opusdsp.h
index 3ea3d14bf0..c2a301e832 100644
--- a/libavcodec/opusdsp.h
+++ b/libavcodec/opusdsp.h
@@ -30,5 +30,6 @@ void ff_opus_dsp_init(OpusDSP *ctx);
void ff_opus_dsp_init_x86(OpusDSP *ctx);
void ff_opus_dsp_init_aarch64(OpusDSP *ctx);
+void ff_opus_dsp_init_riscv(OpusDSP *ctx);
#endif /* AVCODEC_OPUSDSP_H */
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index eae87ea231..965942f4df 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -12,6 +12,8 @@ OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_init.o
RVV-OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_rvv.o
OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
RVV-OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_rvv.o
+OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_init.o
+RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o
OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o \
riscv/pixblockdsp_rvi.o
RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
diff --git a/libavcodec/riscv/opusdsp_init.c b/libavcodec/riscv/opusdsp_init.c
new file mode 100644
index 0000000000..f1d2c871e3
--- /dev/null
+++ b/libavcodec/riscv/opusdsp_init.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
+#include "libavcodec/opusdsp.h"
+
+void ff_opus_postfilter_rvv_128(float *data, int period, float *g, int len);
+
+av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
+{
+#if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_RVV_F32)
+ switch (ff_get_rv_vlenb()) {
+ case 16:
+ d->postfilter = ff_opus_postfilter_rvv_128;
+ break;
+ }
+#endif
+}
diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S
new file mode 100644
index 0000000000..79b46696cd
--- /dev/null
+++ b/libavcodec/riscv/opusdsp_rvv.S
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_opus_postfilter_rvv_128, zve32f
+ addi a1, a1, 2
+ slli a1, a1, 2
+ lw t1, 4(a2)
+ vsetivli zero, 3, e32, m1, ta, ma
+ vle32.v v24, (a2)
+ sub a1, a0, a1 // a1 = &x4 = &data[-(period + 2)]
+ vsetivli zero, 5, e32, m2, ta, ma
+ vslide1up.vx v8, v24, t1
+ lw t2, 8(a2)
+ vle32.v v16, (a1)
+ vslide1up.vx v24, v8, t2 // v24 = { g[2], g[1], g[0], g[1], g[2] }
+2:
+ vsetvli t0, a3, e32, m2, ta, ma
+ vle32.v v0, (a0)
+ sub a3, a3, t0
+3:
+ vsetivli zero, 5, e32, m2, ta, ma
+ lw t2, 20(a1)
+ vfmul.vv v8, v24, v16
+ addi a0, a0, 4
+ vslide1down.vx v16, v16, t2
+ addi a1, a1, 4
+ vfredusum.vs v0, v8, v0
+ vsetvli zero, t0, e32, m2, ta, ma
+ vmv.x.s t1, v0
+ addi t0, t0, -1
+ vslide1down.vx v0, v0, zero
+ sw t1, -4(a0)
+ bnez t0, 3b
+
+ bnez a3, 2b
+
+ ret
+endfunc
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* [FFmpeg-devel] [PATCH 2/4] lavu/riscv: helper macro for VTYPE encoding
2022-10-05 16:12 [FFmpeg-devel] [PATCHv2 0/4] RISC-V V Opus postfilter Rémi Denis-Courmont
2022-10-05 16:12 ` [FFmpeg-devel] [PATCH 1/4] lavc/opusdsp: RISC-V V (128-bit) postfilter Rémi Denis-Courmont
@ 2022-10-05 16:12 ` Rémi Denis-Courmont
2022-10-05 16:12 ` [FFmpeg-devel] [PATCH 3/4] lavc/opusdsp: RISC-V V (256-bit) postfilter Rémi Denis-Courmont
2022-10-05 16:12 ` [FFmpeg-devel] [PATCH 4/4] lavc/opusdsp: RISC-V V (512-bit) postfilter Rémi Denis-Courmont
3 siblings, 0 replies; 5+ messages in thread
From: Rémi Denis-Courmont @ 2022-10-05 16:12 UTC (permalink / raw)
To: ffmpeg-devel
On most cases, the vector type (VTYPE) for the RISC-V Vector extension
is supplied as an immediate value, with either of the VSETVLI or
VSETIVLI instructions. There is however a third instruction VSETVL
which takes the vector type from a general purpose register. That is so
the type can be selected at run-time.
This introduces a macro to load a (valid) vector type into a register.
The syntax follows that of VSETVLI and VSETIVLI, with element size,
group multiplier, then tail and mask policies.
---
libavutil/riscv/asm.S | 75 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
index ffa0bd9068..6ca74f263a 100644
--- a/libavutil/riscv/asm.S
+++ b/libavutil/riscv/asm.S
@@ -92,3 +92,78 @@
shnadd 3, \rd, \rs1, \rs2
.endm
#endif
+
+ /* Convenience macro to load a Vector type (vtype) as immediate */
+ .macro lvtypei rd, e, m=m1, tp=tu, mp=mu
+
+ .ifc \e,e8
+ .equ ei, 0
+ .else
+ .ifc \e,e16
+ .equ ei, 8
+ .else
+ .ifc \e,e32
+ .equ ei, 16
+ .else
+ .ifc \e,e64
+ .equ ei, 24
+ .else
+ .error "Unknown element type"
+ .endif
+ .endif
+ .endif
+ .endif
+
+ .ifc \m,m1
+ .equ mi, 0
+ .else
+ .ifc \m,m2
+ .equ mi, 1
+ .else
+ .ifc \m,m4
+ .equ mi, 2
+ .else
+ .ifc \m,m8
+ .equ mi, 3
+ .else
+ .ifc \m,mf8
+ .equ mi, 5
+ .else
+ .ifc \m,mf4
+ .equ mi, 6
+ .else
+ .ifc \m,mf2
+ .equ mi, 7
+ .else
+ .error "Unknown multiplier"
+ .equ mi, 3
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+ .endif
+
+ .ifc \tp,tu
+ .equ tpi, 0
+ .else
+ .ifc \tp,ta
+ .equ tpi, 64
+ .else
+ .error "Unknown tail policy"
+ .endif
+ .endif
+
+ .ifc \mp,mu
+ .equ mpi, 0
+ .else
+ .ifc \mp,ma
+ .equ mpi, 128
+ .else
+ .error "Unknown mask policy"
+ .endif
+ .endif
+
+ li \rd, (ei | mi | tpi | mpi)
+ .endm
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* [FFmpeg-devel] [PATCH 3/4] lavc/opusdsp: RISC-V V (256-bit) postfilter
2022-10-05 16:12 [FFmpeg-devel] [PATCHv2 0/4] RISC-V V Opus postfilter Rémi Denis-Courmont
2022-10-05 16:12 ` [FFmpeg-devel] [PATCH 1/4] lavc/opusdsp: RISC-V V (128-bit) postfilter Rémi Denis-Courmont
2022-10-05 16:12 ` [FFmpeg-devel] [PATCH 2/4] lavu/riscv: helper macro for VTYPE encoding Rémi Denis-Courmont
@ 2022-10-05 16:12 ` Rémi Denis-Courmont
2022-10-05 16:12 ` [FFmpeg-devel] [PATCH 4/4] lavc/opusdsp: RISC-V V (512-bit) postfilter Rémi Denis-Courmont
3 siblings, 0 replies; 5+ messages in thread
From: Rémi Denis-Courmont @ 2022-10-05 16:12 UTC (permalink / raw)
To: ffmpeg-devel
This adds a variant of the postfilter for use with 256-bit vectors.
As a single vector is then large enough to perform the scalar product,
the group multipler is reduced to just one at run-time.
The different vector type is passed via register. Unfortunately,
there is no VSETIVL instruction, so the constant vector size (5) also
needs to be passed via a register.
---
libavcodec/riscv/opusdsp_init.c | 4 ++++
libavcodec/riscv/opusdsp_rvv.S | 16 ++++++++++++----
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/libavcodec/riscv/opusdsp_init.c b/libavcodec/riscv/opusdsp_init.c
index f1d2c871e3..e6f9505f77 100644
--- a/libavcodec/riscv/opusdsp_init.c
+++ b/libavcodec/riscv/opusdsp_init.c
@@ -26,6 +26,7 @@
#include "libavcodec/opusdsp.h"
void ff_opus_postfilter_rvv_128(float *data, int period, float *g, int len);
+void ff_opus_postfilter_rvv_256(float *data, int period, float *g, int len);
av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
{
@@ -37,6 +38,9 @@ av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
case 16:
d->postfilter = ff_opus_postfilter_rvv_128;
break;
+ case 32:
+ d->postfilter = ff_opus_postfilter_rvv_256;
+ break;
}
#endif
}
diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S
index 79b46696cd..243c9a5e52 100644
--- a/libavcodec/riscv/opusdsp_rvv.S
+++ b/libavcodec/riscv/opusdsp_rvv.S
@@ -21,30 +21,38 @@
#include "libavutil/riscv/asm.S"
func ff_opus_postfilter_rvv_128, zve32f
+ lvtypei a5, e32, m2, ta, ma
+ j 1f
+endfunc
+
+func ff_opus_postfilter_rvv_256, zve32f
+ lvtypei a5, e32, m1, ta, ma
+1:
+ li a4, 5
addi a1, a1, 2
slli a1, a1, 2
lw t1, 4(a2)
vsetivli zero, 3, e32, m1, ta, ma
vle32.v v24, (a2)
sub a1, a0, a1 // a1 = &x4 = &data[-(period + 2)]
- vsetivli zero, 5, e32, m2, ta, ma
+ vsetvl zero, a4, a5
vslide1up.vx v8, v24, t1
lw t2, 8(a2)
vle32.v v16, (a1)
vslide1up.vx v24, v8, t2 // v24 = { g[2], g[1], g[0], g[1], g[2] }
2:
- vsetvli t0, a3, e32, m2, ta, ma
+ vsetvl t0, a3, a5
vle32.v v0, (a0)
sub a3, a3, t0
3:
- vsetivli zero, 5, e32, m2, ta, ma
+ vsetvl zero, a4, a5
lw t2, 20(a1)
vfmul.vv v8, v24, v16
addi a0, a0, 4
vslide1down.vx v16, v16, t2
addi a1, a1, 4
vfredusum.vs v0, v8, v0
- vsetvli zero, t0, e32, m2, ta, ma
+ vsetvl zero, t0, a5
vmv.x.s t1, v0
addi t0, t0, -1
vslide1down.vx v0, v0, zero
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* [FFmpeg-devel] [PATCH 4/4] lavc/opusdsp: RISC-V V (512-bit) postfilter
2022-10-05 16:12 [FFmpeg-devel] [PATCHv2 0/4] RISC-V V Opus postfilter Rémi Denis-Courmont
` (2 preceding siblings ...)
2022-10-05 16:12 ` [FFmpeg-devel] [PATCH 3/4] lavc/opusdsp: RISC-V V (256-bit) postfilter Rémi Denis-Courmont
@ 2022-10-05 16:12 ` Rémi Denis-Courmont
3 siblings, 0 replies; 5+ messages in thread
From: Rémi Denis-Courmont @ 2022-10-05 16:12 UTC (permalink / raw)
To: ffmpeg-devel
This adds a variant of the postfilter for use with 512-bit vectors.
Half a vector is enough to perform the scalar product. Normally a whole
vector would be used anyhow. Indeed fractional multiplers are no faster
than the unit multipler.
But in this particular function, a full vector makes up 16 samples,
which would be loaded at each iteration of the outer loop. The minimum
guaranteed CELT postfilter period is only 15. Accounting for the edges,
we can only safely preload up to 13 samples.
The fractional multipler is thus used to cap the selected vector length
to a safe value of 8 elements or 256 bits.
Likewise, we have the 1024-bit variant with the quarter multipler. In
theory, a 2048-bit one would be possible with the eigth multipler, but
that length is not even defined in the specifications as of yet, nor is
it supported by any emulator - forget actual hardware.
---
libavcodec/riscv/opusdsp_init.c | 8 ++++++++
libavcodec/riscv/opusdsp_rvv.S | 10 ++++++++++
2 files changed, 18 insertions(+)
diff --git a/libavcodec/riscv/opusdsp_init.c b/libavcodec/riscv/opusdsp_init.c
index e6f9505f77..d564cca50c 100644
--- a/libavcodec/riscv/opusdsp_init.c
+++ b/libavcodec/riscv/opusdsp_init.c
@@ -27,6 +27,8 @@
void ff_opus_postfilter_rvv_128(float *data, int period, float *g, int len);
void ff_opus_postfilter_rvv_256(float *data, int period, float *g, int len);
+void ff_opus_postfilter_rvv_512(float *data, int period, float *g, int len);
+void ff_opus_postfilter_rvv_1024(float *data, int period, float *g, int len);
av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
{
@@ -41,6 +43,12 @@ av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
case 32:
d->postfilter = ff_opus_postfilter_rvv_256;
break;
+ case 64:
+ d->postfilter = ff_opus_postfilter_rvv_512;
+ break;
+ case 128:
+ d->postfilter = ff_opus_postfilter_rvv_512;
+ break;
}
#endif
}
diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S
index 243c9a5e52..b3d23a9de5 100644
--- a/libavcodec/riscv/opusdsp_rvv.S
+++ b/libavcodec/riscv/opusdsp_rvv.S
@@ -25,6 +25,16 @@ func ff_opus_postfilter_rvv_128, zve32f
j 1f
endfunc
+func ff_opus_postfilter_rvv_512, zve32f
+ lvtypei a5, e32, mf2, ta, ma
+ j 1f
+endfunc
+
+func ff_opus_postfilter_rvv_1024, zve32f
+ lvtypei a5, e32, mf4, ta, ma
+ j 1f
+endfunc
+
func ff_opus_postfilter_rvv_256, zve32f
lvtypei a5, e32, m1, ta, ma
1:
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread