* [FFmpeg-devel] [PATCH 1/2] lavc/flacdsp: R-V V flac_wasted32
@ 2024-05-12 17:07 Rémi Denis-Courmont
2024-05-12 18:37 ` James Almer
0 siblings, 1 reply; 3+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-12 17:07 UTC (permalink / raw)
To: ffmpeg-devel
T-Head C908:
flac_wasted_32_c: 949.0
flac_wasted_32_rvv_i32: 278.7
---
libavcodec/riscv/flacdsp_init.c | 7 ++++++-
libavcodec/riscv/flacdsp_rvv.S | 15 +++++++++++++++
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c
index 6cfb50ead8..4043715a3b 100644
--- a/libavcodec/riscv/flacdsp_init.c
+++ b/libavcodec/riscv/flacdsp_init.c
@@ -31,6 +31,7 @@ void ff_flac_lpc32_rvv(int32_t *decoded, const int coeffs[32],
int pred_order, int qlevel, int len);
void ff_flac_lpc32_rvv_simple(int32_t *decoded, const int coeffs[32],
int pred_order, int qlevel, int len);
+void ff_flac_wasted32_rvv(int32_t *, int shift, int len);
void ff_flac_decorrelate_indep2_16_rvv(uint8_t **out, int32_t **in,
int channels, int len, int shift);
void ff_flac_decorrelate_indep4_16_rvv(uint8_t **out, int32_t **in,
@@ -76,8 +77,12 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
c->lpc32 = ff_flac_lpc32_rvv_simple;
else
c->lpc32 = ff_flac_lpc32_rvv;
+# endif
}
+ c->wasted32 = ff_flac_wasted32_rvv;
+
+# if (__riscv_xlen >= 64)
switch (fmt) {
case AV_SAMPLE_FMT_S16:
switch (channels) {
@@ -117,8 +122,8 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
c->decorrelate[2] = ff_flac_decorrelate_rs_32_rvv;
c->decorrelate[3] = ff_flac_decorrelate_ms_32_rvv;
break;
-# endif
}
+# endif
}
#endif
}
diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S
index 2a0b50f7a9..d576a0cc21 100644
--- a/libavcodec/riscv/flacdsp_rvv.S
+++ b/libavcodec/riscv/flacdsp_rvv.S
@@ -100,7 +100,22 @@ func ff_flac_lpc32_rvv_simple, zve32x
ret
endfunc
+#endif
+
+func ff_flac_wasted32_rvv, zve32x
+1:
+ vsetvli t0, a2, e32, m8, ta, ma
+ vle32.v v8, (a0)
+ sub a2, a2, t0
+ vsll.vx v8, v8, a1
+ vse32.v v8, (a0)
+ sh2add a0, t0, a0
+ bnez a2, 1b
+ ret
+endfunc
+
+#if (__riscv_xlen == 64)
func ff_flac_decorrelate_indep2_16_rvv, zve32x
ld a0, (a0)
ld a2, 8(a1)
--
2.43.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/2] lavc/flacdsp: R-V V flac_wasted32
2024-05-12 17:07 [FFmpeg-devel] [PATCH 1/2] lavc/flacdsp: R-V V flac_wasted32 Rémi Denis-Courmont
@ 2024-05-12 18:37 ` James Almer
2024-05-12 19:41 ` Rémi Denis-Courmont
0 siblings, 1 reply; 3+ messages in thread
From: James Almer @ 2024-05-12 18:37 UTC (permalink / raw)
To: ffmpeg-devel
On 5/12/2024 2:07 PM, Rémi Denis-Courmont wrote:
> T-Head C908:
> flac_wasted_32_c: 949.0
> flac_wasted_32_rvv_i32: 278.7
> ---
> libavcodec/riscv/flacdsp_init.c | 7 ++++++-
> libavcodec/riscv/flacdsp_rvv.S | 15 +++++++++++++++
> 2 files changed, 21 insertions(+), 1 deletion(-)
>
> diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c
> index 6cfb50ead8..4043715a3b 100644
> --- a/libavcodec/riscv/flacdsp_init.c
> +++ b/libavcodec/riscv/flacdsp_init.c
> @@ -31,6 +31,7 @@ void ff_flac_lpc32_rvv(int32_t *decoded, const int coeffs[32],
> int pred_order, int qlevel, int len);
> void ff_flac_lpc32_rvv_simple(int32_t *decoded, const int coeffs[32],
> int pred_order, int qlevel, int len);
> +void ff_flac_wasted32_rvv(int32_t *, int shift, int len);
> void ff_flac_decorrelate_indep2_16_rvv(uint8_t **out, int32_t **in,
> int channels, int len, int shift);
> void ff_flac_decorrelate_indep4_16_rvv(uint8_t **out, int32_t **in,
> @@ -76,8 +77,12 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
> c->lpc32 = ff_flac_lpc32_rvv_simple;
> else
> c->lpc32 = ff_flac_lpc32_rvv;
> +# endif
> }
>
> + c->wasted32 = ff_flac_wasted32_rvv;
> +
> +# if (__riscv_xlen >= 64)
> switch (fmt) {
> case AV_SAMPLE_FMT_S16:
> switch (channels) {
> @@ -117,8 +122,8 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
> c->decorrelate[2] = ff_flac_decorrelate_rs_32_rvv;
> c->decorrelate[3] = ff_flac_decorrelate_ms_32_rvv;
> break;
> -# endif
> }
> +# endif
> }
> #endif
> }
> diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S
> index 2a0b50f7a9..d576a0cc21 100644
> --- a/libavcodec/riscv/flacdsp_rvv.S
> +++ b/libavcodec/riscv/flacdsp_rvv.S
> @@ -100,7 +100,22 @@ func ff_flac_lpc32_rvv_simple, zve32x
>
> ret
> endfunc
> +#endif
> +
> +func ff_flac_wasted32_rvv, zve32x
> +1:
> + vsetvli t0, a2, e32, m8, ta, ma
> + vle32.v v8, (a0)
> + sub a2, a2, t0
> + vsll.vx v8, v8, a1
> + vse32.v v8, (a0)
> + sh2add a0, t0, a0
> + bnez a2, 1b
Not sure if you're taking it into account, but the minimum blocksize is
16 and the buffer is always allocated for max_blocksize plus padding, so
you should be able to do more samples per loop than this. Same for wasted33.
>
> + ret
> +endfunc
> +
> +#if (__riscv_xlen == 64)
> func ff_flac_decorrelate_indep2_16_rvv, zve32x
> ld a0, (a0)
> ld a2, 8(a1)
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/2] lavc/flacdsp: R-V V flac_wasted32
2024-05-12 18:37 ` James Almer
@ 2024-05-12 19:41 ` Rémi Denis-Courmont
0 siblings, 0 replies; 3+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-12 19:41 UTC (permalink / raw)
To: ffmpeg-devel
Le sunnuntaina 12. toukokuuta 2024, 21.37.28 EEST James Almer a écrit :
> Not sure if you're taking it into account, but the minimum blocksize is
> 16
Granted, this only fills a single 8-vector vector group (v8-v15), so only a
quarter of the register bank (v0-v31), which is unusually low. But that
already adds up to 32 ints per iteration with 128-bit vectors respectively.
IIUC, the x86 implementation is only half as much.
> and the buffer is always allocated for max_blocksize plus padding,
RVV really wants element-size alignment, so 32/64-bit here. Beyond that, it
really does not care. (I think Arm SVE works the same way?)
> so you should be able to do more samples per loop than this.
In my experience, this particular hardware would likely exhibit marginally
better performance with only *half* as many sample per iterations. I just
don't want to overfit to this relatively early and low-end hardware design. In
fact, Yuechi already has newer better hardware.
> Same for wasted33.
The wasted33 kernel actually already uses 3 eighth (v8-v11, v16-v23) of the
bank, for 16 ints per iteration. I doubt that unrolling explicitly would help.
The performance (~1.5x) is pretty disappointing to be sure. The root cause is
RVV's notoriously lacks of widening left-shifts. FWIW, Zvbb only adds the
unsigned variant, which is not what we need here. Plus there is no
commercially available hardware with Zvbb yet. So in the end, we have an extra
size conversion. And then 64-bit shift and 64-bit element stores which are
half as fast as 32-bit ones on weighed basis.
Maybe widening signed multiplication would be faster though, I will try that.
--
Rémi Denis-Courmont
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-05-12 19:41 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-12 17:07 [FFmpeg-devel] [PATCH 1/2] lavc/flacdsp: R-V V flac_wasted32 Rémi Denis-Courmont
2024-05-12 18:37 ` James Almer
2024-05-12 19:41 ` Rémi Denis-Courmont
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git