* [FFmpeg-devel] [PATCH] lavc/vp8dsp: R-V V vp8_idct_add
@ 2024-06-05 19:45 Rémi Denis-Courmont
0 siblings, 0 replies; only message in thread
From: Rémi Denis-Courmont @ 2024-06-05 19:45 UTC (permalink / raw)
To: ffmpeg-devel
T-Head C908 (cycles):
vp8_idct_add_c: 312.2
vp8_idct_add_rvv_i32: 117.0
---
libavcodec/riscv/vp8dsp_init.c | 2 ++
libavcodec/riscv/vp8dsp_rvv.S | 59 ++++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+)
diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c
index 5911d195ba..d9e2beb237 100644
--- a/libavcodec/riscv/vp8dsp_init.c
+++ b/libavcodec/riscv/vp8dsp_init.c
@@ -27,6 +27,7 @@
#include "vp8dsp.h"
void ff_vp8_luma_dc_wht_rvv(int16_t block[4][4][16], int16_t dc[16]);
+void ff_vp8_idct_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
void ff_vp8_idct_dc_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
void ff_vp8_idct_dc_add4y_rvv(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
void ff_vp8_idct_dc_add4uv_rvv(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
@@ -129,6 +130,7 @@ av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
if (flags & AV_CPU_FLAG_RVV_I64)
c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_rvv;
#endif
+ c->vp8_idct_add = ff_vp8_idct_add_rvv;
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_rvv;
c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_rvv;
if (flags & AV_CPU_FLAG_RVV_I64)
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index 02eeb4cc2c..b4b7b63195 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -100,6 +100,65 @@ func ff_vp8_luma_dc_wht_rvv, zve64x
endfunc
#endif
+func ff_vp8_idct_add_rvv, zve32x
+ csrwi vxrm, 0
+ vsetivli zero, 4, e16, mf2, ta, ma
+ addi a3, a1, 1 * 4 * 2
+ addi a4, a1, 2 * 4 * 2
+ addi a5, a1, 3 * 4 * 2
+ li t1, 20091
+ li t2, 35468
+ jal t0, 1f
+ vsseg4e16.v v0, (a1)
+ jal t0, 1f
+ vlsseg4e8.v v4, (a0), a2
+ vssra.vi v0, v0, 3
+ sd zero, (a1)
+ vssra.vi v1, v1, 3
+ sd zero, 8(a1)
+ vssra.vi v2, v2, 3
+ sd zero, 16(a1)
+ vssra.vi v3, v3, 3
+ sd zero, 24(a1)
+ vsetvli zero, zero, e8, mf4, ta, ma
+ vwaddu.wv v0, v0, v4
+ vwaddu.wv v1, v1, v5
+ vwaddu.wv v2, v2, v6
+ vwaddu.wv v3, v3, v7
+ vsetvli zero, zero, e16, mf2, ta, ma
+ vmax.vx v0, v0, zero
+ vmax.vx v1, v1, zero
+ vmax.vx v2, v2, zero
+ vmax.vx v3, v3, zero
+ vsetvli zero, zero, e8, mf4, ta, ma
+ vnclipu.wi v4, v0, 0
+ vnclipu.wi v5, v1, 0
+ vnclipu.wi v6, v2, 0
+ vnclipu.wi v7, v3, 0
+ vssseg4e8.v v4, (a0), a2
+ ret
+1:
+ vle16.v v0, (a1)
+ vle16.v v2, (a4)
+ vle16.v v1, (a3)
+ vle16.v v3, (a5)
+ vadd.vv v4, v0, v2 # t0
+ vsub.vv v5, v0, v2 # t1
+ vmulhsu.vx v8, v3, t1
+ vmulhsu.vx v6, v1, t2
+ vadd.vv v8, v8, v3
+ vmulhsu.vx v7, v1, t1
+ vmulhsu.vx v9, v3, t2
+ vadd.vv v7, v7, v1
+ vsub.vv v6, v6, v8 # t2
+ vadd.vv v7, v7, v9 # t3
+ vadd.vv v1, v5, v6
+ vsub.vv v2, v5, v6
+ vadd.vv v0, v4, v7
+ vsub.vv v3, v4, v7
+ jr t0
+endfunc
+
func ff_vp8_idct_dc_add_rvv, zve32x
lh a3, (a1)
addi a3, a3, 4
--
2.45.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2024-06-05 19:45 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-05 19:45 [FFmpeg-devel] [PATCH] lavc/vp8dsp: R-V V vp8_idct_add Rémi Denis-Courmont
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git