From 1388287e7fb00cae1709bc3b82bc373fff4cd0a8 Mon Sep 17 00:00:00 2001 From: sunyuechi Date: Fri, 2 Feb 2024 12:50:07 +0800 Subject: [PATCH 2/3] lavc/vp8dsp: R-V V vp8_idct_dc_add4y c908: vp8_idct_dc_add4y_c: 368.5 vp8_idct_dc_add4y_rvv_i32: 134.5 --- libavcodec/riscv/vp8dsp_init.c | 2 ++ libavcodec/riscv/vp8dsp_rvv.S | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c index ab020070eb..6615d3d440 100644 --- a/libavcodec/riscv/vp8dsp_init.c +++ b/libavcodec/riscv/vp8dsp_init.c @@ -26,6 +26,7 @@ #include "libavcodec/vp8dsp.h" void ff_vp8_idct_dc_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride); +void ff_vp8_idct_dc_add4y_rvv(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride); av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c) { @@ -34,6 +35,7 @@ av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c) if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) { c->vp8_idct_dc_add = ff_vp8_idct_dc_add_rvv; + c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_rvv; } #endif } diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S index 8609b79937..87c369fb16 100644 --- a/libavcodec/riscv/vp8dsp_rvv.S +++ b/libavcodec/riscv/vp8dsp_rvv.S @@ -36,9 +36,25 @@ vsse32.v v0, (a0), a2 .endm +.macro vp8_idct_dc_addy + vp8_idct_dc_add + addi a0, a0, 4 + addi a1, a1, 32 +.endm + func ff_vp8_idct_dc_add_rvv, zve32x vsetivli zero, 4, e8, mf4, ta, ma vp8_idct_dc_add ret endfunc + +func ff_vp8_idct_dc_add4y_rvv, zve32x + vsetivli zero, 4, e8, mf4, ta, ma + vp8_idct_dc_addy + vp8_idct_dc_addy + vp8_idct_dc_addy + vp8_idct_dc_add + + ret +endfunc -- 2.43.0