From e791fada3a4777fae87dec806c0b46b595d265db Mon Sep 17 00:00:00 2001 From: sunyuechi Date: Tue, 27 Feb 2024 00:06:25 +0800 Subject: [PATCH 2/3] lavc/vp9dsp: R-V V ipred hor C908: vp9_hor_4x4_8bpp_c: 37.7 vp9_hor_4x4_8bpp_rvv_i32: 33.7 vp9_hor_8x8_8bpp_c: 82.7 vp9_hor_8x8_8bpp_rvv_i32: 51.5 vp9_hor_16x16_8bpp_c: 182.2 vp9_hor_16x16_8bpp_rvv_i32: 89.5 vp9_hor_32x32_8bpp_c: 518.2 vp9_hor_32x32_8bpp_rvv_i32: 270.7 --- libavcodec/riscv/vp9dsp_init.c | 8 ++++ libavcodec/riscv/vp9dsp_rvv.S | 82 ++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c index 58db936f31..5b68302235 100644 --- a/libavcodec/riscv/vp9dsp_init.c +++ b/libavcodec/riscv/vp9dsp_init.c @@ -25,6 +25,10 @@ void ff_vp9_ipred_v_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a); void ff_vp9_ipred_v_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a); +void ff_vp9_ipred_h_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a); +void ff_vp9_ipred_h_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a); +void ff_vp9_ipred_h_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a); +void ff_vp9_ipred_h_4x4_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a); av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact) { @@ -35,6 +39,10 @@ av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact) if (bpp == 8) { dsp->intra_pred[TX_32X32][VERT_PRED] = ff_vp9_ipred_v_32x32_rvv; dsp->intra_pred[TX_16X16][VERT_PRED] = ff_vp9_ipred_v_16x16_rvv; + dsp->intra_pred[TX_32X32][HOR_PRED] = ff_vp9_ipred_h_32x32_rvv; + dsp->intra_pred[TX_16X16][HOR_PRED] = ff_vp9_ipred_h_16x16_rvv; + dsp->intra_pred[TX_8X8][HOR_PRED] = ff_vp9_ipred_h_8x8_rvv; + dsp->intra_pred[TX_4X4][HOR_PRED] = ff_vp9_ipred_h_4x4_rvv; } } #endif diff --git a/libavcodec/riscv/vp9dsp_rvv.S b/libavcodec/riscv/vp9dsp_rvv.S index 0645567f1b..578fbce061 100644 --- a/libavcodec/riscv/vp9dsp_rvv.S +++ b/libavcodec/riscv/vp9dsp_rvv.S @@ -45,3 +45,85 @@ func ff_vp9_ipred_v_16x16_rvv, zve32x ret endfunc + +func ff_vp9_ipred_h_32x32_rvv, zve32x + addi a2, a2, 31 + li t0, 32 + + .rept 2 + vsetvli zero, t0, e8, m2, ta, ma + .irp n 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30 + lbu t1, (a2) + addi a2, a2, -1 + vmv.v.x v\n, t1 + .endr + + vsetivli zero, 8, e8, mf2, ta, ma + .irp n 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30 + vse32.v v\n, (a0) + add a0, a0, a1 + .endr + .endr + + ret +endfunc + +func ff_vp9_ipred_h_16x16_rvv, zve32x + addi a2, a2, 15 + vsetivli zero, 16, e8, m1, ta, ma + + .irp n 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 + lbu t1, (a2) + addi a2, a2, -1 + vmv.v.x v\n, t1 + .endr + + vsetivli zero, 4, e8, mf4, ta, ma + .irp n 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22 + vse32.v v\n, (a0) + add a0, a0, a1 + .endr + vse32.v v23, (a0) + + ret +endfunc + +func ff_vp9_ipred_h_8x8_rvv, zve32x + addi a2, a2, 7 + vsetivli zero, 8, e8, mf2, ta, ma + + .irp n 8,9,10,11,12,13,14,15 + lbu t1, (a2) + addi a2, a2, -1 + vmv.v.x v\n, t1 + .endr + + vsetivli zero, 2, e8, mf4, ta, ma + .irp n 8,9,10,11,12,13,14 + vse32.v v\n, (a0) + add a0, a0, a1 + .endr + vse32.v v15, (a0) + + ret +endfunc + +func ff_vp9_ipred_h_4x4_rvv, zve32x + addi a2, a2, 3 + vsetivli zero, 4, e8, mf2, ta, ma + + .irp n 8,9,10,11 + lbu t1, (a2) + addi a2, a2, -1 + vmv.v.x v\n, t1 + .endr + + vsetivli zero, 1, e8, mf4, ta, ma + .irp n 8,9,10 + vse32.v v\n, (a0) + add a0, a0, a1 + .endr + vse32.v v11, (a0) + + ret +endfunc -- 2.44.0