From a4672687a10a49702623449e8569d68913e91346 Mon Sep 17 00:00:00 2001 From: sunyuechi Date: Thu, 21 Mar 2024 21:39:50 +0800 Subject: [PATCH 3/4] lavc/vp9dsp: R-V V ipred hor C908: vp9_hor_8x8_8bpp_c: 74.7 vp9_hor_8x8_8bpp_rvv_i32: 35.7 vp9_hor_16x16_8bpp_c: 175.5 vp9_hor_16x16_8bpp_rvv_i32: 80.2 vp9_hor_32x32_8bpp_c: 510.2 vp9_hor_32x32_8bpp_rvv_i32: 264.0 --- libavcodec/riscv/vp9_intra_rvv.S | 56 ++++++++++++++++++++++++++++++++ libavcodec/riscv/vp9dsp.h | 6 ++++ libavcodec/riscv/vp9dsp_init.c | 3 ++ 3 files changed, 65 insertions(+) diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S index d81513cd3c..bf7f6d8692 100644 --- a/libavcodec/riscv/vp9_intra_rvv.S +++ b/libavcodec/riscv/vp9_intra_rvv.S @@ -157,3 +157,59 @@ func ff_v_8x8_rvv, zve64x ret endfunc + +func ff_h_32x32_rvv, zve32x + li t0, 32 + addi a2, a2, 31 + vsetvli zero, t0, e8, m2, ta, ma + + .rept 2 + .irp n 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 + lbu t1, (a2) + addi a2, a2, -1 + vmv.v.x v\n, t1 + .endr + .irp n 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 + vse8.v v\n, (a0) + add a0, a0, a1 + .endr + .endr + + ret +endfunc + +func ff_h_16x16_rvv, zve32x + addi a2, a2, 15 + vsetivli zero, 16, e8, m1, ta, ma + + .irp n 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 + lbu t1, (a2) + addi a2, a2, -1 + vmv.v.x v\n, t1 + .endr + .irp n 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 + vse8.v v\n, (a0) + add a0, a0, a1 + .endr + vse8.v v23, (a0) + + ret +endfunc + +func ff_h_8x8_rvv, zve32x + addi a2, a2, 7 + vsetivli zero, 8, e8, mf2, ta, ma + + .irp n 8, 9, 10, 11, 12, 13, 14, 15 + lbu t1, (a2) + addi a2, a2, -1 + vmv.v.x v\n, t1 + .endr + .irp n 8, 9, 10, 11, 12, 13, 14 + vse8.v v\n, (a0) + add a0, a0, a1 + .endr + vse8.v v15, (a0) + + ret +endfunc diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h index 17663c6107..51d9bc3957 100644 --- a/libavcodec/riscv/vp9dsp.h +++ b/libavcodec/riscv/vp9dsp.h @@ -66,6 +66,12 @@ void ff_v_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a); void ff_v_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a); +void ff_h_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, + const uint8_t *a); +void ff_h_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, + const uint8_t *a); +void ff_h_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, + const uint8_t *a); #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx) \ void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \ diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c index 9c550d40b5..16aeeb260a 100644 --- a/libavcodec/riscv/vp9dsp_init.c +++ b/libavcodec/riscv/vp9dsp_init.c @@ -54,6 +54,9 @@ static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp) dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv; dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvv; dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvv; + dsp->intra_pred[TX_32X32][HOR_PRED] = ff_h_32x32_rvv; + dsp->intra_pred[TX_16X16][HOR_PRED] = ff_h_16x16_rvv; + dsp->intra_pred[TX_8X8][HOR_PRED] = ff_h_8x8_rvv; } #endif } -- 2.44.0