From 7ad03f4bc70e4c334d8e52dce2ea2b6f09a9a244 Mon Sep 17 00:00:00 2001 From: sunyuechi Date: Thu, 21 Mar 2024 22:11:26 +0800 Subject: [PATCH 2/7] lavc/vp9dsp: R-V V mc bilin h C908: vp9_avg_bilin_4h_8bpp_c: 5.5 vp9_avg_bilin_4h_8bpp_rvv_i64: 2.5 vp9_avg_bilin_8h_8bpp_c: 19.7 vp9_avg_bilin_8h_8bpp_rvv_i64: 5.0 vp9_avg_bilin_16h_8bpp_c: 78.2 vp9_avg_bilin_16h_8bpp_rvv_i64: 10.0 vp9_avg_bilin_32h_8bpp_c: 325.2 vp9_avg_bilin_32h_8bpp_rvv_i64: 28.5 vp9_avg_bilin_64h_8bpp_c: 1266.2 vp9_avg_bilin_64h_8bpp_rvv_i64: 115.0 vp9_put_bilin_4h_8bpp_c: 4.5 vp9_put_bilin_4h_8bpp_rvv_i64: 2.2 vp9_put_bilin_8h_8bpp_c: 16.7 vp9_put_bilin_8h_8bpp_rvv_i64: 4.2 vp9_put_bilin_16h_8bpp_c: 65.2 vp9_put_bilin_16h_8bpp_rvv_i64: 8.7 vp9_put_bilin_32h_8bpp_c: 273.5 vp9_put_bilin_32h_8bpp_rvv_i64: 26.7 vp9_put_bilin_64h_8bpp_c: 1041.0 vp9_put_bilin_64h_8bpp_rvv_i64: 87.2 --- libavcodec/riscv/vp9_mc_rvv.S | 73 ++++++++++++++++++++++++++++++++++ libavcodec/riscv/vp9dsp_init.c | 17 ++++++++ 2 files changed, 90 insertions(+) diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S index ba9ec3431f..a97807633e 100644 --- a/libavcodec/riscv/vp9_mc_rvv.S +++ b/libavcodec/riscv/vp9_mc_rvv.S @@ -51,6 +51,72 @@ ret .endm +.macro bilin_h_load dst len type +.ifc \len,4 + vsetivli zero, 5, e8, mf2, ta, ma +.elseif \len == 8 + vsetivli zero, 9, e8, m1, ta, ma +.elseif \len == 16 + vsetivli zero, 17, e8, m2, ta, ma +.elseif \len == 32 + li t0, 33 + vsetvli zero, t0, e8, m4, ta, ma +.elseif \len == 64 + li t0, 65 + vsetvli zero, t0, e8, m8, ta, ma +.endif + + vle8.v v8, (a2) + vslide1down.vx v0, v8, t5 + +.ifc \len,4 + vsetivli zero, 4, e8, mf4, ta, ma +.elseif \len == 8 + vsetivli zero, 8, e8, mf2, ta, ma +.elseif \len == 16 + vsetivli zero, 16, e8, m1, ta, ma +.elseif \len == 32 + li t0, 32 + vsetvli zero, t0, e8, m2, ta, ma +.elseif \len == 64 + li t0, 64 + vsetvli zero, t0, e8, m4, ta, ma +.endif + + vwmulu.vx v16, v0, a5 + vwmaccsu.vx v16, t1, v8 + vwadd.wx v16, v16, t4 + vnsra.wi v16, v16, 4 + vadd.vv \dst, v16, v8 + +.ifc \type,put + vadd.vv \dst, v16, v8 +.elseif \type == avg + vadd.vv v16, v16, v8 + vle8.v \dst, (a0) + vaaddu.vv \dst, \dst, v16 +.endif + +.endm + +.macro bilin_h len type +.ifc \type,avg + csrwi vxrm, 0 +.endif + li t4, 8 + li t5, 1 + neg t1, a5 +1: + addi a4, a4, -1 + bilin_h_load v0, \len, \type + vse8.v v0, (a0) + add a2, a2, a3 + add a0, a0, a1 + bnez a4, 1b + + ret +.endm + .irp len 64, 32, 16 func ff_copy\len\()_rvv, zve32x copy_avg \len copy @@ -61,4 +127,11 @@ endfunc func ff_avg\len\()_rvv, zve32x copy_avg \len avg endfunc + +func ff_put_bilin_\len\()h_rvv, zve32x + bilin_h \len put +endfunc +func ff_avg_bilin_\len\()h_rvv, zve32x + bilin_h \len avg +endfunc .endr diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c index c602c38bb2..d6d6fb52cc 100644 --- a/libavcodec/riscv/vp9dsp_init.c +++ b/libavcodec/riscv/vp9dsp_init.c @@ -106,6 +106,23 @@ static av_cold void vp9dsp_mc_init_rvv(VP9DSPContext *dsp, int bpp) #undef init_copy_avg #undef init_avg #undef init_fpel + +#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \ + dsp->mc[idx1][FILTER_BILINEAR ][idx2][idxh][idxv] = \ + ff_##type##_bilin_##sz##dir##_rvv; + +#define init_subpel2(idx, idxh, idxv, dir, type) \ + init_subpel1(0, idx, idxh, idxv, 64, dir, type); \ + init_subpel1(1, idx, idxh, idxv, 32, dir, type); \ + init_subpel1(2, idx, idxh, idxv, 16, dir, type); \ + init_subpel1(3, idx, idxh, idxv, 8, dir, type); \ + init_subpel1(4, idx, idxh, idxv, 4, dir, type) + +#define init_subpel3(idx, type) \ + init_subpel2(idx, 1, 0, h, type) + + init_subpel3(0, put); + init_subpel3(1, avg); } #endif #endif -- 2.44.0