From 94aacf6d1d49cc009669f89c91db71038a13285d Mon Sep 17 00:00:00 2001 From: sunyuechi Date: Thu, 21 Mar 2024 23:08:01 +0800 Subject: [PATCH 5/7] lavc/vp9dsp: R-V V mc tap v C908: vp9_avg_8tap_smooth_4v_8bpp_c: 13.7 vp9_avg_8tap_smooth_4v_8bpp_rvv_i64: 5.0 vp9_avg_8tap_smooth_8v_8bpp_c: 49.7 vp9_avg_8tap_smooth_8v_8bpp_rvv_i64: 9.2 vp9_avg_8tap_smooth_16v_8bpp_c: 191.5 vp9_avg_8tap_smooth_16v_8bpp_rvv_i64: 21.2 vp9_avg_8tap_smooth_32v_8bpp_c: 770.5 vp9_avg_8tap_smooth_32v_8bpp_rvv_i64: 66.0 vp9_avg_8tap_smooth_64v_8bpp_c: 3068.0 vp9_avg_8tap_smooth_64v_8bpp_rvv_i64: 262.5 vp9_put_8tap_smooth_4v_8bpp_c: 12.0 vp9_put_8tap_smooth_4v_8bpp_rvv_i64: 4.5 vp9_put_8tap_smooth_8v_8bpp_c: 43.7 vp9_put_8tap_smooth_8v_8bpp_rvv_i64: 8.5 vp9_put_8tap_smooth_16v_8bpp_c: 168.7 vp9_put_8tap_smooth_16v_8bpp_rvv_i64: 20.0 vp9_put_8tap_smooth_32v_8bpp_c: 681.5 vp9_put_8tap_smooth_32v_8bpp_rvv_i64: 63.7 vp9_put_8tap_smooth_64v_8bpp_c: 2692.7 vp9_put_8tap_smooth_64v_8bpp_rvv_i64: 253.5 --- libavcodec/riscv/vp9_mc_rvv.S | 32 +++++++++++++++++++++++++++++++- libavcodec/riscv/vp9dsp_init.c | 3 ++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S index 9458a2e82b..2d4b56516f 100644 --- a/libavcodec/riscv/vp9_mc_rvv.S +++ b/libavcodec/riscv/vp9_mc_rvv.S @@ -221,7 +221,11 @@ subpel_filters_smooth: .macro epel_filter name type regtype lla \regtype\()2, subpel_filters_\name li \regtype\()1, 8 +.ifc \type,v + mul \regtype\()0, a6, \regtype\()1 +.elseif \type == h mul \regtype\()0, a5, \regtype\()1 +.endif add \regtype\()0, \regtype\()0, \regtype\()2 .irp n 1,2,3,4,5,6 lb \regtype\n, \n(\regtype\()0) @@ -238,6 +242,19 @@ subpel_filters_smooth: li a5, 64 .ifc \from_mem, 1 vle8.v v22, (a2) +.ifc \type,v + sub a2, a2, a3 + vle8.v v20, (a2) + add a2, a2, a3 + add a2, a2, a3 + vle8.v v24, (a2) + add a2, a2, a3 + vle8.v v26, (a2) + add a2, a2, a3 + vle8.v v28, (a2) + add a2, a2, a3 + vle8.v v30, (a2) +.elseif \type == h addi a2, a2, -1 vle8.v v20, (a2) addi a2, a2, 2 @@ -248,6 +265,7 @@ subpel_filters_smooth: vle8.v v28, (a2) addi a2, a2, 1 vle8.v v30, (a2) +.endif .ifc \name,smooth vwmulu.vx v16, v24, \regtype\()4 @@ -266,11 +284,23 @@ subpel_filters_smooth: vwmaccsu.vx v16, s7, v30 .endif +.ifc \type,v + .rept 6 + sub a2, a2, a3 + .endr + vle8.v v28, (a2) + sub a2, a2, a3 + vle8.v v26, (a2) + .rept 3 + add a2, a2, a3 + .endr +.elseif \type == h addi a2, a2, -6 vle8.v v28, (a2) addi a2, a2, -1 vle8.v v26, (a2) addi a2, a2, 3 +.endif .ifc \name,smooth vwmaccsu.vx v16, \regtype\()1, v28 @@ -410,7 +440,7 @@ endfunc .irp name regular sharp smooth .irp do put avg - .irp type h + .irp type h v gen_epel \len \do \name \type .endr .endr diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c index 413b203e5f..da09918796 100644 --- a/libavcodec/riscv/vp9dsp_init.c +++ b/libavcodec/riscv/vp9dsp_init.c @@ -125,7 +125,8 @@ static av_cold void vp9dsp_mc_init_rvv(VP9DSPContext *dsp, int bpp) init_subpel1(4, idx, idxh, idxv, 4, dir, type) #define init_subpel3(idx, type) \ - init_subpel2(idx, 1, 0, h, type) + init_subpel2(idx, 1, 0, h, type); \ + init_subpel2(idx, 0, 1, v, type) init_subpel3(0, put); init_subpel3(1, avg); -- 2.44.0