From 80b6694bc29ed1c37852dc079a6d91a24dd6f18e Mon Sep 17 00:00:00 2001 From: sunyuechi Date: Tue, 19 Dec 2023 09:11:28 +0800 Subject: [PATCH] libavfilter/af_afir: R-V V dcmul_add c908: dcmul_add_c: 88.0 dcmul_add_rvv_f64: 46.2 --- libavfilter/riscv/af_afir_init.c | 3 +++ libavfilter/riscv/af_afir_rvv.S | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/libavfilter/riscv/af_afir_init.c b/libavfilter/riscv/af_afir_init.c index 52aa18c126..f9a76f108b 100644 --- a/libavfilter/riscv/af_afir_init.c +++ b/libavfilter/riscv/af_afir_init.c @@ -27,6 +27,8 @@ void ff_fcmul_add_rvv(float *sum, const float *t, const float *c, ptrdiff_t len); +void ff_dcmul_add_rvv(double *sum, const double *t, const double *c, + ptrdiff_t len); av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s) { @@ -36,6 +38,7 @@ av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s) if (flags & AV_CPU_FLAG_RVV_F64) { if (flags & AV_CPU_FLAG_RVB_ADDR) { s->fcmul_add = ff_fcmul_add_rvv; + s->dcmul_add = ff_dcmul_add_rvv; } } #endif diff --git a/libavfilter/riscv/af_afir_rvv.S b/libavfilter/riscv/af_afir_rvv.S index 04ec2e50d8..d1fa6e22e5 100644 --- a/libavfilter/riscv/af_afir_rvv.S +++ b/libavfilter/riscv/af_afir_rvv.S @@ -53,3 +53,44 @@ func ff_fcmul_add_rvv, zve64f ret endfunc + +func ff_dcmul_add_rvv, zve64f +1: + vsetvli t0, a3, e64, m4, ta, ma + li t1, 16 + li t2, 8 + vlse64.v v0, (a1), t1 + add a1, a1, t2 + vlse64.v v4, (a2), t1 + add a2, a2, t2 + vlse64.v v12, (a0), t1 + add a0, a0, t2 + vfmacc.vv v12, v0, v4 + sub a3, a3, t0 + vlse64.v v8, (a2), t1 + sub a2, a2, t2 + sh3add a2, t0, a2 + vlse64.v v16, (a0), t1 + sub a0, a0, t2 + vfmacc.vv v16, v0, v8 + sh3add a2, t0, a2 + vlse64.v v0, (a1), t1 + sub a1, a1, t2 + sh3add a1, t0, a1 + vfnmsac.vv v12, v0, v8 + sh3add a1, t0, a1 + vfmacc.vv v16, v0, v4 + vsse64.v v12, (a0), t1 + add a0, a0, t2 + vsse64.v v16, (a0), t1 + sub a0, a0, t2 + sh3add a0, t0, a0 + sh3add a0, t0, a0 + bgtz a3, 1b + fld fa0, 0(a1) + fld fa1, 0(a2) + fld fa2, 0(a0) + fmadd.d fa2, fa0, fa1, fa2 + fsd fa2, 0(a0) + ret +endfunc -- 2.43.0