From: "zhanheng.yang--- via ffmpeg-devel" <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: Zhanheng Yang <zhanheng.yang@linux.alibaba.com>
Subject: [FFmpeg-devel] [PATCH 6/6] libavcodec/riscv: add RVV optimized for epel_hv in HEVC.
Date: Thu, 22 Jan 2026 12:23:57 +0800
Message-ID: <20260122042357.1438-6-zhanheng.yang@linux.alibaba.com> (raw)
In-Reply-To: <20260122042357.1438-1-zhanheng.yang@linux.alibaba.com>
From: Zhanheng Yang <zhanheng.yang@linux.alibaba.com>
Bench on A210 C908 core(VLEN 128).
put_hevc_epel_hv4_8_c: 390.0 ( 1.00x)
put_hevc_epel_hv4_8_rvv_i32: 213.0 ( 1.83x)
put_hevc_epel_hv6_8_c: 749.8 ( 1.00x)
put_hevc_epel_hv6_8_rvv_i32: 290.8 ( 2.58x)
put_hevc_epel_hv8_8_c: 1215.5 ( 1.00x)
put_hevc_epel_hv8_8_rvv_i32: 360.7 ( 3.37x)
put_hevc_epel_hv12_8_c: 2602.5 ( 1.00x)
put_hevc_epel_hv12_8_rvv_i32: 515.4 ( 5.05x)
put_hevc_epel_hv16_8_c: 4417.0 ( 1.00x)
put_hevc_epel_hv16_8_rvv_i32: 661.8 ( 6.67x)
put_hevc_epel_hv24_8_c: 9524.8 ( 1.00x)
put_hevc_epel_hv24_8_rvv_i32: 1909.2 ( 4.99x)
put_hevc_epel_hv32_8_c: 16589.1 ( 1.00x)
put_hevc_epel_hv32_8_rvv_i32: 2508.0 ( 6.61x)
put_hevc_epel_hv48_8_c: 37145.4 ( 1.00x)
put_hevc_epel_hv48_8_rvv_i32: 5526.8 ( 6.72x)
put_hevc_epel_hv64_8_c: 65015.9 ( 1.00x)
put_hevc_epel_hv64_8_rvv_i32: 9751.9 ( 6.67x)
put_hevc_epel_uni_hv4_8_c: 434.8 ( 1.00x)
put_hevc_epel_uni_hv4_8_rvv_i32: 238.8 ( 1.82x)
put_hevc_epel_uni_hv6_8_c: 856.8 ( 1.00x)
put_hevc_epel_uni_hv6_8_rvv_i32: 329.6 ( 2.60x)
put_hevc_epel_uni_hv8_8_c: 1474.2 ( 1.00x)
put_hevc_epel_uni_hv8_8_rvv_i32: 412.9 ( 3.57x)
put_hevc_epel_uni_hv12_8_c: 2995.9 ( 1.00x)
put_hevc_epel_uni_hv12_8_rvv_i32: 593.9 ( 5.04x)
put_hevc_epel_uni_hv16_8_c: 5128.2 ( 1.00x)
put_hevc_epel_uni_hv16_8_rvv_i32: 770.6 ( 6.66x)
put_hevc_epel_uni_hv24_8_c: 11159.5 ( 1.00x)
put_hevc_epel_uni_hv24_8_rvv_i32: 2223.1 ( 5.02x)
put_hevc_epel_uni_hv32_8_c: 19462.3 ( 1.00x)
put_hevc_epel_uni_hv32_8_rvv_i32: 2925.1 ( 6.65x)
put_hevc_epel_uni_hv48_8_c: 43480.5 ( 1.00x)
put_hevc_epel_uni_hv48_8_rvv_i32: 6476.7 ( 6.71x)
put_hevc_epel_uni_hv64_8_c: 76411.2 ( 1.00x)
put_hevc_epel_uni_hv64_8_rvv_i32: 11456.7 ( 6.67x)
put_hevc_epel_uni_w_hv4_8_c: 557.8 ( 1.00x)
put_hevc_epel_uni_w_hv4_8_rvv_i32: 287.9 ( 1.94x)
put_hevc_epel_uni_w_hv6_8_c: 1068.0 ( 1.00x)
put_hevc_epel_uni_w_hv6_8_rvv_i32: 399.4 ( 2.67x)
put_hevc_epel_uni_w_hv8_8_c: 1835.2 ( 1.00x)
put_hevc_epel_uni_w_hv8_8_rvv_i32: 507.3 ( 3.62x)
put_hevc_epel_uni_w_hv12_8_c: 3758.9 ( 1.00x)
put_hevc_epel_uni_w_hv12_8_rvv_i32: 729.2 ( 5.15x)
put_hevc_epel_uni_w_hv16_8_c: 6524.5 ( 1.00x)
put_hevc_epel_uni_w_hv16_8_rvv_i32: 954.7 ( 6.83x)
put_hevc_epel_uni_w_hv24_8_c: 14094.2 ( 1.00x)
put_hevc_epel_uni_w_hv24_8_rvv_i32: 2764.9 ( 5.10x)
put_hevc_epel_uni_w_hv32_8_c: 24887.0 ( 1.00x)
put_hevc_epel_uni_w_hv32_8_rvv_i32: 3640.5 ( 6.84x)
put_hevc_epel_uni_w_hv48_8_c: 55341.0 ( 1.00x)
put_hevc_epel_uni_w_hv48_8_rvv_i32: 8083.8 ( 6.85x)
put_hevc_epel_uni_w_hv64_8_c: 97377.8 ( 1.00x)
put_hevc_epel_uni_w_hv64_8_rvv_i32: 14322.9 ( 6.80x)
put_hevc_epel_bi_hv4_8_c: 472.2 ( 1.00x)
put_hevc_epel_bi_hv4_8_rvv_i32: 250.0 ( 1.89x)
put_hevc_epel_bi_hv6_8_c: 903.1 ( 1.00x)
put_hevc_epel_bi_hv6_8_rvv_i32: 341.3 ( 2.65x)
put_hevc_epel_bi_hv8_8_c: 1583.5 ( 1.00x)
put_hevc_epel_bi_hv8_8_rvv_i32: 433.1 ( 3.66x)
put_hevc_epel_bi_hv12_8_c: 3205.8 ( 1.00x)
put_hevc_epel_bi_hv12_8_rvv_i32: 615.0 ( 5.21x)
put_hevc_epel_bi_hv16_8_c: 5504.1 ( 1.00x)
put_hevc_epel_bi_hv16_8_rvv_i32: 800.3 ( 6.88x)
put_hevc_epel_bi_hv24_8_c: 11897.2 ( 1.00x)
put_hevc_epel_bi_hv24_8_rvv_i32: 2309.9 ( 5.15x)
put_hevc_epel_bi_hv32_8_c: 20823.8 ( 1.00x)
put_hevc_epel_bi_hv32_8_rvv_i32: 3031.2 ( 6.87x)
put_hevc_epel_bi_hv48_8_c: 46854.5 ( 1.00x)
put_hevc_epel_bi_hv48_8_rvv_i32: 6713.2 ( 6.98x)
put_hevc_epel_bi_hv64_8_c: 82399.2 ( 1.00x)
put_hevc_epel_bi_hv64_8_rvv_i32: 11901.4 ( 6.92x)
Signed-off-by: Zhanheng Yang <zhanheng.yang@linux.alibaba.com>
---
libavcodec/riscv/h26x/h2656dsp.h | 11 +
libavcodec/riscv/h26x/hevcepel_rvv.S | 325 +++++++++++++++++++++++++--
libavcodec/riscv/hevcdsp_init.c | 4 +
3 files changed, 325 insertions(+), 15 deletions(-)
diff --git a/libavcodec/riscv/h26x/h2656dsp.h b/libavcodec/riscv/h26x/h2656dsp.h
index 7e320bd795..b8a116bdf7 100644
--- a/libavcodec/riscv/h26x/h2656dsp.h
+++ b/libavcodec/riscv/h26x/h2656dsp.h
@@ -81,4 +81,15 @@ void ff_hevc_put_epel_uni_w_v_8_m1_rvv(uint8_t *_dst, ptrdiff_t _dststride,
void ff_hevc_put_epel_bi_v_8_m1_rvv(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
mx, intptr_t my, int width);
+void ff_hevc_put_epel_hv_8_m1_rvv(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
+ intptr_t mx, intptr_t my, int width);
+void ff_hevc_put_epel_uni_hv_8_m1_rvv(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
+ ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width);
+void ff_hevc_put_epel_uni_w_hv_8_m1_rvv(uint8_t *_dst, ptrdiff_t _dststride,
+ const uint8_t *_src, ptrdiff_t _srcstride,
+ int height, int denom, int wx, int ox,
+ intptr_t mx, intptr_t my, int width);
+void ff_hevc_put_epel_bi_hv_8_m1_rvv(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
+ ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
+ mx, intptr_t my, int width);
#endif
diff --git a/libavcodec/riscv/h26x/hevcepel_rvv.S b/libavcodec/riscv/h26x/hevcepel_rvv.S
index caca0b88ab..7a4a3f3318 100644
--- a/libavcodec/riscv/h26x/hevcepel_rvv.S
+++ b/libavcodec/riscv/h26x/hevcepel_rvv.S
@@ -285,8 +285,8 @@ func ff_hevc_put_epel_v_8_\lmul\()_rvv, zve32x
sx s4, 24(sp)
load_filter a5
sub a1, a1, a2 # src - src_stride
- li t1, 0 # offset
- mv t4, a3
+ li t1, 0 # offset
+ mv t4, a3
1:
add t2, a1, t1
@@ -310,7 +310,7 @@ func ff_hevc_put_epel_v_8_\lmul\()_rvv, zve32x
vse16.v v0, (t3)
add t3, t3, 2*HEVC_MAX_PB_SIZE
addi a3, a3, -1
- bgt a3, zero, 2b
+ bgt a3, zero, 2b
add t1, t1, t5
sub a6, a6, t5
mv a3, t4
@@ -325,7 +325,7 @@ func ff_hevc_put_epel_v_8_\lmul\()_rvv, zve32x
endfunc
func ff_hevc_put_epel_uni_v_8_\lmul\()_rvv, zve32x
- csrwi vxrm, 0
+ csrwi vxrm, 0
addi sp, sp, -32
sx s1, 0(sp)
sx s2, 8(sp)
@@ -333,8 +333,8 @@ func ff_hevc_put_epel_uni_v_8_\lmul\()_rvv, zve32x
sx s4, 24(sp)
load_filter a6
sub a2, a2, a3 # src - src_stride
- li t1, 0 # offset
- mv t4, a4
+ li t1, 0 # offset
+ mv t4, a4
1:
add t2, a2, t1
@@ -360,7 +360,7 @@ func ff_hevc_put_epel_uni_v_8_\lmul\()_rvv, zve32x
vse8.v v0, (t3)
add t3, t3, a1
addi a4, a4, -1
- bgt a4, zero, 2b
+ bgt a4, zero, 2b
add t1, t1, t5
sub a7, a7, t5
mv a4, t4
@@ -375,7 +375,7 @@ func ff_hevc_put_epel_uni_v_8_\lmul\()_rvv, zve32x
endfunc
func ff_hevc_put_epel_uni_w_v_8_\lmul\()_rvv, zve32x
- csrwi vxrm, 0
+ csrwi vxrm, 0
#if (__riscv_xlen == 32)
lw t1, 4(sp) # my
lw t6, 8(sp) # width
@@ -391,8 +391,8 @@ func ff_hevc_put_epel_uni_w_v_8_\lmul\()_rvv, zve32x
load_filter t1
addi a5, a5, 6 # shift
sub a2, a2, a3 # src - src_stride
- li t1, 0 # offset
- mv t4, a4
+ li t1, 0 # offset
+ mv t4, a4
1:
add t2, a2, t1
@@ -424,7 +424,7 @@ func ff_hevc_put_epel_uni_w_v_8_\lmul\()_rvv, zve32x
vse8.v v0, (t3)
add t3, t3, a1
addi a4, a4, -1
- bgt a4, zero, 2b
+ bgt a4, zero, 2b
add t1, t1, t5
sub t6, t6, t5
mv a4, t4
@@ -439,7 +439,7 @@ func ff_hevc_put_epel_uni_w_v_8_\lmul\()_rvv, zve32x
endfunc
func ff_hevc_put_epel_bi_v_8_\lmul\()_rvv, zve32x
- csrwi vxrm, 0
+ csrwi vxrm, 0
lw t6, 0(sp) # width
addi sp, sp, -32
sx s1, 0(sp)
@@ -448,8 +448,8 @@ func ff_hevc_put_epel_bi_v_8_\lmul\()_rvv, zve32x
sx s4, 24(sp)
load_filter a7
sub a2, a2, a3 # src - src_stride
- li t1, 0 # offset
- mv t4, a5
+ li t1, 0 # offset
+ mv t4, a5
1:
add t2, a2, t1
@@ -495,4 +495,299 @@ func ff_hevc_put_epel_bi_v_8_\lmul\()_rvv, zve32x
endfunc
.endm
-hevc_epel_v m1, m2, m4
\ No newline at end of file
+hevc_epel_v m1, m2, m4
+
+.macro filter_v_s vdst, vsrc0, vsrc1, vsrc2, vsrc3
+ vwmul.vx \vdst, \vsrc0, s5
+ vwmacc.vx \vdst, s6, \vsrc1
+ vmv.v.v \vsrc0, \vsrc1
+ vwmacc.vx \vdst, s7, \vsrc2
+ vmv.v.v \vsrc1, \vsrc2
+ vwmacc.vx \vdst, s8, \vsrc3
+ vmv.v.v \vsrc2, \vsrc3
+.endm
+
+/* clobbers t0, t1 */
+.macro load_filter2 m
+ la t0, qpel_filters
+ slli t1, \m, 2
+ add t0, t0, t1
+ lb s5, 0(t0)
+ lb s6, 1(t0)
+ lb s7, 2(t0)
+ lb s8, 3(t0)
+.endm
+
+.macro hevc_epel_hv lmul, lmul2, lmul4
+func ff_hevc_put_epel_hv_8_\lmul\()_rvv, zve32x
+ csrwi vxrm, 2
+ addi sp, sp, -64
+ sx s1, 0(sp)
+ sx s2, 8(sp)
+ sx s3, 16(sp)
+ sx s4, 24(sp)
+ sx s5, 32(sp)
+ sx s6, 40(sp)
+ sx s7, 48(sp)
+ sx s8, 56(sp)
+ load_filter a4
+ load_filter2 a5
+ sub a1, a1, a2 # src - src_stride
+ mv t0, a3
+ li t1, 0 # offset
+
+1:
+ add t2, a1, t1
+ slli t3, t1, 1
+ add t3, a0, t3
+
+ vsetvli t6, a6, e8, \lmul, ta, ma
+ filter_h v4, v24, v26, v28, v30, t2
+ add t2, t2, a2
+ filter_h v8, v24, v26, v28, v30, t2
+ add t2, t2, a2
+ filter_h v12, v24, v26, v28, v30, t2
+ add t2, t2, a2
+
+2:
+ vsetvli zero, zero, e8, \lmul, ta, ma
+ filter_h v16, v24, v26, v28, v30, t2
+ add t2, t2, a2
+
+ vsetvli zero, zero, e16, \lmul2, ta, ma
+ filter_v_s v0, v4, v8, v12, v16
+ vnclip.wi v0, v0, 6
+ vse16.v v0, (t3)
+ addi a3, a3, -1
+ addi t3, t3, 2*HEVC_MAX_PB_SIZE
+ bgt a3, zero, 2b
+ mv a3, t0
+ add t1, t1, t6
+ sub a6, a6, t6
+ bgt a6, zero, 1b
+
+ lx s1, 0(sp)
+ lx s2, 8(sp)
+ lx s3, 16(sp)
+ lx s4, 24(sp)
+ lx s5, 32(sp)
+ lx s6, 40(sp)
+ lx s7, 48(sp)
+ lx s8, 56(sp)
+ addi sp, sp, 64
+ ret
+endfunc
+
+func ff_hevc_put_epel_uni_hv_8_\lmul\()_rvv, zve32x
+ csrwi vxrm, 0
+ addi sp, sp, -64
+ sx s1, 0(sp)
+ sx s2, 8(sp)
+ sx s3, 16(sp)
+ sx s4, 24(sp)
+ sx s5, 32(sp)
+ sx s6, 40(sp)
+ sx s7, 48(sp)
+ sx s8, 56(sp)
+ load_filter a5
+ load_filter2 a6
+ sub a2, a2, a3 # src - src_stride
+ mv t0, a4
+ li t1, 0 # offset
+
+1:
+ add t2, a2, t1
+ add t3, a0, t1
+
+ vsetvli t6, a7, e8, \lmul, ta, ma
+ filter_h v4, v24, v26, v28, v30, t2
+ add t2, t2, a3
+ filter_h v8, v24, v26, v28, v30, t2
+ add t2, t2, a3
+ filter_h v12, v24, v26, v28, v30, t2
+ add t2, t2, a3
+
+2:
+ vsetvli zero, zero, e8, \lmul, ta, ma
+ filter_h v16, v24, v26, v28, v30, t2
+ add t2, t2, a3
+
+ vsetvli zero, zero, e16, \lmul2, ta, ma
+ filter_v_s v0, v4, v8, v12, v16
+ vsetvli zero, zero, e32, \lmul4, ta, ma
+ vsra.vi v0, v0, 6
+ vmax.vx v0, v0, zero
+ vsetvli zero, zero, e16, \lmul2, ta, ma
+ vnclipu.wi v0, v0, 6
+ vsetvli zero, zero, e8, \lmul, ta, ma
+ vnclipu.wi v0, v0, 0
+ vse8.v v0, (t3)
+ addi a4, a4, -1
+ add t3, t3, a1
+ bgt a4, zero, 2b
+ mv a4, t0
+ add t1, t1, t6
+ sub a7, a7, t6
+ bgt a7, zero, 1b
+
+ lx s1, 0(sp)
+ lx s2, 8(sp)
+ lx s3, 16(sp)
+ lx s4, 24(sp)
+ lx s5, 32(sp)
+ lx s6, 40(sp)
+ lx s7, 48(sp)
+ lx s8, 56(sp)
+ addi sp, sp, 64
+ ret
+endfunc
+
+func ff_hevc_put_epel_uni_w_hv_8_\lmul\()_rvv, zve32x
+ csrwi vxrm, 0
+ lx t2, 0(sp) # mx
+#if (__riscv_xlen == 32)
+ lw t4, 4(sp) # my
+ lw t5, 8(sp) # width
+#elif (__riscv_xlen == 64)
+ ld t4, 8(sp)
+ lw t5, 16(sp)
+#endif
+ addi a5, a5, 6 # shift
+ addi sp, sp, -64
+ sx s1, 0(sp)
+ sx s2, 8(sp)
+ sx s3, 16(sp)
+ sx s4, 24(sp)
+ sx s5, 32(sp)
+ sx s6, 40(sp)
+ sx s7, 48(sp)
+ sx s8, 56(sp)
+ load_filter t2
+ load_filter2 t4
+ sub a2, a2, a3 # src - src_stride
+ mv t0, a4
+ li t1, 0 # offset
+
+1:
+ add t2, a2, t1
+ add t3, a0, t1
+
+ vsetvli t6, t5, e8, \lmul, ta, ma
+ filter_h v4, v24, v26, v28, v30, t2
+ add t2, t2, a3
+ filter_h v8, v24, v26, v28, v30, t2
+ add t2, t2, a3
+ filter_h v12, v24, v26, v28, v30, t2
+ add t2, t2, a3
+
+2:
+ vsetvli zero, zero, e8, \lmul, ta, ma
+ filter_h v16, v24, v26, v28, v30, t2
+ add t2, t2, a3
+
+ vsetvli zero, zero, e16, \lmul2, ta, ma
+ filter_v_s v0, v4, v8, v12, v16
+ vsetvli zero, zero, e32, \lmul4, ta, ma
+ vsra.vi v0, v0, 6
+ vmul.vx v0, v0, a6
+ vssra.vx v0, v0, a5
+ vsadd.vx v0, v0, a7
+ vmax.vx v0, v0, zero
+ vsetvli zero, zero, e16, \lmul2, ta, ma
+ vnclip.wi v0, v0, 0
+ vsetvli zero, zero, e8, \lmul, ta, ma
+ vnclipu.wi v0, v0, 0
+ vse8.v v0, (t3)
+ addi a4, a4, -1
+ add t3, t3, a1
+ bgt a4, zero, 2b
+ mv a4, t0
+ add t1, t1, t6
+ sub t5, t5, t6
+ bgt t5, zero, 1b
+
+ lx s1, 0(sp)
+ lx s2, 8(sp)
+ lx s3, 16(sp)
+ lx s4, 24(sp)
+ lx s5, 32(sp)
+ lx s6, 40(sp)
+ lx s7, 48(sp)
+ lx s8, 56(sp)
+ addi sp, sp, 64
+ ret
+endfunc
+
+func ff_hevc_put_epel_bi_hv_8_\lmul\()_rvv, zve32x
+ csrwi vxrm, 0
+ lw t3, 0(sp) # width
+ addi sp, sp, -64
+ sx s1, 0(sp)
+ sx s2, 8(sp)
+ sx s3, 16(sp)
+ sx s4, 24(sp)
+ sx s5, 32(sp)
+ sx s6, 40(sp)
+ sx s7, 48(sp)
+ sx s8, 56(sp)
+ load_filter a6
+ load_filter2 a7
+ mv a6, t3
+ sub a2, a2, a3 # src - src_stride
+ mv t0, a5
+ li t1, 0 # offset
+
+1:
+ add t2, a2, t1
+ add t3, a0, t1
+ slli t5, t1, 1
+ add t5, a4, t5
+
+ vsetvli t6, a6, e8, \lmul, ta, ma
+ filter_h v4, v24, v26, v28, v30, t2
+ add t2, t2, a3
+ filter_h v8, v24, v26, v28, v30, t2
+ add t2, t2, a3
+ filter_h v12, v24, v26, v28, v30, t2
+ add t2, t2, a3
+
+2:
+ vsetvli zero, zero, e8, \lmul, ta, ma
+ filter_h v16, v24, v26, v28, v30, t2
+ add t2, t2, a3
+
+ vsetvli zero, zero, e16, \lmul2, ta, ma
+ vle16.V v24, (t5)
+ addi t5, t5, 2*HEVC_MAX_PB_SIZE
+ filter_v_s v0, v4, v8, v12, v16
+ vsetvli zero, zero, e32, \lmul4, ta, ma
+ vsra.vi v0, v0, 6
+ vsetvli zero, zero, e16, \lmul2, ta, ma
+ vwadd.wv v0, v0, v24
+ vnclip.wi v0, v0, 7
+ vmax.vx v0, v0, zero
+ vsetvli zero, zero, e8, \lmul, ta, ma
+ vnclipu.wi v0, v0, 0
+ vse8.v v0, (t3)
+ addi a5, a5, -1
+ add t3, t3, a1
+ bgt a5, zero, 2b
+ mv a5, t0
+ add t1, t1, t6
+ sub a6, a6, t6
+ bgt a6, zero, 1b
+
+ lx s1, 0(sp)
+ lx s2, 8(sp)
+ lx s3, 16(sp)
+ lx s4, 24(sp)
+ lx s5, 32(sp)
+ lx s6, 40(sp)
+ lx s7, 48(sp)
+ lx s8, 56(sp)
+ addi sp, sp, 64
+ ret
+endfunc
+.endm
+
+hevc_epel_hv m1, m2, m4
diff --git a/libavcodec/riscv/hevcdsp_init.c b/libavcodec/riscv/hevcdsp_init.c
index 53c800626f..1df7eb654a 100644
--- a/libavcodec/riscv/hevcdsp_init.c
+++ b/libavcodec/riscv/hevcdsp_init.c
@@ -102,6 +102,10 @@ void ff_hevc_dsp_init_riscv(HEVCDSPContext *c, const int bit_depth)
RVV_FNASSIGN_PEL(c->put_hevc_epel_uni, 1, 0, ff_hevc_put_epel_uni_v_8_m1_rvv);
RVV_FNASSIGN_PEL(c->put_hevc_epel_uni_w, 1, 0, ff_hevc_put_epel_uni_w_v_8_m1_rvv);
RVV_FNASSIGN_PEL(c->put_hevc_epel_bi, 1, 0, ff_hevc_put_epel_bi_v_8_m1_rvv);
+ RVV_FNASSIGN_PEL(c->put_hevc_epel, 1, 1, ff_hevc_put_epel_hv_8_m1_rvv);
+ RVV_FNASSIGN_PEL(c->put_hevc_epel_uni, 1, 1, ff_hevc_put_epel_uni_hv_8_m1_rvv);
+ RVV_FNASSIGN_PEL(c->put_hevc_epel_uni_w, 1, 1, ff_hevc_put_epel_uni_w_hv_8_m1_rvv);
+ RVV_FNASSIGN_PEL(c->put_hevc_epel_bi, 1, 1, ff_hevc_put_epel_bi_hv_8_m1_rvv);
break;
default:
break;
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
prev parent reply other threads:[~2026-01-24 15:58 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-22 4:23 [FFmpeg-devel] [PATCH 1/6] libavcodec/riscv: add RVV optimized for qpel_h " zhanheng.yang--- via ffmpeg-devel
2026-01-22 4:23 ` [FFmpeg-devel] [PATCH 2/6] libavcodec/riscv: add RVV optimized for qpel_v " zhanheng.yang--- via ffmpeg-devel
2026-01-22 4:23 ` [FFmpeg-devel] [PATCH 3/6] libavcodec/riscv: add RVV optimized for epel_h " zhanheng.yang--- via ffmpeg-devel
2026-01-22 4:23 ` [FFmpeg-devel] [PATCH 4/6] libavcodec/riscv: add RVV optimized for epel_v " zhanheng.yang--- via ffmpeg-devel
2026-01-22 4:23 ` [FFmpeg-devel] [PATCH 5/6] libavcodec/riscv: add RVV optimized for qpel_hv " zhanheng.yang--- via ffmpeg-devel
2026-01-22 4:23 ` zhanheng.yang--- via ffmpeg-devel [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260122042357.1438-6-zhanheng.yang@linux.alibaba.com \
--to=ffmpeg-devel@ffmpeg.org \
--cc=zhanheng.yang@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git