From: "Rémi Denis-Courmont" <remi@remlab.net> To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH] lavc/h264dsp: R-V V 8-bit h264_weight_pixels Date: Fri, 5 Jul 2024 21:28:16 +0300 Message-ID: <20240705182816.27464-1-remi@remlab.net> (raw) There are two implementations here: - a generic scalable one processing one column at a time, - a specialised processing one (fixed-size) row at a time. Unsurprisingly, the generic one works out better with smaller widths. With larger widths, the gains from filling vectors are outweighed by the extra cost of strided loads and stores. In other words, memory accesses become the bottleneck. T-Head C908: h264_weight2_8_c: 54.2 h264_weight2_8_rvv_i32: 17.5 h264_weight4_8_c: 102.0 h264_weight4_8_rvv_i32: 34.7 h264_weight8_8_c: 213.7 h264_weight8_8_rvv_i32: 79.7 h264_weight16_8_c: 401.0 h264_weight16_8_rvv_i32: 74.2 SpacemiT X60: h264_weight2_8_c: 48.5 h264_weight2_8_rvv_i32: 11.7 h264_weight4_8_c: 90.5 h264_weight4_8_rvv_i32: 23.7 h264_weight8_8_c: 175.0 h264_weight8_8_rvv_i32: 58.0 h264_weight16_8_c: 342.2 h264_weight16_8_rvv_i32: 66.0 --- libavcodec/riscv/h264dsp_init.c | 7 +++ libavcodec/riscv/h264dsp_rvv.S | 77 +++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c index bf9743eb6b..e1b725dcbb 100644 --- a/libavcodec/riscv/h264dsp_init.c +++ b/libavcodec/riscv/h264dsp_init.c @@ -21,12 +21,15 @@ #include "config.h" #include <stdint.h> +#include <string.h> #include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/riscv/cpu.h" #include "libavcodec/h264dsp.h" +extern const h264_weight_func ff_h264_weight_funcs_8_rvv[]; + void ff_h264_v_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride, @@ -60,6 +63,10 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, # if HAVE_RVV if (flags & AV_CPU_FLAG_RVV_I32) { if (bit_depth == 8 && ff_rv_vlen_least(128)) { + memcpy(dsp->weight_h264_pixels_tab, + ff_h264_weight_funcs_8_rvv, + sizeof (dsp->weight_h264_pixels_tab)); + dsp->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_8_rvv; dsp->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_8_rvv; dsp->h264_h_loop_filter_luma_mbaff = diff --git a/libavcodec/riscv/h264dsp_rvv.S b/libavcodec/riscv/h264dsp_rvv.S index 96a8a0a8a3..ab85bfbd69 100644 --- a/libavcodec/riscv/h264dsp_rvv.S +++ b/libavcodec/riscv/h264dsp_rvv.S @@ -26,6 +26,83 @@ #include "libavutil/riscv/asm.S" +func ff_h264_weight_pixels_simple_8_rvv, zve32x + csrwi vxrm, 0 + sll a5, a5, a3 +1: + vsetvli zero, a6, e32, m4, ta, ma + vle8.v v8, (a0) + addi a2, a2, -1 + vmv.v.x v16, a5 + vsetvli zero, zero, e16, m2, ta, ma + vzext.vf2 v24, v8 + vwmaccsu.vx v16, a4, v24 + vnclip.wi v16, v16, 0 + vmax.vx v16, v16, zero + vsetvli zero, zero, e8, m1, ta, ma + vnclipu.wx v8, v16, a3 + vse8.v v8, (a0) + add a0, a0, a1 + bnez a2, 1b + + ret +endfunc + +func ff_h264_weight_pixels_8_rvv, zve32x + csrwi vxrm, 0 + sll a5, a5, a3 +1: + mv t0, a0 + mv t6, a6 +2: + vsetvli t2, a2, e32, m8, ta, ma + vlse8.v v8, (t0), a1 + addi t6, t6, -1 + vmv.v.x v16, a5 + vsetvli zero, zero, e16, m4, ta, ma + vzext.vf2 v24, v8 + vwmaccsu.vx v16, a4, v24 + vnclip.wi v16, v16, 0 + vmax.vx v16, v16, zero + vsetvli zero, zero, e8, m2, ta, ma + vnclipu.wx v8, v16, a3 + vsse8.v v8, (t0), a1 + addi t0, t0, 1 + bnez t6, 2b + + mul t3, a1, t2 + sub a2, a2, t2 + add a0, a0, t3 + bnez a2, 1b + + ret +endfunc + +.irp w, 16, 8, 4, 2 +func ff_h264_weight_pixels\w\()_8_rvv, zve32x + li a6, \w + .if \w == 16 + j ff_h264_weight_pixels_simple_8_rvv + .else + j ff_h264_weight_pixels_8_rvv + .endif +endfunc +.endr + + .global ff_h264_weight_funcs_8_rvv + .hidden ff_h264_weight_funcs_8_rvv +const ff_h264_weight_funcs_8_rvv + .irp w, 16, 8, 4, 2 +#if __riscv_xlen == 32 + .word ff_h264_weight_pixels\w\()_8_rvv +#elif __riscv_xlen == 64 + .dword ff_h264_weight_pixels\w\()_8_rvv +#else + .qword ff_h264_weight_pixels\w\()_8_rvv +#endif + .endr +endconst + .variant_cc ff_h264_loop_filter_luma_8_rvv func ff_h264_loop_filter_luma_8_rvv, zve32x # p2: v8, p1: v9, p0: v10, q0: v11, q1: v12, q2: v13 -- 2.45.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2024-07-05 18:28 UTC|newest] Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-07-05 18:28 Rémi Denis-Courmont [this message] 2024-07-05 20:23 ` [FFmpeg-devel] [PATCH 2/2] lavc/h264dsp: R-V V 8-bit h264_biweight_pixels Rémi Denis-Courmont 2024-07-06 10:52 ` [FFmpeg-devel] [PATCH] lavc/h264dsp: R-V V 8-bit h264_weight_pixels Rémi Denis-Courmont
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240705182816.27464-1-remi@remlab.net \ --to=remi@remlab.net \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git