From: "Rémi Denis-Courmont" <remi@remlab.net> To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH] lavc/h264dsp: R-V V idct4_add8 (all depth) Date: Wed, 31 Jul 2024 22:06:50 +0300 Message-ID: <20240731190650.636970-1-remi@remlab.net> (raw) This is really just a wrapper for idct4_add16intra, which is in turm mostly a wrapper for idct4_add and idct4_dc_add. For benchmarks refer to the later two. --- libavcodec/riscv/h264dsp_init.c | 14 ++++++-- libavcodec/riscv/h264idct_rvv.S | 59 +++++++++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c index e892c335a6..6b81587003 100644 --- a/libavcodec/riscv/h264dsp_init.c +++ b/libavcodec/riscv/h264dsp_init.c @@ -53,6 +53,9 @@ void ff_h264_idct_add16intra_##depth##_rvv(uint8_t *d, const int *soffset, \ int16_t *s, int stride, \ const uint8_t nnzc[5 * 8]); \ void ff_h264_idct8_add4_##depth##_rvv(uint8_t *d, const int *soffset, \ + int16_t *s, int stride, \ + const uint8_t nnzc[5 * 8]); \ +void ff_h264_idct4_add8_##depth##_rvv(uint8_t **d, const int *soffset, \ int16_t *s, int stride, \ const uint8_t nnzc[5 * 8]); @@ -104,6 +107,8 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv; # if __riscv_xlen == 64 dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv; + if (chroma_format_idc <= 1) + dsp->h264_idct_add8 = ff_h264_idct4_add8_8_rvv; # endif } if (flags & AV_CPU_FLAG_RVV_I64) { @@ -123,10 +128,13 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, if (zvl128b && (flags & AV_CPU_FLAG_RVB)) { \ dsp->h264_idct_dc_add = ff_h264_idct4_dc_add_##depth##_rvv; \ dsp->h264_idct8_dc_add = ff_h264_idct8_dc_add_##depth##_rvv; \ + dsp->h264_idct_add16 = ff_h264_idct_add16_##depth##_rvv; \ + dsp->h264_idct_add16intra = \ + ff_h264_idct_add16intra_##depth##_rvv; \ if (__riscv_xlen == 64) { \ - dsp->h264_idct_add16 = ff_h264_idct_add16_##depth##_rvv; \ - dsp->h264_idct_add16intra = \ - ff_h264_idct_add16intra_##depth##_rvv; \ + if (chroma_format_idc <= 1) \ + dsp->h264_idct_add8 = \ + ff_h264_idct4_add8_##depth##_rvv; \ } \ } \ if (__riscv_xlen == 64 && (flags & AV_CPU_FLAG_RVB)) \ diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S index f823346c8d..70b7cfac4d 100644 --- a/libavcodec/riscv/h264idct_rvv.S +++ b/libavcodec/riscv/h264idct_rvv.S @@ -57,7 +57,7 @@ endfunc func ff_h264_idct_add_8_rvv, zve32x lpad 0 csrwi vxrm, 0 -.Lidct_add4_8_rvv: +.Lidct4_add_8_rvv: vsetivli zero, 4, e16, mf2, ta, ma addi t1, a1, 1 * 4 * 2 vle16.v v0, (a1) @@ -111,7 +111,7 @@ endfunc func ff_h264_idct_add_16_rvv, zve32x csrwi vxrm, 0 -.Lidct_add4_16_rvv: +.Lidct4_add_16_rvv: vsetivli zero, 4, e32, m1, ta, ma addi t1, a1, 1 * 4 * 4 vle32.v v0, (a1) @@ -543,8 +543,12 @@ endfunc .endr const ff_h264_scan8 - .byte 014, 015, 024, 025, 016, 017, 026, 027 - .byte 034, 035, 044, 045, 036, 037, 046, 047 + .byte 014, 015, 024, 025, 016, 017, 026, 027 + .byte 034, 035, 044, 045, 036, 037, 046, 047 + .byte 064, 065, 074, 075, 066, 067, 076, 077 + .byte 0104, 0105, 0114, 0115, 0106, 0107, 0116, 0117 + .byte 0134, 0135, 0144, 0145, 0136, 0137, 0146, 0147 + .byte 0154, 0155, 0164, 0165, 0156, 0157, 0166, 0167 endconst .macro idct4_adds type, depth @@ -554,8 +558,11 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x, b .endif csrwi vxrm, 0 lla t0, ff_h264_scan8 - li t1, 32 * (\depth / 8) vsetivli zero, 16, e8, m1, ta, ma +.ifc \type, 16intra +.Lidct4_add4_\depth\()_rvv: +.endif + li t1, 32 * (\depth / 8) vle8.v v8, (t0) .if \depth == 8 vlse16.v v16, (a2), t1 @@ -587,7 +594,7 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x, b mv t5, a1 mv a1, a2 mv a2, a3 - li a3, 16 + csrr a3, vl mv a7, ra 1: andi t0, a4, 1 @@ -603,7 +610,7 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x, b .else beqz t0, 2f # if (nnzc[scan8[i]]) .endif - jal .Lidct_add4_\depth\()_rvv + jal .Lidct4_add_\depth\()_rvv j 3f 2: .ifnc \type, 16 @@ -702,6 +709,38 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x, b addi sp, sp, 48 ret endfunc + +func ff_h264_idct4_add8_\depth\()_rvv, zve32x +.if \depth == 8 + lpad 0 +.endif + csrwi vxrm, 0 + addi sp, sp, -32 + addi a2, a2, 16 * 16 * 2 * (\depth / 8) # &block[16 * 16] + lla t0, ff_h264_scan8 + 16 + sd s0, 0(sp) + sd ra, 8(sp) + mv s0, sp + sd a0, 16(sp) + sd a4, 24(sp) + ld a0, 0(a0) # dest[0] + addi a1, a1, 16 * 4 # &block_offset[16] + vsetivli zero, 4, e8, mf4, ta, ma + jal .Lidct4_add4_\depth\()_rvv + + ld a4, 24(sp) # nnzc + ld a0, 16(sp) + mv a3, a2 # stride + addi a2, a1, (16 - 4) * 16 * 2 * (\depth / 8) # &block[32 * 16] + addi a1, t5, (16 - 4)* 4 # &block_offset[32] + ld a0, 8(a0) # dest[1] + lla t0, ff_h264_scan8 + 32 + ld ra, 8(sp) + ld s0, 0(sp) + addi sp, sp, 32 + vsetivli zero, 4, e8, mf4, ta, ma + j .Lidct4_add4_\depth\()_rvv +endfunc #endif .endr @@ -724,5 +763,11 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x li a5, (1 << \depth) - 1 j ff_h264_idct8_add4_16_rvv endfunc + +func ff_h264_idct4_add8_\depth\()_rvv, zve32x + lpad 0 + li a5, (1 << \depth) - 1 + j ff_h264_idct4_add8_16_rvv +endfunc #endif .endr -- 2.45.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2024-07-31 19:07 UTC|newest] Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-07-31 19:06 Rémi Denis-Courmont [this message] 2024-07-31 20:05 ` [FFmpeg-devel] [PATCHv2] lavc/h264dsp: R-V V idct4_add8 (all depths) Rémi Denis-Courmont
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240731190650.636970-1-remi@remlab.net \ --to=remi@remlab.net \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git