From: "Rémi Denis-Courmont" <remi@remlab.net> To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH 4/5] lavc/h264dsp: reuse the R-V V IDCT DC add functions Date: Thu, 18 Jul 2024 22:35:45 +0300 Message-ID: <20240718193546.18939-4-remi@remlab.net> (raw) In-Reply-To: <20240718193546.18939-1-remi@remlab.net> This reuses the DC bypass functions from the multiple IDCT functions, to leverage vector code. As an added bonus, the caller functions can now rely on the callee functions to preserve their parameters, thus cutting down on stack spills. --- libavcodec/riscv/h264idct_rvv.S | 76 +++++++-------------------------- 1 file changed, 16 insertions(+), 60 deletions(-) diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S index 2648e06aeb..c42db6ef29 100644 --- a/libavcodec/riscv/h264idct_rvv.S +++ b/libavcodec/riscv/h264idct_rvv.S @@ -536,7 +536,7 @@ endconst .macro idct4_adds type, depth func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x csrwi vxrm, 0 - addi sp, sp, -96 + addi sp, sp, -64 lla t0, ff_h264_scan8 sd s0, (sp) li t1, 32 * (\depth / 8) @@ -547,14 +547,6 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x sd s3, 32(sp) sd s4, 40(sp) sd s5, 48(sp) - sd s6, 56(sp) - sd s7, 64(sp) -.if \depth > 8 - sd s8, 72(sp) - sd s9, 80(sp) - mv s8, a5 - mv s9, a6 -.endif vsetivli zero, 16, e8, m1, ta, ma vle8.v v8, (t0) .if \depth == 8 @@ -583,8 +575,8 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x li s1, 16 mv s4, a0 mv s5, a1 - mv s6, a2 - mv s7, a3 + mv a1, a2 + mv a2, a3 1: andi t0, s2, 1 addi s1, s1, -1 @@ -594,12 +586,7 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x .endif lw t2, (s5) # block_offset[i] andi t1, s3, 1 - mv a1, s6 - mv a2, s7 add a0, s4, t2 -.if \depth > 8 - mv a5, s8 -.endif .ifc \type, 16 bnez t1, 2f # if (nnz == 1 && block[i * 16]) .else @@ -611,23 +598,13 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x .ifnc \type, 16 beqz t1, 3f # if (block[i * 16]) .endif -.if \depth == 8 - call ff_h264_idct_dc_add_\depth\()_c -.else - jalr s9 -.endif + jal ff_h264_idct4_dc_add_\depth\()_rvv 3: srli s3, s3, 1 addi s5, s5, 4 - addi s6, s6, 16 * 2 * (\depth / 8) + addi a1, a1, 16 * 2 * (\depth / 8) bnez s1, 1b -.if \depth > 8 - ld s9, 80(sp) - ld s8, 72(sp) -.endif - ld s7, 64(sp) - ld s6, 56(sp) ld s5, 48(sp) ld s4, 40(sp) ld s3, 32(sp) @@ -635,7 +612,7 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x ld s1, 16(sp) ld ra, 8(sp) ld s0, 0(sp) - addi sp, sp, 96 + addi sp, sp, 64 ret endfunc .endm @@ -646,7 +623,7 @@ idct4_adds 16intra, \depth func ff_h264_idct8_add4_\depth\()_rvv, zve32x csrwi vxrm, 0 - addi sp, sp, -96 + addi sp, sp, -64 lla t0, ff_h264_scan8 sd s0, (sp) li t1, 4 * 32 * (\depth / 8) @@ -658,14 +635,6 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x sd s3, 32(sp) sd s4, 40(sp) sd s5, 48(sp) - sd s6, 56(sp) - sd s7, 64(sp) -.if \depth > 8 - sd s8, 72(sp) - sd s9, 80(sp) - mv s8, a5 - mv s9, a6 -.endif vsetivli zero, 4, e8, mf4, ta, ma vlse8.v v8, (t0), t2 .if \depth == 8 @@ -689,8 +658,8 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x li s1, 4 mv s4, a0 mv s5, a1 - mv s6, a2 - mv s7, a3 + mv a1, a2 + mv a2, a3 1: andi t0, s2, 1 addi s1, s1, -1 @@ -698,33 +667,23 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x beqz t0, 3f # if (nnz) lw t2, (s5) # block_offset[i] andi t1, s3, 1 - mv a1, s6 - mv a2, s7 add a0, s4, t2 -.if \depth > 8 - mv a5, s8 -.endif bnez t1, 2f # if (nnz == 1 && block[i * 16]) jal .Lidct8_add_\depth\()_rvv - j 3f -2: .if \depth == 8 - call ff_h264_idct8_dc_add_\depth\()_c + j 3f .else - jalr s9 + j 4f # idct8_add_16 updates a1 .endif +2: + jal ff_h264_idct8_dc_add_\depth\()_rvv 3: + addi a1, a1, 4 * 16 * 2 * (\depth / 8) +4: srli s3, s3, 1 addi s5, s5, 4 * 4 - addi s6, s6, 4 * 16 * 2 * (\depth / 8) bnez s1, 1b -.if \depth > 8 - ld s9, 80(sp) - ld s8, 72(sp) -.endif - ld s7, 64(sp) - ld s6, 56(sp) ld s5, 48(sp) ld s4, 40(sp) ld s3, 32(sp) @@ -732,7 +691,7 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x ld s1, 16(sp) ld ra, 8(sp) ld s0, 0(sp) - addi sp, sp, 96 + addi sp, sp, 64 ret endfunc .endr @@ -740,19 +699,16 @@ endfunc .irp depth, 9, 10, 12, 14 func ff_h264_idct_add16_\depth\()_rvv, zve32x li a5, (1 << \depth) - 1 - lla a6, ff_h264_idct_dc_add_\depth\()_c j ff_h264_idct_add16_16_rvv endfunc func ff_h264_idct_add16intra_\depth\()_rvv, zve32x li a5, (1 << \depth) - 1 - lla a6, ff_h264_idct_dc_add_\depth\()_c j ff_h264_idct_add16intra_16_rvv endfunc func ff_h264_idct8_add4_\depth\()_rvv, zve32x li a5, (1 << \depth) - 1 - lla a6, ff_h264_idct8_dc_add_\depth\()_c j ff_h264_idct8_add4_16_rvv endfunc .endr -- 2.45.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-07-18 19:36 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-07-18 19:35 [FFmpeg-devel] [PATCH 1/5] lavc/h264dsp: factor some mostly identical R-V V code Rémi Denis-Courmont 2024-07-18 19:35 ` [FFmpeg-devel] [PATCH 2/5] lavc/h264dsp: move R-V V idct_dc_add Rémi Denis-Courmont 2024-07-18 19:35 ` [FFmpeg-devel] [PATCH 3/5] lavc/h264dsp: correct VL and LMUL in idct_dc_add Rémi Denis-Courmont 2024-07-18 19:35 ` Rémi Denis-Courmont [this message] 2024-07-18 19:35 ` [FFmpeg-devel] [PATCH 5/5] lavc/h264dsp: reduce spills in R-V V idct_add16 Rémi Denis-Courmont
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240718193546.18939-4-remi@remlab.net \ --to=remi@remlab.net \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git