Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: "Rémi Denis-Courmont" <remi@remlab.net>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH 4/5] lavc/h264dsp: reuse the R-V V IDCT DC add functions
Date: Thu, 18 Jul 2024 22:35:45 +0300
Message-ID: <20240718193546.18939-4-remi@remlab.net> (raw)
In-Reply-To: <20240718193546.18939-1-remi@remlab.net>

This reuses the DC bypass functions from the multiple IDCT functions, to
leverage vector code.

As an added bonus, the caller functions can now rely on the callee functions
to preserve their parameters, thus cutting down on stack spills.
---
 libavcodec/riscv/h264idct_rvv.S | 76 +++++++--------------------------
 1 file changed, 16 insertions(+), 60 deletions(-)

diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S
index 2648e06aeb..c42db6ef29 100644
--- a/libavcodec/riscv/h264idct_rvv.S
+++ b/libavcodec/riscv/h264idct_rvv.S
@@ -536,7 +536,7 @@ endconst
 .macro  idct4_adds type, depth
 func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
         csrwi   vxrm, 0
-        addi    sp, sp, -96
+        addi    sp, sp, -64
         lla     t0, ff_h264_scan8
         sd      s0,   (sp)
         li      t1, 32 * (\depth / 8)
@@ -547,14 +547,6 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
         sd      s3, 32(sp)
         sd      s4, 40(sp)
         sd      s5, 48(sp)
-        sd      s6, 56(sp)
-        sd      s7, 64(sp)
-.if \depth > 8
-        sd      s8, 72(sp)
-        sd      s9, 80(sp)
-        mv      s8, a5
-        mv      s9, a6
-.endif
         vsetivli  zero, 16, e8, m1, ta, ma
         vle8.v    v8, (t0)
 .if \depth == 8
@@ -583,8 +575,8 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
         li      s1, 16
         mv      s4, a0
         mv      s5, a1
-        mv      s6, a2
-        mv      s7, a3
+        mv      a1, a2
+        mv      a2, a3
 1:
         andi    t0, s2, 1
         addi    s1, s1, -1
@@ -594,12 +586,7 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
 .endif
         lw      t2, (s5)   # block_offset[i]
         andi    t1, s3, 1
-        mv      a1, s6
-        mv      a2, s7
         add     a0, s4, t2
-.if \depth > 8
-        mv      a5, s8
-.endif
 .ifc \type, 16
         bnez    t1, 2f     # if (nnz == 1 && block[i * 16])
 .else
@@ -611,23 +598,13 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
 .ifnc \type, 16
         beqz    t1, 3f    # if (block[i * 16])
 .endif
-.if \depth == 8
-        call    ff_h264_idct_dc_add_\depth\()_c
-.else
-        jalr    s9
-.endif
+        jal     ff_h264_idct4_dc_add_\depth\()_rvv
 3:
         srli    s3, s3, 1
         addi    s5, s5, 4
-        addi    s6, s6, 16 * 2 * (\depth / 8)
+        addi    a1, a1, 16 * 2 * (\depth / 8)
         bnez    s1, 1b
 
-.if \depth > 8
-        ld      s9, 80(sp)
-        ld      s8, 72(sp)
-.endif
-        ld      s7, 64(sp)
-        ld      s6, 56(sp)
         ld      s5, 48(sp)
         ld      s4, 40(sp)
         ld      s3, 32(sp)
@@ -635,7 +612,7 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
         ld      s1, 16(sp)
         ld      ra,  8(sp)
         ld      s0,  0(sp)
-        addi    sp, sp, 96
+        addi    sp, sp, 64
         ret
 endfunc
 .endm
@@ -646,7 +623,7 @@ idct4_adds 16intra, \depth
 
 func ff_h264_idct8_add4_\depth\()_rvv, zve32x
         csrwi       vxrm, 0
-        addi    sp, sp, -96
+        addi    sp, sp, -64
         lla     t0, ff_h264_scan8
         sd      s0,   (sp)
         li      t1, 4 * 32 * (\depth / 8)
@@ -658,14 +635,6 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x
         sd      s3, 32(sp)
         sd      s4, 40(sp)
         sd      s5, 48(sp)
-        sd      s6, 56(sp)
-        sd      s7, 64(sp)
-.if \depth > 8
-        sd      s8, 72(sp)
-        sd      s9, 80(sp)
-        mv      s8, a5
-        mv      s9, a6
-.endif
         vsetivli  zero, 4, e8, mf4, ta, ma
         vlse8.v   v8, (t0), t2
 .if \depth == 8
@@ -689,8 +658,8 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x
         li      s1, 4
         mv      s4, a0
         mv      s5, a1
-        mv      s6, a2
-        mv      s7, a3
+        mv      a1, a2
+        mv      a2, a3
 1:
         andi    t0, s2, 1
         addi    s1, s1, -1
@@ -698,33 +667,23 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x
         beqz    t0, 3f     # if (nnz)
         lw      t2, (s5)   # block_offset[i]
         andi    t1, s3, 1
-        mv      a1, s6
-        mv      a2, s7
         add     a0, s4, t2
-.if \depth > 8
-        mv      a5, s8
-.endif
         bnez    t1, 2f    # if (nnz == 1 && block[i * 16])
         jal     .Lidct8_add_\depth\()_rvv
-        j       3f
-2:
 .if \depth == 8
-        call    ff_h264_idct8_dc_add_\depth\()_c
+        j       3f
 .else
-        jalr    s9
+        j       4f        # idct8_add_16 updates a1
 .endif
+2:
+        jal     ff_h264_idct8_dc_add_\depth\()_rvv
 3:
+        addi    a1, a1, 4 * 16 * 2 * (\depth / 8)
+4:
         srli    s3, s3, 1
         addi    s5, s5, 4 * 4
-        addi    s6, s6, 4 * 16 * 2 * (\depth / 8)
         bnez    s1, 1b
 
-.if \depth > 8
-        ld      s9, 80(sp)
-        ld      s8, 72(sp)
-.endif
-        ld      s7, 64(sp)
-        ld      s6, 56(sp)
         ld      s5, 48(sp)
         ld      s4, 40(sp)
         ld      s3, 32(sp)
@@ -732,7 +691,7 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x
         ld      s1, 16(sp)
         ld      ra,  8(sp)
         ld      s0,  0(sp)
-        addi    sp, sp, 96
+        addi    sp, sp, 64
         ret
 endfunc
 .endr
@@ -740,19 +699,16 @@ endfunc
 .irp    depth, 9, 10, 12, 14
 func ff_h264_idct_add16_\depth\()_rvv, zve32x
         li      a5, (1 << \depth) - 1
-        lla     a6, ff_h264_idct_dc_add_\depth\()_c
         j       ff_h264_idct_add16_16_rvv
 endfunc
 
 func ff_h264_idct_add16intra_\depth\()_rvv, zve32x
         li      a5, (1 << \depth) - 1
-        lla     a6, ff_h264_idct_dc_add_\depth\()_c
         j       ff_h264_idct_add16intra_16_rvv
 endfunc
 
 func ff_h264_idct8_add4_\depth\()_rvv, zve32x
         li      a5, (1 << \depth) - 1
-        lla     a6, ff_h264_idct8_dc_add_\depth\()_c
         j       ff_h264_idct8_add4_16_rvv
 endfunc
 .endr
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  parent reply	other threads:[~2024-07-18 19:36 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-18 19:35 [FFmpeg-devel] [PATCH 1/5] lavc/h264dsp: factor some mostly identical R-V V code Rémi Denis-Courmont
2024-07-18 19:35 ` [FFmpeg-devel] [PATCH 2/5] lavc/h264dsp: move R-V V idct_dc_add Rémi Denis-Courmont
2024-07-18 19:35 ` [FFmpeg-devel] [PATCH 3/5] lavc/h264dsp: correct VL and LMUL in idct_dc_add Rémi Denis-Courmont
2024-07-18 19:35 ` Rémi Denis-Courmont [this message]
2024-07-18 19:35 ` [FFmpeg-devel] [PATCH 5/5] lavc/h264dsp: reduce spills in R-V V idct_add16 Rémi Denis-Courmont

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240718193546.18939-4-remi@remlab.net \
    --to=remi@remlab.net \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git