* [FFmpeg-devel] [PATCH 1/2] lavc/vc1dsp: unify R-V V DC bypass functions
@ 2024-07-27 19:39 Rémi Denis-Courmont
2024-07-27 19:39 ` [FFmpeg-devel] [PATCH 2/2] lavc/vc1dsp: use saturating arithmetic for RVV inv_trans_dc Rémi Denis-Courmont
0 siblings, 1 reply; 2+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-27 19:39 UTC (permalink / raw)
To: ffmpeg-devel
---
libavcodec/riscv/vc1dsp_rvv.S | 126 ++++++++++------------------------
1 file changed, 35 insertions(+), 91 deletions(-)
diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S
index 5189d5e855..548ef9d3bf 100644
--- a/libavcodec/riscv/vc1dsp_rvv.S
+++ b/libavcodec/riscv/vc1dsp_rvv.S
@@ -21,101 +21,45 @@
#include "libavutil/riscv/asm.S"
-func ff_vc1_inv_trans_8x8_dc_rvv, zve64x, zba
+.macro inv_trans_dc rows, cols, w, mat_lmul, row_lmul
+func ff_vc1_inv_trans_\cols\()x\rows\()_dc_rvv, zve64x, zba
lpad 0
- lh t2, (a2)
- vsetivli zero, 8, e8, mf2, ta, ma
- vlse64.v v0, (a0), a1
- sh1add t2, t2, t2
- addi t2, t2, 1
- srai t2, t2, 1
- sh1add t2, t2, t2
- addi t2, t2, 16
- srai t2, t2, 5
- li t0, 8*8
- vsetvli zero, t0, e16, m8, ta, ma
- vzext.vf2 v8, v0
- vadd.vx v8, v8, t2
- vmax.vx v8, v8, zero
- vsetvli zero, zero, e8, m4, ta, ma
- vnclipu.wi v0, v8, 0
- vsetivli zero, 8, e8, mf2, ta, ma
- vsse64.v v0, (a0), a1
- ret
-endfunc
-
-func ff_vc1_inv_trans_4x8_dc_rvv, zve32x, zba
- lpad 0
- lh t2, (a2)
- vsetivli zero, 8, e8, mf2, ta, ma
- vlse32.v v0, (a0), a1
- slli t1, t2, 4
- add t2, t2, t1
- addi t2, t2, 4
- srai t2, t2, 3
- sh1add t2, t2, t2
- slli t2, t2, 2
- addi t2, t2, 64
- srai t2, t2, 7
- li t0, 4*8
- vsetvli zero, t0, e16, m4, ta, ma
- vzext.vf2 v4, v0
- vadd.vx v4, v4, t2
- vmax.vx v4, v4, zero
- vsetvli zero, zero, e8, m2, ta, ma
- vnclipu.wi v0, v4, 0
- vsetivli zero, 8, e8, mf2, ta, ma
- vsse32.v v0, (a0), a1
- ret
-endfunc
-
-func ff_vc1_inv_trans_8x4_dc_rvv, zve64x, zba
- lpad 0
- lh t2, (a2)
- vsetivli zero, 4, e8, mf4, ta, ma
- vlse64.v v0, (a0), a1
- sh1add t2, t2, t2
- addi t2, t2, 1
- srai t2, t2, 1
- slli t1, t2, 4
- add t2, t2, t1
- addi t2, t2, 64
- srai t2, t2, 7
- li t0, 8*4
- vsetvli zero, t0, e16, m4, ta, ma
- vzext.vf2 v4, v0
- vadd.vx v4, v4, t2
- vmax.vx v4, v4, zero
- vsetvli zero, zero, e8, m2, ta, ma
- vnclipu.wi v0, v4, 0
- vsetivli zero, 4, e8, mf4, ta, ma
- vsse64.v v0, (a0), a1
+ lh t2, (a2)
+ li a4, 22 - (5 * \cols) / 4
+ mul t2, t2, a4
+ vsetivli zero, \rows, e8, m\row_lmul, ta, ma
+ vlse\w\().v v0, (a0), a1
+ addi t2, t2, 4
+ li a5, 22 - (5 * \rows) / 4
+ srai t2, t2, 3
+ mul t2, t2, a5
+.if \cols * \rows >= 32
+ li t0, \cols * \rows
+.endif
+ addi t2, t2, 64
+ srai t2, t2, 7
+.if \rows * \cols == 64
+ vsetvli zero, t0, e16, m8, ta, ma
+.elseif \rows * \cols == 32
+ vsetvli zero, t0, e16, m4, ta, ma
+.else
+ vsetivli zero, \rows * \cols, e16, m2, ta, ma
+.endif
+ vzext.vf2 v8, v0
+ vadd.vx v8, v8, t2
+ vmax.vx v8, v8, zero
+ vsetvli zero, zero, e8, m\mat_lmul, ta, ma
+ vnclipu.wi v0, v8, 0
+ vsetivli zero, \rows, e8, m\row_lmul, ta, ma
+ vsse\w\().v v0, (a0), a1
ret
endfunc
+.endm
-func ff_vc1_inv_trans_4x4_dc_rvv, zve32x
- lpad 0
- lh t2, (a2)
- vsetivli zero, 4, e8, mf4, ta, ma
- vlse32.v v0, (a0), a1
- slli t1, t2, 4
- add t2, t2, t1
- addi t2, t2, 4
- srai t2, t2, 3
- slli t1, t2, 4
- add t2, t2, t1
- addi t2, t2, 64
- srai t2, t2, 7
- vsetivli zero, 4*4, e16, m2, ta, ma
- vzext.vf2 v2, v0
- vadd.vx v2, v2, t2
- vmax.vx v2, v2, zero
- vsetvli zero, zero, e8, m1, ta, ma
- vnclipu.wi v0, v2, 0
- vsetivli zero, 4, e8, mf4, ta, ma
- vsse32.v v0, (a0), a1
- ret
-endfunc
+inv_trans_dc 8, 8, 64, 4, f2
+inv_trans_dc 4, 8, 64, 2, f4
+inv_trans_dc 8, 4, 32, 2, f2
+inv_trans_dc 4, 4, 32, 1, f4
.variant_cc ff_vc1_inv_trans_8_rvv
func ff_vc1_inv_trans_8_rvv, zve32x
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
* [FFmpeg-devel] [PATCH 2/2] lavc/vc1dsp: use saturating arithmetic for RVV inv_trans_dc
2024-07-27 19:39 [FFmpeg-devel] [PATCH 1/2] lavc/vc1dsp: unify R-V V DC bypass functions Rémi Denis-Courmont
@ 2024-07-27 19:39 ` Rémi Denis-Courmont
0 siblings, 0 replies; 2+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-27 19:39 UTC (permalink / raw)
To: ffmpeg-devel
T-Head C908 (cycles):
vc1dsp.vc1_inv_trans_4x4_dc_c: 113.7
vc1dsp.vc1_inv_trans_4x4_dc_rvv_i32: 46.5 (before)
vc1dsp.vc1_inv_trans_4x4_dc_rvv_i32: 45.5 (after)
vc1dsp.vc1_inv_trans_4x8_dc_c: 230.7
vc1dsp.vc1_inv_trans_4x8_dc_rvv_i32: 65.7 (before)
vc1dsp.vc1_inv_trans_4x8_dc_rvv_i32: 52.5 (after)
vc1dsp.vc1_inv_trans_8x4_dc_c: 246.7
vc1dsp.vc1_inv_trans_8x4_dc_rvv_i64: 56.7 (before)
vc1dsp.vc1_inv_trans_8x4_dc_rvv_i64: 45.5 (after)
vc1dsp.vc1_inv_trans_8x8_dc_c: 419.7
vc1dsp.vc1_inv_trans_8x8_dc_rvv_i64: 81.2 (before)
vc1dsp.vc1_inv_trans_8x8_dc_rvv_i64: 53.5 (after)
---
libavcodec/riscv/vc1dsp_rvv.S | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S
index 548ef9d3bf..f9b59688ae 100644
--- a/libavcodec/riscv/vc1dsp_rvv.S
+++ b/libavcodec/riscv/vc1dsp_rvv.S
@@ -38,18 +38,20 @@ func ff_vc1_inv_trans_\cols\()x\rows\()_dc_rvv, zve64x, zba
.endif
addi t2, t2, 64
srai t2, t2, 7
-.if \rows * \cols == 64
- vsetvli zero, t0, e16, m8, ta, ma
-.elseif \rows * \cols == 32
- vsetvli zero, t0, e16, m4, ta, ma
+.if \rows * \cols >= 32
+ vsetvli zero, t0, e8, m\mat_lmul, ta, ma
.else
- vsetivli zero, \rows * \cols, e16, m2, ta, ma
+ vsetivli zero, \rows * \cols, e8, m\mat_lmul, ta, ma
.endif
- vzext.vf2 v8, v0
- vadd.vx v8, v8, t2
- vmax.vx v8, v8, zero
- vsetvli zero, zero, e8, m\mat_lmul, ta, ma
- vnclipu.wi v0, v8, 0
+ bgez t2, 1f
+
+ neg t2, t2
+ vssubu.vx v0, v0, t2
+ vsetivli zero, \rows, e8, m\row_lmul, ta, ma
+ vsse\w\().v v0, (a0), a1
+ ret
+1:
+ vsaddu.vx v0, v0, t2
vsetivli zero, \rows, e8, m\row_lmul, ta, ma
vsse\w\().v v0, (a0), a1
ret
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-07-27 19:39 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-27 19:39 [FFmpeg-devel] [PATCH 1/2] lavc/vc1dsp: unify R-V V DC bypass functions Rémi Denis-Courmont
2024-07-27 19:39 ` [FFmpeg-devel] [PATCH 2/2] lavc/vc1dsp: use saturating arithmetic for RVV inv_trans_dc Rémi Denis-Courmont
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git