* [FFmpeg-devel] [PATCH] RVV: fix and improve lavc/pixblockdsp (PR #20647)
@ 2025-10-05 13:04 Rémi Denis-Courmont via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: Rémi Denis-Courmont via ffmpeg-devel @ 2025-10-05 13:04 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Rémi Denis-Courmont
PR #20647 opened by Rémi Denis-Courmont (Courmisch)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20647
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20647.patch
From e94bf3d10d5531d115db5745690044b09495ccef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi@remlab.net>
Date: Sun, 5 Oct 2025 15:08:06 +0300
Subject: [PATCH 1/4] Revert "lavc/pixblockdsp: rework R-V V
get_pixels_unaligned"
The optimised version does not work if the stride is not a multiple 8,
which can occur as reproduce by vsynth3-asv1 and vsynth3-asv2 tests.
This reverts commit 02594c8c011ca2cdc20334e9bb812ec8f6f37cf3.
Conflicts:
libavcodec/riscv/pixblockdsp_init.c
libavcodec/riscv/pixblockdsp_rvv.S
---
libavcodec/riscv/pixblockdsp_init.c | 11 +++++++----
libavcodec/riscv/pixblockdsp_rvv.S | 28 +++++++++++-----------------
2 files changed, 18 insertions(+), 21 deletions(-)
diff --git a/libavcodec/riscv/pixblockdsp_init.c b/libavcodec/riscv/pixblockdsp_init.c
index e59fba63cc..dcd6bd2977 100644
--- a/libavcodec/riscv/pixblockdsp_init.c
+++ b/libavcodec/riscv/pixblockdsp_init.c
@@ -65,15 +65,18 @@ av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
c->diff_pixels = ff_diff_pixels_unaligned_rvv;
c->diff_pixels_unaligned = ff_diff_pixels_unaligned_rvv;
- }
- if ((cpu_flags & AV_CPU_FLAG_RVV_I64) && ff_get_rv_vlenb() >= 16) {
if (!high_bit_depth) {
- c->get_pixels = ff_get_pixels_8_rvv;
+ c->get_pixels = ff_get_pixels_unaligned_8_rvv;
c->get_pixels_unaligned = ff_get_pixels_unaligned_8_rvv;
}
- c->diff_pixels = ff_diff_pixels_rvv;
+ if (cpu_flags & AV_CPU_FLAG_RVV_I64) {
+ if (!high_bit_depth)
+ c->get_pixels = ff_get_pixels_8_rvv;
+
+ c->diff_pixels = ff_diff_pixels_rvv;
+ }
}
#endif
#endif
diff --git a/libavcodec/riscv/pixblockdsp_rvv.S b/libavcodec/riscv/pixblockdsp_rvv.S
index 85233470cf..4425227c9b 100644
--- a/libavcodec/riscv/pixblockdsp_rvv.S
+++ b/libavcodec/riscv/pixblockdsp_rvv.S
@@ -24,7 +24,6 @@ func ff_get_pixels_8_rvv, zve64x
lpad 0
vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8
-1:
vlse64.v v16, (a1), a2
vsetvli zero, t0, e8, m4, ta, ma
vwcvtu.x.x.v v8, v16
@@ -32,24 +31,19 @@ func ff_get_pixels_8_rvv, zve64x
ret
endfunc
-func ff_get_pixels_unaligned_8_rvv, zve64x
+func ff_get_pixels_unaligned_8_rvv, zve32x
lpad 0
- andi t1, a1, 7
- vsetivli zero, 8, e64, m4, ta, ma
- li t0, 8 * 8
- beqz t1, 1b
- andi a1, a1, -8
- slli t2, t1, 3
- addi t1, a1, 8
- sub t3, t0, t2
- vlse64.v v16, (a1), a2
- vlse64.v v24, (t1), a2
- vsrl.vx v16, v16, t2
- vsll.vx v24, v24, t3
- vor.vv v16, v16, v24
- vsetvli zero, t0, e8, m4, ta, ma
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vlsseg8e8.v v16, (a1), a2
vwcvtu.x.x.v v8, v16
- vse16.v v8, (a0)
+ vwcvtu.x.x.v v9, v17
+ vwcvtu.x.x.v v10, v18
+ vwcvtu.x.x.v v11, v19
+ vwcvtu.x.x.v v12, v20
+ vwcvtu.x.x.v v13, v21
+ vwcvtu.x.x.v v14, v22
+ vwcvtu.x.x.v v15, v23
+ vsseg8e16.v v8, (a0)
ret
endfunc
--
2.49.1
From 0e22318431c550426ed0c1e32883ff06c3e2c5e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi@remlab.net>
Date: Sun, 5 Oct 2025 15:16:03 +0300
Subject: [PATCH 2/4] pixblockdsp: avoid segments on R-V V get_pixels_unaligned
On SpacemiT X86, before:
get_pixels_unaligned_rvv_i32: 172.4 ( 0.37x)
...after:
get_pixels_unaligned_rvv_i32: 34.4 ( 1.84x)
---
libavcodec/riscv/pixblockdsp_rvv.S | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/libavcodec/riscv/pixblockdsp_rvv.S b/libavcodec/riscv/pixblockdsp_rvv.S
index 4425227c9b..89023c3320 100644
--- a/libavcodec/riscv/pixblockdsp_rvv.S
+++ b/libavcodec/riscv/pixblockdsp_rvv.S
@@ -33,17 +33,22 @@ endfunc
func ff_get_pixels_unaligned_8_rvv, zve32x
lpad 0
+ li t2, 8
vsetivli zero, 8, e8, mf2, ta, ma
- vlsseg8e8.v v16, (a1), a2
+1:
+ add t1, a1, a2
+ vle8.v v16, (a1)
+ addi t0, a0, 2 * 8
+ vle8.v v17, (t1)
+ addi t2, t2, -2
vwcvtu.x.x.v v8, v16
vwcvtu.x.x.v v9, v17
- vwcvtu.x.x.v v10, v18
- vwcvtu.x.x.v v11, v19
- vwcvtu.x.x.v v12, v20
- vwcvtu.x.x.v v13, v21
- vwcvtu.x.x.v v14, v22
- vwcvtu.x.x.v v15, v23
- vsseg8e16.v v8, (a0)
+ vse16.v v8, (a0)
+ add a1, t1, a2
+ vse16.v v9, (t0)
+ addi a0, t0, 2 * 8
+ bnez t2, 1b
+
ret
endfunc
--
2.49.1
From e8cb07e6e61a7edca3551be6ca4a789f93db0c13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi@remlab.net>
Date: Sun, 5 Oct 2025 15:16:03 +0300
Subject: [PATCH 3/4] pixblockdsp: avoid segments on R-V V
diff_pixels_unaligned
On SpacemiT X86, before:
diff_pixels_unaligned_rvv_i32: 250.2 ( 0.59x)
...after:
diff_pixels_unaligned_rvv_i32: 56.9 ( 2.60x)
---
libavcodec/riscv/pixblockdsp_rvv.S | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/libavcodec/riscv/pixblockdsp_rvv.S b/libavcodec/riscv/pixblockdsp_rvv.S
index 89023c3320..78dcddf33a 100644
--- a/libavcodec/riscv/pixblockdsp_rvv.S
+++ b/libavcodec/riscv/pixblockdsp_rvv.S
@@ -66,17 +66,18 @@ endfunc
func ff_diff_pixels_unaligned_rvv, zve32x
lpad 0
+ li t3, 8
vsetivli zero, 8, e8, mf2, ta, ma
- vlsseg8e8.v v16, (a1), a3
- vlsseg8e8.v v24, (a2), a3
+1:
+ vle8.v v16, (a1)
+ add a1, a1, a3
+ vle8.v v24, (a2)
+ add a2, a2, a3
vwsubu.vv v8, v16, v24
- vwsubu.vv v9, v17, v25
- vwsubu.vv v10, v18, v26
- vwsubu.vv v11, v19, v27
- vwsubu.vv v12, v20, v28
- vwsubu.vv v13, v21, v29
- vwsubu.vv v14, v22, v30
- vwsubu.vv v15, v23, v31
- vsseg8e16.v v8, (a0)
+ addi t3, t3, -1
+ vse16.v v8, (a0)
+ addi a0, a0, 2 * 8
+ bnez t3, 1b
+
ret
endfunc
--
2.49.1
From a30bb127777f508f6229fe0a090e6182e8f000f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi@remlab.net>
Date: Sun, 5 Oct 2025 16:03:04 +0300
Subject: [PATCH 4/4] riscv/cpu: add av_const for VLEN helpers
This is read from a read-only constant system register.
---
libavutil/riscv/cpu.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libavutil/riscv/cpu.h b/libavutil/riscv/cpu.h
index f2e6b7b430..66e86a3527 100644
--- a/libavutil/riscv/cpu.h
+++ b/libavutil/riscv/cpu.h
@@ -45,7 +45,7 @@ static inline av_const bool ff_rv_zbb_support(void)
* Returns the vector size in bytes (always a power of two and at least 4).
* This is undefined behaviour if vectors are not implemented.
*/
-static inline size_t ff_get_rv_vlenb(void)
+static inline av_const size_t ff_get_rv_vlenb(void)
{
size_t vlenb;
@@ -61,7 +61,7 @@ static inline size_t ff_get_rv_vlenb(void)
* Checks that the vector bit-size is at least the given value.
* This is potentially undefined behaviour if vectors are not implemented.
*/
-static inline bool ff_rv_vlen_least(unsigned int bits)
+static inline av_const bool ff_rv_vlen_least(unsigned int bits)
{
#ifdef __riscv_v_min_vlen
if (bits <= __riscv_v_min_vlen)
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-10-05 13:05 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-10-05 13:04 [FFmpeg-devel] [PATCH] RVV: fix and improve lavc/pixblockdsp (PR #20647) Rémi Denis-Courmont via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror http://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ http://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git