From: "Rémi Denis-Courmont via ffmpeg-devel" <ffmpeg-devel@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Cc: "Rémi Denis-Courmont" <code@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH] RVV: fix and improve lavc/pixblockdsp (PR #20647) Date: Sun, 05 Oct 2025 13:04:42 -0000 Message-ID: <175966948285.65.12523957197350493448@bf249f23a2c8> (raw) PR #20647 opened by Rémi Denis-Courmont (Courmisch) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20647 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20647.patch From e94bf3d10d5531d115db5745690044b09495ccef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi@remlab.net> Date: Sun, 5 Oct 2025 15:08:06 +0300 Subject: [PATCH 1/4] Revert "lavc/pixblockdsp: rework R-V V get_pixels_unaligned" The optimised version does not work if the stride is not a multiple 8, which can occur as reproduce by vsynth3-asv1 and vsynth3-asv2 tests. This reverts commit 02594c8c011ca2cdc20334e9bb812ec8f6f37cf3. Conflicts: libavcodec/riscv/pixblockdsp_init.c libavcodec/riscv/pixblockdsp_rvv.S --- libavcodec/riscv/pixblockdsp_init.c | 11 +++++++---- libavcodec/riscv/pixblockdsp_rvv.S | 28 +++++++++++----------------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/libavcodec/riscv/pixblockdsp_init.c b/libavcodec/riscv/pixblockdsp_init.c index e59fba63cc..dcd6bd2977 100644 --- a/libavcodec/riscv/pixblockdsp_init.c +++ b/libavcodec/riscv/pixblockdsp_init.c @@ -65,15 +65,18 @@ av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c, if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) { c->diff_pixels = ff_diff_pixels_unaligned_rvv; c->diff_pixels_unaligned = ff_diff_pixels_unaligned_rvv; - } - if ((cpu_flags & AV_CPU_FLAG_RVV_I64) && ff_get_rv_vlenb() >= 16) { if (!high_bit_depth) { - c->get_pixels = ff_get_pixels_8_rvv; + c->get_pixels = ff_get_pixels_unaligned_8_rvv; c->get_pixels_unaligned = ff_get_pixels_unaligned_8_rvv; } - c->diff_pixels = ff_diff_pixels_rvv; + if (cpu_flags & AV_CPU_FLAG_RVV_I64) { + if (!high_bit_depth) + c->get_pixels = ff_get_pixels_8_rvv; + + c->diff_pixels = ff_diff_pixels_rvv; + } } #endif #endif diff --git a/libavcodec/riscv/pixblockdsp_rvv.S b/libavcodec/riscv/pixblockdsp_rvv.S index 85233470cf..4425227c9b 100644 --- a/libavcodec/riscv/pixblockdsp_rvv.S +++ b/libavcodec/riscv/pixblockdsp_rvv.S @@ -24,7 +24,6 @@ func ff_get_pixels_8_rvv, zve64x lpad 0 vsetivli zero, 8, e8, mf2, ta, ma li t0, 8 * 8 -1: vlse64.v v16, (a1), a2 vsetvli zero, t0, e8, m4, ta, ma vwcvtu.x.x.v v8, v16 @@ -32,24 +31,19 @@ func ff_get_pixels_8_rvv, zve64x ret endfunc -func ff_get_pixels_unaligned_8_rvv, zve64x +func ff_get_pixels_unaligned_8_rvv, zve32x lpad 0 - andi t1, a1, 7 - vsetivli zero, 8, e64, m4, ta, ma - li t0, 8 * 8 - beqz t1, 1b - andi a1, a1, -8 - slli t2, t1, 3 - addi t1, a1, 8 - sub t3, t0, t2 - vlse64.v v16, (a1), a2 - vlse64.v v24, (t1), a2 - vsrl.vx v16, v16, t2 - vsll.vx v24, v24, t3 - vor.vv v16, v16, v24 - vsetvli zero, t0, e8, m4, ta, ma + vsetivli zero, 8, e8, mf2, ta, ma + vlsseg8e8.v v16, (a1), a2 vwcvtu.x.x.v v8, v16 - vse16.v v8, (a0) + vwcvtu.x.x.v v9, v17 + vwcvtu.x.x.v v10, v18 + vwcvtu.x.x.v v11, v19 + vwcvtu.x.x.v v12, v20 + vwcvtu.x.x.v v13, v21 + vwcvtu.x.x.v v14, v22 + vwcvtu.x.x.v v15, v23 + vsseg8e16.v v8, (a0) ret endfunc -- 2.49.1 From 0e22318431c550426ed0c1e32883ff06c3e2c5e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi@remlab.net> Date: Sun, 5 Oct 2025 15:16:03 +0300 Subject: [PATCH 2/4] pixblockdsp: avoid segments on R-V V get_pixels_unaligned On SpacemiT X86, before: get_pixels_unaligned_rvv_i32: 172.4 ( 0.37x) ...after: get_pixels_unaligned_rvv_i32: 34.4 ( 1.84x) --- libavcodec/riscv/pixblockdsp_rvv.S | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/libavcodec/riscv/pixblockdsp_rvv.S b/libavcodec/riscv/pixblockdsp_rvv.S index 4425227c9b..89023c3320 100644 --- a/libavcodec/riscv/pixblockdsp_rvv.S +++ b/libavcodec/riscv/pixblockdsp_rvv.S @@ -33,17 +33,22 @@ endfunc func ff_get_pixels_unaligned_8_rvv, zve32x lpad 0 + li t2, 8 vsetivli zero, 8, e8, mf2, ta, ma - vlsseg8e8.v v16, (a1), a2 +1: + add t1, a1, a2 + vle8.v v16, (a1) + addi t0, a0, 2 * 8 + vle8.v v17, (t1) + addi t2, t2, -2 vwcvtu.x.x.v v8, v16 vwcvtu.x.x.v v9, v17 - vwcvtu.x.x.v v10, v18 - vwcvtu.x.x.v v11, v19 - vwcvtu.x.x.v v12, v20 - vwcvtu.x.x.v v13, v21 - vwcvtu.x.x.v v14, v22 - vwcvtu.x.x.v v15, v23 - vsseg8e16.v v8, (a0) + vse16.v v8, (a0) + add a1, t1, a2 + vse16.v v9, (t0) + addi a0, t0, 2 * 8 + bnez t2, 1b + ret endfunc -- 2.49.1 From e8cb07e6e61a7edca3551be6ca4a789f93db0c13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi@remlab.net> Date: Sun, 5 Oct 2025 15:16:03 +0300 Subject: [PATCH 3/4] pixblockdsp: avoid segments on R-V V diff_pixels_unaligned On SpacemiT X86, before: diff_pixels_unaligned_rvv_i32: 250.2 ( 0.59x) ...after: diff_pixels_unaligned_rvv_i32: 56.9 ( 2.60x) --- libavcodec/riscv/pixblockdsp_rvv.S | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/libavcodec/riscv/pixblockdsp_rvv.S b/libavcodec/riscv/pixblockdsp_rvv.S index 89023c3320..78dcddf33a 100644 --- a/libavcodec/riscv/pixblockdsp_rvv.S +++ b/libavcodec/riscv/pixblockdsp_rvv.S @@ -66,17 +66,18 @@ endfunc func ff_diff_pixels_unaligned_rvv, zve32x lpad 0 + li t3, 8 vsetivli zero, 8, e8, mf2, ta, ma - vlsseg8e8.v v16, (a1), a3 - vlsseg8e8.v v24, (a2), a3 +1: + vle8.v v16, (a1) + add a1, a1, a3 + vle8.v v24, (a2) + add a2, a2, a3 vwsubu.vv v8, v16, v24 - vwsubu.vv v9, v17, v25 - vwsubu.vv v10, v18, v26 - vwsubu.vv v11, v19, v27 - vwsubu.vv v12, v20, v28 - vwsubu.vv v13, v21, v29 - vwsubu.vv v14, v22, v30 - vwsubu.vv v15, v23, v31 - vsseg8e16.v v8, (a0) + addi t3, t3, -1 + vse16.v v8, (a0) + addi a0, a0, 2 * 8 + bnez t3, 1b + ret endfunc -- 2.49.1 From a30bb127777f508f6229fe0a090e6182e8f000f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi@remlab.net> Date: Sun, 5 Oct 2025 16:03:04 +0300 Subject: [PATCH 4/4] riscv/cpu: add av_const for VLEN helpers This is read from a read-only constant system register. --- libavutil/riscv/cpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavutil/riscv/cpu.h b/libavutil/riscv/cpu.h index f2e6b7b430..66e86a3527 100644 --- a/libavutil/riscv/cpu.h +++ b/libavutil/riscv/cpu.h @@ -45,7 +45,7 @@ static inline av_const bool ff_rv_zbb_support(void) * Returns the vector size in bytes (always a power of two and at least 4). * This is undefined behaviour if vectors are not implemented. */ -static inline size_t ff_get_rv_vlenb(void) +static inline av_const size_t ff_get_rv_vlenb(void) { size_t vlenb; @@ -61,7 +61,7 @@ static inline size_t ff_get_rv_vlenb(void) * Checks that the vector bit-size is at least the given value. * This is potentially undefined behaviour if vectors are not implemented. */ -static inline bool ff_rv_vlen_least(unsigned int bits) +static inline av_const bool ff_rv_vlen_least(unsigned int bits) { #ifdef __riscv_v_min_vlen if (bits <= __riscv_v_min_vlen) -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2025-10-05 13:05 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=175966948285.65.12523957197350493448@bf249f23a2c8 \ --to=ffmpeg-devel@ffmpeg.org \ --cc=code@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror http://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ http://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git