* [FFmpeg-devel] [PATCH 1/3] swscale/rgb2rgb: rework RISC-V V uyvytoyuv422
@ 2023-09-30 9:01 Rémi Denis-Courmont
2023-09-30 9:01 ` [FFmpeg-devel] [PATCH 2/3] swscale/rgb2rgb: avoid S-regs in " Rémi Denis-Courmont
2023-09-30 9:01 ` [FFmpeg-devel] [PATCH 3/3] swscale/rgb2rgb: unroll " Rémi Denis-Courmont
0 siblings, 2 replies; 3+ messages in thread
From: Rémi Denis-Courmont @ 2023-09-30 9:01 UTC (permalink / raw)
To: ffmpeg-devel
This avoids using relatively slow register strides.
---
libswscale/riscv/rgb2rgb_rvv.S | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
index 008f098bfe..3e7988ca01 100644
--- a/libswscale/riscv/rgb2rgb_rvv.S
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -101,34 +101,33 @@ func ff_interleave_bytes_rvv, zve32x
endfunc
#if (__riscv_xlen == 64)
-.macro yuy2_to_i422p v_y0, v_y1, v_u, v_v
+.macro yuy2_to_i422p y_shift
addi sp, sp, -16
sd s0, (sp)
- sd s1, 8(sp)
addi a4, a4, 1
lw s0, 16(sp)
srai a4, a4, 1 // pixel width -> chroma width
- li s1, 2
1:
mv t4, a4
mv t3, a3
mv t0, a0
- addi t6, a0, 1
mv t1, a1
mv t2, a2
addi a5, a5, -1
2:
vsetvli t5, t4, e8, m1, ta, ma
+ vlseg2e16.v v16, (t3)
sub t4, t4, t5
- vlseg4e8.v v8, (t3)
+ vnsrl.wi v24, v16, \y_shift // Y0
sh2add t3, t5, t3
- vsse8.v \v_y0, (t0), s1
+ vnsrl.wi v25, v18, \y_shift // Y1
+ vnsrl.wi v28, v16, 8 - \y_shift // U
+ vnsrl.wi v30, v18, 8 - \y_shift // V
+ vsseg2e8.v v24, (t0)
sh1add t0, t5, t0
- vsse8.v \v_y1, (t6), s1
- sh1add t6, t5, t6
- vse8.v \v_u, (t1)
+ vse8.v v28, (t1)
add t1, t5, t1
- vse8.v \v_v, (t2)
+ vse8.v v30, (t2)
add t2, t5, t2
bnez t4, 2b
@@ -138,17 +137,16 @@ endfunc
add a2, a2, a7
bnez a5, 1b
- ld s1, 8(sp)
ld s0, (sp)
addi sp, sp, 16
ret
.endm
func ff_uyvytoyuv422_rvv, zve32x
- yuy2_to_i422p v9, v11, v8, v10
+ yuy2_to_i422p 8
endfunc
func ff_yuyvtoyuv422_rvv, zve32x
- yuy2_to_i422p v8, v10, v9, v11
+ yuy2_to_i422p 0
endfunc
#endif
--
2.42.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
* [FFmpeg-devel] [PATCH 2/3] swscale/rgb2rgb: avoid S-regs in RISC-V V uyvytoyuv422
2023-09-30 9:01 [FFmpeg-devel] [PATCH 1/3] swscale/rgb2rgb: rework RISC-V V uyvytoyuv422 Rémi Denis-Courmont
@ 2023-09-30 9:01 ` Rémi Denis-Courmont
2023-09-30 9:01 ` [FFmpeg-devel] [PATCH 3/3] swscale/rgb2rgb: unroll " Rémi Denis-Courmont
1 sibling, 0 replies; 3+ messages in thread
From: Rémi Denis-Courmont @ 2023-09-30 9:01 UTC (permalink / raw)
To: ffmpeg-devel
We can make do with callee-clobbered registers only now.
As an added bonus, this makes the code XLEN-independent.
---
libswscale/riscv/rgb2rgb.c | 2 --
libswscale/riscv/rgb2rgb_rvv.S | 10 ++--------
2 files changed, 2 insertions(+), 10 deletions(-)
diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c
index 162a4082b0..565f0b77f1 100644
--- a/libswscale/riscv/rgb2rgb.c
+++ b/libswscale/riscv/rgb2rgb.c
@@ -55,10 +55,8 @@ av_cold void rgb2rgb_init_riscv(void)
shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv;
shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
interleaveBytes = ff_interleave_bytes_rvv;
-#if (__riscv_xlen == 64)
uyvytoyuv422 = ff_uyvytoyuv422_rvv;
yuyvtoyuv422 = ff_yuyvtoyuv422_rvv;
-#endif
}
#endif
}
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
index 3e7988ca01..3200370224 100644
--- a/libswscale/riscv/rgb2rgb_rvv.S
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -100,12 +100,9 @@ func ff_interleave_bytes_rvv, zve32x
ret
endfunc
-#if (__riscv_xlen == 64)
.macro yuy2_to_i422p y_shift
- addi sp, sp, -16
- sd s0, (sp)
addi a4, a4, 1
- lw s0, 16(sp)
+ lw t6, (sp)
srai a4, a4, 1 // pixel width -> chroma width
1:
mv t4, a4
@@ -131,14 +128,12 @@ endfunc
add t2, t5, t2
bnez t4, 2b
- add a3, a3, s0
+ add a3, a3, t6
add a0, a0, a6
add a1, a1, a7
add a2, a2, a7
bnez a5, 1b
- ld s0, (sp)
- addi sp, sp, 16
ret
.endm
@@ -149,4 +144,3 @@ endfunc
func ff_yuyvtoyuv422_rvv, zve32x
yuy2_to_i422p 0
endfunc
-#endif
--
2.42.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
* [FFmpeg-devel] [PATCH 3/3] swscale/rgb2rgb: unroll RISC-V V uyvytoyuv422
2023-09-30 9:01 [FFmpeg-devel] [PATCH 1/3] swscale/rgb2rgb: rework RISC-V V uyvytoyuv422 Rémi Denis-Courmont
2023-09-30 9:01 ` [FFmpeg-devel] [PATCH 2/3] swscale/rgb2rgb: avoid S-regs in " Rémi Denis-Courmont
@ 2023-09-30 9:01 ` Rémi Denis-Courmont
1 sibling, 0 replies; 3+ messages in thread
From: Rémi Denis-Courmont @ 2023-09-30 9:01 UTC (permalink / raw)
To: ffmpeg-devel
---
libswscale/riscv/rgb2rgb_rvv.S | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
index 3200370224..ff02eba9bf 100644
--- a/libswscale/riscv/rgb2rgb_rvv.S
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -112,14 +112,14 @@ endfunc
mv t2, a2
addi a5, a5, -1
2:
- vsetvli t5, t4, e8, m1, ta, ma
+ vsetvli t5, t4, e8, m2, ta, ma
vlseg2e16.v v16, (t3)
sub t4, t4, t5
vnsrl.wi v24, v16, \y_shift // Y0
sh2add t3, t5, t3
- vnsrl.wi v25, v18, \y_shift // Y1
+ vnsrl.wi v26, v20, \y_shift // Y1
vnsrl.wi v28, v16, 8 - \y_shift // U
- vnsrl.wi v30, v18, 8 - \y_shift // V
+ vnsrl.wi v30, v20, 8 - \y_shift // V
vsseg2e8.v v24, (t0)
sh1add t0, t5, t0
vse8.v v28, (t1)
--
2.42.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-09-30 9:01 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-30 9:01 [FFmpeg-devel] [PATCH 1/3] swscale/rgb2rgb: rework RISC-V V uyvytoyuv422 Rémi Denis-Courmont
2023-09-30 9:01 ` [FFmpeg-devel] [PATCH 2/3] swscale/rgb2rgb: avoid S-regs in " Rémi Denis-Courmont
2023-09-30 9:01 ` [FFmpeg-devel] [PATCH 3/3] swscale/rgb2rgb: unroll " Rémi Denis-Courmont
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git