* [FFmpeg-devel] [PATCH 1/3] sws/rgb2rgb: RISC-V V shuffle_bytes_xxxx functions
2022-09-28 15:29 [FFmpeg-devel] [PATCH 0/3] RISC-V V swscale pixel format conversions Rémi Denis-Courmont
@ 2022-09-28 15:29 ` remi
2022-09-28 15:30 ` [FFmpeg-devel] [PATCH 2/3] sws/rgb2rgb: RISC-V V interleaveBytes remi
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: remi @ 2022-09-28 15:29 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libswscale/rgb2rgb.c | 2 +
libswscale/rgb2rgb.h | 1 +
libswscale/riscv/Makefile | 2 +
libswscale/riscv/rgb2rgb.c | 47 ++++++++++++++++++++
libswscale/riscv/rgb2rgb_rvv.S | 78 ++++++++++++++++++++++++++++++++++
5 files changed, 130 insertions(+)
create mode 100644 libswscale/riscv/Makefile
create mode 100644 libswscale/riscv/rgb2rgb.c
create mode 100644 libswscale/riscv/rgb2rgb_rvv.S
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 3af775b389..e98fdac8ea 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -139,6 +139,8 @@ av_cold void ff_sws_rgb2rgb_init(void)
rgb2rgb_init_c();
#if ARCH_AARCH64
rgb2rgb_init_aarch64();
+#elif ARCH_RISCV
+ rgb2rgb_init_riscv();
#elif ARCH_X86
rgb2rgb_init_x86();
#elif ARCH_LOONGARCH64
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index db85bfc42f..f3951d523e 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -167,6 +167,7 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const u
void ff_sws_rgb2rgb_init(void);
void rgb2rgb_init_aarch64(void);
+void rgb2rgb_init_riscv(void);
void rgb2rgb_init_x86(void);
void rgb2rgb_init_loongarch(void);
diff --git a/libswscale/riscv/Makefile b/libswscale/riscv/Makefile
new file mode 100644
index 0000000000..214d877b62
--- /dev/null
+++ b/libswscale/riscv/Makefile
@@ -0,0 +1,2 @@
+OBJS += riscv/rgb2rgb.o
+RVV-OBJS += riscv/rgb2rgb_rvv.o
diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c
new file mode 100644
index 0000000000..5654154494
--- /dev/null
+++ b/libswscale/riscv/rgb2rgb.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libswscale/rgb2rgb.h"
+
+void ff_shuffle_bytes_0321_rvv(const uint8_t *src, uint8_t *dst, int src_len);
+void ff_shuffle_bytes_2103_rvv(const uint8_t *src, uint8_t *dst, int src_len);
+void ff_shuffle_bytes_1230_rvv(const uint8_t *src, uint8_t *dst, int src_len);
+void ff_shuffle_bytes_3012_rvv(const uint8_t *src, uint8_t *dst, int src_len);
+void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len);
+
+av_cold void rgb2rgb_init_riscv(void)
+{
+#if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_RVV_I32) {
+ shuffle_bytes_0321 = ff_shuffle_bytes_0321_rvv;
+ shuffle_bytes_2103 = ff_shuffle_bytes_2103_rvv;
+ shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv;
+ shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
+ shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv;
+ }
+#endif
+}
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
new file mode 100644
index 0000000000..3eb11262c0
--- /dev/null
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_shuffle_bytes_0321_rvv, zve32x
+ addi t1, a0, 3
+ addi t2, a0, 2
+ addi t3, a0, 1
+1:
+ srai a2, a2, 2
+ li t4, 4
+2:
+ vsetvli t0, a2, e8, m1, ta, ma
+ sub a2, a2, t0
+ vlse8.v v8, (a0), t4
+ sh2add a0, t0, a0
+ vlse8.v v9, (t1), t4
+ sh2add t1, t0, t1
+ vlse8.v v10, (t2), t4
+ sh2add t2, t0, t2
+ vlse8.v v11, (t3), t4
+ sh2add t3, t0, t3
+ vsseg4e8.v v8, (a1)
+ sh2add a1, t0, a1
+ bnez a2, 2b
+
+ ret
+endfunc
+
+func ff_shuffle_bytes_2103_rvv, zve32x
+ addi t1, a0, 1
+ addi t2, a0, 0
+ addi t3, a0, 3
+ addi a0, a0, 2
+ j 1b
+endfunc
+
+func ff_shuffle_bytes_1230_rvv, zve32x
+ addi t1, a0, 2
+ addi t2, a0, 3
+ addi t3, a0, 0
+ addi a0, a0, 1
+ j 1b
+endfunc
+
+func ff_shuffle_bytes_3012_rvv, zve32x
+ addi t1, a0, 0
+ addi t2, a0, 1
+ addi t3, a0, 2
+ addi a0, a0, 3
+ j 1b
+endfunc
+
+func ff_shuffle_bytes_3210_rvv, zve32x
+ addi t1, a0, 2
+ addi t2, a0, 1
+ addi t3, a0, 0
+ addi a0, a0, 3
+ j 1b
+endfunc
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* [FFmpeg-devel] [PATCH 2/3] sws/rgb2rgb: RISC-V V interleaveBytes
2022-09-28 15:29 [FFmpeg-devel] [PATCH 0/3] RISC-V V swscale pixel format conversions Rémi Denis-Courmont
2022-09-28 15:29 ` [FFmpeg-devel] [PATCH 1/3] sws/rgb2rgb: RISC-V V shuffle_bytes_xxxx functions remi
@ 2022-09-28 15:30 ` remi
2022-09-28 15:30 ` [FFmpeg-devel] [PATCH 3/3] sws/rgb2rgb: RISC-V 64-bit V packed YUYV/UYVY to planar 4:2:2 remi
2022-09-29 4:04 ` [FFmpeg-devel] [PATCH 0/3] RISC-V V swscale pixel format conversions Lynne
3 siblings, 0 replies; 5+ messages in thread
From: remi @ 2022-09-28 15:30 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
---
libswscale/riscv/rgb2rgb.c | 4 ++++
libswscale/riscv/rgb2rgb_rvv.S | 26 ++++++++++++++++++++++++++
2 files changed, 30 insertions(+)
diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c
index 5654154494..32c1546827 100644
--- a/libswscale/riscv/rgb2rgb.c
+++ b/libswscale/riscv/rgb2rgb.c
@@ -30,6 +30,9 @@ void ff_shuffle_bytes_2103_rvv(const uint8_t *src, uint8_t *dst, int src_len);
void ff_shuffle_bytes_1230_rvv(const uint8_t *src, uint8_t *dst, int src_len);
void ff_shuffle_bytes_3012_rvv(const uint8_t *src, uint8_t *dst, int src_len);
void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len);
+void ff_interleave_bytes_rvv(const uint8_t *src1, const uint8_t *src2,
+ uint8_t *dst, int width, int height, int s1stride,
+ int s2stride, int dstride);
av_cold void rgb2rgb_init_riscv(void)
{
@@ -42,6 +45,7 @@ av_cold void rgb2rgb_init_riscv(void)
shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv;
shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv;
+ interleaveBytes = ff_interleave_bytes_rvv;
}
#endif
}
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
index 3eb11262c0..7f8c2efd80 100644
--- a/libswscale/riscv/rgb2rgb_rvv.S
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -76,3 +76,29 @@ func ff_shuffle_bytes_3210_rvv, zve32x
addi a0, a0, 3
j 1b
endfunc
+
+func ff_interleave_bytes_rvv, zve32x
+1:
+ mv t0, a0
+ mv t1, a1
+ mv t2, a2
+ mv t3, a3
+ addi a4, a4, -1
+2:
+ vsetvli t4, t3, e8, ta, ma
+ sub t3, t3, t4
+ vle8.v v8, (t0)
+ add t0, t4, t0
+ vle8.v v9, (t1)
+ add t1, t4, t1
+ vsseg2e8.v v8, (t2)
+ sh1add t2, t4, t2
+ bnez t4, 2b
+
+ add a0, a0, a5
+ add a1, a1, a6
+ add a2, a2, a7
+ bnez a4, 1b
+
+ ret
+endfunc
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* [FFmpeg-devel] [PATCH 3/3] sws/rgb2rgb: RISC-V 64-bit V packed YUYV/UYVY to planar 4:2:2
2022-09-28 15:29 [FFmpeg-devel] [PATCH 0/3] RISC-V V swscale pixel format conversions Rémi Denis-Courmont
2022-09-28 15:29 ` [FFmpeg-devel] [PATCH 1/3] sws/rgb2rgb: RISC-V V shuffle_bytes_xxxx functions remi
2022-09-28 15:30 ` [FFmpeg-devel] [PATCH 2/3] sws/rgb2rgb: RISC-V V interleaveBytes remi
@ 2022-09-28 15:30 ` remi
2022-09-29 4:04 ` [FFmpeg-devel] [PATCH 0/3] RISC-V V swscale pixel format conversions Lynne
3 siblings, 0 replies; 5+ messages in thread
From: remi @ 2022-09-28 15:30 UTC (permalink / raw)
To: ffmpeg-devel
From: Rémi Denis-Courmont <remi@remlab.net>
This is currently 64-bit only because the stack spilling code would not
assemble on RV32I (and it would corrupt s0 and s1 on RV128I, in theory).
This could be added later in the unlikely that someone wants it.
---
libswscale/riscv/rgb2rgb.c | 10 +++++++
libswscale/riscv/rgb2rgb_rvv.S | 53 ++++++++++++++++++++++++++++++++++
2 files changed, 63 insertions(+)
diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c
index 32c1546827..93bc6b6245 100644
--- a/libswscale/riscv/rgb2rgb.c
+++ b/libswscale/riscv/rgb2rgb.c
@@ -33,6 +33,12 @@ void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len);
void ff_interleave_bytes_rvv(const uint8_t *src1, const uint8_t *src2,
uint8_t *dst, int width, int height, int s1stride,
int s2stride, int dstride);
+void ff_uyvytoyuv422_rvv(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+ const uint8_t *src, int width, int height,
+ int ystride, int uvstride, int src_stride);
+void ff_yuyvtoyuv422_rvv(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+ const uint8_t *src, int width, int height,
+ int ystride, int uvstride, int src_stride);
av_cold void rgb2rgb_init_riscv(void)
{
@@ -46,6 +52,10 @@ av_cold void rgb2rgb_init_riscv(void)
shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv;
interleaveBytes = ff_interleave_bytes_rvv;
+# if (__riscv_xlen == 64)
+ uyvytoyuv422 = ff_uyvytoyuv422_rvv;
+ yuyvtoyuv422 = ff_yuyvtoyuv422_rvv;
+# endif
}
#endif
}
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
index 7f8c2efd80..5626d906eb 100644
--- a/libswscale/riscv/rgb2rgb_rvv.S
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -102,3 +102,56 @@ func ff_interleave_bytes_rvv, zve32x
ret
endfunc
+
+#if (__riscv_xlen == 64)
+.macro yuy2_to_i422p v_y0, v_y1, v_u, v_v
+ addi sp, sp, -16
+ sd s0, (sp)
+ sd s1, 8(sp)
+ addi a4, a4, 1
+ lw s0, 16(sp)
+ srai a4, a4, 1 // pixel width -> chroma width
+ li s1, 2
+1:
+ mv t4, a4
+ mv t3, a3
+ mv t0, a0
+ addi t6, a0, 1
+ mv t1, a1
+ mv t2, a2
+ addi a5, a5, -1
+2:
+ vsetvli t5, t4, e8, m1, ta, ma
+ sub t4, t4, t5
+ vlseg4e8.v v8, (t3)
+ sh2add t3, t5, t3
+ vsse8.v \v_y0, (t0), s1
+ sh1add t0, t5, t0
+ vsse8.v \v_y1, (t6), s1
+ sh1add t6, t5, t6
+ vse8.v \v_u, (t1)
+ add t1, t5, t1
+ vse8.v \v_v, (t2)
+ add t2, t5, t2
+ bnez t4, 2b
+
+ add a3, a3, s0
+ add a0, a0, a6
+ add a1, a1, a7
+ add a2, a2, a7
+ bnez a5, 1b
+
+ ld s1, 8(sp)
+ ld s0, (sp)
+ addi sp, sp, 16
+ ret
+.endm
+
+func ff_uyvytoyuv422_rvv, zve32x
+ yuy2_to_i422p v9, v11, v8, v10
+endfunc
+
+func ff_yuyvtoyuv422_rvv, zve32x
+ yuy2_to_i422p v8, v10, v9, v11
+endfunc
+#endif
--
2.37.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH 0/3] RISC-V V swscale pixel format conversions
2022-09-28 15:29 [FFmpeg-devel] [PATCH 0/3] RISC-V V swscale pixel format conversions Rémi Denis-Courmont
` (2 preceding siblings ...)
2022-09-28 15:30 ` [FFmpeg-devel] [PATCH 3/3] sws/rgb2rgb: RISC-V 64-bit V packed YUYV/UYVY to planar 4:2:2 remi
@ 2022-09-29 4:04 ` Lynne
3 siblings, 0 replies; 5+ messages in thread
From: Lynne @ 2022-09-29 4:04 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Sep 28, 2022, 17:29 by remi@remlab.net:
> Hello,
>
> This adds the pixel format conversions that appear to covered by checkasm
> (plus YUYV to I422 for which a patch was sent already).
>
> RVV has no register-register interleaving/deinterleaving instructions, so this
> uses strided loads or stores instead. Another option would be full register move then segmented store, but that is presumably slower.
>
> The following changes since commit d31013166ac3727ae7c7ebbb756e1e5800bc2b40:
>
> lavc/pixblockdsp: RISC-V diff_pixels & diff_pixels_unaligned (2022-09-28 11:46:11 +0200)
>
> are available in the Git repository at:
>
> git.remlab.net:git/ffmpeg.git rvv-swscale
>
> for you to fetch changes up to 18edd2c3108b126fc478635ac1048db60b9d7fc4:
>
> sws/rgb2rgb: RISC-V 64-bit V packed YUYV/UYVY to planar 4:2:2 (2022-09-28 18:23:53 +0300)
>
> ----------------------------------------------------------------
> Rémi Denis-Courmont (3):
> sws/rgb2rgb: RISC-V V shuffle_bytes_xxxx functions
> sws/rgb2rgb: RISC-V V interleaveBytes
> sws/rgb2rgb: RISC-V 64-bit V packed YUYV/UYVY to planar 4:2:2
>
> libswscale/rgb2rgb.c | 2 +
> libswscale/rgb2rgb.h | 1 +
> libswscale/riscv/Makefile | 2 +
> libswscale/riscv/rgb2rgb.c | 61 ++++++++++++++++
> libswscale/riscv/rgb2rgb_rvv.S | 157 +++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 223 insertions(+)
> create mode 100644 libswscale/riscv/Makefile
> create mode 100644 libswscale/riscv/rgb2rgb.c
> create mode 100644 libswscale/riscv/rgb2rgb_rvv.S
>
Patchset looks good. Will apply in a couple of hours.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread