* [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
@ 2023-12-29 11:57 flow gg
2023-12-30 12:15 ` Rémi Denis-Courmont
0 siblings, 1 reply; 15+ messages in thread
From: flow gg @ 2023-12-29 11:57 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 64 bytes --]
C908
ssd_int8_vs_int16_c: 207.7
ssd_int8_vs_int16_rvv_i32: 28.0
[-- Attachment #2: 0003-lavc-svq1enc-R-V-V-ssd_int8_vs_int16.patch --]
[-- Type: text/x-patch, Size: 5610 bytes --]
From 0fd1b7a34ab8794868d80233c35f70c8ad42b9fa Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Fri, 29 Dec 2023 13:27:31 +0800
Subject: [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
C908
ssd_int8_vs_int16_c: 207.7
ssd_int8_vs_int16_rvv_i32: 28.0
---
libavcodec/riscv/Makefile | 2 ++
libavcodec/riscv/svqenc_init.c | 41 ++++++++++++++++++++++++++++++
libavcodec/riscv/svqenc_rvv.S | 46 ++++++++++++++++++++++++++++++++++
libavcodec/svq1enc.c | 2 ++
libavcodec/svq1encdsp.h | 1 +
5 files changed, 92 insertions(+)
create mode 100644 libavcodec/riscv/svqenc_init.c
create mode 100644 libavcodec/riscv/svqenc_rvv.S
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 7f253bba12..4e14c3d094 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -46,6 +46,8 @@ RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o
OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o
RV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvi.o
RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
+OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_init.o
+RVV-OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_rvv.o
OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_init.o
RVV-OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_rvv.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_init.o
diff --git a/libavcodec/riscv/svqenc_init.c b/libavcodec/riscv/svqenc_init.c
new file mode 100644
index 0000000000..f4c398960c
--- /dev/null
+++ b/libavcodec/riscv/svqenc_init.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/svq1encdsp.h"
+
+int ff_ssd_int8_vs_int16_rvv(const int8_t *pix1, const int16_t *pix2,
+ intptr_t size);
+
+av_cold void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c)
+{
+#if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_RVV_I32) {
+ if (flags & AV_CPU_FLAG_RVB_ADDR) {
+ c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_rvv;
+ }
+ }
+#endif
+}
diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S
new file mode 100644
index 0000000000..426bbe2c4a
--- /dev/null
+++ b/libavcodec/riscv/svqenc_rvv.S
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_ssd_int8_vs_int16_rvv, zve32x
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.v.x v24, zero
+1:
+ vsetvli t0, a2, e8, m2, ta, ma
+ vle8.v v0, (a0)
+ sub a2, a2, t0
+ vsetvli zero, t0, e16, m4, ta, ma
+ vle16.v v8, (a1)
+ vsetvli zero, t0, e32, m8, ta, ma
+ vsext.vf4 v16, v0
+ vsext.vf2 v0, v8
+ vsub.vv v16, v16, v0
+ add a0, a0, t0
+ vmacc.vv v24, v16, v16
+ sh1add a1, t0, a1
+ bnez a2, 1b
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.s.x v0, zero
+ vredsum.vs v0, v24, v0
+ vmv.x.s a0, v0
+
+ ret
+endfunc
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 0dea405dec..6e7ea12aa7 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -766,6 +766,8 @@ void ff_svq1enc_init(SVQ1EncDSPContext *c)
#if ARCH_PPC
ff_svq1enc_init_ppc(c);
+#elif ARCH_RISCV
+ ff_svq1enc_init_riscv(c);
#elif ARCH_X86
ff_svq1enc_init_x86(c);
#endif
diff --git a/libavcodec/svq1encdsp.h b/libavcodec/svq1encdsp.h
index 618bf8463b..5dfa35cc62 100644
--- a/libavcodec/svq1encdsp.h
+++ b/libavcodec/svq1encdsp.h
@@ -30,6 +30,7 @@ typedef struct SVQ1EncDSPContext {
void ff_svq1enc_init(SVQ1EncDSPContext *c);
void ff_svq1enc_init_ppc(SVQ1EncDSPContext *c);
+void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c);
void ff_svq1enc_init_x86(SVQ1EncDSPContext *c);
#endif /* AVCODEC_SVQ1ENCDSP_H */
--
2.43.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2023-12-29 11:57 [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16 flow gg
@ 2023-12-30 12:15 ` Rémi Denis-Courmont
2023-12-30 14:00 ` flow gg
2023-12-30 16:20 ` flow gg
0 siblings, 2 replies; 15+ messages in thread
From: Rémi Denis-Courmont @ 2023-12-30 12:15 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le 29 décembre 2023 12:57:20 GMT+01:00, flow gg <hlefthleft@gmail.com> a écrit :
>C908
>ssd_int8_vs_int16_c: 207.7
>ssd_int8_vs_int16_rvv_i32: 28.0
At a quick glance, it won't work if the input length is not a multiple of the vector length.
Also do you really need to extend accumulators to 32 bits?
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2023-12-30 12:15 ` Rémi Denis-Courmont
@ 2023-12-30 14:00 ` flow gg
2023-12-30 14:01 ` flow gg
2023-12-30 15:45 ` Rémi Denis-Courmont
2023-12-30 16:20 ` flow gg
1 sibling, 2 replies; 15+ messages in thread
From: flow gg @ 2023-12-30 14:00 UTC (permalink / raw)
To: FFmpeg development discussions and patches
> At a quick glance, it won't work if the input length is not a multiple of
the vector length.
Why? I tried 1024, 32*3, 32*7 and all passed the test.
> Also do you really need to extend accumulators to 32 bits?
It won't overflow after the test is changed, so it's not needed anymore.
I have modified it in this reply.
Rémi Denis-Courmont <remi@remlab.net> 于2023年12月30日周六 20:15写道:
>
>
> Le 29 décembre 2023 12:57:20 GMT+01:00, flow gg <hlefthleft@gmail.com> a
> écrit :
> >C908
> >ssd_int8_vs_int16_c: 207.7
> >ssd_int8_vs_int16_rvv_i32: 28.0
>
> At a quick glance, it won't work if the input length is not a multiple of
> the vector length.
>
> Also do you really need to extend accumulators to 32 bits?
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2023-12-30 14:00 ` flow gg
@ 2023-12-30 14:01 ` flow gg
2023-12-30 15:45 ` Rémi Denis-Courmont
1 sibling, 0 replies; 15+ messages in thread
From: flow gg @ 2023-12-30 14:01 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1146 bytes --]
flow gg <hlefthleft@gmail.com> 于2023年12月30日周六 22:00写道:
> > At a quick glance, it won't work if the input length is not a multiple
> of the vector length.
>
> Why? I tried 1024, 32*3, 32*7 and all passed the test.
>
> > Also do you really need to extend accumulators to 32 bits?
>
> It won't overflow after the test is changed, so it's not needed anymore.
> I have modified it in this reply.
>
> Rémi Denis-Courmont <remi@remlab.net> 于2023年12月30日周六 20:15写道:
>
>>
>>
>> Le 29 décembre 2023 12:57:20 GMT+01:00, flow gg <hlefthleft@gmail.com> a
>> écrit :
>> >C908
>> >ssd_int8_vs_int16_c: 207.7
>> >ssd_int8_vs_int16_rvv_i32: 28.0
>>
>> At a quick glance, it won't work if the input length is not a multiple of
>> the vector length.
>>
>> Also do you really need to extend accumulators to 32 bits?
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>
[-- Attachment #2: 0003-lavc-svq1enc-R-V-V-ssd_int8_vs_int16.patch --]
[-- Type: text/x-patch, Size: 5534 bytes --]
From 7bb35b54a4e578848660b1437b96333dd740cbd3 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Fri, 29 Dec 2023 13:27:31 +0800
Subject: [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
C908
ssd_int8_vs_int16_c: 207.7
ssd_int8_vs_int16_rvv_i32: 17.7
---
libavcodec/riscv/Makefile | 2 ++
libavcodec/riscv/svqenc_init.c | 41 +++++++++++++++++++++++++++++++
libavcodec/riscv/svqenc_rvv.S | 44 ++++++++++++++++++++++++++++++++++
libavcodec/svq1enc.c | 2 ++
libavcodec/svq1encdsp.h | 1 +
5 files changed, 90 insertions(+)
create mode 100644 libavcodec/riscv/svqenc_init.c
create mode 100644 libavcodec/riscv/svqenc_rvv.S
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 7f253bba12..4e14c3d094 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -46,6 +46,8 @@ RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o
OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o
RV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvi.o
RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
+OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_init.o
+RVV-OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_rvv.o
OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_init.o
RVV-OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_rvv.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_init.o
diff --git a/libavcodec/riscv/svqenc_init.c b/libavcodec/riscv/svqenc_init.c
new file mode 100644
index 0000000000..f4c398960c
--- /dev/null
+++ b/libavcodec/riscv/svqenc_init.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/svq1encdsp.h"
+
+int ff_ssd_int8_vs_int16_rvv(const int8_t *pix1, const int16_t *pix2,
+ intptr_t size);
+
+av_cold void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c)
+{
+#if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_RVV_I32) {
+ if (flags & AV_CPU_FLAG_RVB_ADDR) {
+ c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_rvv;
+ }
+ }
+#endif
+}
diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S
new file mode 100644
index 0000000000..c52913c2e7
--- /dev/null
+++ b/libavcodec/riscv/svqenc_rvv.S
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_ssd_int8_vs_int16_rvv, zve32x
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.v.x v24, zero
+1:
+ vsetvli t0, a2, e8, m2, ta, ma
+ vle8.v v0, (a0)
+ sub a2, a2, t0
+ vsetvli zero, t0, e16, m4, ta, ma
+ vle16.v v8, (a1)
+ vsext.vf2 v16, v0
+ vsub.vv v16, v16, v8
+ add a0, a0, t0
+ vwmacc.vv v24, v16, v16
+ sh1add a1, t0, a1
+ bnez a2, 1b
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.s.x v0, zero
+ vredsum.vs v0, v24, v0
+ vmv.x.s a0, v0
+
+ ret
+endfunc
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 0dea405dec..6e7ea12aa7 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -766,6 +766,8 @@ void ff_svq1enc_init(SVQ1EncDSPContext *c)
#if ARCH_PPC
ff_svq1enc_init_ppc(c);
+#elif ARCH_RISCV
+ ff_svq1enc_init_riscv(c);
#elif ARCH_X86
ff_svq1enc_init_x86(c);
#endif
diff --git a/libavcodec/svq1encdsp.h b/libavcodec/svq1encdsp.h
index 618bf8463b..5dfa35cc62 100644
--- a/libavcodec/svq1encdsp.h
+++ b/libavcodec/svq1encdsp.h
@@ -30,6 +30,7 @@ typedef struct SVQ1EncDSPContext {
void ff_svq1enc_init(SVQ1EncDSPContext *c);
void ff_svq1enc_init_ppc(SVQ1EncDSPContext *c);
+void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c);
void ff_svq1enc_init_x86(SVQ1EncDSPContext *c);
#endif /* AVCODEC_SVQ1ENCDSP_H */
--
2.43.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2023-12-30 14:00 ` flow gg
2023-12-30 14:01 ` flow gg
@ 2023-12-30 15:45 ` Rémi Denis-Courmont
1 sibling, 0 replies; 15+ messages in thread
From: Rémi Denis-Courmont @ 2023-12-30 15:45 UTC (permalink / raw)
To: FFmpeg development discussions and patches, flow gg
Le 30 décembre 2023 15:00:53 GMT+01:00, flow gg <hlefthleft@gmail.com> a écrit :
>> At a quick glance, it won't work if the input length is not a multiple of
>the vector length.
>
>Why?
You're not handling tails as far as I see.
> I tried 1024, 32*3, 32*7 and all passed the test.
They're all multiples of the vector length.
>> Also do you really need to extend accumulators to 32 bits?
>
>It won't overflow after the test is changed, so it's not needed anymore.
>I have modified it in this reply.
>
>Rémi Denis-Courmont <remi@remlab.net> 于2023年12月30日周六 20:15写道:
>
>>
>>
>> Le 29 décembre 2023 12:57:20 GMT+01:00, flow gg <hlefthleft@gmail.com> a
>> écrit :
>> >C908
>> >ssd_int8_vs_int16_c: 207.7
>> >ssd_int8_vs_int16_rvv_i32: 28.0
>>
>> At a quick glance, it won't work if the input length is not a multiple of
>> the vector length.
>>
>> Also do you really need to extend accumulators to 32 bits?
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>_______________________________________________
>ffmpeg-devel mailing list
>ffmpeg-devel@ffmpeg.org
>https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>To unsubscribe, visit link above, or email
>ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2023-12-30 12:15 ` Rémi Denis-Courmont
2023-12-30 14:00 ` flow gg
@ 2023-12-30 16:20 ` flow gg
2024-01-04 16:00 ` Rémi Denis-Courmont
1 sibling, 1 reply; 15+ messages in thread
From: flow gg @ 2023-12-30 16:20 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 818 bytes --]
I mistook it, seeing the vector length as the length of the vector register
..
I have modified it in this reply.
Rémi Denis-Courmont <remi@remlab.net> 于2023年12月30日周六 20:15写道:
>
>
> Le 29 décembre 2023 12:57:20 GMT+01:00, flow gg <hlefthleft@gmail.com> a
> écrit :
> >C908
> >ssd_int8_vs_int16_c: 207.7
> >ssd_int8_vs_int16_rvv_i32: 28.0
>
> At a quick glance, it won't work if the input length is not a multiple of
> the vector length.
>
> Also do you really need to extend accumulators to 32 bits?
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
[-- Attachment #2: 0003-lavc-svq1enc-R-V-V-ssd_int8_vs_int16.patch --]
[-- Type: text/x-patch, Size: 5534 bytes --]
From ffa495739e163a402bd580ddbdd54a42e1ef6712 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Fri, 29 Dec 2023 13:27:31 +0800
Subject: [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
C908
ssd_int8_vs_int16_c: 207.7
ssd_int8_vs_int16_rvv_i32: 17.7
---
libavcodec/riscv/Makefile | 2 ++
libavcodec/riscv/svqenc_init.c | 41 +++++++++++++++++++++++++++++++
libavcodec/riscv/svqenc_rvv.S | 44 ++++++++++++++++++++++++++++++++++
libavcodec/svq1enc.c | 2 ++
libavcodec/svq1encdsp.h | 1 +
5 files changed, 90 insertions(+)
create mode 100644 libavcodec/riscv/svqenc_init.c
create mode 100644 libavcodec/riscv/svqenc_rvv.S
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 7f253bba12..4e14c3d094 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -46,6 +46,8 @@ RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o
OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o
RV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvi.o
RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
+OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_init.o
+RVV-OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_rvv.o
OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_init.o
RVV-OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_rvv.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_init.o
diff --git a/libavcodec/riscv/svqenc_init.c b/libavcodec/riscv/svqenc_init.c
new file mode 100644
index 0000000000..f4c398960c
--- /dev/null
+++ b/libavcodec/riscv/svqenc_init.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/svq1encdsp.h"
+
+int ff_ssd_int8_vs_int16_rvv(const int8_t *pix1, const int16_t *pix2,
+ intptr_t size);
+
+av_cold void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c)
+{
+#if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_RVV_I32) {
+ if (flags & AV_CPU_FLAG_RVB_ADDR) {
+ c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_rvv;
+ }
+ }
+#endif
+}
diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S
new file mode 100644
index 0000000000..985bab81a0
--- /dev/null
+++ b/libavcodec/riscv/svqenc_rvv.S
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_ssd_int8_vs_int16_rvv, zve32x
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.v.x v24, zero
+1:
+ vsetvli t0, a2, e8, m2, tu, ma
+ vle8.v v0, (a0)
+ sub a2, a2, t0
+ vsetvli zero, t0, e16, m4, tu, ma
+ vle16.v v8, (a1)
+ vsext.vf2 v16, v0
+ vsub.vv v16, v16, v8
+ add a0, a0, t0
+ vwmacc.vv v24, v16, v16
+ sh1add a1, t0, a1
+ bnez a2, 1b
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.s.x v0, zero
+ vredsum.vs v0, v24, v0
+ vmv.x.s a0, v0
+
+ ret
+endfunc
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 0dea405dec..6e7ea12aa7 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -766,6 +766,8 @@ void ff_svq1enc_init(SVQ1EncDSPContext *c)
#if ARCH_PPC
ff_svq1enc_init_ppc(c);
+#elif ARCH_RISCV
+ ff_svq1enc_init_riscv(c);
#elif ARCH_X86
ff_svq1enc_init_x86(c);
#endif
diff --git a/libavcodec/svq1encdsp.h b/libavcodec/svq1encdsp.h
index 618bf8463b..5dfa35cc62 100644
--- a/libavcodec/svq1encdsp.h
+++ b/libavcodec/svq1encdsp.h
@@ -30,6 +30,7 @@ typedef struct SVQ1EncDSPContext {
void ff_svq1enc_init(SVQ1EncDSPContext *c);
void ff_svq1enc_init_ppc(SVQ1EncDSPContext *c);
+void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c);
void ff_svq1enc_init_x86(SVQ1EncDSPContext *c);
#endif /* AVCODEC_SVQ1ENCDSP_H */
--
2.43.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2023-12-30 16:20 ` flow gg
@ 2024-01-04 16:00 ` Rémi Denis-Courmont
2024-01-05 0:56 ` flow gg
0 siblings, 1 reply; 15+ messages in thread
From: Rémi Denis-Courmont @ 2024-01-04 16:00 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le lauantaina 30. joulukuuta 2023, 18.20.15 EET flow gg a écrit :
> I mistook it, seeing the vector length as the length of the vector register
> ..
> I have modified it in this reply.
Setting element size to 8-bit is unnecessary, and a widening subtraction can
presumably avoid the sign extension.
--
レミ・デニ-クールモン
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2024-01-04 16:00 ` Rémi Denis-Courmont
@ 2024-01-05 0:56 ` flow gg
2024-01-06 15:04 ` Rémi Denis-Courmont
0 siblings, 1 reply; 15+ messages in thread
From: flow gg @ 2024-01-05 0:56 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 863 bytes --]
One vset can be reduced, but vwsub should not be used in this case. I
modified it in this reply.
Rémi Denis-Courmont <remi@remlab.net> 于2024年1月5日周五 00:00写道:
> Le lauantaina 30. joulukuuta 2023, 18.20.15 EET flow gg a écrit :
> > I mistook it, seeing the vector length as the length of the vector
> register
> > ..
> > I have modified it in this reply.
>
> Setting element size to 8-bit is unnecessary, and a widening subtraction
> can
> presumably avoid the sign extension.
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
[-- Attachment #2: 0003-lavc-svq1enc-R-V-V-ssd_int8_vs_int16.patch --]
[-- Type: text/x-patch, Size: 5488 bytes --]
From c989e8711d98ecc9b814a83db29f781cbf26a416 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Fri, 29 Dec 2023 13:27:31 +0800
Subject: [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
C908
ssd_int8_vs_int16_c: 207.7
ssd_int8_vs_int16_rvv_i32: 17.7
---
libavcodec/riscv/Makefile | 2 ++
libavcodec/riscv/svqenc_init.c | 41 ++++++++++++++++++++++++++++++++
libavcodec/riscv/svqenc_rvv.S | 43 ++++++++++++++++++++++++++++++++++
libavcodec/svq1enc.c | 2 ++
libavcodec/svq1encdsp.h | 1 +
5 files changed, 89 insertions(+)
create mode 100644 libavcodec/riscv/svqenc_init.c
create mode 100644 libavcodec/riscv/svqenc_rvv.S
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 7f253bba12..4e14c3d094 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -46,6 +46,8 @@ RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o
OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o
RV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvi.o
RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
+OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_init.o
+RVV-OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_rvv.o
OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_init.o
RVV-OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_rvv.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_init.o
diff --git a/libavcodec/riscv/svqenc_init.c b/libavcodec/riscv/svqenc_init.c
new file mode 100644
index 0000000000..f4c398960c
--- /dev/null
+++ b/libavcodec/riscv/svqenc_init.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/svq1encdsp.h"
+
+int ff_ssd_int8_vs_int16_rvv(const int8_t *pix1, const int16_t *pix2,
+ intptr_t size);
+
+av_cold void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c)
+{
+#if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_RVV_I32) {
+ if (flags & AV_CPU_FLAG_RVB_ADDR) {
+ c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_rvv;
+ }
+ }
+#endif
+}
diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S
new file mode 100644
index 0000000000..c513115fb4
--- /dev/null
+++ b/libavcodec/riscv/svqenc_rvv.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_ssd_int8_vs_int16_rvv, zve32x
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.v.x v24, zero
+1:
+ vsetvli t0, a2, e16, m4, tu, ma
+ vle8.v v0, (a0)
+ sub a2, a2, t0
+ vle16.v v8, (a1)
+ add a0, a0, t0
+ vsext.vf2 v16, v0
+ vsub.vv v16, v16, v8
+ vwmacc.vv v24, v16, v16
+ sh1add a1, t0, a1
+ bnez a2, 1b
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.s.x v0, zero
+ vredsum.vs v0, v24, v0
+ vmv.x.s a0, v0
+
+ ret
+endfunc
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 0dea405dec..6e7ea12aa7 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -766,6 +766,8 @@ void ff_svq1enc_init(SVQ1EncDSPContext *c)
#if ARCH_PPC
ff_svq1enc_init_ppc(c);
+#elif ARCH_RISCV
+ ff_svq1enc_init_riscv(c);
#elif ARCH_X86
ff_svq1enc_init_x86(c);
#endif
diff --git a/libavcodec/svq1encdsp.h b/libavcodec/svq1encdsp.h
index 618bf8463b..5dfa35cc62 100644
--- a/libavcodec/svq1encdsp.h
+++ b/libavcodec/svq1encdsp.h
@@ -30,6 +30,7 @@ typedef struct SVQ1EncDSPContext {
void ff_svq1enc_init(SVQ1EncDSPContext *c);
void ff_svq1enc_init_ppc(SVQ1EncDSPContext *c);
+void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c);
void ff_svq1enc_init_x86(SVQ1EncDSPContext *c);
#endif /* AVCODEC_SVQ1ENCDSP_H */
--
2.43.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2024-01-05 0:56 ` flow gg
@ 2024-01-06 15:04 ` Rémi Denis-Courmont
2024-01-07 1:33 ` flow gg
0 siblings, 1 reply; 15+ messages in thread
From: Rémi Denis-Courmont @ 2024-01-06 15:04 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le perjantaina 5. tammikuuta 2024, 2.56.18 EET flow gg a écrit :
> One vset can be reduced, but vwsub should not be used in this case. I
> modified it in this reply.
Fair enough, but are you sure that that's faster than keeping the vsetvli and
removing the sign extension?
> Rémi Denis-Courmont <remi@remlab.net> 于2024年1月5日周五 00:00写道:
>
> > Le lauantaina 30. joulukuuta 2023, 18.20.15 EET flow gg a écrit :
> > > I mistook it, seeing the vector length as the length of the vector
> >
> > register
> >
> > > ..
> > > I have modified it in this reply.
> >
> > Setting element size to 8-bit is unnecessary, and a widening subtraction
> > can
> > presumably avoid the sign extension.
> >
> > --
> > レミ・デニ-クールモン
> > http://www.remlab.net/
> >
> >
> >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
--
Rémi Denis-Courmont
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2024-01-06 15:04 ` Rémi Denis-Courmont
@ 2024-01-07 1:33 ` flow gg
2024-01-07 8:03 ` Rémi Denis-Courmont
2024-01-16 18:04 ` Rémi Denis-Courmont
0 siblings, 2 replies; 15+ messages in thread
From: flow gg @ 2024-01-07 1:33 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 2328 bytes --]
I tested it, and indeed using vwsub is faster. Updated it in the reply.
---
I have a question: if I tweak the load order a bit, using one less vset, it
leads to being slower (the patch I submitted is 13.2, if I make the
following change, the time would be 15.2).
But I thought it would be faster.
- vsetvli t0, a2, e8, m2, tu, ma
- vle8.v v0, (a0)
- sub a2, a2, t0
- vsetvli zero, t0, e16, m4, tu, ma
- vle16.v v8, (a1)
- vsetvli zero, t0, e8, m2, tu, ma
- vwsub.wv v16, v8, v0
+ vsetvli t0, a2, e16, m4, tu, ma
+ vle16.v v8, (a1)
+ sub a2, a2, t0
+ vsetvli zero, t0, e8, m2, tu, ma
+ vle8.v v0, (a0)
+ vwsub.wv v16, v8, v0
Rémi Denis-Courmont <remi@remlab.net> 于2024年1月6日周六 23:05写道:
> Le perjantaina 5. tammikuuta 2024, 2.56.18 EET flow gg a écrit :
> > One vset can be reduced, but vwsub should not be used in this case. I
> > modified it in this reply.
>
> Fair enough, but are you sure that that's faster than keeping the vsetvli
> and
> removing the sign extension?
>
> > Rémi Denis-Courmont <remi@remlab.net> 于2024年1月5日周五 00:00写道:
> >
> > > Le lauantaina 30. joulukuuta 2023, 18.20.15 EET flow gg a écrit :
> > > > I mistook it, seeing the vector length as the length of the vector
> > >
> > > register
> > >
> > > > ..
> > > > I have modified it in this reply.
> > >
> > > Setting element size to 8-bit is unnecessary, and a widening
> subtraction
> > > can
> > > presumably avoid the sign extension.
> > >
> > > --
> > > レミ・デニ-クールモン
> > > http://www.remlab.net/
> > >
> > >
> > >
> > > _______________________________________________
> > > ffmpeg-devel mailing list
> > > ffmpeg-devel@ffmpeg.org
> > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> > > To unsubscribe, visit link above, or email
> > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
>
> --
> Rémi Denis-Courmont
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
[-- Attachment #2: 0003-lavc-svq1enc-R-V-V-ssd_int8_vs_int16.patch --]
[-- Type: text/x-patch, Size: 5598 bytes --]
From e028a42bf7a63eb7f0851d03dbca471bbfecbc7b Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Fri, 29 Dec 2023 13:27:31 +0800
Subject: [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
C908
ssd_int8_vs_int16_c: 207.7
ssd_int8_vs_int16_rvv_i32: 13.2
---
libavcodec/riscv/Makefile | 2 ++
libavcodec/riscv/svqenc_init.c | 41 +++++++++++++++++++++++++++++++
libavcodec/riscv/svqenc_rvv.S | 45 ++++++++++++++++++++++++++++++++++
libavcodec/svq1enc.c | 2 ++
libavcodec/svq1encdsp.h | 1 +
5 files changed, 91 insertions(+)
create mode 100644 libavcodec/riscv/svqenc_init.c
create mode 100644 libavcodec/riscv/svqenc_rvv.S
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 7f253bba12..4e14c3d094 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -46,6 +46,8 @@ RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o
OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o
RV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvi.o
RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
+OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_init.o
+RVV-OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_rvv.o
OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_init.o
RVV-OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_rvv.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_init.o
diff --git a/libavcodec/riscv/svqenc_init.c b/libavcodec/riscv/svqenc_init.c
new file mode 100644
index 0000000000..f4c398960c
--- /dev/null
+++ b/libavcodec/riscv/svqenc_init.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/svq1encdsp.h"
+
+int ff_ssd_int8_vs_int16_rvv(const int8_t *pix1, const int16_t *pix2,
+ intptr_t size);
+
+av_cold void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c)
+{
+#if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_RVV_I32) {
+ if (flags & AV_CPU_FLAG_RVB_ADDR) {
+ c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_rvv;
+ }
+ }
+#endif
+}
diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S
new file mode 100644
index 0000000000..d5985cafa6
--- /dev/null
+++ b/libavcodec/riscv/svqenc_rvv.S
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_ssd_int8_vs_int16_rvv, zve32x
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.v.x v24, zero
+1:
+ vsetvli t0, a2, e8, m2, tu, ma
+ vle8.v v0, (a0)
+ sub a2, a2, t0
+ vsetvli zero, t0, e16, m4, tu, ma
+ vle16.v v8, (a1)
+ vsetvli zero, t0, e8, m2, tu, ma
+ vwsub.wv v16, v8, v0
+ vsetvli zero, t0, e16, m4, tu, ma
+ add a0, a0, t0
+ vwmacc.vv v24, v16, v16
+ sh1add a1, t0, a1
+ bnez a2, 1b
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.s.x v0, zero
+ vredsum.vs v0, v24, v0
+ vmv.x.s a0, v0
+
+ ret
+endfunc
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 0dea405dec..6e7ea12aa7 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -766,6 +766,8 @@ void ff_svq1enc_init(SVQ1EncDSPContext *c)
#if ARCH_PPC
ff_svq1enc_init_ppc(c);
+#elif ARCH_RISCV
+ ff_svq1enc_init_riscv(c);
#elif ARCH_X86
ff_svq1enc_init_x86(c);
#endif
diff --git a/libavcodec/svq1encdsp.h b/libavcodec/svq1encdsp.h
index 618bf8463b..5dfa35cc62 100644
--- a/libavcodec/svq1encdsp.h
+++ b/libavcodec/svq1encdsp.h
@@ -30,6 +30,7 @@ typedef struct SVQ1EncDSPContext {
void ff_svq1enc_init(SVQ1EncDSPContext *c);
void ff_svq1enc_init_ppc(SVQ1EncDSPContext *c);
+void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c);
void ff_svq1enc_init_x86(SVQ1EncDSPContext *c);
#endif /* AVCODEC_SVQ1ENCDSP_H */
--
2.43.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2024-01-07 1:33 ` flow gg
@ 2024-01-07 8:03 ` Rémi Denis-Courmont
2024-01-07 8:36 ` flow gg
2024-01-16 18:04 ` Rémi Denis-Courmont
1 sibling, 1 reply; 15+ messages in thread
From: Rémi Denis-Courmont @ 2024-01-07 8:03 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le sunnuntaina 7. tammikuuta 2024, 3.33.39 EET flow gg a écrit :
> I tested it, and indeed using vwsub is faster. Updated it in the reply.
>
> ---
>
> I have a question: if I tweak the load order a bit, using one less vset, it
> leads to being slower (the patch I submitted is 13.2, if I make the
> following change, the time would be 15.2).
> But I thought it would be faster.
I would guess that v0 is needed before v8 in the internal implementation of
vwsub. This kind of makes sense as the element still need to be sign-extended.
Thus vwsub ends up stalling the pipeline in wait for vle8 to complete. That's
just a guess though, as I don't have internal cycle timing documentation.
> - vsetvli t0, a2, e8, m2, tu, ma
> - vle8.v v0, (a0)
> - sub a2, a2, t0
> - vsetvli zero, t0, e16, m4, tu, ma
> - vle16.v v8, (a1)
> - vsetvli zero, t0, e8, m2, tu, ma
> - vwsub.wv v16, v8, v0
>
> + vsetvli t0, a2, e16, m4, tu, ma
> + vle16.v v8, (a1)
> + sub a2, a2, t0
> + vsetvli zero, t0, e8, m2, tu, ma
> + vle8.v v0, (a0)
> + vwsub.wv v16, v8, v0
--
雷米‧德尼-库尔蒙
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2024-01-07 8:03 ` Rémi Denis-Courmont
@ 2024-01-07 8:36 ` flow gg
2024-01-16 18:00 ` Rémi Denis-Courmont
0 siblings, 1 reply; 15+ messages in thread
From: flow gg @ 2024-01-07 8:36 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1794 bytes --]
Alright, I learned a bit more, so should we not consider the internal
implementation?
I've added this version that reduces one vset in this reply.
Rémi Denis-Courmont <remi@remlab.net> 于2024年1月7日周日 16:03写道:
> Le sunnuntaina 7. tammikuuta 2024, 3.33.39 EET flow gg a écrit :
> > I tested it, and indeed using vwsub is faster. Updated it in the reply.
> >
> > ---
> >
> > I have a question: if I tweak the load order a bit, using one less vset,
> it
> > leads to being slower (the patch I submitted is 13.2, if I make the
> > following change, the time would be 15.2).
> > But I thought it would be faster.
>
> I would guess that v0 is needed before v8 in the internal implementation
> of
> vwsub. This kind of makes sense as the element still need to be
> sign-extended.
> Thus vwsub ends up stalling the pipeline in wait for vle8 to complete.
> That's
> just a guess though, as I don't have internal cycle timing documentation.
>
> > - vsetvli t0, a2, e8, m2, tu, ma
> > - vle8.v v0, (a0)
> > - sub a2, a2, t0
> > - vsetvli zero, t0, e16, m4, tu, ma
> > - vle16.v v8, (a1)
> > - vsetvli zero, t0, e8, m2, tu, ma
> > - vwsub.wv v16, v8, v0
> >
> > + vsetvli t0, a2, e16, m4, tu, ma
> > + vle16.v v8, (a1)
> > + sub a2, a2, t0
> > + vsetvli zero, t0, e8, m2, tu, ma
> > + vle8.v v0, (a0)
> > + vwsub.wv v16, v8, v0
>
> --
> 雷米‧德尼-库尔蒙
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
[-- Attachment #2: 0003-lavc-svq1enc-R-V-V-ssd_int8_vs_int16.patch --]
[-- Type: text/x-patch, Size: 5551 bytes --]
From 786c28b1c83b3cf033644c54ff32240f07b8082b Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Fri, 29 Dec 2023 13:27:31 +0800
Subject: [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
C908
ssd_int8_vs_int16_c: 207.7
ssd_int8_vs_int16_rvv_i32: 15.2
---
libavcodec/riscv/Makefile | 2 ++
libavcodec/riscv/svqenc_init.c | 41 +++++++++++++++++++++++++++++++
libavcodec/riscv/svqenc_rvv.S | 44 ++++++++++++++++++++++++++++++++++
libavcodec/svq1enc.c | 2 ++
libavcodec/svq1encdsp.h | 1 +
5 files changed, 90 insertions(+)
create mode 100644 libavcodec/riscv/svqenc_init.c
create mode 100644 libavcodec/riscv/svqenc_rvv.S
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 7f253bba12..4e14c3d094 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -46,6 +46,8 @@ RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o
OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o
RV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvi.o
RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
+OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_init.o
+RVV-OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_rvv.o
OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_init.o
RVV-OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_rvv.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_init.o
diff --git a/libavcodec/riscv/svqenc_init.c b/libavcodec/riscv/svqenc_init.c
new file mode 100644
index 0000000000..f4c398960c
--- /dev/null
+++ b/libavcodec/riscv/svqenc_init.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/svq1encdsp.h"
+
+int ff_ssd_int8_vs_int16_rvv(const int8_t *pix1, const int16_t *pix2,
+ intptr_t size);
+
+av_cold void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c)
+{
+#if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_RVV_I32) {
+ if (flags & AV_CPU_FLAG_RVB_ADDR) {
+ c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_rvv;
+ }
+ }
+#endif
+}
diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S
new file mode 100644
index 0000000000..b632eb39a9
--- /dev/null
+++ b/libavcodec/riscv/svqenc_rvv.S
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_ssd_int8_vs_int16_rvv, zve32x
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.v.x v24, zero
+1:
+ vsetvli t0, a2, e16, m4, tu, ma
+ vle16.v v8, (a1)
+ sub a2, a2, t0
+ vsetvli zero, t0, e8, m2, tu, ma
+ vle8.v v0, (a0)
+ vwsub.wv v16, v8, v0
+ vsetvli zero, t0, e16, m4, tu, ma
+ add a0, a0, t0
+ vwmacc.vv v24, v16, v16
+ sh1add a1, t0, a1
+ bnez a2, 1b
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.s.x v0, zero
+ vredsum.vs v0, v24, v0
+ vmv.x.s a0, v0
+
+ ret
+endfunc
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 0dea405dec..6e7ea12aa7 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -766,6 +766,8 @@ void ff_svq1enc_init(SVQ1EncDSPContext *c)
#if ARCH_PPC
ff_svq1enc_init_ppc(c);
+#elif ARCH_RISCV
+ ff_svq1enc_init_riscv(c);
#elif ARCH_X86
ff_svq1enc_init_x86(c);
#endif
diff --git a/libavcodec/svq1encdsp.h b/libavcodec/svq1encdsp.h
index 618bf8463b..5dfa35cc62 100644
--- a/libavcodec/svq1encdsp.h
+++ b/libavcodec/svq1encdsp.h
@@ -30,6 +30,7 @@ typedef struct SVQ1EncDSPContext {
void ff_svq1enc_init(SVQ1EncDSPContext *c);
void ff_svq1enc_init_ppc(SVQ1EncDSPContext *c);
+void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c);
void ff_svq1enc_init_x86(SVQ1EncDSPContext *c);
#endif /* AVCODEC_SVQ1ENCDSP_H */
--
2.43.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2024-01-07 8:36 ` flow gg
@ 2024-01-16 18:00 ` Rémi Denis-Courmont
0 siblings, 0 replies; 15+ messages in thread
From: Rémi Denis-Courmont @ 2024-01-16 18:00 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le sunnuntaina 7. tammikuuta 2024, 10.36.23 EET flow gg a écrit :
> Alright, I learned a bit more, so should we not consider the internal
> implementation?
You asked what the reason was for your counter-intuitive observations, and I
provided a plausible hypothesis. Nothing more ,nothing less.
Of course we should take performance characteristics of real hardware into
account, as is done on all other ISAs. The flip side however is that we might
have to make tradeoffs when design from other vendors come out exhibiting
different characteristics.
--
雷米‧德尼-库尔蒙
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2024-01-07 1:33 ` flow gg
2024-01-07 8:03 ` Rémi Denis-Courmont
@ 2024-01-16 18:04 ` Rémi Denis-Courmont
2024-01-16 18:46 ` flow gg
1 sibling, 1 reply; 15+ messages in thread
From: Rémi Denis-Courmont @ 2024-01-16 18:04 UTC (permalink / raw)
To: FFmpeg development discussions and patches
+ vsetvli t0, a2, e8, m2, tu, ma
+ vle8.v v0, (a0)
+ sub a2, a2, t0
+ vsetvli zero, t0, e16, m4, tu, ma
+ vle16.v v8, (a1)
+ vsetvli zero, t0, e8, m2, tu, ma
+ vwsub.wv v16, v8, v0
+ vsetvli zero, t0, e16, m4, tu, ma
It looks to me like the second vsetvli is unnecessary, and consequently the
third as well. As for the later ones, please use `vsetvli zero, zero` if you
intend to change SEW while preserving VL and the LMUL:SEW ratio.
--
雷米‧德尼-库尔蒙
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16
2024-01-16 18:04 ` Rémi Denis-Courmont
@ 2024-01-16 18:46 ` flow gg
0 siblings, 0 replies; 15+ messages in thread
From: flow gg @ 2024-01-16 18:46 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1042 bytes --]
Okay, I updated it in the reply
Rémi Denis-Courmont <remi@remlab.net> 于2024年1月17日周三 02:04写道:
> + vsetvli t0, a2, e8, m2, tu, ma
> + vle8.v v0, (a0)
> + sub a2, a2, t0
> + vsetvli zero, t0, e16, m4, tu, ma
> + vle16.v v8, (a1)
> + vsetvli zero, t0, e8, m2, tu, ma
> + vwsub.wv v16, v8, v0
> + vsetvli zero, t0, e16, m4, tu, ma
>
> It looks to me like the second vsetvli is unnecessary, and consequently
> the
> third as well. As for the later ones, please use `vsetvli zero, zero` if
> you
> intend to change SEW while preserving VL and the LMUL:SEW ratio.
>
> --
> 雷米‧德尼-库尔蒙
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
[-- Attachment #2: lavc-svq1enc-R-V-V-ssd_int8_vs_int16.patch --]
[-- Type: text/x-patch, Size: 5506 bytes --]
From b3c3187bebfc3ccf180e840dafe4a0486ce5d429 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Fri, 29 Dec 2023 13:27:31 +0800
Subject: [PATCH] lavc/svq1enc: R-V V ssd_int8_vs_int16
C908
ssd_int8_vs_int16_c: 207.7
ssd_int8_vs_int16_rvv_i32: 14.2
---
libavcodec/riscv/Makefile | 2 ++
libavcodec/riscv/svqenc_init.c | 41 ++++++++++++++++++++++++++++++++
libavcodec/riscv/svqenc_rvv.S | 43 ++++++++++++++++++++++++++++++++++
libavcodec/svq1enc.c | 2 ++
libavcodec/svq1encdsp.h | 1 +
5 files changed, 89 insertions(+)
create mode 100644 libavcodec/riscv/svqenc_init.c
create mode 100644 libavcodec/riscv/svqenc_rvv.S
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 4c587ce4e0..1996f6bf37 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -48,6 +48,8 @@ RVV-OBJS-$(CONFIG_OPUS_DECODER) += riscv/opusdsp_rvv.o
OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o
RV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvi.o
RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
+OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_init.o
+RVV-OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_rvv.o
OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_init.o
RVV-OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_rvv.o
OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_init.o
diff --git a/libavcodec/riscv/svqenc_init.c b/libavcodec/riscv/svqenc_init.c
new file mode 100644
index 0000000000..f4c398960c
--- /dev/null
+++ b/libavcodec/riscv/svqenc_init.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/svq1encdsp.h"
+
+int ff_ssd_int8_vs_int16_rvv(const int8_t *pix1, const int16_t *pix2,
+ intptr_t size);
+
+av_cold void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c)
+{
+#if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_RVV_I32) {
+ if (flags & AV_CPU_FLAG_RVB_ADDR) {
+ c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_rvv;
+ }
+ }
+#endif
+}
diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S
new file mode 100644
index 0000000000..cfc27154dd
--- /dev/null
+++ b/libavcodec/riscv/svqenc_rvv.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_ssd_int8_vs_int16_rvv, zve32x
+ vsetvli t0, zero, e32, m8, ta, ma
+ vmv.v.x v24, zero
+1:
+ vsetvli t0, a2, e8, m2, tu, ma
+ vle16.v v8, (a1)
+ sub a2, a2, t0
+ vle8.v v0, (a0)
+ vwsub.wv v16, v8, v0
+ vsetvli zero, zero, e16, m4, tu, ma
+ add a0, a0, t0
+ vwmacc.vv v24, v16, v16
+ sh1add a1, t0, a1
+ bnez a2, 1b
+ vsetvli zero, zero, e32, m8, ta, ma
+ vmv.s.x v0, zero
+ vredsum.vs v0, v24, v0
+ vmv.x.s a0, v0
+
+ ret
+endfunc
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 0dea405dec..6e7ea12aa7 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -766,6 +766,8 @@ void ff_svq1enc_init(SVQ1EncDSPContext *c)
#if ARCH_PPC
ff_svq1enc_init_ppc(c);
+#elif ARCH_RISCV
+ ff_svq1enc_init_riscv(c);
#elif ARCH_X86
ff_svq1enc_init_x86(c);
#endif
diff --git a/libavcodec/svq1encdsp.h b/libavcodec/svq1encdsp.h
index 618bf8463b..5dfa35cc62 100644
--- a/libavcodec/svq1encdsp.h
+++ b/libavcodec/svq1encdsp.h
@@ -30,6 +30,7 @@ typedef struct SVQ1EncDSPContext {
void ff_svq1enc_init(SVQ1EncDSPContext *c);
void ff_svq1enc_init_ppc(SVQ1EncDSPContext *c);
+void ff_svq1enc_init_riscv(SVQ1EncDSPContext *c);
void ff_svq1enc_init_x86(SVQ1EncDSPContext *c);
#endif /* AVCODEC_SVQ1ENCDSP_H */
--
2.43.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2024-01-16 18:46 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-29 11:57 [FFmpeg-devel] [PATCH 3/3] lavc/svq1enc: R-V V ssd_int8_vs_int16 flow gg
2023-12-30 12:15 ` Rémi Denis-Courmont
2023-12-30 14:00 ` flow gg
2023-12-30 14:01 ` flow gg
2023-12-30 15:45 ` Rémi Denis-Courmont
2023-12-30 16:20 ` flow gg
2024-01-04 16:00 ` Rémi Denis-Courmont
2024-01-05 0:56 ` flow gg
2024-01-06 15:04 ` Rémi Denis-Courmont
2024-01-07 1:33 ` flow gg
2024-01-07 8:03 ` Rémi Denis-Courmont
2024-01-07 8:36 ` flow gg
2024-01-16 18:00 ` Rémi Denis-Courmont
2024-01-16 18:04 ` Rémi Denis-Courmont
2024-01-16 18:46 ` flow gg
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git