* [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
@ 2023-07-14 10:08 Alan Kelly
2023-07-14 12:59 ` Kieran Kunhya
0 siblings, 1 reply; 7+ messages in thread
From: Alan Kelly @ 2023-07-14 10:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Alan Kelly
---
libswscale/x86/swscale.c | 7 +++++++
libswscale/x86/yuv2yuvX.asm | 19 ++++++++++++++++++-
2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 8c67bf4fab..52423a1199 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -225,6 +225,9 @@ YUV2YUVX_FUNC(sse3, 32, mmxext)
#if HAVE_AVX2_EXTERNAL
YUV2YUVX_FUNC(avx2, 64, sse3)
#endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+YUV2YUVX_FUNC(avx512, 128, avx2)
+#endif
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
@@ -467,6 +470,10 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
#if HAVE_AVX2_EXTERNAL
if (EXTERNAL_AVX2_FAST(cpu_flags))
c->yuv2planeX = yuv2yuvX_avx2;
+#endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+ if (EXTERNAL_AVX512(cpu_flags))
+ c->yuv2planeX = yuv2yuvX_avx512;
#endif
}
#if ARCH_X86_32 && !HAVE_ALIGNED_STACK
diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm
index 369c850674..57bfa09d66 100644
--- a/libswscale/x86/yuv2yuvX.asm
+++ b/libswscale/x86/yuv2yuvX.asm
@@ -22,6 +22,10 @@
%include "libavutil/x86/x86util.asm"
+SECTION_RODATA 64
+
+permutation: dq 0, 2, 4, 6, 1, 3, 5, 7
+
SECTION .text
;-----------------------------------------------------------------------------
@@ -50,6 +54,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
%else
movq xm3, [ditherq]
%endif ; avx2
+
+%if cpuflag(avx512)
+ mova m15, [permutation]
+%endif
cmp offsetd, 0
jz .offset
@@ -109,7 +117,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
packuswb m6, m6, m1
%endif
mov srcq, [filterq]
-%if cpuflag(avx2)
+%if cpuflag(avx512)
+ vpermt2q m3, m15, m3
+ vpermt2q m6, m15, m6
+%elif cpuflag(avx2)
vpermq m3, m3, 216
vpermq m6, m6, 216
%endif
@@ -131,4 +142,10 @@ YUV2YUVX_FUNC
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
YUV2YUVX_FUNC
+%if HAVE_AVX512_EXTERNAL
+%if ARCH_X86_64
+INIT_ZMM avx512
+YUV2YUVX_FUNC
+%endif
+%endif
%endif
--
2.41.0.255.g8b1d071c50-goog
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
2023-07-14 10:08 [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512 Alan Kelly
@ 2023-07-14 12:59 ` Kieran Kunhya
2023-07-14 13:03 ` James Almer
0 siblings, 1 reply; 7+ messages in thread
From: Kieran Kunhya @ 2023-07-14 12:59 UTC (permalink / raw)
To: FFmpeg development discussions and patches; +Cc: Alan Kelly
> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
> + if (EXTERNAL_AVX512(cpu_flags))
> + c->yuv2planeX = yuv2yuvX_avx512;
> #endif
>
You want EXTERNAL_AVX512ICL here.
Kieran
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
2023-07-14 12:59 ` Kieran Kunhya
@ 2023-07-14 13:03 ` James Almer
2023-07-14 14:57 ` Kieran Kunhya
0 siblings, 1 reply; 7+ messages in thread
From: James Almer @ 2023-07-14 13:03 UTC (permalink / raw)
To: ffmpeg-devel
On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
>> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
>> + if (EXTERNAL_AVX512(cpu_flags))
>> + c->yuv2planeX = yuv2yuvX_avx512;
>> #endif
>>
>
> You want EXTERNAL_AVX512ICL here.
vpermt2q with zmm registers is avx512f and not any of the extensions, so
that check is fine.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
2023-07-14 13:03 ` James Almer
@ 2023-07-14 14:57 ` Kieran Kunhya
2023-07-14 14:59 ` James Almer
0 siblings, 1 reply; 7+ messages in thread
From: Kieran Kunhya @ 2023-07-14 14:57 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote:
> On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
> >> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
> >> + if (EXTERNAL_AVX512(cpu_flags))
> >> + c->yuv2planeX = yuv2yuvX_avx512;
> >> #endif
> >>
> >
> > You want EXTERNAL_AVX512ICL here.
>
> vpermt2q with zmm registers is avx512f and not any of the extensions, so
> that check is fine.
>
We still support Skylake and we don't want downclocking on that platform.
At least that was my understanding of the intention of AVX512 vs AVX512ICL.
It appears I'm the only one following this convention though.
Kieran
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
2023-07-14 14:57 ` Kieran Kunhya
@ 2023-07-14 14:59 ` James Almer
2023-07-17 9:23 ` Alan Kelly
0 siblings, 1 reply; 7+ messages in thread
From: James Almer @ 2023-07-14 14:59 UTC (permalink / raw)
To: ffmpeg-devel
On 7/14/2023 11:57 AM, Kieran Kunhya wrote:
> On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote:
>
>> On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
>>>> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
>>>> + if (EXTERNAL_AVX512(cpu_flags))
>>>> + c->yuv2planeX = yuv2yuvX_avx512;
>>>> #endif
>>>>
>>>
>>> You want EXTERNAL_AVX512ICL here.
>>
>> vpermt2q with zmm registers is avx512f and not any of the extensions, so
>> that check is fine.
>>
>
> We still support Skylake and we don't want downclocking on that platform.
> At least that was my understanding of the intention of AVX512 vs AVX512ICL.
> It appears I'm the only one following this convention though.
Ah, no opinion in that regard. I was following the use of the checks in
the strict technical sense of instruction availability.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
2023-07-14 14:59 ` James Almer
@ 2023-07-17 9:23 ` Alan Kelly
2023-07-17 9:23 ` Alan Kelly
0 siblings, 1 reply; 7+ messages in thread
From: Alan Kelly @ 2023-07-17 9:23 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Happy to add the check.
Thanks,
Alan
On Fri, Jul 14, 2023 at 4:59 PM James Almer <jamrial@gmail.com> wrote:
> On 7/14/2023 11:57 AM, Kieran Kunhya wrote:
> > On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote:
> >
> >> On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
> >>>> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
> >>>> + if (EXTERNAL_AVX512(cpu_flags))
> >>>> + c->yuv2planeX = yuv2yuvX_avx512;
> >>>> #endif
> >>>>
> >>>
> >>> You want EXTERNAL_AVX512ICL here.
> >>
> >> vpermt2q with zmm registers is avx512f and not any of the extensions, so
> >> that check is fine.
> >>
> >
> > We still support Skylake and we don't want downclocking on that platform.
> > At least that was my understanding of the intention of AVX512 vs
> AVX512ICL.
> > It appears I'm the only one following this convention though.
>
> Ah, no opinion in that regard. I was following the use of the checks in
> the strict technical sense of instruction availability.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
2023-07-17 9:23 ` Alan Kelly
@ 2023-07-17 9:23 ` Alan Kelly
0 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2023-07-17 9:23 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Alan Kelly
---
Checks for EXTERNAL_AVX512ICL to prevent downclocking on Skylake
libswscale/x86/swscale.c | 7 +++++++
libswscale/x86/yuv2yuvX.asm | 19 ++++++++++++++++++-
2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 8c67bf4fab..600c7d6c91 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -225,6 +225,9 @@ YUV2YUVX_FUNC(sse3, 32, mmxext)
#if HAVE_AVX2_EXTERNAL
YUV2YUVX_FUNC(avx2, 64, sse3)
#endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+YUV2YUVX_FUNC(avx512, 128, avx2)
+#endif
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
@@ -467,6 +470,10 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
#if HAVE_AVX2_EXTERNAL
if (EXTERNAL_AVX2_FAST(cpu_flags))
c->yuv2planeX = yuv2yuvX_avx2;
+#endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+ if (EXTERNAL_AVX512ICL(cpu_flags))
+ c->yuv2planeX = yuv2yuvX_avx512;
#endif
}
#if ARCH_X86_32 && !HAVE_ALIGNED_STACK
diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm
index 369c850674..57bfa09d66 100644
--- a/libswscale/x86/yuv2yuvX.asm
+++ b/libswscale/x86/yuv2yuvX.asm
@@ -22,6 +22,10 @@
%include "libavutil/x86/x86util.asm"
+SECTION_RODATA 64
+
+permutation: dq 0, 2, 4, 6, 1, 3, 5, 7
+
SECTION .text
;-----------------------------------------------------------------------------
@@ -50,6 +54,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
%else
movq xm3, [ditherq]
%endif ; avx2
+
+%if cpuflag(avx512)
+ mova m15, [permutation]
+%endif
cmp offsetd, 0
jz .offset
@@ -109,7 +117,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
packuswb m6, m6, m1
%endif
mov srcq, [filterq]
-%if cpuflag(avx2)
+%if cpuflag(avx512)
+ vpermt2q m3, m15, m3
+ vpermt2q m6, m15, m6
+%elif cpuflag(avx2)
vpermq m3, m3, 216
vpermq m6, m6, 216
%endif
@@ -131,4 +142,10 @@ YUV2YUVX_FUNC
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
YUV2YUVX_FUNC
+%if HAVE_AVX512_EXTERNAL
+%if ARCH_X86_64
+INIT_ZMM avx512
+YUV2YUVX_FUNC
+%endif
+%endif
%endif
--
2.41.0.255.g8b1d071c50-goog
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2023-07-17 9:23 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-14 10:08 [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512 Alan Kelly
2023-07-14 12:59 ` Kieran Kunhya
2023-07-14 13:03 ` James Almer
2023-07-14 14:57 ` Kieran Kunhya
2023-07-14 14:59 ` James Almer
2023-07-17 9:23 ` Alan Kelly
2023-07-17 9:23 ` Alan Kelly
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git