Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
@ 2023-07-14 10:08 Alan Kelly
  2023-07-14 12:59 ` Kieran Kunhya
  0 siblings, 1 reply; 7+ messages in thread
From: Alan Kelly @ 2023-07-14 10:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Alan Kelly

---
 libswscale/x86/swscale.c    |  7 +++++++
 libswscale/x86/yuv2yuvX.asm | 19 ++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 8c67bf4fab..52423a1199 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -225,6 +225,9 @@ YUV2YUVX_FUNC(sse3, 32, mmxext)
 #if HAVE_AVX2_EXTERNAL
 YUV2YUVX_FUNC(avx2, 64, sse3)
 #endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+YUV2YUVX_FUNC(avx512, 128, avx2)
+#endif
 
 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
@@ -467,6 +470,10 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
 #if HAVE_AVX2_EXTERNAL
         if (EXTERNAL_AVX2_FAST(cpu_flags))
             c->yuv2planeX = yuv2yuvX_avx2;
+#endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+        if (EXTERNAL_AVX512(cpu_flags))
+            c->yuv2planeX = yuv2yuvX_avx512;
 #endif
     }
 #if ARCH_X86_32 && !HAVE_ALIGNED_STACK
diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm
index 369c850674..57bfa09d66 100644
--- a/libswscale/x86/yuv2yuvX.asm
+++ b/libswscale/x86/yuv2yuvX.asm
@@ -22,6 +22,10 @@
 
 %include "libavutil/x86/x86util.asm"
 
+SECTION_RODATA 64
+
+permutation: dq 0, 2, 4, 6, 1, 3, 5, 7
+
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -50,6 +54,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
 %else
     movq                 xm3, [ditherq]
 %endif ; avx2
+
+%if cpuflag(avx512)
+    mova                 m15, [permutation]
+%endif
     cmp                  offsetd, 0
     jz                   .offset
 
@@ -109,7 +117,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
     packuswb             m6, m6, m1
 %endif
     mov                  srcq, [filterq]
-%if cpuflag(avx2)
+%if cpuflag(avx512)
+    vpermt2q             m3, m15, m3
+    vpermt2q             m6, m15, m6
+%elif cpuflag(avx2)
     vpermq               m3, m3, 216
     vpermq               m6, m6, 216
 %endif
@@ -131,4 +142,10 @@ YUV2YUVX_FUNC
 %if HAVE_AVX2_EXTERNAL
 INIT_YMM avx2
 YUV2YUVX_FUNC
+%if HAVE_AVX512_EXTERNAL
+%if ARCH_X86_64
+INIT_ZMM avx512
+YUV2YUVX_FUNC
+%endif
+%endif
 %endif
-- 
2.41.0.255.g8b1d071c50-goog

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
  2023-07-14 10:08 [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512 Alan Kelly
@ 2023-07-14 12:59 ` Kieran Kunhya
  2023-07-14 13:03   ` James Almer
  0 siblings, 1 reply; 7+ messages in thread
From: Kieran Kunhya @ 2023-07-14 12:59 UTC (permalink / raw)
  To: FFmpeg development discussions and patches; +Cc: Alan Kelly

> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
> +        if (EXTERNAL_AVX512(cpu_flags))
> +            c->yuv2planeX = yuv2yuvX_avx512;
>  #endif
>

 You want EXTERNAL_AVX512ICL here.

Kieran
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
  2023-07-14 12:59 ` Kieran Kunhya
@ 2023-07-14 13:03   ` James Almer
  2023-07-14 14:57     ` Kieran Kunhya
  0 siblings, 1 reply; 7+ messages in thread
From: James Almer @ 2023-07-14 13:03 UTC (permalink / raw)
  To: ffmpeg-devel

On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
>> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
>> +        if (EXTERNAL_AVX512(cpu_flags))
>> +            c->yuv2planeX = yuv2yuvX_avx512;
>>   #endif
>>
> 
>   You want EXTERNAL_AVX512ICL here.

vpermt2q with zmm registers is avx512f and not any of the extensions, so 
that check is fine.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
  2023-07-14 13:03   ` James Almer
@ 2023-07-14 14:57     ` Kieran Kunhya
  2023-07-14 14:59       ` James Almer
  0 siblings, 1 reply; 7+ messages in thread
From: Kieran Kunhya @ 2023-07-14 14:57 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote:

> On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
> >> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
> >> +        if (EXTERNAL_AVX512(cpu_flags))
> >> +            c->yuv2planeX = yuv2yuvX_avx512;
> >>   #endif
> >>
> >
> >   You want EXTERNAL_AVX512ICL here.
>
> vpermt2q with zmm registers is avx512f and not any of the extensions, so
> that check is fine.
>

We still support Skylake and we don't want downclocking on that platform.
At least that was my understanding of the intention of AVX512 vs AVX512ICL.
It appears I'm the only one following this convention though.

Kieran
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
  2023-07-14 14:57     ` Kieran Kunhya
@ 2023-07-14 14:59       ` James Almer
  2023-07-17  9:23         ` Alan Kelly
  0 siblings, 1 reply; 7+ messages in thread
From: James Almer @ 2023-07-14 14:59 UTC (permalink / raw)
  To: ffmpeg-devel

On 7/14/2023 11:57 AM, Kieran Kunhya wrote:
> On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote:
> 
>> On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
>>>> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
>>>> +        if (EXTERNAL_AVX512(cpu_flags))
>>>> +            c->yuv2planeX = yuv2yuvX_avx512;
>>>>    #endif
>>>>
>>>
>>>    You want EXTERNAL_AVX512ICL here.
>>
>> vpermt2q with zmm registers is avx512f and not any of the extensions, so
>> that check is fine.
>>
> 
> We still support Skylake and we don't want downclocking on that platform.
> At least that was my understanding of the intention of AVX512 vs AVX512ICL.
> It appears I'm the only one following this convention though.

Ah, no opinion in that regard. I was following the use of the checks in 
the strict technical sense of instruction availability.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
  2023-07-14 14:59       ` James Almer
@ 2023-07-17  9:23         ` Alan Kelly
  2023-07-17  9:23           ` Alan Kelly
  0 siblings, 1 reply; 7+ messages in thread
From: Alan Kelly @ 2023-07-17  9:23 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Happy to add the check.

Thanks,
Alan

On Fri, Jul 14, 2023 at 4:59 PM James Almer <jamrial@gmail.com> wrote:

> On 7/14/2023 11:57 AM, Kieran Kunhya wrote:
> > On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote:
> >
> >> On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
> >>>> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
> >>>> +        if (EXTERNAL_AVX512(cpu_flags))
> >>>> +            c->yuv2planeX = yuv2yuvX_avx512;
> >>>>    #endif
> >>>>
> >>>
> >>>    You want EXTERNAL_AVX512ICL here.
> >>
> >> vpermt2q with zmm registers is avx512f and not any of the extensions, so
> >> that check is fine.
> >>
> >
> > We still support Skylake and we don't want downclocking on that platform.
> > At least that was my understanding of the intention of AVX512 vs
> AVX512ICL.
> > It appears I'm the only one following this convention though.
>
> Ah, no opinion in that regard. I was following the use of the checks in
> the strict technical sense of instruction availability.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512
  2023-07-17  9:23         ` Alan Kelly
@ 2023-07-17  9:23           ` Alan Kelly
  0 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2023-07-17  9:23 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Alan Kelly

---
 Checks for EXTERNAL_AVX512ICL to prevent downclocking on Skylake
 libswscale/x86/swscale.c    |  7 +++++++
 libswscale/x86/yuv2yuvX.asm | 19 ++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 8c67bf4fab..600c7d6c91 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -225,6 +225,9 @@ YUV2YUVX_FUNC(sse3, 32, mmxext)
 #if HAVE_AVX2_EXTERNAL
 YUV2YUVX_FUNC(avx2, 64, sse3)
 #endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+YUV2YUVX_FUNC(avx512, 128, avx2)
+#endif
 
 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
@@ -467,6 +470,10 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
 #if HAVE_AVX2_EXTERNAL
         if (EXTERNAL_AVX2_FAST(cpu_flags))
             c->yuv2planeX = yuv2yuvX_avx2;
+#endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+        if (EXTERNAL_AVX512ICL(cpu_flags))
+            c->yuv2planeX = yuv2yuvX_avx512;
 #endif
     }
 #if ARCH_X86_32 && !HAVE_ALIGNED_STACK
diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm
index 369c850674..57bfa09d66 100644
--- a/libswscale/x86/yuv2yuvX.asm
+++ b/libswscale/x86/yuv2yuvX.asm
@@ -22,6 +22,10 @@
 
 %include "libavutil/x86/x86util.asm"
 
+SECTION_RODATA 64
+
+permutation: dq 0, 2, 4, 6, 1, 3, 5, 7
+
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -50,6 +54,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
 %else
     movq                 xm3, [ditherq]
 %endif ; avx2
+
+%if cpuflag(avx512)
+    mova                 m15, [permutation]
+%endif
     cmp                  offsetd, 0
     jz                   .offset
 
@@ -109,7 +117,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
     packuswb             m6, m6, m1
 %endif
     mov                  srcq, [filterq]
-%if cpuflag(avx2)
+%if cpuflag(avx512)
+    vpermt2q             m3, m15, m3
+    vpermt2q             m6, m15, m6
+%elif cpuflag(avx2)
     vpermq               m3, m3, 216
     vpermq               m6, m6, 216
 %endif
@@ -131,4 +142,10 @@ YUV2YUVX_FUNC
 %if HAVE_AVX2_EXTERNAL
 INIT_YMM avx2
 YUV2YUVX_FUNC
+%if HAVE_AVX512_EXTERNAL
+%if ARCH_X86_64
+INIT_ZMM avx512
+YUV2YUVX_FUNC
+%endif
+%endif
 %endif
-- 
2.41.0.255.g8b1d071c50-goog

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-07-17  9:23 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-14 10:08 [FFmpeg-devel] [PATCH 2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512 Alan Kelly
2023-07-14 12:59 ` Kieran Kunhya
2023-07-14 13:03   ` James Almer
2023-07-14 14:57     ` Kieran Kunhya
2023-07-14 14:59       ` James Almer
2023-07-17  9:23         ` Alan Kelly
2023-07-17  9:23           ` Alan Kelly

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git