From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH 2/2] swscale/x86/input: add AVX2 optimized uyvytoyuv422
Date: Wed, 5 Jun 2024 23:00:57 +0200
Message-ID: <GV1P250MB07377FCA744EFA8DC8E501F58FF92@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM> (raw)
In-Reply-To: <20240605202853.3135-2-jamrial@gmail.com>
James Almer:
> uyvytoyuv422_c: 23991.8
> uyvytoyuv422_sse2: 2817.8
> uyvytoyuv422_avx: 2819.3
Why don't you nuke the avx version in a follow-up patch?
> uyvytoyuv422_avx2: 1972.3
>
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
> libswscale/x86/rgb2rgb.c | 6 ++++++
> libswscale/x86/rgb_2_rgb.asm | 32 ++++++++++++++++++++++++--------
> 2 files changed, 30 insertions(+), 8 deletions(-)
>
> diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
> index b325e5dbd5..21ccfafe51 100644
> --- a/libswscale/x86/rgb2rgb.c
> +++ b/libswscale/x86/rgb2rgb.c
> @@ -136,6 +136,9 @@ void ff_uyvytoyuv422_sse2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
> void ff_uyvytoyuv422_avx(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
> const uint8_t *src, int width, int height,
> int lumStride, int chromStride, int srcStride);
> +void ff_uyvytoyuv422_avx2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
> + const uint8_t *src, int width, int height,
> + int lumStride, int chromStride, int srcStride);
> #endif
>
> av_cold void rgb2rgb_init_x86(void)
> @@ -177,5 +180,8 @@ av_cold void rgb2rgb_init_x86(void)
> if (EXTERNAL_AVX(cpu_flags)) {
> uyvytoyuv422 = ff_uyvytoyuv422_avx;
> }
> + if (EXTERNAL_AVX2_FAST(cpu_flags)) {
> + uyvytoyuv422 = ff_uyvytoyuv422_avx2;
> + }
> #endif
> }
> diff --git a/libswscale/x86/rgb_2_rgb.asm b/libswscale/x86/rgb_2_rgb.asm
> index 76ca1eec03..0bf1278718 100644
> --- a/libswscale/x86/rgb_2_rgb.asm
> +++ b/libswscale/x86/rgb_2_rgb.asm
> @@ -34,13 +34,16 @@ pb_shuffle3210: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
>
> SECTION .text
>
> -%macro RSHIFT_COPY 3
> +%macro RSHIFT_COPY 5
> ; %1 dst ; %2 src ; %3 shift
> -%if cpuflag(avx)
> - psrldq %1, %2, %3
> +%if mmsize == 32
> + vperm2i128 %1, %2, %3, %5
> + RSHIFT %1, %4
> +%elif cpuflag(avx)
> + psrldq %1, %2, %4
> %else
> mova %1, %2
> - RSHIFT %1, %3
> + RSHIFT %1, %4
> %endif
> %endmacro
>
> @@ -233,26 +236,37 @@ cglobal uyvytoyuv422, 9, 14, 8, ydst, udst, vdst, src, w, h, lum_stride, chrom_s
> jge .end_line
>
> .loop_simd:
> +%if mmsize == 32
> + movu xm2, [srcq + wtwoq ]
> + movu xm3, [srcq + wtwoq + 16 ]
> + movu xm4, [srcq + wtwoq + 16 * 2]
> + movu xm5, [srcq + wtwoq + 16 * 3]
> + vinserti128 m2, m2, [srcq + wtwoq + 16 * 4], 1
> + vinserti128 m3, m3, [srcq + wtwoq + 16 * 5], 1
> + vinserti128 m4, m4, [srcq + wtwoq + 16 * 6], 1
> + vinserti128 m5, m5, [srcq + wtwoq + 16 * 7], 1
> +%else
> movu m2, [srcq + wtwoq ]
> movu m3, [srcq + wtwoq + mmsize ]
> movu m4, [srcq + wtwoq + mmsize * 2]
> movu m5, [srcq + wtwoq + mmsize * 3]
> +%endif
>
> ; extract y part 1
> - RSHIFT_COPY m6, m2, 1 ; UYVY UYVY -> YVYU YVY...
> + RSHIFT_COPY m6, m2, m4, 1, 0x20 ; UYVY UYVY -> YVYU YVY...
> pand m6, m1; YxYx YxYx...
>
> - RSHIFT_COPY m7, m3, 1 ; UYVY UYVY -> YVYU YVY...
> + RSHIFT_COPY m7, m3, m5, 1, 0x20 ; UYVY UYVY -> YVYU YVY...
> pand m7, m1 ; YxYx YxYx...
>
> packuswb m6, m7 ; YYYY YYYY...
> movu [ydstq + wq], m6
>
> ; extract y part 2
> - RSHIFT_COPY m6, m4, 1 ; UYVY UYVY -> YVYU YVY...
> + RSHIFT_COPY m6, m4, m2, 1, 0x13 ; UYVY UYVY -> YVYU YVY...
> pand m6, m1; YxYx YxYx...
>
> - RSHIFT_COPY m7, m5, 1 ; UYVY UYVY -> YVYU YVY...
> + RSHIFT_COPY m7, m5, m3, 1, 0x13 ; UYVY UYVY -> YVYU YVY...
> pand m7, m1 ; YxYx YxYx...
>
> packuswb m6, m7 ; YYYY YYYY...
> @@ -309,4 +323,6 @@ UYVY_TO_YUV422
>
> INIT_XMM avx
> UYVY_TO_YUV422
> +INIT_YMM avx2
> +UYVY_TO_YUV422
> %endif
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-06-05 21:01 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-05 20:28 [FFmpeg-devel] [PATCH 1/2] swscale/x86/input: add AVX2 optimized RGB32 to YUV functions James Almer
2024-06-05 20:28 ` [FFmpeg-devel] [PATCH 2/2] swscale/x86/input: add AVX2 optimized uyvytoyuv422 James Almer
2024-06-05 21:00 ` Andreas Rheinhardt [this message]
2024-06-06 6:03 ` Rémi Denis-Courmont
2024-06-06 7:01 ` Christophe Gisquet
2024-06-07 7:24 ` Rémi Denis-Courmont
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=GV1P250MB07377FCA744EFA8DC8E501F58FF92@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM \
--to=andreas.rheinhardt@outlook.com \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git