From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> To: ffmpeg-devel@ffmpeg.org Cc: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Subject: [FFmpeg-devel] [PATCH 2/4] swscale/x86/rgb2rgb: Don't unnecessarily check for inline ASM Date: Wed, 5 Jun 2024 23:38:59 +0200 Message-ID: <GV1P250MB07373C098407F98494C1F8CC8FF92@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM> (raw) In-Reply-To: <GV1P250MB073797DF5AC7A41CD8735E158FF92@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM> The SSE2 and AVX versions of deinterleaveBytes are external ASM. Move them out of the inline ASM template. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libswscale/x86/rgb2rgb.c | 48 +++++++++++++++++++++++-------- libswscale/x86/rgb2rgb_template.c | 30 ------------------- 2 files changed, 36 insertions(+), 42 deletions(-) diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index b325e5dbd5..be6f5abc95 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -100,13 +100,6 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset); #define RENAME(a) a ## _sse2 #include "rgb2rgb_template.c" -//AVX versions -#undef RENAME -#undef COMPILE_TEMPLATE_AVX -#define COMPILE_TEMPLATE_AVX 1 -#define RENAME(a) a ## _avx -#include "rgb2rgb_template.c" - /* RGB15->RGB16 original by Strepto/Astral ported to gcc & bugfixed : A'rpi @@ -138,6 +131,33 @@ void ff_uyvytoyuv422_avx(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int lumStride, int chromStride, int srcStride); #endif +#define DEINTERLEAVE_BYTES(cpuext) \ +void ff_nv12ToUV_ ## cpuext(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *unused, \ + const uint8_t *src1, \ + const uint8_t *src2, \ + int w, \ + uint32_t *unused2, \ + void *opq); \ +static void deinterleave_bytes_ ## cpuext(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, \ + int width, int height, int srcStride, \ + int dst1Stride, int dst2Stride) \ +{ \ + for (int h = 0; h < height; h++) { \ + ff_nv12ToUV_ ## cpuext(dst1, dst2, NULL, src, NULL, width, NULL, NULL); \ + src += srcStride; \ + dst1 += dst1Stride; \ + dst2 += dst2Stride; \ + } \ +} + +#if HAVE_SSE2_EXTERNAL +DEINTERLEAVE_BYTES(sse2) +#endif +#if HAVE_AVX_EXTERNAL +DEINTERLEAVE_BYTES(avx) +#endif + av_cold void rgb2rgb_init_x86(void) { int cpu_flags = av_get_cpu_flags(); @@ -147,18 +167,19 @@ av_cold void rgb2rgb_init_x86(void) rgb2rgb_init_mmxext(); if (INLINE_SSE2(cpu_flags)) rgb2rgb_init_sse2(); - if (INLINE_AVX(cpu_flags)) - rgb2rgb_init_avx(); #endif /* HAVE_INLINE_ASM */ if (EXTERNAL_MMXEXT(cpu_flags)) { shuffle_bytes_2103 = ff_shuffle_bytes_2103_mmxext; } +#if HAVE_SSE2_EXTERNAL if (EXTERNAL_SSE2(cpu_flags)) { #if ARCH_X86_64 uyvytoyuv422 = ff_uyvytoyuv422_sse2; #endif + deinterleaveBytes = deinterleave_bytes_sse2; } +#endif if (EXTERNAL_SSSE3(cpu_flags)) { shuffle_bytes_0321 = ff_shuffle_bytes_0321_ssse3; shuffle_bytes_2103 = ff_shuffle_bytes_2103_ssse3; @@ -166,16 +187,19 @@ av_cold void rgb2rgb_init_x86(void) shuffle_bytes_3012 = ff_shuffle_bytes_3012_ssse3; shuffle_bytes_3210 = ff_shuffle_bytes_3210_ssse3; } +#if HAVE_AVX_EXTERNAL + if (EXTERNAL_AVX(cpu_flags)) { + deinterleaveBytes = deinterleave_bytes_avx; #if ARCH_X86_64 + uyvytoyuv422 = ff_uyvytoyuv422_avx; + } if (EXTERNAL_AVX2_FAST(cpu_flags)) { shuffle_bytes_0321 = ff_shuffle_bytes_0321_avx2; shuffle_bytes_2103 = ff_shuffle_bytes_2103_avx2; shuffle_bytes_1230 = ff_shuffle_bytes_1230_avx2; shuffle_bytes_3012 = ff_shuffle_bytes_3012_avx2; shuffle_bytes_3210 = ff_shuffle_bytes_3210_avx2; - } - if (EXTERNAL_AVX(cpu_flags)) { - uyvytoyuv422 = ff_uyvytoyuv422_avx; +#endif } #endif } diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index e4e884827c..5c73fa4e16 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -1816,31 +1816,6 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui } #endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */ -#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL -#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM -void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused, - const uint8_t *src1, - const uint8_t *src2, - int w, - uint32_t *unused2, - void *opq); -static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, - int width, int height, int srcStride, - int dst1Stride, int dst2Stride) -{ - int h; - - for (h = 0; h < height; h++) { - RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL, NULL); - src += srcStride; - dst1 += dst1Stride; - dst2 += dst2Stride; - } -} -#endif /* COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM */ -#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */ - #if !COMPILE_TEMPLATE_SSE2 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, @@ -2441,9 +2416,4 @@ static av_cold void RENAME(rgb2rgb_init)(void) #if !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 interleaveBytes = RENAME(interleaveBytes); #endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */ -#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL -#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM - deinterleaveBytes = RENAME(deinterleaveBytes); -#endif -#endif } -- 2.40.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-06-05 21:39 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-06-05 21:37 [FFmpeg-devel] [PATCH 1/4] swscale/x86/rgb2rgb_template: Remove unnecessary SFENCE Andreas Rheinhardt 2024-06-05 21:38 ` Andreas Rheinhardt [this message] 2024-06-05 21:39 ` [FFmpeg-devel] [PATCH 3/4] swscale/x86/rgb2rgb_template: Remove unused uyvytoyv12 Andreas Rheinhardt 2024-06-05 21:39 ` [FFmpeg-devel] [PATCH 4/4] swscale/x86/rgb2rgb: Detemplatize Andreas Rheinhardt 2024-06-08 19:44 ` [FFmpeg-devel] [PATCH 1/4] swscale/x86/rgb2rgb_template: Remove unnecessary SFENCE Andreas Rheinhardt
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=GV1P250MB07373C098407F98494C1F8CC8FF92@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM \ --to=andreas.rheinhardt@outlook.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git