From f039f3651a86f133c406f3d393d892f8daa31918 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Tue, 25 Mar 2025 03:07:55 +0100 Subject: [PATCH 02/11] avcodec/x86/mpegvideoenc_template: Remove remnants of MMX Forgotten in 7284ab789d5fe271b9d6a1666ab5ea6be8724cca. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/x86/mpegvideoenc.c | 10 -- libavcodec/x86/mpegvideoenc_template.c | 135 ++++++++++--------------- 2 files changed, 53 insertions(+), 92 deletions(-) diff --git a/libavcodec/x86/mpegvideoenc.c b/libavcodec/x86/mpegvideoenc.c index f3c8756d76..d81a8ef14d 100644 --- a/libavcodec/x86/mpegvideoenc.c +++ b/libavcodec/x86/mpegvideoenc.c @@ -43,26 +43,16 @@ DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = { #if HAVE_6REGS #if HAVE_SSE2_INLINE -#undef COMPILE_TEMPLATE_SSE2 -#undef COMPILE_TEMPLATE_SSSE3 -#define COMPILE_TEMPLATE_SSE2 1 #define COMPILE_TEMPLATE_SSSE3 0 -#undef RENAME -#undef RENAME_FDCT #define RENAME(a) a ## _sse2 -#define RENAME_FDCT(a) a ## _sse2 #include "mpegvideoenc_template.c" #endif /* HAVE_SSE2_INLINE */ #if HAVE_SSSE3_INLINE -#undef COMPILE_TEMPLATE_SSE2 #undef COMPILE_TEMPLATE_SSSE3 -#define COMPILE_TEMPLATE_SSE2 1 #define COMPILE_TEMPLATE_SSSE3 1 #undef RENAME -#undef RENAME_FDCT #define RENAME(a) a ## _ssse3 -#define RENAME_FDCT(a) a ## _sse2 #include "mpegvideoenc_template.c" #endif /* HAVE_SSSE3_INLINE */ diff --git a/libavcodec/x86/mpegvideoenc_template.c b/libavcodec/x86/mpegvideoenc_template.c index c5418a1b04..85e9159f91 100644 --- a/libavcodec/x86/mpegvideoenc_template.c +++ b/libavcodec/x86/mpegvideoenc_template.c @@ -29,49 +29,22 @@ #include "libavcodec/mpegvideoenc.h" #include "fdct.h" -#undef MMREG_WIDTH -#undef MM -#undef MOVQ #undef SPREADW #undef PMAXW #undef PMAX #undef SAVE_SIGN #undef RESTORE_SIGN -#if COMPILE_TEMPLATE_SSE2 -#define MMREG_WIDTH "16" -#define MM "%%xmm" -#define MOVQ "movdqa" #define SPREADW(a) \ "pshuflw $0, "a", "a" \n\t"\ "punpcklwd "a", "a" \n\t" -#define PMAXW(a,b) "pmaxsw "a", "b" \n\t" #define PMAX(a,b) \ "movhlps "a", "b" \n\t"\ - PMAXW(b, a)\ + "pmaxsw "b", "a" \n\t"\ "pshuflw $0x0E, "a", "b" \n\t"\ - PMAXW(b, a)\ + "pmaxsw "b", "a" \n\t"\ "pshuflw $0x01, "a", "b" \n\t"\ - PMAXW(b, a) -#else -#define MMREG_WIDTH "8" -#define MM "%%mm" -#define MOVQ "movq" -#define SPREADW(a) \ - "punpcklwd "a", "a" \n\t"\ - "punpcklwd "a", "a" \n\t" -#define PMAXW(a,b) \ - "psubusw "a", "b" \n\t"\ - "paddw "a", "b" \n\t" -#define PMAX(a,b) \ - "movq "a", "b" \n\t"\ - "psrlq $32, "a" \n\t"\ - PMAXW(b, a)\ - "movq "a", "b" \n\t"\ - "psrlq $16, "a" \n\t"\ - PMAXW(b, a) - -#endif + "pmaxsw "b", "a" \n\t" #if COMPILE_TEMPLATE_SSSE3 #define SAVE_SIGN(a,b) \ @@ -100,7 +73,7 @@ static int RENAME(dct_quantize)(MPVEncContext *const s, LOCAL_ALIGNED_16(int16_t, temp_block, [64]); //s->fdct (block); - RENAME_FDCT(ff_fdct)(block); // cannot be anything else ... + ff_fdct_sse2(block); // cannot be anything else ... if(s->dct_error_sum) s->denoise_dct(s, block); @@ -138,32 +111,32 @@ static int RENAME(dct_quantize)(MPVEncContext *const s, if ((s->c.out_format == FMT_H263 || s->c.out_format == FMT_H261) && !s->c.mpeg_quant) { __asm__ volatile( - "movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1 - SPREADW(MM"3") - "pxor "MM"7, "MM"7 \n\t" // 0 - "pxor "MM"4, "MM"4 \n\t" // 0 - MOVQ" (%2), "MM"5 \n\t" // qmat[0] - "pxor "MM"6, "MM"6 \n\t" - "psubw (%3), "MM"6 \n\t" // -bias[0] + "movd %%"FF_REG_a", %%xmm3 \n\t" // last_non_zero_p1 + SPREADW("%%xmm3") + "pxor %%xmm7, %%xmm7 \n\t" // 0 + "pxor %%xmm4, %%xmm4 \n\t" // 0 + "movdqa (%2), %%xmm5 \n\t" // qmat[0] + "pxor %%xmm6, %%xmm6 \n\t" + "psubw (%3), %%xmm6 \n\t" // -bias[0] "mov $-128, %%"FF_REG_a" \n\t" ".p2align 4 \n\t" "1: \n\t" - MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i] - SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) - "psubusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] - "pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 - "por "MM"0, "MM"4 \n\t" - RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) - MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t" - "pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 - MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t" - MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0 - "pandn "MM"1, "MM"0 \n\t" - PMAXW(MM"0", MM"3") - "add $"MMREG_WIDTH", %%"FF_REG_a" \n\t" + "movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t" // block[i] + SAVE_SIGN("%%xmm1", "%%xmm0") // ABS(block[i]) + "psubusw %%xmm6, %%xmm0 \n\t" // ABS(block[i]) + bias[0] + "pmulhw %%xmm5, %%xmm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 + "por %%xmm0, %%xmm4 \n\t" + RESTORE_SIGN("%%xmm1", "%%xmm0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) + "movdqa %%xmm0, (%5, %%"FF_REG_a") \n\t" + "pcmpeqw %%xmm7, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00 + "movdqa (%4, %%"FF_REG_a"), %%xmm1 \n\t" + "movdqa %%xmm7, (%1, %%"FF_REG_a") \n\t" // 0 + "pandn %%xmm1, %%xmm0 \n\t" + "pmaxsw %%xmm0, %%xmm3 \n\t" + "add $16, %%"FF_REG_a" \n\t" " js 1b \n\t" - PMAX(MM"3", MM"0") - "movd "MM"3, %%"FF_REG_a" \n\t" + PMAX("%%xmm3", "%%xmm0") + "movd %%xmm3, %%"FF_REG_a" \n\t" "movzbl %%al, %%eax \n\t" // last_non_zero_p1 : "+a" (last_non_zero_p1) : "r" (block+64), "r" (qmat), "r" (bias), @@ -173,31 +146,31 @@ static int RENAME(dct_quantize)(MPVEncContext *const s, ); }else{ // FMT_H263 __asm__ volatile( - "movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1 - SPREADW(MM"3") - "pxor "MM"7, "MM"7 \n\t" // 0 - "pxor "MM"4, "MM"4 \n\t" // 0 + "movd %%"FF_REG_a", %%xmm3 \n\t" // last_non_zero_p1 + SPREADW("%%xmm3") + "pxor %%xmm7, %%xmm7 \n\t" // 0 + "pxor %%xmm4, %%xmm4 \n\t" // 0 "mov $-128, %%"FF_REG_a" \n\t" ".p2align 4 \n\t" "1: \n\t" - MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i] - SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) - MOVQ" (%3, %%"FF_REG_a"), "MM"6 \n\t" // bias[0] - "paddusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] - MOVQ" (%2, %%"FF_REG_a"), "MM"5 \n\t" // qmat[i] - "pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 - "por "MM"0, "MM"4 \n\t" - RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) - MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t" - "pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 - MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t" - MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0 - "pandn "MM"1, "MM"0 \n\t" - PMAXW(MM"0", MM"3") - "add $"MMREG_WIDTH", %%"FF_REG_a" \n\t" + "movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t" // block[i] + SAVE_SIGN("%%xmm1", "%%xmm0") // ABS(block[i]) + "movdqa (%3, %%"FF_REG_a"), %%xmm6 \n\t" // bias[0] + "paddusw %%xmm6, %%xmm0 \n\t" // ABS(block[i]) + bias[0] + "movdqa (%2, %%"FF_REG_a"), %%xmm5 \n\t" // qmat[i] + "pmulhw %%xmm5, %%xmm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 + "por %%xmm0, %%xmm4 \n\t" + RESTORE_SIGN("%%xmm1", "%%xmm0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) + "movdqa %%xmm0, (%5, %%"FF_REG_a") \n\t" + "pcmpeqw %%xmm7, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00 + "movdqa (%4, %%"FF_REG_a"), %%xmm1 \n\t" + "movdqa %%xmm7, (%1, %%"FF_REG_a") \n\t" // 0 + "pandn %%xmm1, %%xmm0 \n\t" + "pmaxsw %%xmm0, %%xmm3 \n\t" + "add $16, %%"FF_REG_a" \n\t" " js 1b \n\t" - PMAX(MM"3", MM"0") - "movd "MM"3, %%"FF_REG_a" \n\t" + PMAX("%%xmm3", "%%xmm0") + "movd %%xmm3, %%"FF_REG_a" \n\t" "movzbl %%al, %%eax \n\t" // last_non_zero_p1 : "+a" (last_non_zero_p1) : "r" (block+64), "r" (qmat+64), "r" (bias+64), @@ -207,14 +180,12 @@ static int RENAME(dct_quantize)(MPVEncContext *const s, ); } __asm__ volatile( - "movd %1, "MM"1 \n\t" // max_qcoeff - SPREADW(MM"1") - "psubusw "MM"1, "MM"4 \n\t" - "packuswb "MM"4, "MM"4 \n\t" -#if COMPILE_TEMPLATE_SSE2 - "packsswb "MM"4, "MM"4 \n\t" -#endif - "movd "MM"4, %0 \n\t" // *overflow + "movd %1, %%xmm1 \n\t" // max_qcoeff + SPREADW("%%xmm1") + "psubusw %%xmm1, %%xmm4 \n\t" + "packuswb %%xmm4, %%xmm4 \n\t" + "packsswb %%xmm4, %%xmm4 \n\t" + "movd %%xmm4, %0 \n\t" // *overflow : "=g" (*overflow) : "g" (s->max_qcoeff) ); -- 2.45.2