Re: [FFmpeg-devel] [PATCH] avutil/x86/intmath: remove inline asm implementations for clip functions

From: Niklas Haas <ffmpeg@haasn.xyz>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH] avutil/x86/intmath: remove inline asm implementations for clip functions
Date: Tue, 3 Jun 2025 18:15:57 +0200
Message-ID: <20250603181557.GB181379@haasn.xyz> (raw)
In-Reply-To: <20250602184133.2175-1-jamrial@gmail.com>

On Mon, 02 Jun 2025 15:41:33 -0300 James Almer <jamrial@gmail.com> wrote:
> GCC/Clang is smart enough to emit minss/maxss the same way as these functions.
> The only theoretical benefit was in x86_32, where x87 floats are used, but the
> penalty of making the clipping opaque to the compiler's scheduler plus moving
> values from mmx regs to xmm and back will offset any potential speedup.
> x86_32 builds targetting anything made in the last two decades and a half
> should use -msse -mfp=sse anyway.

As mention in the another thread, x87 FPU usage causes non-bitexact results in
swscale. Should we at this point consider setting -mfpu=sse by default for
x86_32 builds?

>
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavutil/x86/intmath.h | 62 -----------------------------------------
>  1 file changed, 62 deletions(-)
>
> diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h
> index 4893a1f1b4..735945ca95 100644
> --- a/libavutil/x86/intmath.h
> +++ b/libavutil/x86/intmath.h
> @@ -114,68 +114,6 @@ static av_always_inline av_const unsigned av_zero_extend_bmi2(unsigned a, unsign
>
>  #endif /* __BMI2__ */
>
> -#if defined(__SSE2__) && !defined(__INTEL_COMPILER)
> -
> -#define av_clipd av_clipd_sse2
> -static av_always_inline av_const double av_clipd_sse2(double a, double amin, double amax)
> -{
> -#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> -    if (amin > amax) abort();
> -#endif
> -    __asm__ ("maxsd %1, %0 \n\t"
> -             "minsd %2, %0 \n\t"
> -             : "+&x"(a) : "xm"(amin), "xm"(amax));
> -    return a;
> -}
> -
> -#endif /* __SSE2__ */
> -
> -#if defined(__SSE__) && !defined(__INTEL_COMPILER)
> -
> -#define av_clipf av_clipf_sse
> -static av_always_inline av_const float av_clipf_sse(float a, float amin, float amax)
> -{
> -#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> -    if (amin > amax) abort();
> -#endif
> -    __asm__ ("maxss %1, %0 \n\t"
> -             "minss %2, %0 \n\t"
> -             : "+&x"(a) : "xm"(amin), "xm"(amax));
> -    return a;
> -}
> -
> -#endif /* __SSE__ */
> -
> -#if defined(__AVX__) && !defined(__INTEL_COMPILER)
> -
> -#undef av_clipd
> -#define av_clipd av_clipd_avx
> -static av_always_inline av_const double av_clipd_avx(double a, double amin, double amax)
> -{
> -#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> -    if (amin > amax) abort();
> -#endif
> -    __asm__ ("vmaxsd %1, %0, %0 \n\t"
> -             "vminsd %2, %0, %0 \n\t"
> -             : "+&x"(a) : "xm"(amin), "xm"(amax));
> -    return a;
> -}
> -
> -#undef av_clipf
> -#define av_clipf av_clipf_avx
> -static av_always_inline av_const float av_clipf_avx(float a, float amin, float amax)
> -{
> -#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
> -    if (amin > amax) abort();
> -#endif
> -    __asm__ ("vmaxss %1, %0, %0 \n\t"
> -             "vminss %2, %0, %0 \n\t"
> -             : "+&x"(a) : "xm"(amin), "xm"(amax));
> -    return a;
> -}
> -
> -#endif /* __AVX__ */
> -
>  #endif /* __GNUC__ */
>
>  #endif /* AVUTIL_X86_INTMATH_H */
> --
> 2.49.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".