From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id 51FB84EAB1 for ; Wed, 14 May 2025 00:46:18 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 9100168AD83; Wed, 14 May 2025 03:46:14 +0300 (EEST) Received: from mail-qv1-f44.google.com (mail-qv1-f44.google.com [209.85.219.44]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 8783968A4EE for ; Wed, 14 May 2025 03:46:08 +0300 (EEST) Received: by mail-qv1-f44.google.com with SMTP id 6a1803df08f44-6f6e72cbbf4so4513996d6.0 for ; Tue, 13 May 2025 17:46:08 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1747183566; x=1747788366; darn=ffmpeg.org; h=content-transfer-encoding:mime-version:message-id:date:subject:to :from:from:to:cc:subject:date:message-id:reply-to; bh=+LWqf7IlnUrovMScui6yV5GPBY7fFiKy/qymXwl7VRA=; b=ZSaSofhEH2bLs8apI9IJBYu20610Guk0dpytzhxIpAuT7H7x6ovW0snzrvMvKY7Rpz gv4JvuByTWIx7xzN3fCubrE2ph0WZmZyF7JShjW3al+mIfrW9+w18WCWgGgg95ahvb/N nqnrm5eeQPGKfdtbBgNHiWmgVaBgIChiCU1VZaU/TWoj6/xzr/qGZzcf1BjxGJN6d8EG xNzZehyph58d4kZLcJpokwlF+2Dc7Knk5F79yRthCe4YwT6IzDKSwWTSIBMb7Gyxk4iJ /+gWzEmVjtfxej9UFgXmOAcYEDMZACYLAMKJYnSsAAbg7hK3I2Q9cV10diqzwERN8fEg DAhw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1747183566; x=1747788366; h=content-transfer-encoding:mime-version:message-id:date:subject:to :from:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=+LWqf7IlnUrovMScui6yV5GPBY7fFiKy/qymXwl7VRA=; b=a8v9AYori/q61z837RhVPVENysvfTqX7fsSaxE92lsS5t43azmrmutPAbjp5iNcsG7 K4d9Fx8vGBnjLwot6r2zhN/Ycz1/8zS95tWzOvUBVSPyMsfMdLLk9EJ1haU816iNA7a0 oqPvwyr4fz133u3mIfz04RACG+JryvSVqYJajRetk9Ym//HOtmHO7WWxHA8ObKmvCa9n WkUZk5k8OjZvuryCuJrly9Ouf2sM7VvlC5K3Vj7yXlguyvyJQoiUyfR/gWp0Mn+z3kOg ZiIx3vBJUqiv3wgdbfeWCZiPI7Rk4MdlPD5985IhgzDU4B/5ApH0wfIbW33VtJYwnoEN eFXQ== X-Gm-Message-State: AOJu0Yxc0XPEIYEiGSPRqE6MuMvKu+tyWfPgzEXXhlTqGW9FuvMXWS8i EYQxLSzL/t4hyP/t6jmFsq1uYNMsYRIbOSaeXaarK9E0prxhYDY5Wxmdug== X-Gm-Gg: ASbGncvTU3qs9PHwTHmW48vLcvpY9IdwpqlSc6sw8CgVfNXBHh469aLbOWQ1rZCxrRT FW40RyRLI2DMQrNn+anFFhAWMipj9C2t/usYxbTPq8I+tXESR48w+33j/YW3GSUNdS0RSrn+Hmb 2Y2x9/WlnYYqCuz7Ybq7efJ+hzBoaMyb3qIPpZuuKD0oNnQgcfpf3m11/Ld4frEGrozrVIDRKTH +QPlSW+1S5uBP07BLDDQy0wz6G86AGD/cDMDw6XKXxDAV3001E3A9rh2mtszJ0isDhBU8bZfcBB PdxinKeN2Y/Y09oDCMb8DLeIZERzlNWdV0CfJDGbZRW0rYDJR2mI7+Tw57EEVjRzcwjMmNI= X-Google-Smtp-Source: AGHT+IFetVUhIa5M8dngil2LLmhtPGPt/dzZq9T2ARDb7jcn6CEe+KFez7Jku5v89+AZs+ObrVySVQ== X-Received: by 2002:a05:6122:3207:b0:523:6eef:af62 with SMTP id 71dfb90a1353d-52c88afe8b8mr4903412e0c.4.1747183555453; Tue, 13 May 2025 17:45:55 -0700 (PDT) Received: from localhost.localdomain ([2800:2121:b000:82e:5dfc:d18e:7e38:2cec]) by smtp.gmail.com with ESMTPSA id 71dfb90a1353d-52c538a71a8sm8524797e0c.47.2025.05.13.17.45.54 for (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 13 May 2025 17:45:55 -0700 (PDT) From: James Almer To: ffmpeg-devel@ffmpeg.org Date: Tue, 13 May 2025 21:45:40 -0300 Message-ID: <20250514004541.5072-1-jamrial@gmail.com> X-Mailer: git-send-email 2.49.0 MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH] avutil/x86/intmath: remove inline assembly for av_clip{f, d} X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: It's only supported by GCC, and everything inside the __asm__() block is invisible to the compiler's scheduler. Signed-off-by: James Almer --- libavutil/x86/intmath.h | 55 ++++++++--------------------------------- 1 file changed, 10 insertions(+), 45 deletions(-) diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h index 4893a1f1b4..e6d8de5f67 100644 --- a/libavutil/x86/intmath.h +++ b/libavutil/x86/intmath.h @@ -23,6 +23,7 @@ #include #include +#include "config.h" #if HAVE_FAST_CLZ #if defined(_MSC_VER) #include @@ -30,7 +31,9 @@ #include #endif #endif -#include "config.h" +#if HAVE_INTRINSICS_SSE2 && defined(__SSE2__) +#include +#endif #if HAVE_FAST_CLZ #if (defined(__INTEL_COMPILER) && (__INTEL_COMPILER>=1216)) || defined(_MSC_VER) @@ -114,7 +117,7 @@ static av_always_inline av_const unsigned av_zero_extend_bmi2(unsigned a, unsign #endif /* __BMI2__ */ -#if defined(__SSE2__) && !defined(__INTEL_COMPILER) +#if HAVE_INTRINSICS_SSE2 && defined(__SSE2__) #define av_clipd av_clipd_sse2 static av_always_inline av_const double av_clipd_sse2(double a, double amin, double amax) @@ -122,59 +125,21 @@ static av_always_inline av_const double av_clipd_sse2(double a, double amin, dou #if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 if (amin > amax) abort(); #endif - __asm__ ("maxsd %1, %0 \n\t" - "minsd %2, %0 \n\t" - : "+&x"(a) : "xm"(amin), "xm"(amax)); - return a; + __m128d _a = _mm_min_sd(_mm_max_sd(_mm_set_sd(a), _mm_set_sd(amin)), _mm_set_sd(amax)); + return _mm_cvtsd_f64(_a); } -#endif /* __SSE2__ */ - -#if defined(__SSE__) && !defined(__INTEL_COMPILER) - #define av_clipf av_clipf_sse static av_always_inline av_const float av_clipf_sse(float a, float amin, float amax) { #if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 if (amin > amax) abort(); #endif - __asm__ ("maxss %1, %0 \n\t" - "minss %2, %0 \n\t" - : "+&x"(a) : "xm"(amin), "xm"(amax)); - return a; -} - -#endif /* __SSE__ */ - -#if defined(__AVX__) && !defined(__INTEL_COMPILER) - -#undef av_clipd -#define av_clipd av_clipd_avx -static av_always_inline av_const double av_clipd_avx(double a, double amin, double amax) -{ -#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 - if (amin > amax) abort(); -#endif - __asm__ ("vmaxsd %1, %0, %0 \n\t" - "vminsd %2, %0, %0 \n\t" - : "+&x"(a) : "xm"(amin), "xm"(amax)); - return a; -} - -#undef av_clipf -#define av_clipf av_clipf_avx -static av_always_inline av_const float av_clipf_avx(float a, float amin, float amax) -{ -#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2 - if (amin > amax) abort(); -#endif - __asm__ ("vmaxss %1, %0, %0 \n\t" - "vminss %2, %0, %0 \n\t" - : "+&x"(a) : "xm"(amin), "xm"(amax)); - return a; + __m128 _a = _mm_min_ss(_mm_max_ss(_mm_set_ss(a), _mm_set_ss(amin)), _mm_set_ss(amax)); + return _mm_cvtss_f32(_a); } -#endif /* __AVX__ */ +#endif /* HAVE_INTRINSICS_SSE2 && defined(__SSE2__) */ #endif /* __GNUC__ */ -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".