From 5a8ab5b948423e6cde7b59df0d21f38dc0235155 Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Thu, 11 May 2023 01:11:42 +0200
Subject: [PATCH 1/2] swresample/x86: add float<->double paths

Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libswresample/x86/audio_convert.asm    | 25 +++++++++++++++++++++++++
 libswresample/x86/audio_convert_init.c |  8 ++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
index ad65008e23..82eda3758e 100644
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -540,6 +540,26 @@ pack_8ch_%2_to_%1_u_int %+ SUFFIX:
     punpckhwd m1, m4
 %endmacro
 
+%macro FLOAT_TO_DOUBLE_N 6
+    shufps    %3, %1, %1, q3232
+    shufps    %4, %2, %2, q3232
+    cvtps2pd  %1, %1
+    cvtps2pd  %2, %2
+    cvtps2pd  %3, %3
+    cvtps2pd  %4, %4
+    SWAP 1,2
+%endmacro
+
+%macro DOUBLE_TO_FLOAT_N 6
+    cvtpd2ps  %1, %1
+    cvtpd2ps  %2, %2
+    cvtpd2ps  %3, %3
+    cvtpd2ps  %4, %4
+    shufps    %1, %2, q1010
+    shufps    %3, %4, q1010
+    SWAP 1,2
+%endmacro
+
 %macro INT32_TO_INT16_N 6
     psrad     m0, 16
     psrad     m1, 16
@@ -648,6 +668,11 @@ CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
 CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
 CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
 
+CONV double, float, u, 3, 2, FLOAT_TO_DOUBLE_N, NOP_N
+CONV double, float, a, 3, 2, FLOAT_TO_DOUBLE_N, NOP_N
+CONV float, double, u, 2, 3, DOUBLE_TO_FLOAT_N, NOP_N
+CONV float, double, a, 2, 3, DOUBLE_TO_FLOAT_N, NOP_N
+
 PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
 PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
 PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c
index f6d36f9ca6..e10b978c68 100644
--- a/libswresample/x86/audio_convert_init.c
+++ b/libswresample/x86/audio_convert_init.c
@@ -24,8 +24,8 @@
 #include "libswresample/audioconvert.h"
 
 #define PROTO(pre, in, out, cap) void ff ## pre ## in## _to_ ##out## _a_ ##cap(uint8_t **dst, const uint8_t **src, int len);
-#define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap)
-#define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap)
+#define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap) PROTO(pre, double, out,cap)
+#define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap) PROTO2(pre, double, cap)
 #define PROTO4(pre) PROTO3(pre, sse) PROTO3(pre, sse2) PROTO3(pre, ssse3) PROTO3(pre, sse4) PROTO3(pre, avx) PROTO3(pre, avx2)
 PROTO4(_)
 PROTO4(_pack_2ch_)
@@ -72,6 +72,10 @@ MULTI_CAPS_FUNC(SSE2, sse2)
             ac->simd_f =  ff_float_to_int32_a_sse2;
         if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
             ac->simd_f =  ff_float_to_int16_a_sse2;
+        if(   out_fmt == AV_SAMPLE_FMT_DBL  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_DBLP && in_fmt == AV_SAMPLE_FMT_FLTP)
+            ac->simd_f =  ff_float_to_double_a_sse2;
+        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_DBL || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_DBLP)
+            ac->simd_f =  ff_double_to_float_a_sse2;
 
         if(channels == 2) {
             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
-- 
2.39.1