[FFmpeg-devel] [PATCH] swresample: misc improvements

* [FFmpeg-devel] [PATCH] swresample: misc improvements
@ 2023-05-11 17:13 Paul B Mahol
  2023-05-12 23:36 ` Michael Niedermayer
  0 siblings, 1 reply; 4+ messages in thread
From: Paul B Mahol @ 2023-05-11 17:13 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 10 bytes --]

Attached.

[-- Attachment #2: 0002-swresample-add-paths-for-same-planar-packed-format-c.patch --]
[-- Type: text/x-patch, Size: 11364 bytes --]

From 23bde72fa9b15b16d0b3241652433967111229b5 Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Thu, 11 May 2023 17:39:08 +0200
Subject: [PATCH 2/2] swresample: add paths for same planar/packed format
 conversion

Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libswresample/audioconvert.c | 113 ++++++++++++++++++++++-------------
 libswresample/audioconvert.h |   2 +
 2 files changed, 73 insertions(+), 42 deletions(-)

diff --git a/libswresample/audioconvert.c b/libswresample/audioconvert.c
index 1d75ba1495..29af1ec119 100644
--- a/libswresample/audioconvert.c
+++ b/libswresample/audioconvert.c
@@ -33,64 +33,86 @@
 
 
 #define CONV_FUNC_NAME(dst_fmt, src_fmt) conv_ ## src_fmt ## _to_ ## dst_fmt
+#define CONVP_FUNC_NAME(dst_fmt, src_fmt) convp_ ## src_fmt ## _to_ ## dst_fmt
 
 //FIXME rounding ?
-#define CONV_FUNC(ofmt, otype, ifmt, expr)\
+#define CONV_FUNC(ofmt, otype, ifmt, itype, expr)\
+    \
 static void CONV_FUNC_NAME(ofmt, ifmt)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end)\
 {\
     uint8_t *end2 = end - 3*os;\
     while(po < end2){\
+        itype x = *(itype*)pi;\
         *(otype*)po = expr; pi += is; po += os;\
+        x = *(itype*)pi;\
         *(otype*)po = expr; pi += is; po += os;\
+        x = *(itype*)pi;\
         *(otype*)po = expr; pi += is; po += os;\
+        x = *(itype*)pi;\
         *(otype*)po = expr; pi += is; po += os;\
     }\
     while(po < end){\
+        itype x = *(itype*)pi;\
         *(otype*)po = expr; pi += is; po += os;\
     }\
+}\
+\
+static void CONVP_FUNC_NAME(ofmt, ifmt)(uint8_t *ddst, const uint8_t *ssrc, int len)\
+{\
+    const itype *src = (const itype *)ssrc;\
+    otype *dst = (otype *)ddst;\
+    for (int n = 0; n < len; n++){\
+        itype x = src[n];\
+        dst[n] = expr;\
+    }\
 }
 
 //FIXME put things below under ifdefs so we do not waste space for cases no codec will need
-CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 ,  *(const uint8_t*)pi)
-CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80U)<<8)
-CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80U)<<24)
-CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8 , (uint64_t)((*(const uint8_t*)pi - 0x80U))<<56)
-CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0f/ (1<<7)))
-CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
-CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, (*(const int16_t*)pi>>8) + 0x80)
-CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi)
-CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16, *(const int16_t*)pi * (1 << 16))
-CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16, (uint64_t)(*(const int16_t*)pi)<<48)
-CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0f/ (1<<15)))
-CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0 / (1<<15)))
-CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, (*(const int32_t*)pi>>24) + 0x80)
-CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi>>16)
-CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi)
-CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32, (uint64_t)(*(const int32_t*)pi)<<32)
-CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0f/ (1U<<31)))
-CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0 / (1U<<31)))
-CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S64, (*(const int64_t*)pi>>56) + 0x80)
-CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S64,  *(const int64_t*)pi>>48)
-CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S64,  *(const int64_t*)pi>>32)
-CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S64,  *(const int64_t*)pi)
-CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S64,  *(const int64_t*)pi*(1.0f/ (UINT64_C(1)<<63)))
-CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S64,  *(const int64_t*)pi*(1.0 / (UINT64_C(1)<<63)))
-CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(  lrintf(*(const float*)pi * (1<<7)) + 0x80))
-CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(  lrintf(*(const float*)pi * (1<<15))))
-CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float*)pi * (1U<<31))))
-CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, llrintf(*(const float*)pi * (UINT64_C(1)<<63)))
-CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, *(const float*)pi)
-CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, *(const float*)pi)
-CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(  lrint(*(const double*)pi * (1<<7)) + 0x80))
-CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(  lrint(*(const double*)pi * (1<<15))))
-CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double*)pi * (1U<<31))))
-CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(const double*)pi * (UINT64_C(1)<<63)))
-CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, *(const double*)pi)
-CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const double*)pi)
-
-#define FMT_PAIR_FUNC(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] = CONV_FUNC_NAME(out, in)
-
-static conv_func_type * const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = {
+CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 , uint8_t, x)
+CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80U)<<8)
+CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80U)<<24)
+CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8 , uint8_t, (uint64_t)(x - 0x80U)<<56)
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80)*(1.0f/ (1<<7)))
+CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80)*(1.0 / (1<<7)))
+CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, int16_t, (x>>8) + 0x80)
+CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16, int16_t, x)
+CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16, int16_t, x * (1 << 16))
+CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16, int16_t, (uint64_t)(x)<<48)
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16, int16_t, x*(1.0f/ (1<<15)))
+CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16, int16_t, x*(1.0 / (1<<15)))
+CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, int32_t, (x>>24) + 0x80)
+CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32, int32_t, x>>16)
+CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32, int32_t, x)
+CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32, int32_t, (uint64_t)(x)<<32)
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32, int32_t,  x*(1.0f/ (1U<<31)))
+CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32, int32_t,  x*(1.0 / (1U<<31)))
+CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S64, int64_t, (x>>56) + 0x80)
+CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S64, int64_t, x>>48)
+CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S64, int64_t, x>>32)
+CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S64, int64_t, x)
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S64, int64_t, x*(1.0f/ (UINT64_C(1)<<63)))
+CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S64, int64_t, x*(1.0 / (UINT64_C(1)<<63)))
+CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(  lrintf(x * (1<<7)) + 0x80))
+CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(  lrintf(x * (1<<15))))
+CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(x * (1U<<31))))
+CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, float, llrintf(x * (UINT64_C(1)<<63)))
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, float, x)
+CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, float, x)
+CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(  lrint(x * (1<<7)) + 0x80))
+CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(  lrint(x * (1<<15))))
+CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(x * (1U<<31))))
+CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, double, llrint(x * (UINT64_C(1)<<63)))
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, double, x)
+CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, double, x)
+
+#define FMT_PAIR_FUNC(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] = &(conv_func_types){ CONV_FUNC_NAME(out, in), CONVP_FUNC_NAME(out, in) }
+
+typedef struct conv_func_types {
+    conv_func_type *conv_f;
+    convp_func_type *convp_f;
+} conv_func_types;
+
+static conv_func_types * const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = {
     FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_U8 ),
     FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8 ),
     FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8 ),
@@ -148,7 +170,8 @@ AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt,
                                        int flags)
 {
     AudioConvert *ctx;
-    conv_func_type *f = fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt) + AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)];
+    conv_func_type *f = fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt) + AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)]->conv_f;
+    convp_func_type *fp = fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt) + AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)]->convp_f;
 
     if (!f)
         return NULL;
@@ -163,6 +186,7 @@ AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt,
 
     ctx->channels = channels;
     ctx->conv_f   = f;
+    ctx->convp_f  = fp;
     ctx->ch_map   = ch_map;
     if (in_fmt == AV_SAMPLE_FMT_U8 || in_fmt == AV_SAMPLE_FMT_U8P)
         memset(ctx->silence, 0x80, sizeof(ctx->silence));
@@ -235,6 +259,11 @@ int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len
         }
         if(off == len)
             return 0;
+    } else if (out->planar == in->planar && !ctx->ch_map) {
+        int planes = out->planar ? out->ch_count : 1;
+        for (ch = 0; ch < planes; ch++)
+            ctx->convp_f(out->ch[ch], in->ch[ch], len * (out->planar ? 1 : out->ch_count));
+        return 0;
     }
 
     for(ch=0; ch<ctx->channels; ch++){
diff --git a/libswresample/audioconvert.h b/libswresample/audioconvert.h
index bb143a876d..ac0d5b3ada 100644
--- a/libswresample/audioconvert.h
+++ b/libswresample/audioconvert.h
@@ -33,6 +33,7 @@
 
 
 typedef void (conv_func_type)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end);
+typedef void (convp_func_type)(uint8_t *dst, const uint8_t *src, int len);
 typedef void (simd_func_type)(uint8_t **dst, const uint8_t **src, int len);
 
 typedef struct AudioConvert {
@@ -40,6 +41,7 @@ typedef struct AudioConvert {
     int  in_simd_align_mask;
     int out_simd_align_mask;
     conv_func_type *conv_f;
+    convp_func_type *convp_f;
     simd_func_type *simd_f;
     const int *ch_map;
     uint8_t silence[8]; ///< silence input sample
-- 
2.39.1


[-- Attachment #3: 0001-swresample-x86-add-float-double-paths.patch --]
[-- Type: text/x-patch, Size: 3795 bytes --]

From 5a8ab5b948423e6cde7b59df0d21f38dc0235155 Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Thu, 11 May 2023 01:11:42 +0200
Subject: [PATCH 1/2] swresample/x86: add float<->double paths

Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libswresample/x86/audio_convert.asm    | 25 +++++++++++++++++++++++++
 libswresample/x86/audio_convert_init.c |  8 ++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
index ad65008e23..82eda3758e 100644
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -540,6 +540,26 @@ pack_8ch_%2_to_%1_u_int %+ SUFFIX:
     punpckhwd m1, m4
 %endmacro
 
+%macro FLOAT_TO_DOUBLE_N 6
+    shufps    %3, %1, %1, q3232
+    shufps    %4, %2, %2, q3232
+    cvtps2pd  %1, %1
+    cvtps2pd  %2, %2
+    cvtps2pd  %3, %3
+    cvtps2pd  %4, %4
+    SWAP 1,2
+%endmacro
+
+%macro DOUBLE_TO_FLOAT_N 6
+    cvtpd2ps  %1, %1
+    cvtpd2ps  %2, %2
+    cvtpd2ps  %3, %3
+    cvtpd2ps  %4, %4
+    shufps    %1, %2, q1010
+    shufps    %3, %4, q1010
+    SWAP 1,2
+%endmacro
+
 %macro INT32_TO_INT16_N 6
     psrad     m0, 16
     psrad     m1, 16
@@ -648,6 +668,11 @@ CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
 CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
 CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
 
+CONV double, float, u, 3, 2, FLOAT_TO_DOUBLE_N, NOP_N
+CONV double, float, a, 3, 2, FLOAT_TO_DOUBLE_N, NOP_N
+CONV float, double, u, 2, 3, DOUBLE_TO_FLOAT_N, NOP_N
+CONV float, double, a, 2, 3, DOUBLE_TO_FLOAT_N, NOP_N
+
 PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
 PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
 PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c
index f6d36f9ca6..e10b978c68 100644
--- a/libswresample/x86/audio_convert_init.c
+++ b/libswresample/x86/audio_convert_init.c
@@ -24,8 +24,8 @@
 #include "libswresample/audioconvert.h"
 
 #define PROTO(pre, in, out, cap) void ff ## pre ## in## _to_ ##out## _a_ ##cap(uint8_t **dst, const uint8_t **src, int len);
-#define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap)
-#define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap)
+#define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap) PROTO(pre, double, out,cap)
+#define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap) PROTO2(pre, double, cap)
 #define PROTO4(pre) PROTO3(pre, sse) PROTO3(pre, sse2) PROTO3(pre, ssse3) PROTO3(pre, sse4) PROTO3(pre, avx) PROTO3(pre, avx2)
 PROTO4(_)
 PROTO4(_pack_2ch_)
@@ -72,6 +72,10 @@ MULTI_CAPS_FUNC(SSE2, sse2)
             ac->simd_f =  ff_float_to_int32_a_sse2;
         if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
             ac->simd_f =  ff_float_to_int16_a_sse2;
+        if(   out_fmt == AV_SAMPLE_FMT_DBL  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_DBLP && in_fmt == AV_SAMPLE_FMT_FLTP)
+            ac->simd_f =  ff_float_to_double_a_sse2;
+        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_DBL || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_DBLP)
+            ac->simd_f =  ff_double_to_float_a_sse2;
 
         if(channels == 2) {
             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
-- 
2.39.1


[-- Attachment #4: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread