From 23bde72fa9b15b16d0b3241652433967111229b5 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Thu, 11 May 2023 17:39:08 +0200 Subject: [PATCH 2/2] swresample: add paths for same planar/packed format conversion Signed-off-by: Paul B Mahol --- libswresample/audioconvert.c | 113 ++++++++++++++++++++++------------- libswresample/audioconvert.h | 2 + 2 files changed, 73 insertions(+), 42 deletions(-) diff --git a/libswresample/audioconvert.c b/libswresample/audioconvert.c index 1d75ba1495..29af1ec119 100644 --- a/libswresample/audioconvert.c +++ b/libswresample/audioconvert.c @@ -33,64 +33,86 @@ #define CONV_FUNC_NAME(dst_fmt, src_fmt) conv_ ## src_fmt ## _to_ ## dst_fmt +#define CONVP_FUNC_NAME(dst_fmt, src_fmt) convp_ ## src_fmt ## _to_ ## dst_fmt //FIXME rounding ? -#define CONV_FUNC(ofmt, otype, ifmt, expr)\ +#define CONV_FUNC(ofmt, otype, ifmt, itype, expr)\ + \ static void CONV_FUNC_NAME(ofmt, ifmt)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end)\ {\ uint8_t *end2 = end - 3*os;\ while(po < end2){\ + itype x = *(itype*)pi;\ *(otype*)po = expr; pi += is; po += os;\ + x = *(itype*)pi;\ *(otype*)po = expr; pi += is; po += os;\ + x = *(itype*)pi;\ *(otype*)po = expr; pi += is; po += os;\ + x = *(itype*)pi;\ *(otype*)po = expr; pi += is; po += os;\ }\ while(po < end){\ + itype x = *(itype*)pi;\ *(otype*)po = expr; pi += is; po += os;\ }\ +}\ +\ +static void CONVP_FUNC_NAME(ofmt, ifmt)(uint8_t *ddst, const uint8_t *ssrc, int len)\ +{\ + const itype *src = (const itype *)ssrc;\ + otype *dst = (otype *)ddst;\ + for (int n = 0; n < len; n++){\ + itype x = src[n];\ + dst[n] = expr;\ + }\ } //FIXME put things below under ifdefs so we do not waste space for cases no codec will need -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 , *(const uint8_t*)pi) -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80U)<<8) -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80U)<<24) -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8 , (uint64_t)((*(const uint8_t*)pi - 0x80U))<<56) -CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0f/ (1<<7))) -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7))) -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, (*(const int16_t*)pi>>8) + 0x80) -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16, *(const int16_t*)pi) -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16, *(const int16_t*)pi * (1 << 16)) -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16, (uint64_t)(*(const int16_t*)pi)<<48) -CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_S16, *(const int16_t*)pi*(1.0f/ (1<<15))) -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16, *(const int16_t*)pi*(1.0 / (1<<15))) -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, (*(const int32_t*)pi>>24) + 0x80) -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32, *(const int32_t*)pi>>16) -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32, *(const int32_t*)pi) -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32, (uint64_t)(*(const int32_t*)pi)<<32) -CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_S32, *(const int32_t*)pi*(1.0f/ (1U<<31))) -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32, *(const int32_t*)pi*(1.0 / (1U<<31))) -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S64, (*(const int64_t*)pi>>56) + 0x80) -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S64, *(const int64_t*)pi>>48) -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S64, *(const int64_t*)pi>>32) -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S64, *(const int64_t*)pi) -CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_S64, *(const int64_t*)pi*(1.0f/ (UINT64_C(1)<<63))) -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S64, *(const int64_t*)pi*(1.0 / (UINT64_C(1)<<63))) -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8( lrintf(*(const float*)pi * (1<<7)) + 0x80)) -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16( lrintf(*(const float*)pi * (1<<15)))) -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float*)pi * (1U<<31)))) -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, llrintf(*(const float*)pi * (UINT64_C(1)<<63))) -CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_FLT, *(const float*)pi) -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, *(const float*)pi) -CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8( lrint(*(const double*)pi * (1<<7)) + 0x80)) -CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16( lrint(*(const double*)pi * (1<<15)))) -CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double*)pi * (1U<<31)))) -CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(const double*)pi * (UINT64_C(1)<<63))) -CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_DBL, *(const double*)pi) -CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const double*)pi) - -#define FMT_PAIR_FUNC(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] = CONV_FUNC_NAME(out, in) - -static conv_func_type * const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = { +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 , uint8_t, x) +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80U)<<8) +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80U)<<24) +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8 , uint8_t, (uint64_t)(x - 0x80U)<<56) +CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80)*(1.0f/ (1<<7))) +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , uint8_t, (x - 0x80)*(1.0 / (1<<7))) +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, int16_t, (x>>8) + 0x80) +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16, int16_t, x) +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16, int16_t, x * (1 << 16)) +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16, int16_t, (uint64_t)(x)<<48) +CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_S16, int16_t, x*(1.0f/ (1<<15))) +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16, int16_t, x*(1.0 / (1<<15))) +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, int32_t, (x>>24) + 0x80) +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32, int32_t, x>>16) +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32, int32_t, x) +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32, int32_t, (uint64_t)(x)<<32) +CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_S32, int32_t, x*(1.0f/ (1U<<31))) +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32, int32_t, x*(1.0 / (1U<<31))) +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S64, int64_t, (x>>56) + 0x80) +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S64, int64_t, x>>48) +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S64, int64_t, x>>32) +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S64, int64_t, x) +CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_S64, int64_t, x*(1.0f/ (UINT64_C(1)<<63))) +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S64, int64_t, x*(1.0 / (UINT64_C(1)<<63))) +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8( lrintf(x * (1<<7)) + 0x80)) +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16( lrintf(x * (1<<15)))) +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(x * (1U<<31)))) +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, float, llrintf(x * (UINT64_C(1)<<63))) +CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_FLT, float, x) +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, float, x) +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8( lrint(x * (1<<7)) + 0x80)) +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16( lrint(x * (1<<15)))) +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(x * (1U<<31)))) +CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, double, llrint(x * (UINT64_C(1)<<63))) +CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_DBL, double, x) +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, double, x) + +#define FMT_PAIR_FUNC(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] = &(conv_func_types){ CONV_FUNC_NAME(out, in), CONVP_FUNC_NAME(out, in) } + +typedef struct conv_func_types { + conv_func_type *conv_f; + convp_func_type *convp_f; +} conv_func_types; + +static conv_func_types * const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = { FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_U8 ), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8 ), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8 ), @@ -148,7 +170,8 @@ AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt, int flags) { AudioConvert *ctx; - conv_func_type *f = fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt) + AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)]; + conv_func_type *f = fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt) + AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)]->conv_f; + convp_func_type *fp = fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt) + AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)]->convp_f; if (!f) return NULL; @@ -163,6 +186,7 @@ AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt, ctx->channels = channels; ctx->conv_f = f; + ctx->convp_f = fp; ctx->ch_map = ch_map; if (in_fmt == AV_SAMPLE_FMT_U8 || in_fmt == AV_SAMPLE_FMT_U8P) memset(ctx->silence, 0x80, sizeof(ctx->silence)); @@ -235,6 +259,11 @@ int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len } if(off == len) return 0; + } else if (out->planar == in->planar && !ctx->ch_map) { + int planes = out->planar ? out->ch_count : 1; + for (ch = 0; ch < planes; ch++) + ctx->convp_f(out->ch[ch], in->ch[ch], len * (out->planar ? 1 : out->ch_count)); + return 0; } for(ch=0; chchannels; ch++){ diff --git a/libswresample/audioconvert.h b/libswresample/audioconvert.h index bb143a876d..ac0d5b3ada 100644 --- a/libswresample/audioconvert.h +++ b/libswresample/audioconvert.h @@ -33,6 +33,7 @@ typedef void (conv_func_type)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end); +typedef void (convp_func_type)(uint8_t *dst, const uint8_t *src, int len); typedef void (simd_func_type)(uint8_t **dst, const uint8_t **src, int len); typedef struct AudioConvert { @@ -40,6 +41,7 @@ typedef struct AudioConvert { int in_simd_align_mask; int out_simd_align_mask; conv_func_type *conv_f; + convp_func_type *convp_f; simd_func_type *simd_f; const int *ch_map; uint8_t silence[8]; ///< silence input sample -- 2.39.1