From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> To: ffmpeg-devel@ffmpeg.org Subject: Re: [FFmpeg-devel] [PATCH 09/11] avutil/half2float: use native _Float16 if available Date: Wed, 10 Aug 2022 23:03:45 +0200 Message-ID: <DB6PR0101MB221435143D86ABEC32A3DA438F659@DB6PR0101MB2214.eurprd01.prod.exchangelabs.com> (raw) In-Reply-To: <20220810204712.3123-9-timo@rothenpieler.org> Timo Rothenpieler: > _Float16 support was available on arm/aarch64 for a while, and with gcc > 12 was enabled on x86 as long as SSE2 is supported. > > If the target arch supports f16c, gcc emits fairly efficient assembly, > taking advantage of it. This is the case on x86-64-v3 or higher. > Without f16c, it emulates it in software using sse2 instructions. How is the performance of this emulation compared to our current code? And how is the native _Float16 performance compared to the current code? > --- > configure | 4 ++++ > libavutil/float2half.c | 2 ++ > libavutil/float2half.h | 16 ++++++++++++++++ > libavutil/half2float.c | 4 ++++ > libavutil/half2float.h | 16 ++++++++++++++++ > 5 files changed, 42 insertions(+) > > diff --git a/configure b/configure > index 6761d0cb32..2536ae012d 100755 > --- a/configure > +++ b/configure > @@ -2143,6 +2143,7 @@ ARCH_FEATURES=" > fast_64bit > fast_clz > fast_cmov > + float16 > local_aligned > simd_align_16 > simd_align_32 > @@ -5125,6 +5126,8 @@ elif enabled arm; then > ;; > esac > > + test_cflags -mfp16-format=ieee && add_cflags -mfp16-format=ieee > + > elif enabled avr32; then > > case $cpu in > @@ -6228,6 +6231,7 @@ check_builtin MemoryBarrier windows.h "MemoryBarrier()" > check_builtin sync_val_compare_and_swap "" "int *ptr; int oldval, newval; __sync_val_compare_and_swap(ptr, oldval, newval)" > check_builtin gmtime_r time.h "time_t *time; struct tm *tm; gmtime_r(time, tm)" > check_builtin localtime_r time.h "time_t *time; struct tm *tm; localtime_r(time, tm)" > +check_builtin float16 "" "_Float16 f16var" > > case "$custom_allocator" in > jemalloc) > diff --git a/libavutil/float2half.c b/libavutil/float2half.c > index dba14cef5d..1390d3acc0 100644 > --- a/libavutil/float2half.c > +++ b/libavutil/float2half.c > @@ -20,6 +20,7 @@ > > void ff_init_float2half_tables(float2half_tables *t) > { > +#if !HAVE_FLOAT16 > for (int i = 0; i < 256; i++) { > int e = i - 127; > > @@ -50,4 +51,5 @@ void ff_init_float2half_tables(float2half_tables *t) > t->shifttable[i|0x100] = 13; > } > } > +#endif > } > diff --git a/libavutil/float2half.h b/libavutil/float2half.h > index b8c9cdfc4f..8c1fb804b7 100644 > --- a/libavutil/float2half.h > +++ b/libavutil/float2half.h > @@ -20,21 +20,37 @@ > #define AVUTIL_FLOAT2HALF_H > > #include <stdint.h> > +#include "intfloat.h" > + > +#include "config.h" > > typedef struct float2half_tables { > +#if HAVE_FLOAT16 > + uint8_t dummy; > +#else > uint16_t basetable[512]; > uint8_t shifttable[512]; > +#endif > } float2half_tables; > > void ff_init_float2half_tables(float2half_tables *t); > > static inline uint16_t float2half(uint32_t f, const float2half_tables *t) > { > +#if HAVE_FLOAT16 > + union { > + _Float16 f; > + uint16_t i; > + } u; > + u.f = av_int2float(f); > + return u.i; > +#else > uint16_t h; > > h = t->basetable[(f >> 23) & 0x1ff] + ((f & 0x007fffff) >> t->shifttable[(f >> 23) & 0x1ff]); > > return h; > +#endif > } > > #endif /* AVUTIL_FLOAT2HALF_H */ > diff --git a/libavutil/half2float.c b/libavutil/half2float.c > index baac8e4093..873226d3a0 100644 > --- a/libavutil/half2float.c > +++ b/libavutil/half2float.c > @@ -18,6 +18,7 @@ > > #include "libavutil/half2float.h" > > +#if !HAVE_FLOAT16 > static uint32_t convertmantissa(uint32_t i) > { > int32_t m = i << 13; // Zero pad mantissa bits > @@ -33,9 +34,11 @@ static uint32_t convertmantissa(uint32_t i) > > return m | e; // Return combined number > } > +#endif > > void ff_init_half2float_tables(half2float_tables *t) > { > +#if !HAVE_FLOAT16 > t->mantissatable[0] = 0; > for (int i = 1; i < 1024; i++) > t->mantissatable[i] = convertmantissa(i); > @@ -60,4 +63,5 @@ void ff_init_half2float_tables(half2float_tables *t) > t->offsettable[31] = 2048; > t->offsettable[32] = 0; > t->offsettable[63] = 2048; > +#endif > } > diff --git a/libavutil/half2float.h b/libavutil/half2float.h > index cb58e44a1c..b2a7c934a6 100644 > --- a/libavutil/half2float.h > +++ b/libavutil/half2float.h > @@ -20,22 +20,38 @@ > #define AVUTIL_HALF2FLOAT_H > > #include <stdint.h> > +#include "intfloat.h" > + > +#include "config.h" > > typedef struct half2float_tables { > +#if HAVE_FLOAT16 > + uint8_t dummy; > +#else > uint32_t mantissatable[3072]; > uint32_t exponenttable[64]; > uint16_t offsettable[64]; > +#endif > } half2float_tables; > > void ff_init_half2float_tables(half2float_tables *t); > > static inline uint32_t half2float(uint16_t h, const half2float_tables *t) > { > +#if HAVE_FLOAT16 > + union { > + _Float16 f; > + uint16_t i; > + } u; > + u.i = h; > + return av_float2int(u.f); > +#else > uint32_t f; > > f = t->mantissatable[t->offsettable[h >> 10] + (h & 0x3ff)] + t->exponenttable[h >> 10]; > > return f; > +#endif > } > > #endif /* AVUTIL_HALF2FLOAT_H */ _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2022-08-10 21:04 UTC|newest] Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-08-10 20:47 [FFmpeg-devel] [PATCH 01/11] lavu/pixfmt: add packed RGBA float16 format Timo Rothenpieler 2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 02/11] avutil/hwcontext_d3d11va: add support for rgbaf16 pixel format Timo Rothenpieler 2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 03/11] avfilter/vsrc_ddagrab: add rgbaf16 output support Timo Rothenpieler 2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 04/11] avfilter/vsrc_ddagrab: add options for more control over output format fallback Timo Rothenpieler 2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 05/11] avutil: move half-precision float helper to avutil Timo Rothenpieler 2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN Timo Rothenpieler 2022-08-10 21:24 ` Andreas Rheinhardt 2022-08-10 21:36 ` Timo Rothenpieler 2022-08-10 21:43 ` Andreas Rheinhardt 2022-08-10 21:53 ` Timo Rothenpieler 2022-08-10 22:14 ` Mark Reid 2022-08-10 22:18 ` James Almer 2022-08-10 22:28 ` Timo Rothenpieler 2022-08-10 22:37 ` Mark Reid 2022-08-10 22:55 ` Timo Rothenpieler 2022-08-11 2:18 ` Mark Reid 2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 07/11] avutil/half2float: move tables to header-internal structs Timo Rothenpieler 2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 08/11] avutil/half2float: move non-inline init code out of header Timo Rothenpieler 2022-08-11 20:46 ` Michael Niedermayer 2022-08-11 20:50 ` Andreas Rheinhardt 2022-08-11 21:16 ` Michael Niedermayer 2022-08-11 21:31 ` Andreas Rheinhardt 2022-08-14 19:32 ` Michael Niedermayer 2022-08-15 4:20 ` Andreas Rheinhardt 2022-08-15 18:09 ` Michael Niedermayer 2022-08-14 21:54 ` Timo Rothenpieler 2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 09/11] avutil/half2float: use native _Float16 if available Timo Rothenpieler 2022-08-10 21:03 ` Andreas Rheinhardt [this message] 2022-08-10 21:58 ` Timo Rothenpieler 2022-08-10 22:02 ` James Almer 2022-08-10 22:51 ` [FFmpeg-devel] [PATCH v2 " Timo Rothenpieler 2022-08-11 0:14 ` James Almer 2022-08-11 11:50 ` Timo Rothenpieler 2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 10/11] swscale: add SwsContext parameter to input functions Timo Rothenpieler 2022-08-10 20:52 ` Timo Rothenpieler 2022-08-10 21:55 ` Andreas Rheinhardt 2022-08-10 22:02 ` Timo Rothenpieler 2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 11/11] swscale/input: add rgbaf16 input support Timo Rothenpieler 2022-08-10 21:37 ` Timo Rothenpieler
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=DB6PR0101MB221435143D86ABEC32A3DA438F659@DB6PR0101MB2214.eurprd01.prod.exchangelabs.com \ --to=andreas.rheinhardt@outlook.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git