From: James Almer <jamrial@gmail.com>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH v2 09/11] avutil/half2float: use native _Float16 if available
Date: Wed, 10 Aug 2022 21:14:56 -0300
Message-ID: <314f36cb-5fe1-551b-81bc-b3b902dd6c79@gmail.com> (raw)
In-Reply-To: <20220810225154.8435-1-timo@rothenpieler.org>
On 8/10/2022 7:51 PM, Timo Rothenpieler wrote:
> _Float16 support was available on arm/aarch64 for a while, and with gcc
> 12 was enabled on x86 as long as SSE2 is supported.
>
> If the target arch supports f16c, gcc emits fairly efficient assembly,
> taking advantage of it. This is the case on x86-64-v3 or higher.
> Same goes on arm, which has native float16 support.
> On x86, without f16c, it emulates it in software using sse2 instructions.
>
> This has shown to perform rather poorly:
>
> _Float16 full SSE2 emulation:
> frame=50074 fps=848 q=-0.0 size=N/A time=00:33:22.96 bitrate=N/A speed=33.9x
>
> _Float16 f16c accelerated (Zen2, --cpu=znver2):
> frame=50636 fps=1965 q=-0.0 Lsize=N/A time=00:33:45.40 bitrate=N/A speed=78.6x
>
> classic half2float full software implementation:
> frame=49926 fps=1605 q=-0.0 Lsize=N/A time=00:33:17.00 bitrate=N/A speed=64.2x
>
> Hence an additional check was introduced, that only enables use of
> _Float16 on x86 if f16c is being utilized.
>
> On aarch64, a similar uplift in performance is seen:
>
> RPi4 half2float full software implementation:
> frame= 6088 fps=126 q=-0.0 Lsize=N/A time=00:04:03.48 bitrate=N/A speed=5.06x
>
> RPi4 _Float16:
> frame= 6103 fps=158 q=-0.0 Lsize=N/A time=00:04:04.08 bitrate=N/A speed=6.32x
>
> Since arm/aarch64 always natively support 16 bit floats, it can always
> be considered fast there.
>
> I'm not aware of any additional platforms that currently support
> _Float16. And if there are, they should be considered non-fast until
> proven fast.
> ---
> configure | 13 +++++++++++++
> libavutil/float2half.c | 2 ++
> libavutil/float2half.h | 16 ++++++++++++++++
> libavutil/half2float.c | 4 ++++
> libavutil/half2float.h | 16 ++++++++++++++++
> 5 files changed, 51 insertions(+)
>
> diff --git a/configure b/configure
> index 6761d0cb32..6ede9a5a8f 100755
> --- a/configure
> +++ b/configure
> @@ -2143,6 +2143,8 @@ ARCH_FEATURES="
> fast_64bit
> fast_clz
> fast_cmov
> + fast_float16
> + float16
If HAVE_FLOAT16 is not going to be used, then don't export it here.
Leave it as a configure internal variable.
> local_aligned
> simd_align_16
> simd_align_32
> @@ -5125,6 +5127,8 @@ elif enabled arm; then
> ;;
> esac
>
> + test_cflags -mfp16-format=ieee && add_cflags -mfp16-format=ieee
> +
> elif enabled avr32; then
>
> case $cpu in
> @@ -6229,6 +6233,15 @@ check_builtin sync_val_compare_and_swap "" "int *ptr; int oldval, newval; __sync
> check_builtin gmtime_r time.h "time_t *time; struct tm *tm; gmtime_r(time, tm)"
> check_builtin localtime_r time.h "time_t *time; struct tm *tm; localtime_r(time, tm)"
>
> +check_builtin float16 "" "_Float16 f16var"
> +if enabled float16; then
> + if enabled x86; then
> + test_cpp_condition stddef.h "defined(__F16C__)" && enable fast_float16
> + elif enabled arm || enabled aarch64; then
> + enable fast_float16
> + fi
> +fi
> +
> case "$custom_allocator" in
> jemalloc)
> # jemalloc by default does not use a prefix
> diff --git a/libavutil/float2half.c b/libavutil/float2half.c
> index dba14cef5d..7002612194 100644
> --- a/libavutil/float2half.c
> +++ b/libavutil/float2half.c
> @@ -20,6 +20,7 @@
>
> void ff_init_float2half_tables(float2half_tables *t)
> {
> +#if !HAVE_FAST_FLOAT16
> for (int i = 0; i < 256; i++) {
> int e = i - 127;
>
> @@ -50,4 +51,5 @@ void ff_init_float2half_tables(float2half_tables *t)
> t->shifttable[i|0x100] = 13;
> }
> }
> +#endif
> }
> diff --git a/libavutil/float2half.h b/libavutil/float2half.h
> index b8c9cdfc4f..437666966b 100644
> --- a/libavutil/float2half.h
> +++ b/libavutil/float2half.h
> @@ -20,21 +20,37 @@
> #define AVUTIL_FLOAT2HALF_H
>
> #include <stdint.h>
> +#include "intfloat.h"
> +
> +#include "config.h"
>
> typedef struct float2half_tables {
> +#if HAVE_FAST_FLOAT16
> + uint8_t dummy;
> +#else
> uint16_t basetable[512];
> uint8_t shifttable[512];
> +#endif
> } float2half_tables;
>
> void ff_init_float2half_tables(float2half_tables *t);
>
> static inline uint16_t float2half(uint32_t f, const float2half_tables *t)
> {
> +#if HAVE_FAST_FLOAT16
> + union {
> + _Float16 f;
> + uint16_t i;
> + } u;
> + u.f = av_int2float(f);
> + return u.i;
> +#else
> uint16_t h;
>
> h = t->basetable[(f >> 23) & 0x1ff] + ((f & 0x007fffff) >> t->shifttable[(f >> 23) & 0x1ff]);
>
> return h;
> +#endif
> }
>
> #endif /* AVUTIL_FLOAT2HALF_H */
> diff --git a/libavutil/half2float.c b/libavutil/half2float.c
> index baac8e4093..ff198a8187 100644
> --- a/libavutil/half2float.c
> +++ b/libavutil/half2float.c
> @@ -18,6 +18,7 @@
>
> #include "libavutil/half2float.h"
>
> +#if !HAVE_FAST_FLOAT16
> static uint32_t convertmantissa(uint32_t i)
> {
> int32_t m = i << 13; // Zero pad mantissa bits
> @@ -33,9 +34,11 @@ static uint32_t convertmantissa(uint32_t i)
>
> return m | e; // Return combined number
> }
> +#endif
>
> void ff_init_half2float_tables(half2float_tables *t)
> {
> +#if !HAVE_FAST_FLOAT16
> t->mantissatable[0] = 0;
> for (int i = 1; i < 1024; i++)
> t->mantissatable[i] = convertmantissa(i);
> @@ -60,4 +63,5 @@ void ff_init_half2float_tables(half2float_tables *t)
> t->offsettable[31] = 2048;
> t->offsettable[32] = 0;
> t->offsettable[63] = 2048;
> +#endif
> }
> diff --git a/libavutil/half2float.h b/libavutil/half2float.h
> index cb58e44a1c..57ee8372fe 100644
> --- a/libavutil/half2float.h
> +++ b/libavutil/half2float.h
> @@ -20,22 +20,38 @@
> #define AVUTIL_HALF2FLOAT_H
>
> #include <stdint.h>
> +#include "intfloat.h"
> +
> +#include "config.h"
>
> typedef struct half2float_tables {
> +#if HAVE_FAST_FLOAT16
> + uint8_t dummy;
> +#else
> uint32_t mantissatable[3072];
> uint32_t exponenttable[64];
> uint16_t offsettable[64];
> +#endif
> } half2float_tables;
>
> void ff_init_half2float_tables(half2float_tables *t);
>
> static inline uint32_t half2float(uint16_t h, const half2float_tables *t)
> {
> +#if HAVE_FAST_FLOAT16
> + union {
> + _Float16 f;
> + uint16_t i;
> + } u;
> + u.i = h;
> + return av_float2int(u.f);
> +#else
> uint32_t f;
>
> f = t->mantissatable[t->offsettable[h >> 10] + (h & 0x3ff)] + t->exponenttable[h >> 10];
>
> return f;
> +#endif
> }
>
> #endif /* AVUTIL_HALF2FLOAT_H */
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2022-08-11 0:15 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-08-10 20:47 [FFmpeg-devel] [PATCH 01/11] lavu/pixfmt: add packed RGBA float16 format Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 02/11] avutil/hwcontext_d3d11va: add support for rgbaf16 pixel format Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 03/11] avfilter/vsrc_ddagrab: add rgbaf16 output support Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 04/11] avfilter/vsrc_ddagrab: add options for more control over output format fallback Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 05/11] avutil: move half-precision float helper to avutil Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN Timo Rothenpieler
2022-08-10 21:24 ` Andreas Rheinhardt
2022-08-10 21:36 ` Timo Rothenpieler
2022-08-10 21:43 ` Andreas Rheinhardt
2022-08-10 21:53 ` Timo Rothenpieler
2022-08-10 22:14 ` Mark Reid
2022-08-10 22:18 ` James Almer
2022-08-10 22:28 ` Timo Rothenpieler
2022-08-10 22:37 ` Mark Reid
2022-08-10 22:55 ` Timo Rothenpieler
2022-08-11 2:18 ` Mark Reid
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 07/11] avutil/half2float: move tables to header-internal structs Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 08/11] avutil/half2float: move non-inline init code out of header Timo Rothenpieler
2022-08-11 20:46 ` Michael Niedermayer
2022-08-11 20:50 ` Andreas Rheinhardt
2022-08-11 21:16 ` Michael Niedermayer
2022-08-11 21:31 ` Andreas Rheinhardt
2022-08-14 19:32 ` Michael Niedermayer
2022-08-15 4:20 ` Andreas Rheinhardt
2022-08-15 18:09 ` Michael Niedermayer
2022-08-14 21:54 ` Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 09/11] avutil/half2float: use native _Float16 if available Timo Rothenpieler
2022-08-10 21:03 ` Andreas Rheinhardt
2022-08-10 21:58 ` Timo Rothenpieler
2022-08-10 22:02 ` James Almer
2022-08-10 22:51 ` [FFmpeg-devel] [PATCH v2 " Timo Rothenpieler
2022-08-11 0:14 ` James Almer [this message]
2022-08-11 11:50 ` Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 10/11] swscale: add SwsContext parameter to input functions Timo Rothenpieler
2022-08-10 20:52 ` Timo Rothenpieler
2022-08-10 21:55 ` Andreas Rheinhardt
2022-08-10 22:02 ` Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 11/11] swscale/input: add rgbaf16 input support Timo Rothenpieler
2022-08-10 21:37 ` Timo Rothenpieler
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=314f36cb-5fe1-551b-81bc-b3b902dd6c79@gmail.com \
--to=jamrial@gmail.com \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git