Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH 09/11] avutil/half2float: use native _Float16 if available
Date: Wed, 10 Aug 2022 23:03:45 +0200
Message-ID: <DB6PR0101MB221435143D86ABEC32A3DA438F659@DB6PR0101MB2214.eurprd01.prod.exchangelabs.com> (raw)
In-Reply-To: <20220810204712.3123-9-timo@rothenpieler.org>

Timo Rothenpieler:
> _Float16 support was available on arm/aarch64 for a while, and with gcc
> 12 was enabled on x86 as long as SSE2 is supported.
> 
> If the target arch supports f16c, gcc emits fairly efficient assembly,
> taking advantage of it. This is the case on x86-64-v3 or higher.
> Without f16c, it emulates it in software using sse2 instructions.

How is the performance of this emulation compared to our current code?
And how is the native _Float16 performance compared to the current code?

> ---
>  configure              |  4 ++++
>  libavutil/float2half.c |  2 ++
>  libavutil/float2half.h | 16 ++++++++++++++++
>  libavutil/half2float.c |  4 ++++
>  libavutil/half2float.h | 16 ++++++++++++++++
>  5 files changed, 42 insertions(+)
> 
> diff --git a/configure b/configure
> index 6761d0cb32..2536ae012d 100755
> --- a/configure
> +++ b/configure
> @@ -2143,6 +2143,7 @@ ARCH_FEATURES="
>      fast_64bit
>      fast_clz
>      fast_cmov
> +    float16
>      local_aligned
>      simd_align_16
>      simd_align_32
> @@ -5125,6 +5126,8 @@ elif enabled arm; then
>              ;;
>      esac
>  
> +    test_cflags -mfp16-format=ieee && add_cflags -mfp16-format=ieee
> +
>  elif enabled avr32; then
>  
>      case $cpu in
> @@ -6228,6 +6231,7 @@ check_builtin MemoryBarrier windows.h "MemoryBarrier()"
>  check_builtin sync_val_compare_and_swap "" "int *ptr; int oldval, newval; __sync_val_compare_and_swap(ptr, oldval, newval)"
>  check_builtin gmtime_r time.h "time_t *time; struct tm *tm; gmtime_r(time, tm)"
>  check_builtin localtime_r time.h "time_t *time; struct tm *tm; localtime_r(time, tm)"
> +check_builtin float16 "" "_Float16 f16var"
>  
>  case "$custom_allocator" in
>      jemalloc)
> diff --git a/libavutil/float2half.c b/libavutil/float2half.c
> index dba14cef5d..1390d3acc0 100644
> --- a/libavutil/float2half.c
> +++ b/libavutil/float2half.c
> @@ -20,6 +20,7 @@
>  
>  void ff_init_float2half_tables(float2half_tables *t)
>  {
> +#if !HAVE_FLOAT16
>      for (int i = 0; i < 256; i++) {
>          int e = i - 127;
>  
> @@ -50,4 +51,5 @@ void ff_init_float2half_tables(float2half_tables *t)
>              t->shifttable[i|0x100] = 13;
>          }
>      }
> +#endif
>  }
> diff --git a/libavutil/float2half.h b/libavutil/float2half.h
> index b8c9cdfc4f..8c1fb804b7 100644
> --- a/libavutil/float2half.h
> +++ b/libavutil/float2half.h
> @@ -20,21 +20,37 @@
>  #define AVUTIL_FLOAT2HALF_H
>  
>  #include <stdint.h>
> +#include "intfloat.h"
> +
> +#include "config.h"
>  
>  typedef struct float2half_tables {
> +#if HAVE_FLOAT16
> +    uint8_t dummy;
> +#else
>      uint16_t basetable[512];
>      uint8_t shifttable[512];
> +#endif
>  } float2half_tables;
>  
>  void ff_init_float2half_tables(float2half_tables *t);
>  
>  static inline uint16_t float2half(uint32_t f, const float2half_tables *t)
>  {
> +#if HAVE_FLOAT16
> +    union {
> +        _Float16 f;
> +        uint16_t i;
> +    } u;
> +    u.f = av_int2float(f);
> +    return u.i;
> +#else
>      uint16_t h;
>  
>      h = t->basetable[(f >> 23) & 0x1ff] + ((f & 0x007fffff) >> t->shifttable[(f >> 23) & 0x1ff]);
>  
>      return h;
> +#endif
>  }
>  
>  #endif /* AVUTIL_FLOAT2HALF_H */
> diff --git a/libavutil/half2float.c b/libavutil/half2float.c
> index baac8e4093..873226d3a0 100644
> --- a/libavutil/half2float.c
> +++ b/libavutil/half2float.c
> @@ -18,6 +18,7 @@
>  
>  #include "libavutil/half2float.h"
>  
> +#if !HAVE_FLOAT16
>  static uint32_t convertmantissa(uint32_t i)
>  {
>      int32_t m = i << 13; // Zero pad mantissa bits
> @@ -33,9 +34,11 @@ static uint32_t convertmantissa(uint32_t i)
>  
>      return m | e; // Return combined number
>  }
> +#endif
>  
>  void ff_init_half2float_tables(half2float_tables *t)
>  {
> +#if !HAVE_FLOAT16
>      t->mantissatable[0] = 0;
>      for (int i = 1; i < 1024; i++)
>          t->mantissatable[i] = convertmantissa(i);
> @@ -60,4 +63,5 @@ void ff_init_half2float_tables(half2float_tables *t)
>      t->offsettable[31] = 2048;
>      t->offsettable[32] = 0;
>      t->offsettable[63] = 2048;
> +#endif
>  }
> diff --git a/libavutil/half2float.h b/libavutil/half2float.h
> index cb58e44a1c..b2a7c934a6 100644
> --- a/libavutil/half2float.h
> +++ b/libavutil/half2float.h
> @@ -20,22 +20,38 @@
>  #define AVUTIL_HALF2FLOAT_H
>  
>  #include <stdint.h>
> +#include "intfloat.h"
> +
> +#include "config.h"
>  
>  typedef struct half2float_tables {
> +#if HAVE_FLOAT16
> +    uint8_t dummy;
> +#else
>      uint32_t mantissatable[3072];
>      uint32_t exponenttable[64];
>      uint16_t offsettable[64];
> +#endif
>  } half2float_tables;
>  
>  void ff_init_half2float_tables(half2float_tables *t);
>  
>  static inline uint32_t half2float(uint16_t h, const half2float_tables *t)
>  {
> +#if HAVE_FLOAT16
> +    union {
> +        _Float16 f;
> +        uint16_t i;
> +    } u;
> +    u.i = h;
> +    return av_float2int(u.f);
> +#else
>      uint32_t f;
>  
>      f = t->mantissatable[t->offsettable[h >> 10] + (h & 0x3ff)] + t->exponenttable[h >> 10];
>  
>      return f;
> +#endif
>  }
>  
>  #endif /* AVUTIL_HALF2FLOAT_H */

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  reply	other threads:[~2022-08-10 21:04 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-10 20:47 [FFmpeg-devel] [PATCH 01/11] lavu/pixfmt: add packed RGBA float16 format Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 02/11] avutil/hwcontext_d3d11va: add support for rgbaf16 pixel format Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 03/11] avfilter/vsrc_ddagrab: add rgbaf16 output support Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 04/11] avfilter/vsrc_ddagrab: add options for more control over output format fallback Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 05/11] avutil: move half-precision float helper to avutil Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 06/11] avutil/half2float: adjust conversion of NaN Timo Rothenpieler
2022-08-10 21:24   ` Andreas Rheinhardt
2022-08-10 21:36     ` Timo Rothenpieler
2022-08-10 21:43       ` Andreas Rheinhardt
2022-08-10 21:53         ` Timo Rothenpieler
2022-08-10 22:14           ` Mark Reid
2022-08-10 22:18             ` James Almer
2022-08-10 22:28               ` Timo Rothenpieler
2022-08-10 22:37                 ` Mark Reid
2022-08-10 22:55                   ` Timo Rothenpieler
2022-08-11  2:18                     ` Mark Reid
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 07/11] avutil/half2float: move tables to header-internal structs Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 08/11] avutil/half2float: move non-inline init code out of header Timo Rothenpieler
2022-08-11 20:46   ` Michael Niedermayer
2022-08-11 20:50     ` Andreas Rheinhardt
2022-08-11 21:16       ` Michael Niedermayer
2022-08-11 21:31         ` Andreas Rheinhardt
2022-08-14 19:32           ` Michael Niedermayer
2022-08-15  4:20             ` Andreas Rheinhardt
2022-08-15 18:09               ` Michael Niedermayer
2022-08-14 21:54           ` Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 09/11] avutil/half2float: use native _Float16 if available Timo Rothenpieler
2022-08-10 21:03   ` Andreas Rheinhardt [this message]
2022-08-10 21:58     ` Timo Rothenpieler
2022-08-10 22:02       ` James Almer
2022-08-10 22:51   ` [FFmpeg-devel] [PATCH v2 " Timo Rothenpieler
2022-08-11  0:14     ` James Almer
2022-08-11 11:50       ` Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 10/11] swscale: add SwsContext parameter to input functions Timo Rothenpieler
2022-08-10 20:52   ` Timo Rothenpieler
2022-08-10 21:55   ` Andreas Rheinhardt
2022-08-10 22:02     ` Timo Rothenpieler
2022-08-10 20:47 ` [FFmpeg-devel] [PATCH 11/11] swscale/input: add rgbaf16 input support Timo Rothenpieler
2022-08-10 21:37   ` Timo Rothenpieler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DB6PR0101MB221435143D86ABEC32A3DA438F659@DB6PR0101MB2214.eurprd01.prod.exchangelabs.com \
    --to=andreas.rheinhardt@outlook.com \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git