* [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil @ 2022-08-14 16:48 Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 2/7] avutil/half2float: adjust conversion of NaN Timo Rothenpieler ` (6 more replies) 0 siblings, 7 replies; 12+ messages in thread From: Timo Rothenpieler @ 2022-08-14 16:48 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Timo Rothenpieler --- libavcodec/exr.c | 2 +- libavcodec/exrenc.c | 2 +- libavcodec/pnmdec.c | 3 ++- libavcodec/pnmenc.c | 2 +- {libavcodec => libavutil}/float2half.h | 6 +++--- {libavcodec => libavutil}/half2float.h | 6 +++--- 6 files changed, 11 insertions(+), 10 deletions(-) rename {libavcodec => libavutil}/float2half.h (96%) rename {libavcodec => libavutil}/half2float.h (96%) diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 3a6b9c3014..5c6ca9adbf 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -41,6 +41,7 @@ #include "libavutil/avstring.h" #include "libavutil/opt.h" #include "libavutil/color_utils.h" +#include "libavutil/half2float.h" #include "avcodec.h" #include "bytestream.h" @@ -53,7 +54,6 @@ #include "exrdsp.h" #include "get_bits.h" #include "internal.h" -#include "half2float.h" #include "mathops.h" #include "thread.h" diff --git a/libavcodec/exrenc.c b/libavcodec/exrenc.c index 8cf7827bb6..56c084d483 100644 --- a/libavcodec/exrenc.c +++ b/libavcodec/exrenc.c @@ -31,11 +31,11 @@ #include "libavutil/intreadwrite.h" #include "libavutil/imgutils.h" #include "libavutil/pixdesc.h" +#include "libavutil/float2half.h" #include "avcodec.h" #include "bytestream.h" #include "codec_internal.h" #include "encode.h" -#include "float2half.h" enum ExrCompr { EXR_RAW, diff --git a/libavcodec/pnmdec.c b/libavcodec/pnmdec.c index 130407df25..9383dc8e60 100644 --- a/libavcodec/pnmdec.c +++ b/libavcodec/pnmdec.c @@ -21,12 +21,13 @@ #include "config_components.h" +#include "libavutil/half2float.h" + #include "avcodec.h" #include "codec_internal.h" #include "internal.h" #include "put_bits.h" #include "pnm.h" -#include "half2float.h" static void samplecpy(uint8_t *dst, const uint8_t *src, int n, int maxval) { diff --git a/libavcodec/pnmenc.c b/libavcodec/pnmenc.c index b16c93c88f..7ce534d06e 100644 --- a/libavcodec/pnmenc.c +++ b/libavcodec/pnmenc.c @@ -24,10 +24,10 @@ #include "libavutil/intreadwrite.h" #include "libavutil/imgutils.h" #include "libavutil/pixdesc.h" +#include "libavutil/float2half.h" #include "avcodec.h" #include "codec_internal.h" #include "encode.h" -#include "float2half.h" typedef struct PHMEncContext { uint16_t basetable[512]; diff --git a/libavcodec/float2half.h b/libavutil/float2half.h similarity index 96% rename from libavcodec/float2half.h rename to libavutil/float2half.h index e05125088c..d6aaab8278 100644 --- a/libavcodec/float2half.h +++ b/libavutil/float2half.h @@ -16,8 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef AVCODEC_FLOAT2HALF_H -#define AVCODEC_FLOAT2HALF_H +#ifndef AVUTIL_FLOAT2HALF_H +#define AVUTIL_FLOAT2HALF_H #include <stdint.h> @@ -64,4 +64,4 @@ static uint16_t float2half(uint32_t f, uint16_t *basetable, uint8_t *shifttable) return h; } -#endif /* AVCODEC_FLOAT2HALF_H */ +#endif /* AVUTIL_FLOAT2HALF_H */ diff --git a/libavcodec/half2float.h b/libavutil/half2float.h similarity index 96% rename from libavcodec/half2float.h rename to libavutil/half2float.h index 7df6747e50..1f6deade07 100644 --- a/libavcodec/half2float.h +++ b/libavutil/half2float.h @@ -16,8 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef AVCODEC_HALF2FLOAT_H -#define AVCODEC_HALF2FLOAT_H +#ifndef AVUTIL_HALF2FLOAT_H +#define AVUTIL_HALF2FLOAT_H #include <stdint.h> @@ -71,4 +71,4 @@ static uint32_t half2float(uint16_t h, const uint32_t *mantissatable, const uint return f; } -#endif /* AVCODEC_HALF2FLOAT_H */ +#endif /* AVUTIL_HALF2FLOAT_H */ -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 2/7] avutil/half2float: adjust conversion of NaN 2022-08-14 16:48 [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler @ 2022-08-14 16:48 ` Timo Rothenpieler 2022-08-18 8:51 ` Tomas Härdin 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 3/7] avutil/half2float: move tables to header-internal structs Timo Rothenpieler ` (5 subsequent siblings) 6 siblings, 1 reply; 12+ messages in thread From: Timo Rothenpieler @ 2022-08-14 16:48 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Timo Rothenpieler IEEE-754 differentiates two different kind of NaNs. Quiet and Signaling ones. They are differentiated by the MSB of the mantissa. For whatever reason, actual hardware conversion of half to single always sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's 0. So our code has to follow suite or fate-testing hardware float16 will be impossible. --- libavcodec/exr.c | 2 +- libavcodec/pnm.h | 2 +- libavutil/half2float.h | 5 +++++ tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 5c6ca9adbf..47f4786491 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -191,7 +191,7 @@ typedef struct EXRContext { float gamma; union av_intfloat32 gamma_table[65536]; - uint32_t mantissatable[2048]; + uint32_t mantissatable[3072]; uint32_t exponenttable[64]; uint16_t offsettable[64]; } EXRContext; diff --git a/libavcodec/pnm.h b/libavcodec/pnm.h index 5bf2eaa4d9..7e5445f529 100644 --- a/libavcodec/pnm.h +++ b/libavcodec/pnm.h @@ -34,7 +34,7 @@ typedef struct PNMContext { int half; float scale; - uint32_t mantissatable[2048]; + uint32_t mantissatable[3072]; uint32_t exponenttable[64]; uint16_t offsettable[64]; } PNMContext; diff --git a/libavutil/half2float.h b/libavutil/half2float.h index 1f6deade07..5af4690cfe 100644 --- a/libavutil/half2float.h +++ b/libavutil/half2float.h @@ -45,6 +45,9 @@ static void half2float_table(uint32_t *mantissatable, uint32_t *exponenttable, mantissatable[i] = convertmantissa(i); for (int i = 1024; i < 2048; i++) mantissatable[i] = 0x38000000UL + ((i - 1024) << 13UL); + for (int i = 2048; i < 3072; i++) + mantissatable[i] = mantissatable[i - 1024] | 0x400000UL; + mantissatable[2048] = mantissatable[1024]; exponenttable[0] = 0; for (int i = 1; i < 31; i++) @@ -58,7 +61,9 @@ static void half2float_table(uint32_t *mantissatable, uint32_t *exponenttable, offsettable[0] = 0; for (int i = 1; i < 64; i++) offsettable[i] = 1024; + offsettable[31] = 2048; offsettable[32] = 0; + offsettable[63] = 2048; } static uint32_t half2float(uint16_t h, const uint32_t *mantissatable, const uint32_t *exponenttable, diff --git a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF index b6201116fe..e45a40b498 100644 --- a/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF +++ b/tests/ref/fate/exr-rgb-scanline-zip-half-0x0-0xFFFF @@ -3,4 +3,4 @@ #codec_id 0: rawvideo #dimensions 0: 256x256 #sar 0: 1/1 -0, 0, 0, 1, 786432, 0x1445e411 +0, 0, 0, 1, 786432, 0xce9be2be -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 2/7] avutil/half2float: adjust conversion of NaN 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 2/7] avutil/half2float: adjust conversion of NaN Timo Rothenpieler @ 2022-08-18 8:51 ` Tomas Härdin 0 siblings, 0 replies; 12+ messages in thread From: Tomas Härdin @ 2022-08-18 8:51 UTC (permalink / raw) To: FFmpeg development discussions and patches sön 2022-08-14 klockan 18:48 +0200 skrev Timo Rothenpieler: > IEEE-754 differentiates two different kind of NaNs. > Quiet and Signaling ones. They are differentiated by the MSB of the > mantissa. > > For whatever reason, actual hardware conversion of half to single > always > sets the signaling bit to 1 if the mantissa is != 0, and to 0 if it's > 0. > So our code has to follow suite or fate-testing hardware float16 will > be > impossible. Does IEEE-754 specify this behavior? /Tomas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 3/7] avutil/half2float: move tables to header-internal structs 2022-08-14 16:48 [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 2/7] avutil/half2float: adjust conversion of NaN Timo Rothenpieler @ 2022-08-14 16:48 ` Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 4/7] avutil/half2float: move non-inline init code out of header Timo Rothenpieler ` (4 subsequent siblings) 6 siblings, 0 replies; 12+ messages in thread From: Timo Rothenpieler @ 2022-08-14 16:48 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Timo Rothenpieler Having to put the knowledge of the size of those arrays into a multitude of places is rather smelly. --- libavcodec/exr.c | 27 ++++++++-------------- libavcodec/exrenc.c | 11 +++++---- libavcodec/pnm.h | 5 ++--- libavcodec/pnmdec.c | 42 ++++++++-------------------------- libavcodec/pnmenc.c | 13 +++++------ libavutil/float2half.h | 51 +++++++++++++++++++++++------------------- libavutil/half2float.h | 46 ++++++++++++++++++++----------------- 7 files changed, 84 insertions(+), 111 deletions(-) diff --git a/libavcodec/exr.c b/libavcodec/exr.c index 47f4786491..b0e2e85024 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -191,9 +191,7 @@ typedef struct EXRContext { float gamma; union av_intfloat32 gamma_table[65536]; - uint32_t mantissatable[3072]; - uint32_t exponenttable[64]; - uint16_t offsettable[64]; + Half2FloatTables h2f_tables; } EXRContext; static int zip_uncompress(const EXRContext *s, const uint8_t *src, int compressed_size, @@ -899,10 +897,7 @@ static int ac_uncompress(const EXRContext *s, GetByteContext *gb, float *block) n += val & 0xff; } else { ret = n; - block[ff_zigzag_direct[n]] = av_int2float(half2float(val, - s->mantissatable, - s->exponenttable, - s->offsettable)); + block[ff_zigzag_direct[n]] = av_int2float(half2float(val, &s->h2f_tables)); n++; } } @@ -1120,8 +1115,7 @@ static int dwa_uncompress(const EXRContext *s, const uint8_t *src, int compresse uint16_t *dc = (uint16_t *)td->dc_data; union av_intfloat32 dc_val; - dc_val.i = half2float(dc[idx], s->mantissatable, - s->exponenttable, s->offsettable); + dc_val.i = half2float(dc[idx], &s->h2f_tables); block[0] = dc_val.f; ac_uncompress(s, &agb, block); @@ -1171,7 +1165,7 @@ static int dwa_uncompress(const EXRContext *s, const uint8_t *src, int compresse for (int x = 0; x < td->xsize; x++) { uint16_t ha = ai0[x] | (ai1[x] << 8); - ao[x] = half2float(ha, s->mantissatable, s->exponenttable, s->offsettable); + ao[x] = half2float(ha, &s->h2f_tables); } } @@ -1427,10 +1421,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata, } } else { for (x = 0; x < xsize; x++) { - ptr_x[0].i = half2float(bytestream_get_le16(&src), - s->mantissatable, - s->exponenttable, - s->offsettable); + ptr_x[0].i = half2float(bytestream_get_le16(&src), &s->h2f_tables); ptr_x++; } } @@ -2217,7 +2208,7 @@ static av_cold int decode_init(AVCodecContext *avctx) float one_gamma = 1.0f / s->gamma; avpriv_trc_function trc_func = NULL; - half2float_table(s->mantissatable, s->exponenttable, s->offsettable); + init_half2float_tables(&s->h2f_tables); s->avctx = avctx; @@ -2230,18 +2221,18 @@ static av_cold int decode_init(AVCodecContext *avctx) trc_func = avpriv_get_trc_function_from_trc(s->apply_trc_type); if (trc_func) { for (i = 0; i < 65536; ++i) { - t.i = half2float(i, s->mantissatable, s->exponenttable, s->offsettable); + t.i = half2float(i, &s->h2f_tables); t.f = trc_func(t.f); s->gamma_table[i] = t; } } else { if (one_gamma > 0.9999f && one_gamma < 1.0001f) { for (i = 0; i < 65536; ++i) { - s->gamma_table[i].i = half2float(i, s->mantissatable, s->exponenttable, s->offsettable); + s->gamma_table[i].i = half2float(i, &s->h2f_tables); } } else { for (i = 0; i < 65536; ++i) { - t.i = half2float(i, s->mantissatable, s->exponenttable, s->offsettable); + t.i = half2float(i, &s->h2f_tables); /* If negative value we reuse half value */ if (t.f <= 0.0f) { s->gamma_table[i] = t; diff --git a/libavcodec/exrenc.c b/libavcodec/exrenc.c index 56c084d483..356bd11543 100644 --- a/libavcodec/exrenc.c +++ b/libavcodec/exrenc.c @@ -87,15 +87,14 @@ typedef struct EXRContext { EXRScanlineData *scanline; - uint16_t basetable[512]; - uint8_t shifttable[512]; + Float2HalfTables f2h_tables; } EXRContext; static av_cold int encode_init(AVCodecContext *avctx) { EXRContext *s = avctx->priv_data; - float2half_tables(s->basetable, s->shifttable); + init_float2half_tables(&s->f2h_tables); switch (avctx->pix_fmt) { case AV_PIX_FMT_GBRPF32: @@ -256,7 +255,7 @@ static int encode_scanline_rle(EXRContext *s, const AVFrame *frame) const uint32_t *src = (const uint32_t *)(frame->data[ch] + y * frame->linesize[ch]); for (int x = 0; x < frame->width; x++) - dst[x] = float2half(src[x], s->basetable, s->shifttable); + dst[x] = float2half(src[x], &s->f2h_tables); } break; } @@ -324,7 +323,7 @@ static int encode_scanline_zip(EXRContext *s, const AVFrame *frame) const uint32_t *src = (const uint32_t *)(frame->data[ch] + (y * s->scanline_height + l) * frame->linesize[ch]); for (int x = 0; x < frame->width; x++) - dst[x] = float2half(src[x], s->basetable, s->shifttable); + dst[x] = float2half(src[x], &s->f2h_tables); } } break; @@ -482,7 +481,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, const uint32_t *src = (const uint32_t *)(frame->data[ch] + y * frame->linesize[ch]); for (int x = 0; x < frame->width; x++) - bytestream2_put_le16(pb, float2half(src[x], s->basetable, s->shifttable)); + bytestream2_put_le16(pb, float2half(src[x], &s->f2h_tables)); } } } diff --git a/libavcodec/pnm.h b/libavcodec/pnm.h index 7e5445f529..4906eeb2b9 100644 --- a/libavcodec/pnm.h +++ b/libavcodec/pnm.h @@ -22,6 +22,7 @@ #ifndef AVCODEC_PNM_H #define AVCODEC_PNM_H +#include "libavutil/half2float.h" #include "avcodec.h" typedef struct PNMContext { @@ -34,9 +35,7 @@ typedef struct PNMContext { int half; float scale; - uint32_t mantissatable[3072]; - uint32_t exponenttable[64]; - uint16_t offsettable[64]; + Half2FloatTables h2f_tables; } PNMContext; int ff_pnm_decode_header(AVCodecContext *avctx, PNMContext * const s); diff --git a/libavcodec/pnmdec.c b/libavcodec/pnmdec.c index 9383dc8e60..6adc348ec8 100644 --- a/libavcodec/pnmdec.c +++ b/libavcodec/pnmdec.c @@ -313,18 +313,9 @@ static int pnm_decode_frame(AVCodecContext *avctx, AVFrame *p, b = (float *)p->data[1]; for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - r[j] = av_int2float(half2float(AV_RL16(s->bytestream+0), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; - g[j] = av_int2float(half2float(AV_RL16(s->bytestream+2), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; - b[j] = av_int2float(half2float(AV_RL16(s->bytestream+4), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; + r[j] = av_int2float(half2float(AV_RL16(s->bytestream+0), &s->h2f_tables)) * scale; + g[j] = av_int2float(half2float(AV_RL16(s->bytestream+2), &s->h2f_tables)) * scale; + b[j] = av_int2float(half2float(AV_RL16(s->bytestream+4), &s->h2f_tables)) * scale; s->bytestream += 6; } @@ -340,18 +331,9 @@ static int pnm_decode_frame(AVCodecContext *avctx, AVFrame *p, b = (float *)p->data[1]; for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - r[j] = av_int2float(half2float(AV_RB16(s->bytestream+0), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; - g[j] = av_int2float(half2float(AV_RB16(s->bytestream+2), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; - b[j] = av_int2float(half2float(AV_RB16(s->bytestream+4), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; + r[j] = av_int2float(half2float(AV_RB16(s->bytestream+0), &s->h2f_tables)) * scale; + g[j] = av_int2float(half2float(AV_RB16(s->bytestream+2), &s->h2f_tables)) * scale; + b[j] = av_int2float(half2float(AV_RB16(s->bytestream+4), &s->h2f_tables)) * scale; s->bytestream += 6; } @@ -394,10 +376,7 @@ static int pnm_decode_frame(AVCodecContext *avctx, AVFrame *p, float *g = (float *)p->data[0]; for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - g[j] = av_int2float(half2float(AV_RL16(s->bytestream), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; + g[j] = av_int2float(half2float(AV_RL16(s->bytestream), &s->h2f_tables)) * scale; s->bytestream += 2; } g += p->linesize[0] / 4; @@ -406,10 +385,7 @@ static int pnm_decode_frame(AVCodecContext *avctx, AVFrame *p, float *g = (float *)p->data[0]; for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - g[j] = av_int2float(half2float(AV_RB16(s->bytestream), - s->mantissatable, - s->exponenttable, - s->offsettable)) * scale; + g[j] = av_int2float(half2float(AV_RB16(s->bytestream), &s->h2f_tables)) * scale; s->bytestream += 2; } g += p->linesize[0] / 4; @@ -501,7 +477,7 @@ static av_cold int phm_dec_init(AVCodecContext *avctx) { PNMContext *s = avctx->priv_data; - half2float_table(s->mantissatable, s->exponenttable, s->offsettable); + init_half2float_tables(&s->h2f_tables); return 0; } diff --git a/libavcodec/pnmenc.c b/libavcodec/pnmenc.c index 7ce534d06e..38a5d8172d 100644 --- a/libavcodec/pnmenc.c +++ b/libavcodec/pnmenc.c @@ -30,8 +30,7 @@ #include "encode.h" typedef struct PHMEncContext { - uint16_t basetable[512]; - uint8_t shifttable[512]; + Float2HalfTables f2h_tables; } PHMEncContext; static int pnm_encode_frame(AVCodecContext *avctx, AVPacket *pkt, @@ -169,9 +168,9 @@ static int pnm_encode_frame(AVCodecContext *avctx, AVPacket *pkt, for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - AV_WN16(bytestream + 0, float2half(av_float2int(r[j]), s->basetable, s->shifttable)); - AV_WN16(bytestream + 2, float2half(av_float2int(g[j]), s->basetable, s->shifttable)); - AV_WN16(bytestream + 4, float2half(av_float2int(b[j]), s->basetable, s->shifttable)); + AV_WN16(bytestream + 0, float2half(av_float2int(r[j]), &s->f2h_tables)); + AV_WN16(bytestream + 2, float2half(av_float2int(g[j]), &s->f2h_tables)); + AV_WN16(bytestream + 4, float2half(av_float2int(b[j]), &s->f2h_tables)); bytestream += 6; } @@ -184,7 +183,7 @@ static int pnm_encode_frame(AVCodecContext *avctx, AVPacket *pkt, for (int i = 0; i < avctx->height; i++) { for (int j = 0; j < avctx->width; j++) { - AV_WN16(bytestream, float2half(av_float2int(g[j]), s->basetable, s->shifttable)); + AV_WN16(bytestream, float2half(av_float2int(g[j]), &s->f2h_tables)); bytestream += 2; } @@ -295,7 +294,7 @@ static av_cold int phm_enc_init(AVCodecContext *avctx) { PHMEncContext *s = avctx->priv_data; - float2half_tables(s->basetable, s->shifttable); + init_float2half_tables(&s->f2h_tables); return 0; } diff --git a/libavutil/float2half.h b/libavutil/float2half.h index d6aaab8278..3548aa1d45 100644 --- a/libavutil/float2half.h +++ b/libavutil/float2half.h @@ -21,45 +21,50 @@ #include <stdint.h> -static void float2half_tables(uint16_t *basetable, uint8_t *shifttable) +typedef struct Float2HalfTables { + uint16_t basetable[512]; + uint8_t shifttable[512]; +} Float2HalfTables; + +static void init_float2half_tables(Float2HalfTables *t) { for (int i = 0; i < 256; i++) { int e = i - 127; if (e < -24) { // Very small numbers map to zero - basetable[i|0x000] = 0x0000; - basetable[i|0x100] = 0x8000; - shifttable[i|0x000] = 24; - shifttable[i|0x100] = 24; + t->basetable[i|0x000] = 0x0000; + t->basetable[i|0x100] = 0x8000; + t->shifttable[i|0x000] = 24; + t->shifttable[i|0x100] = 24; } else if (e < -14) { // Small numbers map to denorms - basetable[i|0x000] = (0x0400>>(-e-14)); - basetable[i|0x100] = (0x0400>>(-e-14)) | 0x8000; - shifttable[i|0x000] = -e-1; - shifttable[i|0x100] = -e-1; + t->basetable[i|0x000] = (0x0400>>(-e-14)); + t->basetable[i|0x100] = (0x0400>>(-e-14)) | 0x8000; + t->shifttable[i|0x000] = -e-1; + t->shifttable[i|0x100] = -e-1; } else if (e <= 15) { // Normal numbers just lose precision - basetable[i|0x000] = ((e + 15) << 10); - basetable[i|0x100] = ((e + 15) << 10) | 0x8000; - shifttable[i|0x000] = 13; - shifttable[i|0x100] = 13; + t->basetable[i|0x000] = ((e + 15) << 10); + t->basetable[i|0x100] = ((e + 15) << 10) | 0x8000; + t->shifttable[i|0x000] = 13; + t->shifttable[i|0x100] = 13; } else if (e < 128) { // Large numbers map to Infinity - basetable[i|0x000] = 0x7C00; - basetable[i|0x100] = 0xFC00; - shifttable[i|0x000] = 24; - shifttable[i|0x100] = 24; + t->basetable[i|0x000] = 0x7C00; + t->basetable[i|0x100] = 0xFC00; + t->shifttable[i|0x000] = 24; + t->shifttable[i|0x100] = 24; } else { // Infinity and NaN's stay Infinity and NaN's - basetable[i|0x000] = 0x7C00; - basetable[i|0x100] = 0xFC00; - shifttable[i|0x000] = 13; - shifttable[i|0x100] = 13; + t->basetable[i|0x000] = 0x7C00; + t->basetable[i|0x100] = 0xFC00; + t->shifttable[i|0x000] = 13; + t->shifttable[i|0x100] = 13; } } } -static uint16_t float2half(uint32_t f, uint16_t *basetable, uint8_t *shifttable) +static uint16_t float2half(uint32_t f, const Float2HalfTables *t) { uint16_t h; - h = basetable[(f >> 23) & 0x1ff] + ((f & 0x007fffff) >> shifttable[(f >> 23) & 0x1ff]); + h = t->basetable[(f >> 23) & 0x1ff] + ((f & 0x007fffff) >> t->shifttable[(f >> 23) & 0x1ff]); return h; } diff --git a/libavutil/half2float.h b/libavutil/half2float.h index 5af4690cfe..5696567a8c 100644 --- a/libavutil/half2float.h +++ b/libavutil/half2float.h @@ -21,6 +21,12 @@ #include <stdint.h> +typedef struct Half2FloatTables { + uint32_t mantissatable[3072]; + uint32_t exponenttable[64]; + uint16_t offsettable[64]; +} Half2FloatTables; + static uint32_t convertmantissa(uint32_t i) { int32_t m = i << 13; // Zero pad mantissa bits @@ -37,41 +43,39 @@ static uint32_t convertmantissa(uint32_t i) return m | e; // Return combined number } -static void half2float_table(uint32_t *mantissatable, uint32_t *exponenttable, - uint16_t *offsettable) +static void init_half2float_tables(Half2FloatTables *t) { - mantissatable[0] = 0; + t->mantissatable[0] = 0; for (int i = 1; i < 1024; i++) - mantissatable[i] = convertmantissa(i); + t->mantissatable[i] = convertmantissa(i); for (int i = 1024; i < 2048; i++) - mantissatable[i] = 0x38000000UL + ((i - 1024) << 13UL); + t->mantissatable[i] = 0x38000000UL + ((i - 1024) << 13UL); for (int i = 2048; i < 3072; i++) - mantissatable[i] = mantissatable[i - 1024] | 0x400000UL; - mantissatable[2048] = mantissatable[1024]; + t->mantissatable[i] = t->mantissatable[i - 1024] | 0x400000UL; + t->mantissatable[2048] = t->mantissatable[1024]; - exponenttable[0] = 0; + t->exponenttable[0] = 0; for (int i = 1; i < 31; i++) - exponenttable[i] = i << 23; + t->exponenttable[i] = i << 23; for (int i = 33; i < 63; i++) - exponenttable[i] = 0x80000000UL + ((i - 32) << 23UL); - exponenttable[31]= 0x47800000UL; - exponenttable[32]= 0x80000000UL; - exponenttable[63]= 0xC7800000UL; + t->exponenttable[i] = 0x80000000UL + ((i - 32) << 23UL); + t->exponenttable[31]= 0x47800000UL; + t->exponenttable[32]= 0x80000000UL; + t->exponenttable[63]= 0xC7800000UL; - offsettable[0] = 0; + t->offsettable[0] = 0; for (int i = 1; i < 64; i++) - offsettable[i] = 1024; - offsettable[31] = 2048; - offsettable[32] = 0; - offsettable[63] = 2048; + t->offsettable[i] = 1024; + t->offsettable[31] = 2048; + t->offsettable[32] = 0; + t->offsettable[63] = 2048; } -static uint32_t half2float(uint16_t h, const uint32_t *mantissatable, const uint32_t *exponenttable, - const uint16_t *offsettable) +static uint32_t half2float(uint16_t h, const Half2FloatTables *t) { uint32_t f; - f = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] + exponenttable[h >> 10]; + f = t->mantissatable[t->offsettable[h >> 10] + (h & 0x3ff)] + t->exponenttable[h >> 10]; return f; } -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 4/7] avutil/half2float: move non-inline init code out of header 2022-08-14 16:48 [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 2/7] avutil/half2float: adjust conversion of NaN Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 3/7] avutil/half2float: move tables to header-internal structs Timo Rothenpieler @ 2022-08-14 16:48 ` Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 5/7] avutil/half2float: use native _Float16 if available Timo Rothenpieler ` (3 subsequent siblings) 6 siblings, 0 replies; 12+ messages in thread From: Timo Rothenpieler @ 2022-08-14 16:48 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Timo Rothenpieler --- libavcodec/Makefile | 8 +++--- libavcodec/exr.c | 2 +- libavcodec/exrenc.c | 2 +- libavcodec/float2half.c | 19 +++++++++++++ libavcodec/half2float.c | 19 +++++++++++++ libavcodec/pnmdec.c | 2 +- libavcodec/pnmenc.c | 2 +- libavutil/float2half.c | 53 ++++++++++++++++++++++++++++++++++ libavutil/float2half.h | 36 ++--------------------- libavutil/half2float.c | 63 +++++++++++++++++++++++++++++++++++++++++ libavutil/half2float.h | 46 ++---------------------------- 11 files changed, 166 insertions(+), 86 deletions(-) create mode 100644 libavcodec/float2half.c create mode 100644 libavcodec/half2float.c create mode 100644 libavutil/float2half.c create mode 100644 libavutil/half2float.c diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 029f1bad3d..cb80f73d99 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -337,8 +337,8 @@ OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER) += 8svx.o OBJS-$(CONFIG_ESCAPE124_DECODER) += escape124.o OBJS-$(CONFIG_ESCAPE130_DECODER) += escape130.o OBJS-$(CONFIG_EVRC_DECODER) += evrcdec.o acelp_vectors.o lsp.o -OBJS-$(CONFIG_EXR_DECODER) += exr.o exrdsp.o -OBJS-$(CONFIG_EXR_ENCODER) += exrenc.o +OBJS-$(CONFIG_EXR_DECODER) += exr.o exrdsp.o half2float.o +OBJS-$(CONFIG_EXR_ENCODER) += exrenc.o float2half.o OBJS-$(CONFIG_FASTAUDIO_DECODER) += fastaudio.o OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1.o OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1.o @@ -570,8 +570,8 @@ OBJS-$(CONFIG_PGMYUV_DECODER) += pnmdec.o pnm.o OBJS-$(CONFIG_PGMYUV_ENCODER) += pnmenc.o OBJS-$(CONFIG_PGSSUB_DECODER) += pgssubdec.o OBJS-$(CONFIG_PGX_DECODER) += pgxdec.o -OBJS-$(CONFIG_PHM_DECODER) += pnmdec.o pnm.o -OBJS-$(CONFIG_PHM_ENCODER) += pnmenc.o +OBJS-$(CONFIG_PHM_DECODER) += pnmdec.o pnm.o half2float.o +OBJS-$(CONFIG_PHM_ENCODER) += pnmenc.o float2half.o OBJS-$(CONFIG_PHOTOCD_DECODER) += photocd.o OBJS-$(CONFIG_PICTOR_DECODER) += pictordec.o cga_data.o OBJS-$(CONFIG_PIXLET_DECODER) += pixlet.o diff --git a/libavcodec/exr.c b/libavcodec/exr.c index b0e2e85024..859dd6fedd 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -2208,7 +2208,7 @@ static av_cold int decode_init(AVCodecContext *avctx) float one_gamma = 1.0f / s->gamma; avpriv_trc_function trc_func = NULL; - init_half2float_tables(&s->h2f_tables); + ff_init_half2float_tables(&s->h2f_tables); s->avctx = avctx; diff --git a/libavcodec/exrenc.c b/libavcodec/exrenc.c index 356bd11543..3dad107d62 100644 --- a/libavcodec/exrenc.c +++ b/libavcodec/exrenc.c @@ -94,7 +94,7 @@ static av_cold int encode_init(AVCodecContext *avctx) { EXRContext *s = avctx->priv_data; - init_float2half_tables(&s->f2h_tables); + ff_init_float2half_tables(&s->f2h_tables); switch (avctx->pix_fmt) { case AV_PIX_FMT_GBRPF32: diff --git a/libavcodec/float2half.c b/libavcodec/float2half.c new file mode 100644 index 0000000000..90a6f63fac --- /dev/null +++ b/libavcodec/float2half.c @@ -0,0 +1,19 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/float2half.c" diff --git a/libavcodec/half2float.c b/libavcodec/half2float.c new file mode 100644 index 0000000000..1b023f96a5 --- /dev/null +++ b/libavcodec/half2float.c @@ -0,0 +1,19 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/half2float.c" diff --git a/libavcodec/pnmdec.c b/libavcodec/pnmdec.c index 6adc348ec8..fbed282e93 100644 --- a/libavcodec/pnmdec.c +++ b/libavcodec/pnmdec.c @@ -477,7 +477,7 @@ static av_cold int phm_dec_init(AVCodecContext *avctx) { PNMContext *s = avctx->priv_data; - init_half2float_tables(&s->h2f_tables); + ff_init_half2float_tables(&s->h2f_tables); return 0; } diff --git a/libavcodec/pnmenc.c b/libavcodec/pnmenc.c index 38a5d8172d..b052c03b21 100644 --- a/libavcodec/pnmenc.c +++ b/libavcodec/pnmenc.c @@ -294,7 +294,7 @@ static av_cold int phm_enc_init(AVCodecContext *avctx) { PHMEncContext *s = avctx->priv_data; - init_float2half_tables(&s->f2h_tables); + ff_init_float2half_tables(&s->f2h_tables); return 0; } diff --git a/libavutil/float2half.c b/libavutil/float2half.c new file mode 100644 index 0000000000..c79a3abfa1 --- /dev/null +++ b/libavutil/float2half.c @@ -0,0 +1,53 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/float2half.h" + +void ff_init_float2half_tables(Float2HalfTables *t) +{ + for (int i = 0; i < 256; i++) { + int e = i - 127; + + if (e < -24) { // Very small numbers map to zero + t->basetable[i|0x000] = 0x0000; + t->basetable[i|0x100] = 0x8000; + t->shifttable[i|0x000] = 24; + t->shifttable[i|0x100] = 24; + } else if (e < -14) { // Small numbers map to denorms + t->basetable[i|0x000] = (0x0400>>(-e-14)); + t->basetable[i|0x100] = (0x0400>>(-e-14)) | 0x8000; + t->shifttable[i|0x000] = -e-1; + t->shifttable[i|0x100] = -e-1; + } else if (e <= 15) { // Normal numbers just lose precision + t->basetable[i|0x000] = ((e + 15) << 10); + t->basetable[i|0x100] = ((e + 15) << 10) | 0x8000; + t->shifttable[i|0x000] = 13; + t->shifttable[i|0x100] = 13; + } else if (e < 128) { // Large numbers map to Infinity + t->basetable[i|0x000] = 0x7C00; + t->basetable[i|0x100] = 0xFC00; + t->shifttable[i|0x000] = 24; + t->shifttable[i|0x100] = 24; + } else { // Infinity and NaN's stay Infinity and NaN's + t->basetable[i|0x000] = 0x7C00; + t->basetable[i|0x100] = 0xFC00; + t->shifttable[i|0x000] = 13; + t->shifttable[i|0x100] = 13; + } + } +} diff --git a/libavutil/float2half.h b/libavutil/float2half.h index 3548aa1d45..20fdc2a36b 100644 --- a/libavutil/float2half.h +++ b/libavutil/float2half.h @@ -26,41 +26,9 @@ typedef struct Float2HalfTables { uint8_t shifttable[512]; } Float2HalfTables; -static void init_float2half_tables(Float2HalfTables *t) -{ - for (int i = 0; i < 256; i++) { - int e = i - 127; - - if (e < -24) { // Very small numbers map to zero - t->basetable[i|0x000] = 0x0000; - t->basetable[i|0x100] = 0x8000; - t->shifttable[i|0x000] = 24; - t->shifttable[i|0x100] = 24; - } else if (e < -14) { // Small numbers map to denorms - t->basetable[i|0x000] = (0x0400>>(-e-14)); - t->basetable[i|0x100] = (0x0400>>(-e-14)) | 0x8000; - t->shifttable[i|0x000] = -e-1; - t->shifttable[i|0x100] = -e-1; - } else if (e <= 15) { // Normal numbers just lose precision - t->basetable[i|0x000] = ((e + 15) << 10); - t->basetable[i|0x100] = ((e + 15) << 10) | 0x8000; - t->shifttable[i|0x000] = 13; - t->shifttable[i|0x100] = 13; - } else if (e < 128) { // Large numbers map to Infinity - t->basetable[i|0x000] = 0x7C00; - t->basetable[i|0x100] = 0xFC00; - t->shifttable[i|0x000] = 24; - t->shifttable[i|0x100] = 24; - } else { // Infinity and NaN's stay Infinity and NaN's - t->basetable[i|0x000] = 0x7C00; - t->basetable[i|0x100] = 0xFC00; - t->shifttable[i|0x000] = 13; - t->shifttable[i|0x100] = 13; - } - } -} +void ff_init_float2half_tables(Float2HalfTables *t); -static uint16_t float2half(uint32_t f, const Float2HalfTables *t) +static inline uint16_t float2half(uint32_t f, const Float2HalfTables *t) { uint16_t h; diff --git a/libavutil/half2float.c b/libavutil/half2float.c new file mode 100644 index 0000000000..1967126f76 --- /dev/null +++ b/libavutil/half2float.c @@ -0,0 +1,63 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/half2float.h" + +static uint32_t convertmantissa(uint32_t i) +{ + int32_t m = i << 13; // Zero pad mantissa bits + int32_t e = 0; // Zero exponent + + while (!(m & 0x00800000)) { // While not normalized + e -= 0x00800000; // Decrement exponent (1<<23) + m <<= 1; // Shift mantissa + } + + m &= ~0x00800000; // Clear leading 1 bit + e += 0x38800000; // Adjust bias ((127-14)<<23) + + return m | e; // Return combined number +} + +void ff_init_half2float_tables(Half2FloatTables *t) +{ + t->mantissatable[0] = 0; + for (int i = 1; i < 1024; i++) + t->mantissatable[i] = convertmantissa(i); + for (int i = 1024; i < 2048; i++) + t->mantissatable[i] = 0x38000000UL + ((i - 1024) << 13UL); + for (int i = 2048; i < 3072; i++) + t->mantissatable[i] = t->mantissatable[i - 1024] | 0x400000UL; + t->mantissatable[2048] = t->mantissatable[1024]; + + t->exponenttable[0] = 0; + for (int i = 1; i < 31; i++) + t->exponenttable[i] = i << 23; + for (int i = 33; i < 63; i++) + t->exponenttable[i] = 0x80000000UL + ((i - 32) << 23UL); + t->exponenttable[31]= 0x47800000UL; + t->exponenttable[32]= 0x80000000UL; + t->exponenttable[63]= 0xC7800000UL; + + t->offsettable[0] = 0; + for (int i = 1; i < 64; i++) + t->offsettable[i] = 1024; + t->offsettable[31] = 2048; + t->offsettable[32] = 0; + t->offsettable[63] = 2048; +} diff --git a/libavutil/half2float.h b/libavutil/half2float.h index 5696567a8c..428a27a19f 100644 --- a/libavutil/half2float.h +++ b/libavutil/half2float.h @@ -27,51 +27,9 @@ typedef struct Half2FloatTables { uint16_t offsettable[64]; } Half2FloatTables; -static uint32_t convertmantissa(uint32_t i) -{ - int32_t m = i << 13; // Zero pad mantissa bits - int32_t e = 0; // Zero exponent - - while (!(m & 0x00800000)) { // While not normalized - e -= 0x00800000; // Decrement exponent (1<<23) - m <<= 1; // Shift mantissa - } - - m &= ~0x00800000; // Clear leading 1 bit - e += 0x38800000; // Adjust bias ((127-14)<<23) - - return m | e; // Return combined number -} - -static void init_half2float_tables(Half2FloatTables *t) -{ - t->mantissatable[0] = 0; - for (int i = 1; i < 1024; i++) - t->mantissatable[i] = convertmantissa(i); - for (int i = 1024; i < 2048; i++) - t->mantissatable[i] = 0x38000000UL + ((i - 1024) << 13UL); - for (int i = 2048; i < 3072; i++) - t->mantissatable[i] = t->mantissatable[i - 1024] | 0x400000UL; - t->mantissatable[2048] = t->mantissatable[1024]; - - t->exponenttable[0] = 0; - for (int i = 1; i < 31; i++) - t->exponenttable[i] = i << 23; - for (int i = 33; i < 63; i++) - t->exponenttable[i] = 0x80000000UL + ((i - 32) << 23UL); - t->exponenttable[31]= 0x47800000UL; - t->exponenttable[32]= 0x80000000UL; - t->exponenttable[63]= 0xC7800000UL; - - t->offsettable[0] = 0; - for (int i = 1; i < 64; i++) - t->offsettable[i] = 1024; - t->offsettable[31] = 2048; - t->offsettable[32] = 0; - t->offsettable[63] = 2048; -} +void ff_init_half2float_tables(Half2FloatTables *t); -static uint32_t half2float(uint16_t h, const Half2FloatTables *t) +static inline uint32_t half2float(uint16_t h, const Half2FloatTables *t) { uint32_t f; -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 5/7] avutil/half2float: use native _Float16 if available 2022-08-14 16:48 [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler ` (2 preceding siblings ...) 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 4/7] avutil/half2float: move non-inline init code out of header Timo Rothenpieler @ 2022-08-14 16:48 ` Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 6/7] swscale: add opaque parameter to input functions Timo Rothenpieler ` (2 subsequent siblings) 6 siblings, 0 replies; 12+ messages in thread From: Timo Rothenpieler @ 2022-08-14 16:48 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Timo Rothenpieler _Float16 support was available on arm/aarch64 for a while, and with gcc 12 was enabled on x86 as long as SSE2 is supported. If the target arch supports f16c, gcc emits fairly efficient assembly, taking advantage of it. This is the case on x86-64-v3 or higher. Same goes on arm, which has native float16 support. On x86, without f16c, it emulates it in software using sse2 instructions. This has shown to perform rather poorly: _Float16 full SSE2 emulation: frame=50074 fps=848 q=-0.0 size=N/A time=00:33:22.96 bitrate=N/A speed=33.9x _Float16 f16c accelerated (Zen2, --cpu=znver2): frame=50636 fps=1965 q=-0.0 Lsize=N/A time=00:33:45.40 bitrate=N/A speed=78.6x classic half2float full software implementation: frame=49926 fps=1605 q=-0.0 Lsize=N/A time=00:33:17.00 bitrate=N/A speed=64.2x Hence an additional check was introduced, that only enables use of _Float16 on x86 if f16c is being utilized. On aarch64, a similar uplift in performance is seen: RPi4 half2float full software implementation: frame= 6088 fps=126 q=-0.0 Lsize=N/A time=00:04:03.48 bitrate=N/A speed=5.06x RPi4 _Float16: frame= 6103 fps=158 q=-0.0 Lsize=N/A time=00:04:04.08 bitrate=N/A speed=6.32x Since arm/aarch64 always natively support 16 bit floats, it can always be considered fast there. I'm not aware of any additional platforms that currently support _Float16. And if there are, they should be considered non-fast until proven fast. --- configure | 12 ++++++++++++ libavutil/float2half.c | 2 ++ libavutil/float2half.h | 16 ++++++++++++++++ libavutil/half2float.c | 4 ++++ libavutil/half2float.h | 16 ++++++++++++++++ 5 files changed, 50 insertions(+) diff --git a/configure b/configure index fe94941a03..ea50c94002 100755 --- a/configure +++ b/configure @@ -2145,6 +2145,7 @@ ARCH_FEATURES=" fast_64bit fast_clz fast_cmov + fast_float16 local_aligned simd_align_16 simd_align_32 @@ -5127,6 +5128,8 @@ elif enabled arm; then ;; esac + test_cflags -mfp16-format=ieee && add_cflags -mfp16-format=ieee + elif enabled avr32; then case $cpu in @@ -6231,6 +6234,15 @@ check_builtin sync_val_compare_and_swap "" "int *ptr; int oldval, newval; __sync check_builtin gmtime_r time.h "time_t *time; struct tm *tm; gmtime_r(time, tm)" check_builtin localtime_r time.h "time_t *time; struct tm *tm; localtime_r(time, tm)" +check_builtin float16 "" "_Float16 f16var" +if enabled float16; then + if enabled x86; then + test_cpp_condition stddef.h "defined(__F16C__)" && enable fast_float16 + elif enabled arm || enabled aarch64; then + enable fast_float16 + fi +fi + case "$custom_allocator" in jemalloc) # jemalloc by default does not use a prefix diff --git a/libavutil/float2half.c b/libavutil/float2half.c index c79a3abfa1..1a283956e7 100644 --- a/libavutil/float2half.c +++ b/libavutil/float2half.c @@ -20,6 +20,7 @@ void ff_init_float2half_tables(Float2HalfTables *t) { +#if !HAVE_FAST_FLOAT16 for (int i = 0; i < 256; i++) { int e = i - 127; @@ -50,4 +51,5 @@ void ff_init_float2half_tables(Float2HalfTables *t) t->shifttable[i|0x100] = 13; } } +#endif } diff --git a/libavutil/float2half.h b/libavutil/float2half.h index 20fdc2a36b..e619046911 100644 --- a/libavutil/float2half.h +++ b/libavutil/float2half.h @@ -20,21 +20,37 @@ #define AVUTIL_FLOAT2HALF_H #include <stdint.h> +#include "intfloat.h" + +#include "config.h" typedef struct Float2HalfTables { +#if HAVE_FAST_FLOAT16 + uint8_t dummy; +#else uint16_t basetable[512]; uint8_t shifttable[512]; +#endif } Float2HalfTables; void ff_init_float2half_tables(Float2HalfTables *t); static inline uint16_t float2half(uint32_t f, const Float2HalfTables *t) { +#if HAVE_FAST_FLOAT16 + union { + _Float16 f; + uint16_t i; + } u; + u.f = av_int2float(f); + return u.i; +#else uint16_t h; h = t->basetable[(f >> 23) & 0x1ff] + ((f & 0x007fffff) >> t->shifttable[(f >> 23) & 0x1ff]); return h; +#endif } #endif /* AVUTIL_FLOAT2HALF_H */ diff --git a/libavutil/half2float.c b/libavutil/half2float.c index 1967126f76..4de2180a19 100644 --- a/libavutil/half2float.c +++ b/libavutil/half2float.c @@ -18,6 +18,7 @@ #include "libavutil/half2float.h" +#if !HAVE_FAST_FLOAT16 static uint32_t convertmantissa(uint32_t i) { int32_t m = i << 13; // Zero pad mantissa bits @@ -33,9 +34,11 @@ static uint32_t convertmantissa(uint32_t i) return m | e; // Return combined number } +#endif void ff_init_half2float_tables(Half2FloatTables *t) { +#if !HAVE_FAST_FLOAT16 t->mantissatable[0] = 0; for (int i = 1; i < 1024; i++) t->mantissatable[i] = convertmantissa(i); @@ -60,4 +63,5 @@ void ff_init_half2float_tables(Half2FloatTables *t) t->offsettable[31] = 2048; t->offsettable[32] = 0; t->offsettable[63] = 2048; +#endif } diff --git a/libavutil/half2float.h b/libavutil/half2float.h index 428a27a19f..dbd5e7150f 100644 --- a/libavutil/half2float.h +++ b/libavutil/half2float.h @@ -20,22 +20,38 @@ #define AVUTIL_HALF2FLOAT_H #include <stdint.h> +#include "intfloat.h" + +#include "config.h" typedef struct Half2FloatTables { +#if HAVE_FAST_FLOAT16 + uint8_t dummy; +#else uint32_t mantissatable[3072]; uint32_t exponenttable[64]; uint16_t offsettable[64]; +#endif } Half2FloatTables; void ff_init_half2float_tables(Half2FloatTables *t); static inline uint32_t half2float(uint16_t h, const Half2FloatTables *t) { +#if HAVE_FAST_FLOAT16 + union { + _Float16 f; + uint16_t i; + } u; + u.i = h; + return av_float2int(u.f); +#else uint32_t f; f = t->mantissatable[t->offsettable[h >> 10] + (h & 0x3ff)] + t->exponenttable[h >> 10]; return f; +#endif } #endif /* AVUTIL_HALF2FLOAT_H */ -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 6/7] swscale: add opaque parameter to input functions 2022-08-14 16:48 [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler ` (3 preceding siblings ...) 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 5/7] avutil/half2float: use native _Float16 if available Timo Rothenpieler @ 2022-08-14 16:48 ` Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 7/7] swscale/input: add rgbaf16 input support Timo Rothenpieler 2022-08-18 15:37 ` [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler 6 siblings, 0 replies; 12+ messages in thread From: Timo Rothenpieler @ 2022-08-14 16:48 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Timo Rothenpieler --- libswscale/hscale.c | 12 +-- libswscale/input.c | 149 ++++++++++++++++++---------------- libswscale/swscale_internal.h | 17 ++-- libswscale/x86/swscale.c | 13 +-- 4 files changed, 106 insertions(+), 85 deletions(-) diff --git a/libswscale/hscale.c b/libswscale/hscale.c index eca0635338..6789ce7540 100644 --- a/libswscale/hscale.c +++ b/libswscale/hscale.c @@ -105,18 +105,18 @@ static int lum_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int uint8_t * dst = desc->dst->plane[0].line[i]; if (c->lumToYV12) { - c->lumToYV12(dst, src[0], src[1], src[2], srcW, pal); + c->lumToYV12(dst, src[0], src[1], src[2], srcW, pal, c->input_opaque); } else if (c->readLumPlanar) { - c->readLumPlanar(dst, src, srcW, c->input_rgb2yuv_table); + c->readLumPlanar(dst, src, srcW, c->input_rgb2yuv_table, c->input_opaque); } if (desc->alpha) { dst = desc->dst->plane[3].line[i]; if (c->alpToYV12) { - c->alpToYV12(dst, src[3], src[1], src[2], srcW, pal); + c->alpToYV12(dst, src[3], src[1], src[2], srcW, pal, c->input_opaque); } else if (c->readAlpPlanar) { - c->readAlpPlanar(dst, src, srcW, NULL); + c->readAlpPlanar(dst, src, srcW, NULL, c->input_opaque); } } } @@ -224,9 +224,9 @@ static int chr_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int uint8_t * dst1 = desc->dst->plane[1].line[i]; uint8_t * dst2 = desc->dst->plane[2].line[i]; if (c->chrToYV12) { - c->chrToYV12(dst1, dst2, src[0], src[1], src[2], srcW, pal); + c->chrToYV12(dst1, dst2, src[0], src[1], src[2], srcW, pal, c->input_opaque); } else if (c->readChrPlanar) { - c->readChrPlanar(dst1, dst2, src, srcW, c->input_rgb2yuv_table); + c->readChrPlanar(dst1, dst2, src, srcW, c->input_rgb2yuv_table, c->input_opaque); } } return sliceH; diff --git a/libswscale/input.c b/libswscale/input.c index 68abc4d62c..36ef1e43ac 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -88,7 +88,7 @@ rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV, #define rgb64funcs(pattern, BE_LE, origin) \ static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\ - int width, uint32_t *rgb2yuv) \ + int width, uint32_t *rgb2yuv, void *opq) \ { \ const uint16_t *src = (const uint16_t *) _src; \ uint16_t *dst = (uint16_t *) _dst; \ @@ -97,7 +97,7 @@ static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ - int width, uint32_t *rgb2yuv) \ + int width, uint32_t *rgb2yuv, void *opq) \ { \ const uint16_t *src1 = (const uint16_t *) _src1, \ *src2 = (const uint16_t *) _src2; \ @@ -107,7 +107,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ \ static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ - int width, uint32_t *rgb2yuv) \ + int width, uint32_t *rgb2yuv, void *opq) \ { \ const uint16_t *src1 = (const uint16_t *) _src1, \ *src2 = (const uint16_t *) _src2; \ @@ -192,7 +192,8 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \ const uint8_t *_src, \ const uint8_t *unused0, const uint8_t *unused1,\ int width, \ - uint32_t *rgb2yuv) \ + uint32_t *rgb2yuv, \ + void *opq) \ { \ const uint16_t *src = (const uint16_t *)_src; \ uint16_t *dst = (uint16_t *)_dst; \ @@ -205,7 +206,8 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \ const uint8_t *_src1, \ const uint8_t *_src2, \ int width, \ - uint32_t *rgb2yuv) \ + uint32_t *rgb2yuv, \ + void *opq) \ { \ const uint16_t *src1 = (const uint16_t *)_src1, \ *src2 = (const uint16_t *)_src2; \ @@ -220,7 +222,8 @@ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \ const uint8_t *_src1, \ const uint8_t *_src2, \ int width, \ - uint32_t *rgb2yuv) \ + uint32_t *rgb2yuv, \ + void *opq) \ { \ const uint16_t *src1 = (const uint16_t *)_src1, \ *src2 = (const uint16_t *)_src2; \ @@ -345,7 +348,7 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU, #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ maskg, maskb, rsh, gsh, bsh, S) \ static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \ - int width, uint32_t *tab) \ + int width, uint32_t *tab, void *opq) \ { \ rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \ maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \ @@ -353,7 +356,7 @@ static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unuse \ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \ - int width, uint32_t *tab) \ + int width, uint32_t *tab, void *opq) \ { \ rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ shr, shg, shb, shp, \ @@ -363,7 +366,7 @@ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ const uint8_t *unused0, const uint8_t *src, \ const uint8_t *dummy, \ - int width, uint32_t *tab) \ + int width, uint32_t *tab, void *opq) \ { \ rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ shr, shg, shb, shp, \ @@ -392,7 +395,7 @@ rgb16_32_wrapper(AV_PIX_FMT_X2BGR10LE, bgr30le, 0, 6, 16, 0, 0x3FF, 0xFFC00, 0x3 static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc, - int width, uint32_t *rgb2yuv) + int width, uint32_t *rgb2yuv, void *opq) { uint16_t *dstU = (uint16_t *)_dstU; uint16_t *dstV = (uint16_t *)_dstV; @@ -411,7 +414,7 @@ static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, } static void rgba64leToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, - const uint8_t *unused2, int width, uint32_t *unused) + const uint8_t *unused2, int width, uint32_t *unused, void *opq) { int16_t *dst = (int16_t *)_dst; const uint16_t *src = (const uint16_t *)_src; @@ -421,7 +424,7 @@ static void rgba64leToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unu } static void rgba64beToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, - const uint8_t *unused2, int width, uint32_t *unused) + const uint8_t *unused2, int width, uint32_t *unused, void *opq) { int16_t *dst = (int16_t *)_dst; const uint16_t *src = (const uint16_t *)_src; @@ -430,7 +433,8 @@ static void rgba64beToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unu dst[i] = AV_RB16(src + 4 * i + 3); } -static void abgrToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) +static void abgrToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + const uint8_t *unused2, int width, uint32_t *unused, void *opq) { int16_t *dst = (int16_t *)_dst; int i; @@ -439,7 +443,8 @@ static void abgrToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, } } -static void rgbaToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) +static void rgbaToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + const uint8_t *unused2, int width, uint32_t *unused, void *opq) { int16_t *dst = (int16_t *)_dst; int i; @@ -448,7 +453,8 @@ static void rgbaToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, } } -static void palToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal) +static void palToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + const uint8_t *unused2, int width, uint32_t *pal, void *opq) { int16_t *dst = (int16_t *)_dst; int i; @@ -459,7 +465,8 @@ static void palToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, } } -static void palToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal) +static void palToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + const uint8_t *unused2, int width, uint32_t *pal, void *opq) { int16_t *dst = (int16_t *)_dst; int i; @@ -471,8 +478,8 @@ static void palToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, } static void palToUV_c(uint8_t *_dstU, uint8_t *_dstV, - const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *pal) + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *pal, void *opq) { uint16_t *dstU = (uint16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; @@ -486,7 +493,8 @@ static void palToUV_c(uint8_t *_dstU, uint8_t *_dstV, } } -static void monowhite2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) +static void monowhite2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + const uint8_t *unused2, int width, uint32_t *unused, void *opq) { int16_t *dst = (int16_t *)_dst; int i, j; @@ -503,7 +511,8 @@ static void monowhite2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unus } } -static void monoblack2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) +static void monoblack2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + const uint8_t *unused2, int width, uint32_t *unused, void *opq) { int16_t *dst = (int16_t *)_dst; int i, j; @@ -520,8 +529,8 @@ static void monoblack2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unus } } -static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) +static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, + uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) @@ -529,7 +538,7 @@ static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, } static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) + const uint8_t *src2, int width, uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) { @@ -540,7 +549,7 @@ static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, con } static void yvy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) + const uint8_t *src2, int width, uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) { @@ -551,7 +560,7 @@ static void yvy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, con } static void y210le_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src, - const uint8_t *unused1, int width, uint32_t *unused2) + const uint8_t *unused1, int width, uint32_t *unused2, void *opq) { int i; for (i = 0; i < width; i++) { @@ -561,7 +570,7 @@ static void y210le_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, co } static void y210le_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, - const uint8_t *unused1, int width, uint32_t *unused2) + const uint8_t *unused1, int width, uint32_t *unused2, void *opq) { int i; for (i = 0; i < width; i++) @@ -569,7 +578,7 @@ static void y210le_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, } static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) + uint32_t *unused, void *opq) { int i; const uint16_t *src = (const uint16_t *)_src; @@ -579,7 +588,7 @@ static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused } static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1, - const uint8_t *_src2, int width, uint32_t *unused) + const uint8_t *_src2, int width, uint32_t *unused, void *opq) { int i; const uint16_t *src1 = (const uint16_t *)_src1, @@ -592,7 +601,7 @@ static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, } static void read_ya16le_gray_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) + uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) @@ -600,7 +609,7 @@ static void read_ya16le_gray_c(uint8_t *dst, const uint8_t *src, const uint8_t * } static void read_ya16le_alpha_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) + uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) @@ -608,7 +617,7 @@ static void read_ya16le_alpha_c(uint8_t *dst, const uint8_t *src, const uint8_t } static void read_ya16be_gray_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) + uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) @@ -616,7 +625,7 @@ static void read_ya16be_gray_c(uint8_t *dst, const uint8_t *src, const uint8_t * } static void read_ya16be_alpha_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) + uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) @@ -624,7 +633,7 @@ static void read_ya16be_alpha_c(uint8_t *dst, const uint8_t *src, const uint8_t } static void read_ayuv64le_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width, - uint32_t *unused2) + uint32_t *unused2, void *opq) { int i; for (i = 0; i < width; i++) @@ -633,7 +642,7 @@ static void read_ayuv64le_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *u static void read_ayuv64le_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src, - const uint8_t *unused1, int width, uint32_t *unused2) + const uint8_t *unused1, int width, uint32_t *unused2, void *opq) { int i; for (i = 0; i < width; i++) { @@ -643,7 +652,7 @@ static void read_ayuv64le_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unus } static void read_ayuv64le_A_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width, - uint32_t *unused2) + uint32_t *unused2, void *opq) { int i; for (i = 0; i < width; i++) @@ -651,7 +660,7 @@ static void read_ayuv64le_A_c(uint8_t *dst, const uint8_t *src, const uint8_t *u } static void read_vuya_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src, - const uint8_t *unused1, int width, uint32_t *unused2) + const uint8_t *unused1, int width, uint32_t *unused2, void *opq) { int i; for (i = 0; i < width; i++) { @@ -661,7 +670,7 @@ static void read_vuya_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, } static void read_vuya_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width, - uint32_t *unused2) + uint32_t *unused2, void *opq) { int i; for (i = 0; i < width; i++) @@ -669,7 +678,7 @@ static void read_vuya_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unuse } static void read_vuya_A_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width, - uint32_t *unused2) + uint32_t *unused2, void *opq) { int i; for (i = 0; i < width; i++) @@ -679,7 +688,7 @@ static void read_vuya_A_c(uint8_t *dst, const uint8_t *src, const uint8_t *unuse /* This is almost identical to the previous, end exists only because * yuy2ToY/UV)(dst, src + 1, ...) would have 100% unaligned accesses. */ static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *unused) + uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) @@ -687,7 +696,7 @@ static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, } static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) + const uint8_t *src2, int width, uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) { @@ -709,20 +718,20 @@ static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) + int width, uint32_t *unused, void *opq) { nvXXtoUV_c(dstU, dstV, src1, width); } static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) + int width, uint32_t *unused, void *opq) { nvXXtoUV_c(dstV, dstU, src1, width); } static void p010LEToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, - const uint8_t *unused2, int width, uint32_t *unused) + const uint8_t *unused2, int width, uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) { @@ -731,7 +740,7 @@ static void p010LEToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1 } static void p010BEToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, - const uint8_t *unused2, int width, uint32_t *unused) + const uint8_t *unused2, int width, uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) { @@ -741,7 +750,7 @@ static void p010BEToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1 static void p010LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) + int width, uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) { @@ -751,8 +760,8 @@ static void p010LEToUV_c(uint8_t *dstU, uint8_t *dstV, } static void p010BEToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) { @@ -762,8 +771,8 @@ static void p010BEToUV_c(uint8_t *dstU, uint8_t *dstV, } static void p016LEToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) { @@ -773,8 +782,8 @@ static void p016LEToUV_c(uint8_t *dstU, uint8_t *dstV, } static void p016BEToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused, void *opq) { int i; for (i = 0; i < width; i++) { @@ -786,7 +795,7 @@ static void p016BEToUV_c(uint8_t *dstU, uint8_t *dstV, #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, - int width, uint32_t *rgb2yuv) + int width, uint32_t *rgb2yuv, void *opq) { int16_t *dst = (int16_t *)_dst; int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; @@ -801,7 +810,7 @@ static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1 } static void bgr24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *rgb2yuv) + const uint8_t *src2, int width, uint32_t *rgb2yuv, void *opq) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; @@ -820,7 +829,7 @@ static void bgr24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, } static void bgr24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *rgb2yuv) + const uint8_t *src2, int width, uint32_t *rgb2yuv, void *opq) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; @@ -839,7 +848,7 @@ static void bgr24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unus } static void rgb24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, - uint32_t *rgb2yuv) + uint32_t *rgb2yuv, void *opq) { int16_t *dst = (int16_t *)_dst; int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; @@ -854,7 +863,7 @@ static void rgb24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1 } static void rgb24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *rgb2yuv) + const uint8_t *src2, int width, uint32_t *rgb2yuv, void *opq) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; @@ -873,7 +882,7 @@ static void rgb24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, } static void rgb24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *rgb2yuv) + const uint8_t *src2, int width, uint32_t *rgb2yuv, void *opq) { int16_t *dstU = (int16_t *)_dstU; int16_t *dstV = (int16_t *)_dstV; @@ -891,7 +900,7 @@ static void rgb24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unus } } -static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *rgb2yuv) +static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *rgb2yuv, void *opq) { uint16_t *dst = (uint16_t *)_dst; int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; @@ -905,7 +914,7 @@ static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width, int } } -static void planar_rgb_to_a(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *unused) +static void planar_rgb_to_a(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *unused, void *opq) { uint16_t *dst = (uint16_t *)_dst; int i; @@ -913,7 +922,7 @@ static void planar_rgb_to_a(uint8_t *_dst, const uint8_t *src[4], int width, int dst[i] = src[3][i] << 6; } -static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width, int32_t *rgb2yuv) +static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width, int32_t *rgb2yuv, void *opq) { uint16_t *dstU = (uint16_t *)_dstU; uint16_t *dstV = (uint16_t *)_dstV; @@ -1049,24 +1058,27 @@ static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src, #define rgb9plus_planar_funcs_endian(nbits, endian_name, endian) \ static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \ - int w, int32_t *rgb2yuv) \ + int w, int32_t *rgb2yuv, void *opq) \ { \ planar_rgb16_to_y(dst, src, w, nbits, endian, rgb2yuv); \ } \ static void planar_rgb##nbits##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *src[4], int w, int32_t *rgb2yuv) \ + const uint8_t *src[4], int w, int32_t *rgb2yuv, \ + void *opq) \ { \ planar_rgb16_to_uv(dstU, dstV, src, w, nbits, endian, rgb2yuv); \ } \ #define rgb9plus_planar_transparency_funcs(nbits) \ static void planar_rgb##nbits##le_to_a(uint8_t *dst, const uint8_t *src[4], \ - int w, int32_t *rgb2yuv) \ + int w, int32_t *rgb2yuv, \ + void *opq) \ { \ planar_rgb16_to_a(dst, src, w, nbits, 0, rgb2yuv); \ } \ static void planar_rgb##nbits##be_to_a(uint8_t *dst, const uint8_t *src[4], \ - int w, int32_t *rgb2yuv) \ + int w, int32_t *rgb2yuv, \ + void *opq) \ { \ planar_rgb16_to_a(dst, src, w, nbits, 1, rgb2yuv); \ } @@ -1087,23 +1099,24 @@ rgb9plus_planar_transparency_funcs(16) #define rgbf32_planar_funcs_endian(endian_name, endian) \ static void planar_rgbf32##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \ - int w, int32_t *rgb2yuv) \ + int w, int32_t *rgb2yuv, void *opq) \ { \ planar_rgbf32_to_y(dst, src, w, endian, rgb2yuv); \ } \ static void planar_rgbf32##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *src[4], int w, int32_t *rgb2yuv) \ + const uint8_t *src[4], int w, int32_t *rgb2yuv, \ + void *opq) \ { \ planar_rgbf32_to_uv(dstU, dstV, src, w, endian, rgb2yuv); \ } \ static void planar_rgbf32##endian_name##_to_a(uint8_t *dst, const uint8_t *src[4], \ - int w, int32_t *rgb2yuv) \ + int w, int32_t *rgb2yuv, void *opq) \ { \ planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv); \ } \ static void grayf32##endian_name##ToY16_c(uint8_t *dst, const uint8_t *src, \ const uint8_t *unused1, const uint8_t *unused2, \ - int width, uint32_t *unused) \ + int width, uint32_t *unused, void *opq) \ { \ grayf32ToY16_c(dst, src, unused1, unused2, width, endian, unused); \ } diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index e118b54457..9ab542933f 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -559,26 +559,31 @@ typedef struct SwsContext { yuv2packedX_fn yuv2packedX; yuv2anyX_fn yuv2anyX; + /// Opaque data pointer passed to all input functions. + void *input_opaque; + /// Unscaled conversion of luma plane to YV12 for horizontal scaler. void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, - int width, uint32_t *pal); + int width, uint32_t *pal, void *opq); /// Unscaled conversion of alpha plane to YV12 for horizontal scaler. void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, - int width, uint32_t *pal); + int width, uint32_t *pal, void *opq); /// Unscaled conversion of chroma planes to YV12 for horizontal scaler. void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, - int width, uint32_t *pal); + int width, uint32_t *pal, void *opq); /** * Functions to read planar input, such as planar RGB, and convert * internally to Y/UV/A. */ /** @{ */ - void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv); + void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv, + void *opq); void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], - int width, int32_t *rgb2yuv); - void (*readAlpPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv); + int width, int32_t *rgb2yuv, void *opq); + void (*readAlpPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv, + void *opq); /** @} */ /** diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 628f12137c..270798ba3d 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -299,13 +299,13 @@ VSCALE_FUNCS(avx, avx); #define INPUT_Y_FUNC(fmt, opt) \ void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \ const uint8_t *unused1, const uint8_t *unused2, \ - int w, uint32_t *unused) + int w, uint32_t *unused, void *opq) #define INPUT_UV_FUNC(fmt, opt) \ void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ const uint8_t *unused0, \ const uint8_t *src1, \ const uint8_t *src2, \ - int w, uint32_t *unused) + int w, uint32_t *unused, void *opq) #define INPUT_FUNC(fmt, opt) \ INPUT_Y_FUNC(fmt, opt); \ INPUT_UV_FUNC(fmt, opt) @@ -373,15 +373,18 @@ YUV2GBRP_DECL(avx2); #define INPUT_PLANAR_RGB_Y_FN_DECL(fmt, opt) \ void ff_planar_##fmt##_to_y_##opt(uint8_t *dst, \ - const uint8_t *src[4], int w, int32_t *rgb2yuv) + const uint8_t *src[4], int w, int32_t *rgb2yuv, \ + void *opq) #define INPUT_PLANAR_RGB_UV_FN_DECL(fmt, opt) \ void ff_planar_##fmt##_to_uv_##opt(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *src[4], int w, int32_t *rgb2yuv) + const uint8_t *src[4], int w, int32_t *rgb2yuv, \ + void *opq) #define INPUT_PLANAR_RGB_A_FN_DECL(fmt, opt) \ void ff_planar_##fmt##_to_a_##opt(uint8_t *dst, \ - const uint8_t *src[4], int w, int32_t *rgb2yuv) + const uint8_t *src[4], int w, int32_t *rgb2yuv, \ + void *opq) #define INPUT_PLANAR_RGBXX_A_DECL(fmt, opt) \ -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 7/7] swscale/input: add rgbaf16 input support 2022-08-14 16:48 [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler ` (4 preceding siblings ...) 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 6/7] swscale: add opaque parameter to input functions Timo Rothenpieler @ 2022-08-14 16:48 ` Timo Rothenpieler 2022-08-14 23:15 ` Leo Izen 2022-08-18 15:37 ` [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler 6 siblings, 1 reply; 12+ messages in thread From: Timo Rothenpieler @ 2022-08-14 16:48 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Timo Rothenpieler This is by no means perfect, since at least ddagrab will return scRGB data with values outside of 0.0f to 1.0f for HDR values. Its primary purpose is to be able to work with the format at all. --- libavutil/Makefile | 1 + libswscale/half2float.c | 19 +++++ libswscale/input.c | 130 ++++++++++++++++++++++++++++++++++ libswscale/slice.c | 9 ++- libswscale/swscale_internal.h | 10 +++ libswscale/utils.c | 2 + libswscale/version.h | 2 +- 7 files changed, 171 insertions(+), 2 deletions(-) create mode 100644 libswscale/half2float.c diff --git a/libavutil/Makefile b/libavutil/Makefile index 3d9c07aea8..1aac1a4cc0 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -131,6 +131,7 @@ OBJS = adler32.o \ float_dsp.o \ fixed_dsp.o \ frame.o \ + half2float.o \ hash.o \ hdr_dynamic_metadata.o \ hdr_dynamic_vivid_metadata.o \ diff --git a/libswscale/half2float.c b/libswscale/half2float.c new file mode 100644 index 0000000000..1b023f96a5 --- /dev/null +++ b/libswscale/half2float.c @@ -0,0 +1,19 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/half2float.c" diff --git a/libswscale/input.c b/libswscale/input.c index 36ef1e43ac..1077d01e91 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1124,6 +1124,112 @@ static void grayf32##endian_name##ToY16_c(uint8_t *dst, const uint8_t *src, rgbf32_planar_funcs_endian(le, 0) rgbf32_planar_funcs_endian(be, 1) +#define rdpx(src) av_int2float(half2float(is_be ? AV_RB16(&src) : AV_RL16(&src), h2f_tbl)) + +static av_always_inline void rgbaf16ToUV_half_endian(uint16_t *dstU, uint16_t *dstV, int is_be, + const uint16_t *src, int width, + int32_t *rgb2yuv, Half2FloatTables *h2f_tbl) +{ + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+0]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*8+4]), 0.0f, 65535.0f))) >> 1; + int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+1]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*8+5]), 0.0f, 65535.0f))) >> 1; + int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*8+2]), 0.0f, 65535.0f)) + + lrintf(av_clipf(65535.0f * rdpx(src[i*8+6]), 0.0f, 65535.0f))) >> 1; + + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf16ToUV_endian(uint16_t *dstU, uint16_t *dstV, int is_be, + const uint16_t *src, int width, + int32_t *rgb2yuv, Half2FloatTables *h2f_tbl) +{ + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*4+0]), 0.0f, 65535.0f)); + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*4+1]), 0.0f, 65535.0f)); + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*4+2]), 0.0f, 65535.0f)); + + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf16ToY_endian(uint16_t *dst, const uint16_t *src, int is_be, + int width, int32_t *rgb2yuv, Half2FloatTables *h2f_tbl) +{ + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; + int i; + for (i = 0; i < width; i++) { + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*4+0]), 0.0f, 65535.0f)); + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*4+1]), 0.0f, 65535.0f)); + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*4+2]), 0.0f, 65535.0f)); + + dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void rgbaf16ToA_endian(uint16_t *dst, const uint16_t *src, int is_be, + int width, Half2FloatTables *h2f_tbl) +{ + int i; + for (i=0; i<width; i++) { + dst[i] = lrintf(av_clipf(65535.0f * rdpx(src[i*4+3]), 0.0f, 65535.0f)); + } +} + +#undef rdpx + +#define rgbaf16_funcs_endian(endian_name, endian) \ +static void rgbaf16##endian_name##ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *_rgb2yuv, void *opq) \ +{ \ + const uint16_t *src = (const uint16_t*)src1; \ + uint16_t *dstU = (uint16_t*)_dstU; \ + uint16_t *dstV = (uint16_t*)_dstV; \ + int32_t *rgb2yuv = (int32_t*)_rgb2yuv; \ + av_assert1(src1==src2); \ + rgbaf16ToUV_half_endian(dstU, dstV, endian, src, width, rgb2yuv, opq); \ +} \ +static void rgbaf16##endian_name##ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *_rgb2yuv, void *opq) \ +{ \ + const uint16_t *src = (const uint16_t*)src1; \ + uint16_t *dstU = (uint16_t*)_dstU; \ + uint16_t *dstV = (uint16_t*)_dstV; \ + int32_t *rgb2yuv = (int32_t*)_rgb2yuv; \ + av_assert1(src1==src2); \ + rgbaf16ToUV_endian(dstU, dstV, endian, src, width, rgb2yuv, opq); \ +} \ +static void rgbaf16##endian_name##ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, \ + const uint8_t *unused1, int width, uint32_t *_rgb2yuv, void *opq) \ +{ \ + const uint16_t *src = (const uint16_t*)_src; \ + uint16_t *dst = (uint16_t*)_dst; \ + int32_t *rgb2yuv = (int32_t*)_rgb2yuv; \ + rgbaf16ToY_endian(dst, src, endian, width, rgb2yuv, opq); \ +} \ +static void rgbaf16##endian_name##ToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, \ + const uint8_t *unused1, int width, uint32_t *unused2, void *opq) \ +{ \ + const uint16_t *src = (const uint16_t*)_src; \ + uint16_t *dst = (uint16_t*)_dst; \ + rgbaf16ToA_endian(dst, src, endian, width, opq); \ +} + +rgbaf16_funcs_endian(le, 0) +rgbaf16_funcs_endian(be, 1) + av_cold void ff_sws_init_input_funcs(SwsContext *c) { enum AVPixelFormat srcFormat = c->srcFormat; @@ -1388,6 +1494,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_X2BGR10LE: c->chrToYV12 = bgr30leToUV_half_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->chrToYV12 = rgbaf16beToUV_half_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->chrToYV12 = rgbaf16leToUV_half_c; + break; } } else { switch (srcFormat) { @@ -1475,6 +1587,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_X2BGR10LE: c->chrToYV12 = bgr30leToUV_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->chrToYV12 = rgbaf16beToUV_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->chrToYV12 = rgbaf16leToUV_c; + break; } } @@ -1763,6 +1881,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_X2BGR10LE: c->lumToYV12 = bgr30leToY_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->lumToYV12 = rgbaf16beToY_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->lumToYV12 = rgbaf16leToY_c; + break; } if (c->needAlpha) { if (is16BPS(srcFormat) || isNBPS(srcFormat)) { @@ -1782,6 +1906,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case AV_PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break; + case AV_PIX_FMT_RGBAF16BE: + c->alpToYV12 = rgbaf16beToA_c; + break; + case AV_PIX_FMT_RGBAF16LE: + c->alpToYV12 = rgbaf16leToA_c; + break; case AV_PIX_FMT_YA8: c->alpToYV12 = uyvyToY_c; break; diff --git a/libswscale/slice.c b/libswscale/slice.c index b3ee06d632..db1c696727 100644 --- a/libswscale/slice.c +++ b/libswscale/slice.c @@ -282,7 +282,13 @@ int ff_init_filters(SwsContext * c) c->descIndex[0] = num_ydesc + (need_gamma ? 1 : 0); c->descIndex[1] = num_ydesc + num_cdesc + (need_gamma ? 1 : 0); - + if (isFloat16(c->srcFormat)) { + c->h2f_tables = av_malloc(sizeof(*c->h2f_tables)); + if (!c->h2f_tables) + return AVERROR(ENOMEM); + ff_init_half2float_tables(c->h2f_tables); + c->input_opaque = c->h2f_tables; + } c->desc = av_calloc(c->numDesc, sizeof(*c->desc)); if (!c->desc) @@ -393,5 +399,6 @@ int ff_free_filters(SwsContext *c) free_slice(&c->slice[i]); av_freep(&c->slice); } + av_freep(&c->h2f_tables); return 0; } diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 9ab542933f..6c14ce8536 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -35,6 +35,7 @@ #include "libavutil/pixdesc.h" #include "libavutil/slicethread.h" #include "libavutil/ppc/util_altivec.h" +#include "libavutil/half2float.h" #define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long @@ -679,6 +680,8 @@ typedef struct SwsContext { unsigned int dst_slice_align; atomic_int stride_unaligned_warned; atomic_int data_unaligned_warned; + + Half2FloatTables *h2f_tables; } SwsContext; //FIXME check init (where 0) @@ -840,6 +843,13 @@ static av_always_inline int isFloat(enum AVPixelFormat pix_fmt) return desc->flags & AV_PIX_FMT_FLAG_FLOAT; } +static av_always_inline int isFloat16(enum AVPixelFormat pix_fmt) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + av_assert0(desc); + return (desc->flags & AV_PIX_FMT_FLAG_FLOAT) && desc->comp[0].depth == 16; +} + static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); diff --git a/libswscale/utils.c b/libswscale/utils.c index 34503e57f4..81646c0d73 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -259,6 +259,8 @@ static const FormatEntry format_entries[] = { [AV_PIX_FMT_P416LE] = { 1, 1 }, [AV_PIX_FMT_NV16] = { 1, 1 }, [AV_PIX_FMT_VUYA] = { 1, 1 }, + [AV_PIX_FMT_RGBAF16BE] = { 1, 0 }, + [AV_PIX_FMT_RGBAF16LE] = { 1, 0 }, }; int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, diff --git a/libswscale/version.h b/libswscale/version.h index 3193562d18..d8694bb5c0 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -29,7 +29,7 @@ #include "version_major.h" #define LIBSWSCALE_VERSION_MINOR 8 -#define LIBSWSCALE_VERSION_MICRO 102 +#define LIBSWSCALE_VERSION_MICRO 103 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 7/7] swscale/input: add rgbaf16 input support 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 7/7] swscale/input: add rgbaf16 input support Timo Rothenpieler @ 2022-08-14 23:15 ` Leo Izen 2022-08-15 4:15 ` Andreas Rheinhardt 0 siblings, 1 reply; 12+ messages in thread From: Leo Izen @ 2022-08-14 23:15 UTC (permalink / raw) To: ffmpeg-devel On 8/14/22 12:48, Timo Rothenpieler wrote: > This is by no means perfect, since at least ddagrab will return scRGB > data with values outside of 0.0f to 1.0f for HDR values. > Its primary purpose is to be able to work with the format at all. > --- > libavutil/Makefile | 1 + > libswscale/half2float.c | 19 +++++ > libswscale/input.c | 130 ++++++++++++++++++++++++++++++++++ > libswscale/slice.c | 9 ++- > libswscale/swscale_internal.h | 10 +++ > libswscale/utils.c | 2 + > libswscale/version.h | 2 +- > 7 files changed, 171 insertions(+), 2 deletions(-) > create mode 100644 libswscale/half2float.c > > diff --git a/libavutil/Makefile b/libavutil/Makefile > index 3d9c07aea8..1aac1a4cc0 100644 > --- a/libavutil/Makefile > +++ b/libavutil/Makefile > @@ -131,6 +131,7 @@ OBJS = adler32.o \ > float_dsp.o \ > fixed_dsp.o \ > frame.o \ > + half2float.o \ > hash.o \ > hdr_dynamic_metadata.o \ > hdr_dynamic_vivid_metadata.o \ This .o Makefile addition appears in patch 7/7, but the actual creation of the .c file is in patch 4/7. Is this intentional? Because if it is, I am a bit confused. - Leo Izen (thebombzen) _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 7/7] swscale/input: add rgbaf16 input support 2022-08-14 23:15 ` Leo Izen @ 2022-08-15 4:15 ` Andreas Rheinhardt 0 siblings, 0 replies; 12+ messages in thread From: Andreas Rheinhardt @ 2022-08-15 4:15 UTC (permalink / raw) To: ffmpeg-devel Leo Izen: > On 8/14/22 12:48, Timo Rothenpieler wrote: >> This is by no means perfect, since at least ddagrab will return scRGB >> data with values outside of 0.0f to 1.0f for HDR values. >> Its primary purpose is to be able to work with the format at all. >> --- >> libavutil/Makefile | 1 + >> libswscale/half2float.c | 19 +++++ >> libswscale/input.c | 130 ++++++++++++++++++++++++++++++++++ >> libswscale/slice.c | 9 ++- >> libswscale/swscale_internal.h | 10 +++ >> libswscale/utils.c | 2 + >> libswscale/version.h | 2 +- >> 7 files changed, 171 insertions(+), 2 deletions(-) >> create mode 100644 libswscale/half2float.c >> >> diff --git a/libavutil/Makefile b/libavutil/Makefile >> index 3d9c07aea8..1aac1a4cc0 100644 >> --- a/libavutil/Makefile >> +++ b/libavutil/Makefile >> @@ -131,6 +131,7 @@ OBJS = >> adler32.o \ >> >> float_dsp.o \ >> >> fixed_dsp.o \ >> >> frame.o \ >> + >> half2float.o \ >> >> hash.o \ >> >> hdr_dynamic_metadata.o \ >> >> hdr_dynamic_vivid_metadata.o \ > > This .o Makefile addition appears in patch 7/7, but the actual creation > of the .c file is in patch 4/7. Is this intentional? Because if it is, I > am a bit confused. > If I am not mistaken, libavutil itself does not make use of these; so it should not be added to the libavutil Makefile at all; instead it should be added to the libswscale one, because it is libswscale that (with this patch) makes use of it. Because of this, this patchset should lead to linking errors when creating shared libs. - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil 2022-08-14 16:48 [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler ` (5 preceding siblings ...) 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 7/7] swscale/input: add rgbaf16 input support Timo Rothenpieler @ 2022-08-18 15:37 ` Timo Rothenpieler 2022-08-19 21:21 ` Timo Rothenpieler 6 siblings, 1 reply; 12+ messages in thread From: Timo Rothenpieler @ 2022-08-18 15:37 UTC (permalink / raw) To: ffmpeg-devel I plan to push this soon. The half2float.o entry has been moved to the correct Makefile, otherwise nothing else changed in the meantime. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil 2022-08-18 15:37 ` [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler @ 2022-08-19 21:21 ` Timo Rothenpieler 0 siblings, 0 replies; 12+ messages in thread From: Timo Rothenpieler @ 2022-08-19 21:21 UTC (permalink / raw) To: ffmpeg-devel On 18.08.2022 17:37, Timo Rothenpieler wrote: > I plan to push this soon. > The half2float.o entry has been moved to the correct Makefile, otherwise > nothing else changed in the meantime. applied _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2022-08-19 21:21 UTC | newest] Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-08-14 16:48 [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 2/7] avutil/half2float: adjust conversion of NaN Timo Rothenpieler 2022-08-18 8:51 ` Tomas Härdin 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 3/7] avutil/half2float: move tables to header-internal structs Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 4/7] avutil/half2float: move non-inline init code out of header Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 5/7] avutil/half2float: use native _Float16 if available Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 6/7] swscale: add opaque parameter to input functions Timo Rothenpieler 2022-08-14 16:48 ` [FFmpeg-devel] [PATCH v2 7/7] swscale/input: add rgbaf16 input support Timo Rothenpieler 2022-08-14 23:15 ` Leo Izen 2022-08-15 4:15 ` Andreas Rheinhardt 2022-08-18 15:37 ` [FFmpeg-devel] [PATCH v2 1/7] avutil: move half-precision float helper to avutil Timo Rothenpieler 2022-08-19 21:21 ` Timo Rothenpieler
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git