* [FFmpeg-devel] [PATCH v2 2/2] swscale/output: Don't call av_pix_fmt_desc_get() in a loop
2022-09-16 14:52 [FFmpeg-devel] [PATCH v2 1/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
@ 2022-09-16 14:55 ` Andreas Rheinhardt
2022-09-19 14:34 ` Michael Niedermayer
2022-09-19 14:32 ` [FFmpeg-devel] [PATCH v2 1/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
1 sibling, 1 reply; 5+ messages in thread
From: Andreas Rheinhardt @ 2022-09-16 14:55 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Andreas Rheinhardt
Up until now, libswscale/output.c used a macro to write
an output pixel which involved a call to av_pix_fmt_desc_get()
to find out whether the input pixel format is BE or LE
despite this being known at compile-time (there are templates
per pixfmt). Even worse, these calls are made in a loop,
so that e.g. there are eight calls to av_pix_fmt_desc_get()
for every pixel processed in yuv2rgba64_X_c_template()
for 64bit RGB formats.
This commit modifies these macros to ensure that isBE()
is evaluated at compile-time. This saved 41184B of .text
for me (GCC 11.2, -O3). Of course, it also improved performance.
E.g. ffmpeg_g -f lavfi -i testsrc2,format=yuva420p -pix_fmt rgba64le \
-threads 1 -t 1:00 -f null - (which uses yuv2rgba64le_X_c,
which is an invocation of yuv2rgba64_X_c_template() mentioned above),
performance improved from 95589 to 41387 decicycles for one call
to yuv2packedX; for the be variant the numbers went down from
76087 to 43024 decicycles.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libswscale/output.c | 100 +++++++++++++++++++++++++-------------------
1 file changed, 58 insertions(+), 42 deletions(-)
diff --git a/libswscale/output.c b/libswscale/output.c
index 2f599698e9..0e1c1225a0 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -133,6 +133,11 @@ DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
};
#endif
+#define IS_BE_LE 0
+#define IS_BE_BE 1
+/* ENDIAN_IDENTIFIER needs to be "BE" or "LE". */
+#define IS_BE(ENDIAN_IDENTIFIER) IS_BE_ ## ENDIAN_IDENTIFIER
+
#define output_pixel(pos, val, bias, signedness) \
if (big_endian) { \
AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
@@ -935,7 +940,7 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
#define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B)
#define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R)
#define output_pixel(pos, val) \
- if (isBE(target)) { \
+ if (is_be) { \
AV_WB16(pos, val); \
} else { \
AV_WL16(pos, val); \
@@ -947,7 +952,8 @@ yuv2ya16_X_c_template(SwsContext *c, const int16_t *lumFilter,
const int16_t *chrFilter, const int32_t **unused_chrUSrc,
const int32_t **unused_chrVSrc, int unused_chrFilterSize,
const int32_t **alpSrc, uint16_t *dest, int dstW,
- int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+ int y, enum AVPixelFormat target,
+ int unused_hasAlpha, int unused_eightbytes, int is_be)
{
int hasAlpha = !!alpSrc;
int i;
@@ -984,7 +990,8 @@ yuv2ya16_2_c_template(SwsContext *c, const int32_t *buf[2],
const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
const int32_t *abuf[2], uint16_t *dest, int dstW,
int yalpha, int unused_uvalpha, int y,
- enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+ enum AVPixelFormat target, int unused_hasAlpha,
+ int unused_eightbytes, int is_be)
{
int hasAlpha = abuf && abuf[0] && abuf[1];
const int32_t *buf0 = buf[0], *buf1 = buf[1],
@@ -1015,7 +1022,8 @@ static av_always_inline void
yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0,
const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
const int32_t *abuf0, uint16_t *dest, int dstW,
- int unused_uvalpha, int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+ int unused_uvalpha, int y, enum AVPixelFormat target,
+ int unused_hasAlpha, int unused_eightbytes, int is_be)
{
int hasAlpha = !!abuf0;
int i;
@@ -1043,7 +1051,8 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
const int16_t *chrFilter, const int32_t **chrUSrc,
const int32_t **chrVSrc, int chrFilterSize,
const int32_t **alpSrc, uint16_t *dest, int dstW,
- int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+ int y, enum AVPixelFormat target, int hasAlpha, int eightbytes,
+ int is_be)
{
int i;
int A1 = 0xffff<<14, A2 = 0xffff<<14;
@@ -1124,7 +1133,8 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
const int32_t *ubuf[2], const int32_t *vbuf[2],
const int32_t *abuf[2], uint16_t *dest, int dstW,
int yalpha, int uvalpha, int y,
- enum AVPixelFormat target, int hasAlpha, int eightbytes)
+ enum AVPixelFormat target, int hasAlpha, int eightbytes,
+ int is_be)
{
const int32_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
@@ -1188,7 +1198,8 @@ static av_always_inline void
yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
const int32_t *ubuf[2], const int32_t *vbuf[2],
const int32_t *abuf0, uint16_t *dest, int dstW,
- int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+ int uvalpha, int y, enum AVPixelFormat target,
+ int hasAlpha, int eightbytes, int is_be)
{
const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
int i;
@@ -1293,7 +1304,8 @@ yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
const int16_t *chrFilter, const int32_t **chrUSrc,
const int32_t **chrVSrc, int chrFilterSize,
const int32_t **alpSrc, uint16_t *dest, int dstW,
- int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+ int y, enum AVPixelFormat target, int hasAlpha,
+ int eightbytes, int is_be)
{
int i;
int A = 0xffff<<14;
@@ -1356,7 +1368,8 @@ yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2],
const int32_t *ubuf[2], const int32_t *vbuf[2],
const int32_t *abuf[2], uint16_t *dest, int dstW,
int yalpha, int uvalpha, int y,
- enum AVPixelFormat target, int hasAlpha, int eightbytes)
+ enum AVPixelFormat target, int hasAlpha, int eightbytes,
+ int is_be)
{
const int32_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
@@ -1407,7 +1420,8 @@ static av_always_inline void
yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
const int32_t *ubuf[2], const int32_t *vbuf[2],
const int32_t *abuf0, uint16_t *dest, int dstW,
- int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+ int uvalpha, int y, enum AVPixelFormat target,
+ int hasAlpha, int eightbytes, int is_be)
{
const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
int i;
@@ -1484,7 +1498,7 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
#undef r_b
#undef b_r
-#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \
+#define YUV2PACKED16WRAPPER_EXT(name, base, ext, fmt, is_be, hasAlpha, eightbytes) \
static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
const int16_t **_lumSrc, int lumFilterSize, \
const int16_t *chrFilter, const int16_t **_chrUSrc, \
@@ -1499,7 +1513,7 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
uint16_t *dest = (uint16_t *) _dest; \
name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
- alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \
+ alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes, is_be); \
} \
\
static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
@@ -1513,7 +1527,7 @@ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
**abuf = (const int32_t **) _abuf; \
uint16_t *dest = (uint16_t *) _dest; \
name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
- dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \
+ dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes, is_be); \
} \
\
static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
@@ -1527,36 +1541,38 @@ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
*abuf0 = (const int32_t *) _abuf0; \
uint16_t *dest = (uint16_t *) _dest; \
name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
- dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \
+ dstW, uvalpha, y, fmt, hasAlpha, eightbytes, is_be); \
}
-
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0)
-YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0)
-
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1)
+#define YUV2PACKED16WRAPPER(name, base, ext, base_fmt, endianness, hasAlpha, eightbytes) \
+ YUV2PACKED16WRAPPER_EXT(name, base, ext, base_fmt ## endianness, IS_BE(endianness), hasAlpha, eightbytes)
+
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64, LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64, LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16, BE, 1, 0)
+YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16, LE, 1, 0)
+
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64, LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64, LE, 0, 1)
/*
* Write out 2 RGB pixels in the target pixel format. This function takes a
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 1/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
2022-09-16 14:52 [FFmpeg-devel] [PATCH v2 1/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
2022-09-16 14:55 ` [FFmpeg-devel] [PATCH v2 2/2] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt
@ 2022-09-19 14:32 ` Andreas Rheinhardt
1 sibling, 0 replies; 5+ messages in thread
From: Andreas Rheinhardt @ 2022-09-19 14:32 UTC (permalink / raw)
To: ffmpeg-devel
Andreas Rheinhardt:
> Up until now, libswscale/input.c used a macro to read
> an input pixel which involved a call to av_pix_fmt_desc_get()
> to find out whether the input pixel format is BE or LE
> despite this being known at compile-time (there are templates
> per pixfmt). Even worse, these calls are made in a loop,
> so that e.g. there are six calls to av_pix_fmt_desc_get()
> for every pair of UV pixel processed in
> rgb64ToUV_half_c_template().
>
> This commit modifies these macros to ensure that isBE()
> is evaluated at compile-time. This saved 9743B of .text
> for me (GCC 11.2, -O3). For a simple RGB64LE->YUV420P
> transformation like
> ffmpeg -f lavfi -i haldclutsrc,format=rgba64le -pix_fmt yuv420p \
> -threads 1 -t 1:00 -f null -
> the amount of decicycles spent in rgb64LEToUV_half_c
> (which is created via the template mentioned above)
> decreases from 19751 to 5341; for RGBA64BE the number
> went down from 11945 to 5393. For shared builds (where
> the call to av_pix_fmt_desc_get() is indirect) the old numbers
> are 15230 for RGBA64BE and 27502 for RGBA64LE, whereas
> the numbers with this patch are indistinguishable from
> the numbers from a static build.
>
> Also make the macros that are touched conform to the
> usual convention of using uppercase names while just at it.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
> libswscale/input.c | 122 +++++++++++++++++++++++++--------------------
> 1 file changed, 68 insertions(+), 54 deletions(-)
>
> diff --git a/libswscale/input.c b/libswscale/input.c
> index 88e318e664..7ff7bfaa01 100644
> --- a/libswscale/input.c
> +++ b/libswscale/input.c
> @@ -28,14 +28,21 @@
> #include "config.h"
> #include "swscale_internal.h"
>
> -#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
> +#define input_pixel(pos) (is_be ? AV_RB16(pos) : AV_RL16(pos))
> +
> +#define IS_BE_LE 0
> +#define IS_BE_BE 1
> +#define IS_BE_ 0
> +/* ENDIAN_IDENTIFIER needs to be "BE", "LE" or "". The latter is intended
> + * for single-byte cases where the concept of endianness does not apply. */
> +#define IS_BE(ENDIAN_IDENTIFIER) IS_BE_ ## ENDIAN_IDENTIFIER
>
> #define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? b_r : r_b)
> #define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? r_b : b_r)
>
> static av_always_inline void
> rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
> - enum AVPixelFormat origin, int32_t *rgb2yuv)
> + enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
> {
> int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
> int i;
> @@ -51,7 +58,7 @@ rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
> static av_always_inline void
> rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
> const uint16_t *src1, const uint16_t *src2,
> - int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
> + int width, enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
> {
> int i;
> int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
> @@ -70,7 +77,7 @@ rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
> static av_always_inline void
> rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
> const uint16_t *src1, const uint16_t *src2,
> - int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
> + int width, enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
> {
> int i;
> int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
> @@ -86,13 +93,13 @@ rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
> }
> }
>
> -#define rgb64funcs(pattern, BE_LE, origin) \
> +#define RGB64FUNCS_EXT(pattern, BE_LE, origin, is_be) \
> static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
> int width, uint32_t *rgb2yuv, void *opq) \
> { \
> const uint16_t *src = (const uint16_t *) _src; \
> uint16_t *dst = (uint16_t *) _dst; \
> - rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \
> + rgb64ToY_c_template(dst, src, width, origin, rgb2yuv, is_be); \
> } \
> \
> static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
> @@ -102,7 +109,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
> const uint16_t *src1 = (const uint16_t *) _src1, \
> *src2 = (const uint16_t *) _src2; \
> uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
> - rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
> + rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, is_be); \
> } \
> \
> static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
> @@ -112,18 +119,20 @@ static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV
> const uint16_t *src1 = (const uint16_t *) _src1, \
> *src2 = (const uint16_t *) _src2; \
> uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
> - rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
> + rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, is_be); \
> }
> +#define RGB64FUNCS(pattern, endianness, base_fmt) \
> + RGB64FUNCS_EXT(pattern, endianness, base_fmt ## endianness, IS_BE(endianness))
>
> -rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE)
> -rgb64funcs(rgb, BE, AV_PIX_FMT_RGBA64BE)
> -rgb64funcs(bgr, LE, AV_PIX_FMT_BGRA64LE)
> -rgb64funcs(bgr, BE, AV_PIX_FMT_BGRA64BE)
> +RGB64FUNCS(rgb, LE, AV_PIX_FMT_RGBA64)
> +RGB64FUNCS(rgb, BE, AV_PIX_FMT_RGBA64)
> +RGB64FUNCS(bgr, LE, AV_PIX_FMT_BGRA64)
> +RGB64FUNCS(bgr, BE, AV_PIX_FMT_BGRA64)
>
> static av_always_inline void rgb48ToY_c_template(uint16_t *dst,
> const uint16_t *src, int width,
> enum AVPixelFormat origin,
> - int32_t *rgb2yuv)
> + int32_t *rgb2yuv, int is_be)
> {
> int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
> int i;
> @@ -142,7 +151,7 @@ static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU,
> const uint16_t *src2,
> int width,
> enum AVPixelFormat origin,
> - int32_t *rgb2yuv)
> + int32_t *rgb2yuv, int is_be)
> {
> int i;
> int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
> @@ -164,7 +173,7 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
> const uint16_t *src2,
> int width,
> enum AVPixelFormat origin,
> - int32_t *rgb2yuv)
> + int32_t *rgb2yuv, int is_be)
> {
> int i;
> int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
> @@ -187,7 +196,7 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
> #undef b
> #undef input_pixel
>
> -#define rgb48funcs(pattern, BE_LE, origin) \
> +#define RGB48FUNCS_EXT(pattern, BE_LE, origin, is_be) \
> static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \
> const uint8_t *_src, \
> const uint8_t *unused0, const uint8_t *unused1,\
> @@ -197,7 +206,7 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \
> { \
> const uint16_t *src = (const uint16_t *)_src; \
> uint16_t *dst = (uint16_t *)_dst; \
> - rgb48ToY_c_template(dst, src, width, origin, rgb2yuv); \
> + rgb48ToY_c_template(dst, src, width, origin, rgb2yuv, is_be); \
> } \
> \
> static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \
> @@ -213,7 +222,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \
> *src2 = (const uint16_t *)_src2; \
> uint16_t *dstU = (uint16_t *)_dstU, \
> *dstV = (uint16_t *)_dstV; \
> - rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
> + rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, is_be); \
> } \
> \
> static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \
> @@ -229,13 +238,15 @@ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \
> *src2 = (const uint16_t *)_src2; \
> uint16_t *dstU = (uint16_t *)_dstU, \
> *dstV = (uint16_t *)_dstV; \
> - rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
> + rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, is_be); \
> }
> +#define RGB48FUNCS(pattern, endianness, base_fmt) \
> + RGB48FUNCS_EXT(pattern, endianness, base_fmt ## endianness, IS_BE(endianness))
>
> -rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE)
> -rgb48funcs(rgb, BE, AV_PIX_FMT_RGB48BE)
> -rgb48funcs(bgr, LE, AV_PIX_FMT_BGR48LE)
> -rgb48funcs(bgr, BE, AV_PIX_FMT_BGR48BE)
> +RGB48FUNCS(rgb, LE, AV_PIX_FMT_RGB48)
> +RGB48FUNCS(rgb, BE, AV_PIX_FMT_RGB48)
> +RGB48FUNCS(bgr, LE, AV_PIX_FMT_BGR48)
> +RGB48FUNCS(bgr, BE, AV_PIX_FMT_BGR48)
>
> #define input_pixel(i) ((origin == AV_PIX_FMT_RGBA || \
> origin == AV_PIX_FMT_BGRA || \
> @@ -245,7 +256,7 @@ rgb48funcs(bgr, BE, AV_PIX_FMT_BGR48BE)
> : ((origin == AV_PIX_FMT_X2RGB10LE || \
> origin == AV_PIX_FMT_X2BGR10LE) \
> ? AV_RL32(&src[(i) * 4]) \
> - : (isBE(origin) ? AV_RB16(&src[(i) * 2]) \
> + : (is_be ? AV_RB16(&src[(i) * 2]) \
> : AV_RL16(&src[(i) * 2]))))
>
> static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
> @@ -257,7 +268,7 @@ static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
> int maskr, int maskg,
> int maskb, int rsh,
> int gsh, int bsh, int S,
> - int32_t *rgb2yuv)
> + int32_t *rgb2yuv, int is_be)
> {
> const int ry = rgb2yuv[RY_IDX]<<rsh, gy = rgb2yuv[GY_IDX]<<gsh, by = rgb2yuv[BY_IDX]<<bsh;
> const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
> @@ -283,7 +294,7 @@ static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU,
> int maskr, int maskg,
> int maskb, int rsh,
> int gsh, int bsh, int S,
> - int32_t *rgb2yuv)
> + int32_t *rgb2yuv, int is_be)
> {
> const int ru = rgb2yuv[RU_IDX] * (1 << rsh), gu = rgb2yuv[GU_IDX] * (1 << gsh), bu = rgb2yuv[BU_IDX] * (1 << bsh),
> rv = rgb2yuv[RV_IDX] * (1 << rsh), gv = rgb2yuv[GV_IDX] * (1 << gsh), bv = rgb2yuv[BV_IDX] * (1 << bsh);
> @@ -311,7 +322,7 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
> int maskr, int maskg,
> int maskb, int rsh,
> int gsh, int bsh, int S,
> - int32_t *rgb2yuv)
> + int32_t *rgb2yuv, int is_be)
> {
> const int ru = rgb2yuv[RU_IDX] * (1 << rsh), gu = rgb2yuv[GU_IDX] * (1 << gsh), bu = rgb2yuv[BU_IDX] * (1 << bsh),
> rv = rgb2yuv[RV_IDX] * (1 << rsh), gv = rgb2yuv[GV_IDX] * (1 << gsh), bv = rgb2yuv[BV_IDX] * (1 << bsh),
> @@ -345,13 +356,13 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
>
> #undef input_pixel
>
> -#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
> - maskg, maskb, rsh, gsh, bsh, S) \
> +#define RGB16_32FUNCS_EXT(fmt, name, shr, shg, shb, shp, maskr, \
> + maskg, maskb, rsh, gsh, bsh, S, is_be) \
> static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
> int width, uint32_t *tab, void *opq) \
> { \
> rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \
> - maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \
> + maskr, maskg, maskb, rsh, gsh, bsh, S, tab, is_be); \
> } \
> \
> static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
> @@ -360,7 +371,7 @@ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
> { \
> rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
> shr, shg, shb, shp, \
> - maskr, maskg, maskb, rsh, gsh, bsh, S, tab);\
> + maskr, maskg, maskb, rsh, gsh, bsh, S, tab, is_be); \
> } \
> \
> static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
> @@ -371,27 +382,32 @@ static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
> rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
> shr, shg, shb, shp, \
> maskr, maskg, maskb, \
> - rsh, gsh, bsh, S, tab); \
> -}
> -
> -rgb16_32_wrapper(AV_PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8)
> -rgb16_32_wrapper(AV_PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8)
> -rgb16_32_wrapper(AV_PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8)
> -rgb16_32_wrapper(AV_PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8)
> -rgb16_32_wrapper(AV_PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8)
> -rgb16_32_wrapper(AV_PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7)
> -rgb16_32_wrapper(AV_PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4)
> -rgb16_32_wrapper(AV_PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8)
> -rgb16_32_wrapper(AV_PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
> -rgb16_32_wrapper(AV_PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
> -rgb16_32_wrapper(AV_PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8)
> -rgb16_32_wrapper(AV_PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7)
> -rgb16_32_wrapper(AV_PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4)
> -rgb16_32_wrapper(AV_PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8)
> -rgb16_32_wrapper(AV_PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
> -rgb16_32_wrapper(AV_PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
> -rgb16_32_wrapper(AV_PIX_FMT_X2RGB10LE, rgb30le, 16, 6, 0, 0, 0x3FF00000, 0xFFC00, 0x3FF, 0, 0, 4, RGB2YUV_SHIFT + 6)
> -rgb16_32_wrapper(AV_PIX_FMT_X2BGR10LE, bgr30le, 0, 6, 16, 0, 0x3FF, 0xFFC00, 0x3FF00000, 4, 0, 0, RGB2YUV_SHIFT + 6)
> + rsh, gsh, bsh, S, tab, is_be); \
> +}
> +
> +#define RGB16_32FUNCS(base_fmt, endianness, name, shr, shg, shb, shp, maskr, \
> + maskg, maskb, rsh, gsh, bsh, S) \
> + RGB16_32FUNCS_EXT(base_fmt ## endianness, name, shr, shg, shb, shp, maskr, \
> + maskg, maskb, rsh, gsh, bsh, S, IS_BE(endianness))
> +
> +RGB16_32FUNCS(AV_PIX_FMT_BGR32, , bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8)
> +RGB16_32FUNCS(AV_PIX_FMT_BGR32_1, , bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8)
> +RGB16_32FUNCS(AV_PIX_FMT_RGB32, , rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8)
> +RGB16_32FUNCS(AV_PIX_FMT_RGB32_1, , rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8)
> +RGB16_32FUNCS(AV_PIX_FMT_BGR565, LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8)
> +RGB16_32FUNCS(AV_PIX_FMT_BGR555, LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7)
> +RGB16_32FUNCS(AV_PIX_FMT_BGR444, LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4)
> +RGB16_32FUNCS(AV_PIX_FMT_RGB565, LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8)
> +RGB16_32FUNCS(AV_PIX_FMT_RGB555, LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
> +RGB16_32FUNCS(AV_PIX_FMT_RGB444, LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
> +RGB16_32FUNCS(AV_PIX_FMT_BGR565, BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8)
> +RGB16_32FUNCS(AV_PIX_FMT_BGR555, BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7)
> +RGB16_32FUNCS(AV_PIX_FMT_BGR444, BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4)
> +RGB16_32FUNCS(AV_PIX_FMT_RGB565, BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8)
> +RGB16_32FUNCS(AV_PIX_FMT_RGB555, BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
> +RGB16_32FUNCS(AV_PIX_FMT_RGB444, BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
> +RGB16_32FUNCS(AV_PIX_FMT_X2RGB10, LE, rgb30le, 16, 6, 0, 0, 0x3FF00000, 0xFFC00, 0x3FF, 0, 0, 4, RGB2YUV_SHIFT + 6)
> +RGB16_32FUNCS(AV_PIX_FMT_X2BGR10, LE, bgr30le, 0, 6, 16, 0, 0x3FF, 0xFFC00, 0x3FF00000, 4, 0, 0, RGB2YUV_SHIFT + 6)
>
> static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV,
> const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
> @@ -832,8 +848,6 @@ p01x_wrapper(10, 6)
> p01x_wrapper(12, 4)
> p01x_uv_wrapper(16, 0)
>
> -#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
> -
> static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
> int width, uint32_t *rgb2yuv, void *opq)
> {
Will apply this patchset tomorrow unless there are objections.
- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread