* [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' @ 2022-09-08 2:31 Andreas Rheinhardt 2022-09-08 2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt ` (2 more replies) 0 siblings, 3 replies; 18+ messages in thread From: Andreas Rheinhardt @ 2022-09-08 2:31 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Andreas Rheinhardt These macros are definitions, not only declarations and therefore should not contain a semicolon. Such a semicolon is actually spec-incompliant, but compilers happen to accept them. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libswscale/input.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index be8f3940e1..88e318e664 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -583,8 +583,8 @@ static void yvy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, con AV_WN16(dst + i * 2, AV_RL16(src + i * 4) >> shift); \ } -y21xle_wrapper(10, 6); -y21xle_wrapper(12, 4); +y21xle_wrapper(10, 6) +y21xle_wrapper(12, 4) static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused, void *opq) @@ -828,9 +828,9 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, } \ p01x_uv_wrapper(bits, shift) -p01x_wrapper(10, 6); -p01x_wrapper(12, 4); -p01x_uv_wrapper(16, 0); +p01x_wrapper(10, 6) +p01x_wrapper(12, 4) +p01x_uv_wrapper(16, 0) #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-08 2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt @ 2022-09-08 2:38 ` Andreas Rheinhardt 2022-09-08 17:39 ` Michael Niedermayer 2022-09-08 3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt 2022-09-08 3:44 ` [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Philip Langdale 2 siblings, 1 reply; 18+ messages in thread From: Andreas Rheinhardt @ 2022-09-08 2:38 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Andreas Rheinhardt Up until now, libswscale/input.c used a macro to read an input pixel which involved a call to av_pix_fmt_desc_get() to find out whether the input pixel format is BE or LE despite this being known at compile-time (there are templates per pixfmt). Even worse, these calls are made in a loop, so that e.g. there are six calls to av_pix_fmt_desc_get() for every pair of UV pixel processed in rgb64ToUV_half_c_template(). This commit modifies these macros to ensure that isBE() is evaluated at compile-time. This saved 9743B of .text for me (GCC 11.2, -O3). Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- output.c suffers from the same issue (and according to coverage is responsible for quite a large chunk of the calls to av_pix_fmt_desc_get() (144699832 of 321342234)), so I will be looking into this tomorrow. libswscale/input.c | 96 +++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 44 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 88e318e664..4144ff81a9 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -28,14 +28,19 @@ #include "config.h" #include "swscale_internal.h" -#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) +#define input_pixel(pos) (is_be ? AV_RB16(pos) : AV_RL16(pos)) + +#define IS_BE_LE 0 +#define IS_BE_BE 1 +#define IS_BE_ 0 +#define IS_BE(BE_LE) IS_BE_ ## BE_LE #define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? b_r : r_b) #define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? r_b : b_r) static av_always_inline void rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width, - enum AVPixelFormat origin, int32_t *rgb2yuv) + enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be) { int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; @@ -51,7 +56,7 @@ rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width, static av_always_inline void rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV, const uint16_t *src1, const uint16_t *src2, - int width, enum AVPixelFormat origin, int32_t *rgb2yuv) + int width, enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be) { int i; int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; @@ -70,7 +75,7 @@ rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV, static av_always_inline void rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV, const uint16_t *src1, const uint16_t *src2, - int width, enum AVPixelFormat origin, int32_t *rgb2yuv) + int width, enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be) { int i; int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; @@ -92,7 +97,7 @@ static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, { \ const uint16_t *src = (const uint16_t *) _src; \ uint16_t *dst = (uint16_t *) _dst; \ - rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \ + rgb64ToY_c_template(dst, src, width, origin, rgb2yuv, IS_BE(BE_LE)); \ } \ \ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ @@ -102,7 +107,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ const uint16_t *src1 = (const uint16_t *) _src1, \ *src2 = (const uint16_t *) _src2; \ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ - rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ + rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \ } \ \ static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \ @@ -112,7 +117,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV const uint16_t *src1 = (const uint16_t *) _src1, \ *src2 = (const uint16_t *) _src2; \ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ - rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ + rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \ } rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE) @@ -123,7 +128,7 @@ rgb64funcs(bgr, BE, AV_PIX_FMT_BGRA64BE) static av_always_inline void rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width, enum AVPixelFormat origin, - int32_t *rgb2yuv) + int32_t *rgb2yuv, int is_be) { int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; int i; @@ -142,7 +147,7 @@ static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU, const uint16_t *src2, int width, enum AVPixelFormat origin, - int32_t *rgb2yuv) + int32_t *rgb2yuv, int is_be) { int i; int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; @@ -164,7 +169,7 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU, const uint16_t *src2, int width, enum AVPixelFormat origin, - int32_t *rgb2yuv) + int32_t *rgb2yuv, int is_be) { int i; int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; @@ -197,7 +202,7 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \ { \ const uint16_t *src = (const uint16_t *)_src; \ uint16_t *dst = (uint16_t *)_dst; \ - rgb48ToY_c_template(dst, src, width, origin, rgb2yuv); \ + rgb48ToY_c_template(dst, src, width, origin, rgb2yuv, IS_BE(BE_LE));\ } \ \ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \ @@ -213,7 +218,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \ *src2 = (const uint16_t *)_src2; \ uint16_t *dstU = (uint16_t *)_dstU, \ *dstV = (uint16_t *)_dstV; \ - rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ + rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \ } \ \ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \ @@ -229,7 +234,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \ *src2 = (const uint16_t *)_src2; \ uint16_t *dstU = (uint16_t *)_dstU, \ *dstV = (uint16_t *)_dstV; \ - rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \ + rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \ } rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE) @@ -245,7 +250,7 @@ rgb48funcs(bgr, BE, AV_PIX_FMT_BGR48BE) : ((origin == AV_PIX_FMT_X2RGB10LE || \ origin == AV_PIX_FMT_X2BGR10LE) \ ? AV_RL32(&src[(i) * 4]) \ - : (isBE(origin) ? AV_RB16(&src[(i) * 2]) \ + : (is_be ? AV_RB16(&src[(i) * 2]) \ : AV_RL16(&src[(i) * 2])))) static av_always_inline void rgb16_32ToY_c_template(int16_t *dst, @@ -257,7 +262,7 @@ static av_always_inline void rgb16_32ToY_c_template(int16_t *dst, int maskr, int maskg, int maskb, int rsh, int gsh, int bsh, int S, - int32_t *rgb2yuv) + int32_t *rgb2yuv, int is_be) { const int ry = rgb2yuv[RY_IDX]<<rsh, gy = rgb2yuv[GY_IDX]<<gsh, by = rgb2yuv[BY_IDX]<<bsh; const unsigned rnd = (32<<((S)-1)) + (1<<(S-7)); @@ -283,7 +288,7 @@ static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU, int maskr, int maskg, int maskb, int rsh, int gsh, int bsh, int S, - int32_t *rgb2yuv) + int32_t *rgb2yuv, int is_be) { const int ru = rgb2yuv[RU_IDX] * (1 << rsh), gu = rgb2yuv[GU_IDX] * (1 << gsh), bu = rgb2yuv[BU_IDX] * (1 << bsh), rv = rgb2yuv[RV_IDX] * (1 << rsh), gv = rgb2yuv[GV_IDX] * (1 << gsh), bv = rgb2yuv[BV_IDX] * (1 << bsh); @@ -311,7 +316,7 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU, int maskr, int maskg, int maskb, int rsh, int gsh, int bsh, int S, - int32_t *rgb2yuv) + int32_t *rgb2yuv, int is_be) { const int ru = rgb2yuv[RU_IDX] * (1 << rsh), gu = rgb2yuv[GU_IDX] * (1 << gsh), bu = rgb2yuv[BU_IDX] * (1 << bsh), rv = rgb2yuv[RV_IDX] * (1 << rsh), gv = rgb2yuv[GV_IDX] * (1 << gsh), bv = rgb2yuv[BV_IDX] * (1 << bsh), @@ -345,13 +350,13 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU, #undef input_pixel -#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ - maskg, maskb, rsh, gsh, bsh, S) \ +#define rgb16_32_wrapper_0(fmt, name, shr, shg, shb, shp, maskr, \ + maskg, maskb, rsh, gsh, bsh, S, is_be) \ static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \ int width, uint32_t *tab, void *opq) \ { \ rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \ - maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \ + maskr, maskg, maskb, rsh, gsh, bsh, S, tab, is_be); \ } \ \ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ @@ -360,7 +365,7 @@ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ { \ rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ shr, shg, shb, shp, \ - maskr, maskg, maskb, rsh, gsh, bsh, S, tab);\ + maskr, maskg, maskb, rsh, gsh, bsh, S, tab, is_be); \ } \ \ static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ @@ -371,27 +376,32 @@ static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ shr, shg, shb, shp, \ maskr, maskg, maskb, \ - rsh, gsh, bsh, S, tab); \ -} - -rgb16_32_wrapper(AV_PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7) -rgb16_32_wrapper(AV_PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4) -rgb16_32_wrapper(AV_PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7) -rgb16_32_wrapper(AV_PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4) -rgb16_32_wrapper(AV_PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7) -rgb16_32_wrapper(AV_PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4) -rgb16_32_wrapper(AV_PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8) -rgb16_32_wrapper(AV_PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7) -rgb16_32_wrapper(AV_PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4) -rgb16_32_wrapper(AV_PIX_FMT_X2RGB10LE, rgb30le, 16, 6, 0, 0, 0x3FF00000, 0xFFC00, 0x3FF, 0, 0, 4, RGB2YUV_SHIFT + 6) -rgb16_32_wrapper(AV_PIX_FMT_X2BGR10LE, bgr30le, 0, 6, 16, 0, 0x3FF, 0xFFC00, 0x3FF00000, 4, 0, 0, RGB2YUV_SHIFT + 6) + rsh, gsh, bsh, S, tab, is_be); \ +} + +#define rgb16_32_wrapper(base_fmt, endianness, name, shr, shg, shb, shp, maskr, \ + maskg, maskb, rsh, gsh, bsh, S) \ + rgb16_32_wrapper_0(base_fmt ## endianness, name, shr, shg, shb, shp, maskr, \ + maskg, maskb, rsh, gsh, bsh, S, IS_BE(endianness)) + +rgb16_32_wrapper(AV_PIX_FMT_BGR32, , bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8) +rgb16_32_wrapper(AV_PIX_FMT_BGR32_1, , bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8) +rgb16_32_wrapper(AV_PIX_FMT_RGB32, , rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8) +rgb16_32_wrapper(AV_PIX_FMT_RGB32_1, , rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT + 8) +rgb16_32_wrapper(AV_PIX_FMT_BGR565, LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8) +rgb16_32_wrapper(AV_PIX_FMT_BGR555, LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7) +rgb16_32_wrapper(AV_PIX_FMT_BGR444, LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4) +rgb16_32_wrapper(AV_PIX_FMT_RGB565, LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8) +rgb16_32_wrapper(AV_PIX_FMT_RGB555, LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7) +rgb16_32_wrapper(AV_PIX_FMT_RGB444, LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4) +rgb16_32_wrapper(AV_PIX_FMT_BGR565, BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT + 8) +rgb16_32_wrapper(AV_PIX_FMT_BGR555, BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT + 7) +rgb16_32_wrapper(AV_PIX_FMT_BGR444, BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT + 4) +rgb16_32_wrapper(AV_PIX_FMT_RGB565, BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT + 8) +rgb16_32_wrapper(AV_PIX_FMT_RGB555, BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7) +rgb16_32_wrapper(AV_PIX_FMT_RGB444, BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4) +rgb16_32_wrapper(AV_PIX_FMT_X2RGB10, LE, rgb30le, 16, 6, 0, 0, 0x3FF00000, 0xFFC00, 0x3FF, 0, 0, 4, RGB2YUV_SHIFT + 6) +rgb16_32_wrapper(AV_PIX_FMT_X2BGR10, LE, bgr30le, 0, 6, 16, 0, 0x3FF, 0xFFC00, 0x3FF00000, 4, 0, 0, RGB2YUV_SHIFT + 6) static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc, @@ -832,8 +842,6 @@ p01x_wrapper(10, 6) p01x_wrapper(12, 4) p01x_uv_wrapper(16, 0) -#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) - static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *rgb2yuv, void *opq) { -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-08 2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt @ 2022-09-08 17:39 ` Michael Niedermayer 2022-09-08 19:38 ` Andreas Rheinhardt 0 siblings, 1 reply; 18+ messages in thread From: Michael Niedermayer @ 2022-09-08 17:39 UTC (permalink / raw) To: FFmpeg development discussions and patches [-- Attachment #1.1: Type: text/plain, Size: 1033 bytes --] Hi On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote: > Up until now, libswscale/input.c used a macro to read > an input pixel which involved a call to av_pix_fmt_desc_get() > to find out whether the input pixel format is BE or LE > despite this being known at compile-time (there are templates > per pixfmt). Even worse, these calls are made in a loop, > so that e.g. there are six calls to av_pix_fmt_desc_get() > for every pair of UV pixel processed in > rgb64ToUV_half_c_template(). > > This commit modifies these macros to ensure that isBE() > is evaluated at compile-time. This saved 9743B of .text > for me (GCC 11.2, -O3). hmm, all these functions where supposed to be optimized out why where they not ? iam asking as the code is simpler before your patch if that "optimization out" thing would work thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Everything should be made as simple as possible, but not simpler. -- Albert Einstein [-- Attachment #1.2: signature.asc --] [-- Type: application/pgp-signature, Size: 195 bytes --] [-- Attachment #2: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-08 17:39 ` Michael Niedermayer @ 2022-09-08 19:38 ` Andreas Rheinhardt 2022-09-08 20:25 ` Michael Niedermayer 0 siblings, 1 reply; 18+ messages in thread From: Andreas Rheinhardt @ 2022-09-08 19:38 UTC (permalink / raw) To: ffmpeg-devel Michael Niedermayer: > Hi > > On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote: >> Up until now, libswscale/input.c used a macro to read >> an input pixel which involved a call to av_pix_fmt_desc_get() >> to find out whether the input pixel format is BE or LE >> despite this being known at compile-time (there are templates >> per pixfmt). Even worse, these calls are made in a loop, >> so that e.g. there are six calls to av_pix_fmt_desc_get() >> for every pair of UV pixel processed in >> rgb64ToUV_half_c_template(). >> >> This commit modifies these macros to ensure that isBE() >> is evaluated at compile-time. This saved 9743B of .text >> for me (GCC 11.2, -O3). > > hmm, all these functions where supposed to be optimized out > why where they not ? > > iam asking as the code is simpler before your patch if that > "optimization out" thing would work > Why should these functions be optimized out? What would enable the compiler to optimize them out? (And I really don't see why this patch would make the code more complicated.) - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-08 19:38 ` Andreas Rheinhardt @ 2022-09-08 20:25 ` Michael Niedermayer 2022-09-08 21:44 ` Andreas Rheinhardt 0 siblings, 1 reply; 18+ messages in thread From: Michael Niedermayer @ 2022-09-08 20:25 UTC (permalink / raw) To: FFmpeg development discussions and patches [-- Attachment #1.1: Type: text/plain, Size: 2688 bytes --] On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote: > Michael Niedermayer: > > Hi > > > > On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote: > >> Up until now, libswscale/input.c used a macro to read > >> an input pixel which involved a call to av_pix_fmt_desc_get() > >> to find out whether the input pixel format is BE or LE > >> despite this being known at compile-time (there are templates > >> per pixfmt). Even worse, these calls are made in a loop, > >> so that e.g. there are six calls to av_pix_fmt_desc_get() > >> for every pair of UV pixel processed in > >> rgb64ToUV_half_c_template(). > >> > >> This commit modifies these macros to ensure that isBE() > >> is evaluated at compile-time. This saved 9743B of .text > >> for me (GCC 11.2, -O3). > > > > hmm, all these functions where supposed to be optimized out > > why where they not ? > > > > iam asking as the code is simpler before your patch if that > > "optimization out" thing would work > > > > Why should these functions be optimized out? What would enable the > compiler to optimize them out? Going back into the past, there was 6b0768e2021b90215a2ab55ed427bce91d148148 before this the code certainly did get optimized out, it was just #define isBE(x) ((x)&1) thats simple and clean code btw after this it became #define isBE(x) \ + (av_pix_fmt_descriptors[x].flags & PIX_FMT_BE) thats still really good, and very readable, its a const array so one would assume that a compiler can figure that out at compile time well, i try not to think of linking and seperate objects here ;) next it got then replaced by a function and a call that i suspect people thought would be inlined > (And I really don't see why this patch would make the code more > complicated.) the code historically was capable to lookup any flag and detail of a pixel format at compile time now your code works around that not working. Introducing a 2nd system to do this in parallel. To me if i look at the evolution of isBE() / code checking BE-ness it become more messy over time I think it would be interresting to think about if we can make av_pix_fmt_desc_get(compile time constant) work at compile time. or if we maybe can return to a simpler implementation thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Awnsering whenever a program halts or runs forever is On a turing machine, in general impossible (turings halting problem). On any real computer, always possible as a real computer has a finite number of states N, and will either halt in less than N cycles or never halt. [-- Attachment #1.2: signature.asc --] [-- Type: application/pgp-signature, Size: 195 bytes --] [-- Attachment #2: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-08 20:25 ` Michael Niedermayer @ 2022-09-08 21:44 ` Andreas Rheinhardt 2022-09-09 14:55 ` Michael Niedermayer 2022-09-09 15:52 ` Michael Niedermayer 0 siblings, 2 replies; 18+ messages in thread From: Andreas Rheinhardt @ 2022-09-08 21:44 UTC (permalink / raw) To: ffmpeg-devel Michael Niedermayer: > On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote: >> Michael Niedermayer: >>> Hi >>> >>> On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote: >>>> Up until now, libswscale/input.c used a macro to read >>>> an input pixel which involved a call to av_pix_fmt_desc_get() >>>> to find out whether the input pixel format is BE or LE >>>> despite this being known at compile-time (there are templates >>>> per pixfmt). Even worse, these calls are made in a loop, >>>> so that e.g. there are six calls to av_pix_fmt_desc_get() >>>> for every pair of UV pixel processed in >>>> rgb64ToUV_half_c_template(). >>>> >>>> This commit modifies these macros to ensure that isBE() >>>> is evaluated at compile-time. This saved 9743B of .text >>>> for me (GCC 11.2, -O3). >>> >>> hmm, all these functions where supposed to be optimized out >>> why where they not ? >>> >>> iam asking as the code is simpler before your patch if that >>> "optimization out" thing would work >>> >> >> Why should these functions be optimized out? What would enable the >> compiler to optimize them out? > > Going back into the past, there was > 6b0768e2021b90215a2ab55ed427bce91d148148 > > before this the code certainly did get optimized out, it was just > #define isBE(x) ((x)&1) > > thats simple and clean code btw I don't really consider such magic numbers to be clean. > after this it became > > #define isBE(x) \ > + (av_pix_fmt_descriptors[x].flags & PIX_FMT_BE) > > thats still really good, and very readable, its a const array so > one would assume that a compiler can figure that out at compile time > well, i try not to think of linking and seperate objects here ;) > > next it got then replaced by a function and a call that i suspect > people thought would be inlined > > >> (And I really don't see why this patch would make the code more >> complicated.) > > the code historically was capable to lookup any flag and detail > of a pixel format at compile time > now your code works around that not working. Introducing a 2nd > system to do this in parallel. I am not introducing a second system, I am reusing the existing system, namely our existing naming system (the fact that we use BE/LE in the name of BE/LE pixel formats). > To me if i look at the evolution > of isBE() / code checking BE-ness it become more messy over time > > I think it would be interresting to think about if we can make > av_pix_fmt_desc_get(compile time constant) work at compile time. > or if we maybe can return to a simpler implementation > We could put the av_pix_fmt_descriptors array into an internal header and use something like static av_always_inline const AVPixFmtDescriptor *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt) { if (av_builtin_constant_p(fmt)) return &av_pix_fmt_descriptors[fmt]; return av_pix_fmt_desc_get(fmt); } - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-08 21:44 ` Andreas Rheinhardt @ 2022-09-09 14:55 ` Michael Niedermayer 2022-09-09 18:15 ` Andreas Rheinhardt 2022-09-09 15:52 ` Michael Niedermayer 1 sibling, 1 reply; 18+ messages in thread From: Michael Niedermayer @ 2022-09-09 14:55 UTC (permalink / raw) To: FFmpeg development discussions and patches [-- Attachment #1.1: Type: text/plain, Size: 1113 bytes --] On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote: > Michael Niedermayer: > > On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote: > >> Michael Niedermayer: [...] > > To me if i look at the evolution > > of isBE() / code checking BE-ness it become more messy over time > > > > I think it would be interresting to think about if we can make > > av_pix_fmt_desc_get(compile time constant) work at compile time. > > or if we maybe can return to a simpler implementation > > > > We could put the av_pix_fmt_descriptors array into an internal header > and use something like > > static av_always_inline const AVPixFmtDescriptor > *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt) > { > if (av_builtin_constant_p(fmt)) > return &av_pix_fmt_descriptors[fmt]; > return av_pix_fmt_desc_get(fmt); > } yes thats what i was thinking of too. thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB I do not agree with what you have to say, but I'll defend to the death your right to say it. -- Voltaire [-- Attachment #1.2: signature.asc --] [-- Type: application/pgp-signature, Size: 195 bytes --] [-- Attachment #2: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-09 14:55 ` Michael Niedermayer @ 2022-09-09 18:15 ` Andreas Rheinhardt 2022-09-10 15:29 ` Michael Niedermayer 0 siblings, 1 reply; 18+ messages in thread From: Andreas Rheinhardt @ 2022-09-09 18:15 UTC (permalink / raw) To: ffmpeg-devel Michael Niedermayer: > On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote: >> Michael Niedermayer: >>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote: >>>> Michael Niedermayer: > [...] >>> To me if i look at the evolution >>> of isBE() / code checking BE-ness it become more messy over time >>> >>> I think it would be interresting to think about if we can make >>> av_pix_fmt_desc_get(compile time constant) work at compile time. >>> or if we maybe can return to a simpler implementation >>> >> >> We could put the av_pix_fmt_descriptors array into an internal header >> and use something like >> >> static av_always_inline const AVPixFmtDescriptor >> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt) >> { >> if (av_builtin_constant_p(fmt)) >> return &av_pix_fmt_descriptors[fmt]; >> return av_pix_fmt_desc_get(fmt); >> } > > yes thats what i was thinking of too. > Seems like Anton is away for a week or so. I am sure he has an opinion on the above approach. I think we will wait for him or shall I apply the patches as they are given that they do not block any later alternative solution? (There is one thing I already don't like about the alternative solution: It relies on av_builtin_constant_p, which not every compiler supports.) - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-09 18:15 ` Andreas Rheinhardt @ 2022-09-10 15:29 ` Michael Niedermayer 2022-09-10 16:34 ` Andreas Rheinhardt 0 siblings, 1 reply; 18+ messages in thread From: Michael Niedermayer @ 2022-09-10 15:29 UTC (permalink / raw) To: FFmpeg development discussions and patches [-- Attachment #1.1: Type: text/plain, Size: 2371 bytes --] On Fri, Sep 09, 2022 at 08:15:22PM +0200, Andreas Rheinhardt wrote: > Michael Niedermayer: > > On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote: > >> Michael Niedermayer: > >>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote: > >>>> Michael Niedermayer: > > [...] > >>> To me if i look at the evolution > >>> of isBE() / code checking BE-ness it become more messy over time > >>> > >>> I think it would be interresting to think about if we can make > >>> av_pix_fmt_desc_get(compile time constant) work at compile time. > >>> or if we maybe can return to a simpler implementation > >>> > >> > >> We could put the av_pix_fmt_descriptors array into an internal header > >> and use something like > >> > >> static av_always_inline const AVPixFmtDescriptor > >> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt) > >> { > >> if (av_builtin_constant_p(fmt)) > >> return &av_pix_fmt_descriptors[fmt]; > >> return av_pix_fmt_desc_get(fmt); > >> } > > > > yes thats what i was thinking of too. > > > > Seems like Anton is away for a week or so. I am sure he has an opinion > on the above approach. I think we will wait for him or shall I apply the > patches as they are given that they do not block any later alternative > solution? please dont apply code like "IS_BE(BE_LE)" iam sure it makes sense to you today but it requires an additional step for the reader to understand simplest is a seperate endianness and isbe variable. on the wrapper that should be less code too but quite possibly you see a better and cleaner way > (There is one thing I already don't like about the alternative solution: > It relies on av_builtin_constant_p, which not every compiler supports.) Thats why i didnt suggets to use av_builtin_constant_p() i was hoping you saw a better way to achieve it. But this is a problem which occurs more than once in the codebase. mapping some identifer to some value just depending on the identifer, something that is compile time constant, yet it calls to a function to do a lookup thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB The bravest are surely those who have the clearest vision of what is before them, glory and danger alike, and yet notwithstanding go out to meet it. -- Thucydides [-- Attachment #1.2: signature.asc --] [-- Type: application/pgp-signature, Size: 195 bytes --] [-- Attachment #2: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-10 15:29 ` Michael Niedermayer @ 2022-09-10 16:34 ` Andreas Rheinhardt 2022-09-10 17:06 ` Michael Niedermayer 0 siblings, 1 reply; 18+ messages in thread From: Andreas Rheinhardt @ 2022-09-10 16:34 UTC (permalink / raw) To: ffmpeg-devel Michael Niedermayer: > On Fri, Sep 09, 2022 at 08:15:22PM +0200, Andreas Rheinhardt wrote: >> Michael Niedermayer: >>> On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote: >>>> Michael Niedermayer: >>>>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote: >>>>>> Michael Niedermayer: >>> [...] >>>>> To me if i look at the evolution >>>>> of isBE() / code checking BE-ness it become more messy over time >>>>> >>>>> I think it would be interresting to think about if we can make >>>>> av_pix_fmt_desc_get(compile time constant) work at compile time. >>>>> or if we maybe can return to a simpler implementation >>>>> >>>> >>>> We could put the av_pix_fmt_descriptors array into an internal header >>>> and use something like >>>> >>>> static av_always_inline const AVPixFmtDescriptor >>>> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt) >>>> { >>>> if (av_builtin_constant_p(fmt)) >>>> return &av_pix_fmt_descriptors[fmt]; >>>> return av_pix_fmt_desc_get(fmt); >>>> } >>> >>> yes thats what i was thinking of too. >>> >> >> Seems like Anton is away for a week or so. I am sure he has an opinion >> on the above approach. I think we will wait for him or shall I apply the >> patches as they are given that they do not block any later alternative >> solution? > > please dont apply code like "IS_BE(BE_LE)" > iam sure it makes sense to you today but it requires an additional step > for the reader to understand > simplest is a seperate endianness and isbe variable. on the wrapper > that should be less code too but quite possibly you see a better and > cleaner way > I actually thought that my solution is superior to the one you seem to have in mind; after all, the parameter for isbe is redundant: It can also be derived from the pixfmt-name. > >> (There is one thing I already don't like about the alternative solution: >> It relies on av_builtin_constant_p, which not every compiler supports.) > > Thats why i didnt suggets to use av_builtin_constant_p() i was hoping you > saw a better way to achieve it. > Well, without av_builtin_constant_p() the only other way for this would be to add two systems to get the information, one for compile-time constants and one for not-constants. Ensuring that the former is only used for compile-time constants will be a challenge, to say the least. > But this is a problem which occurs more than once in the codebase. > mapping some identifer to some value just depending on the identifer, > something that is compile time constant, yet it calls to a function to > do a lookup > Another idea from the top of my head: - We could provide some of the info contained in the descriptors via separate defines/enums that parallel the actual enum, like enum AVPixelFormatFlags { AV_PIX_FMT_YUV420P_FLAGS = AV_PIX_FMT_FLAG_PLANAR, AV_PIX_FMT_YUYV422_FLAGS = 0, ... }; The list in pixdesc.c would then use these constants instead of defining the flags twice (to avoid inconsistencies). These constants can be used from macros via fmt ## _FLAGS, avoiding the av_builtin_constant_p() issue. This could even be made public if we add a big warning that new flags may be added in the future. Other such enums for other values may be added, too, but honestly I don't really like this approach. We could also use make an xmacro out of the list in lavu/pixdesc.c and use this xmacro to query the values. E.g. isBE() would then in effect become one big switch: isBE(fmt) { switch (fmt) { case AV_PIX_FMT_YUV420P: return 0; .... } } This could be made to handle non-compile-time constants as well (by having a default that uses av_pix_fmt_desc_get()), but it has a downside: There would be runtime checks for whether we are in the default branch or not. So once again this function should not be used with non-compile time constants. - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-10 16:34 ` Andreas Rheinhardt @ 2022-09-10 17:06 ` Michael Niedermayer 0 siblings, 0 replies; 18+ messages in thread From: Michael Niedermayer @ 2022-09-10 17:06 UTC (permalink / raw) To: FFmpeg development discussions and patches [-- Attachment #1.1: Type: text/plain, Size: 3818 bytes --] On Sat, Sep 10, 2022 at 06:34:43PM +0200, Andreas Rheinhardt wrote: > Michael Niedermayer: > > On Fri, Sep 09, 2022 at 08:15:22PM +0200, Andreas Rheinhardt wrote: > >> Michael Niedermayer: > >>> On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote: > >>>> Michael Niedermayer: > >>>>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote: > >>>>>> Michael Niedermayer: > >>> [...] > >>>>> To me if i look at the evolution > >>>>> of isBE() / code checking BE-ness it become more messy over time > >>>>> > >>>>> I think it would be interresting to think about if we can make > >>>>> av_pix_fmt_desc_get(compile time constant) work at compile time. > >>>>> or if we maybe can return to a simpler implementation > >>>>> > >>>> > >>>> We could put the av_pix_fmt_descriptors array into an internal header > >>>> and use something like > >>>> > >>>> static av_always_inline const AVPixFmtDescriptor > >>>> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt) > >>>> { > >>>> if (av_builtin_constant_p(fmt)) > >>>> return &av_pix_fmt_descriptors[fmt]; > >>>> return av_pix_fmt_desc_get(fmt); > >>>> } > >>> > >>> yes thats what i was thinking of too. > >>> > >> > >> Seems like Anton is away for a week or so. I am sure he has an opinion > >> on the above approach. I think we will wait for him or shall I apply the > >> patches as they are given that they do not block any later alternative > >> solution? > > > > please dont apply code like "IS_BE(BE_LE)" > > iam sure it makes sense to you today but it requires an additional step > > for the reader to understand > > simplest is a seperate endianness and isbe variable. on the wrapper > > that should be less code too but quite possibly you see a better and > > cleaner way > > > > I actually thought that my solution is superior to the one you seem to > have in mind; after all, the parameter for isbe is redundant: It can > also be derived from the pixfmt-name. your solution is supperior in some sense. But iam concerned that it is less readable for a small gain. Maybe BE_LE and others can be renamed to make it immedeatly clear > > > > >> (There is one thing I already don't like about the alternative solution: > >> It relies on av_builtin_constant_p, which not every compiler supports.) > > > > Thats why i didnt suggets to use av_builtin_constant_p() i was hoping you > > saw a better way to achieve it. > > > > Well, without av_builtin_constant_p() the only other way for this would > be to add two systems to get the information, one for compile-time > constants and one for not-constants. Ensuring that the former is only > used for compile-time constants will be a challenge, to say the least. I dont think its a challenge, but iam still quite unsure if this would lead to other issues. For example, if we add a constant lookup function that has direct access to the table. We assue that will only be used where constants are the arguzment. Now its easy to add a extra field to that table and then grep the binary for that. If its found it was either used with a non constant or the compiler failed to optimize it out. This could be a fate test. If it works 100% thats simple but if it works 99% it will be a pain and is not a direction i would suggest, it would be more pain than its worth. So really this is just for discussion at this point, far from a plan with understood consequences thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Frequently ignored answer#1 FFmpeg bugs should be sent to our bugtracker. User questions about the command line tools should be sent to the ffmpeg-user ML. And questions about how to use libav* should be sent to the libav-user ML. [-- Attachment #1.2: signature.asc --] [-- Type: application/pgp-signature, Size: 195 bytes --] [-- Attachment #2: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-08 21:44 ` Andreas Rheinhardt 2022-09-09 14:55 ` Michael Niedermayer @ 2022-09-09 15:52 ` Michael Niedermayer 1 sibling, 0 replies; 18+ messages in thread From: Michael Niedermayer @ 2022-09-09 15:52 UTC (permalink / raw) To: FFmpeg development discussions and patches [-- Attachment #1.1: Type: text/plain, Size: 2179 bytes --] On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote: > Michael Niedermayer: > > On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote: > >> Michael Niedermayer: > >>> Hi > >>> > >>> On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote: > >>>> Up until now, libswscale/input.c used a macro to read > >>>> an input pixel which involved a call to av_pix_fmt_desc_get() > >>>> to find out whether the input pixel format is BE or LE > >>>> despite this being known at compile-time (there are templates > >>>> per pixfmt). Even worse, these calls are made in a loop, > >>>> so that e.g. there are six calls to av_pix_fmt_desc_get() > >>>> for every pair of UV pixel processed in > >>>> rgb64ToUV_half_c_template(). > >>>> > >>>> This commit modifies these macros to ensure that isBE() > >>>> is evaluated at compile-time. This saved 9743B of .text > >>>> for me (GCC 11.2, -O3). > >>> > >>> hmm, all these functions where supposed to be optimized out > >>> why where they not ? > >>> > >>> iam asking as the code is simpler before your patch if that > >>> "optimization out" thing would work > >>> > >> > >> Why should these functions be optimized out? What would enable the > >> compiler to optimize them out? > > > > Going back into the past, there was > > 6b0768e2021b90215a2ab55ed427bce91d148148 > > > > before this the code certainly did get optimized out, it was just > > #define isBE(x) ((x)&1) > > > > thats simple and clean code btw > > I don't really consider such magic numbers to be clean. no, but that wasnt what i meant what i meant was more that if you can structure an identifer so that it itself can provide alot of the used information from within its structure, that has advanatges over requiring a descriptor table lookup. I didnt mean to suggest that this should use hardcoded litteral numbers and be undocumented and untested. thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB No human being will ever know the Truth, for even if they happen to say it by chance, they would not even known they had done so. -- Xenophanes [-- Attachment #1.2: signature.asc --] [-- Type: application/pgp-signature, Size: 195 bytes --] [-- Attachment #2: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop 2022-09-08 2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt 2022-09-08 2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt @ 2022-09-08 3:31 ` Andreas Rheinhardt 2022-09-16 8:39 ` Paul B Mahol 2022-09-08 3:44 ` [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Philip Langdale 2 siblings, 1 reply; 18+ messages in thread From: Andreas Rheinhardt @ 2022-09-08 3:31 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Andreas Rheinhardt Up until now, libswscale/output.c used a macro to write an output pixel which involved a call to av_pix_fmt_desc_get() to find out whether the input pixel format is BE or LE despite this being known at compile-time (there are templates per pixfmt). Even worse, these calls are made in a loop, so that e.g. there are eight calls to av_pix_fmt_desc_get() for every pixel processed in yuv2rgba64_X_c_template() for 64bit RGB formats. This commit modifies these macros to ensure that isBE() is evaluated at compile-time. This saved 41184B of .text for me (GCC 11.2, -O3). Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- This must be the lowest-hanging fruit in the whole codebase. Two other question: Why do all these functions in swscale_internal.h take an enum AVPixelFormat instead of accepting an AVPixFmtDescriptor? And would making av_pix_fmt_desc_get() av_const be beneficial? libswscale/output.c | 101 +++++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 43 deletions(-) diff --git a/libswscale/output.c b/libswscale/output.c index 40a4476c6d..590334eb57 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -919,7 +919,7 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422) #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B) #define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R) #define output_pixel(pos, val) \ - if (isBE(target)) { \ + if (is_be) { \ AV_WB16(pos, val); \ } else { \ AV_WL16(pos, val); \ @@ -931,7 +931,8 @@ yuv2ya16_X_c_template(SwsContext *c, const int16_t *lumFilter, const int16_t *chrFilter, const int32_t **unused_chrUSrc, const int32_t **unused_chrVSrc, int unused_chrFilterSize, const int32_t **alpSrc, uint16_t *dest, int dstW, - int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes) + int y, enum AVPixelFormat target, + int unused_hasAlpha, int unused_eightbytes, int is_be) { int hasAlpha = !!alpSrc; int i; @@ -968,7 +969,8 @@ yuv2ya16_2_c_template(SwsContext *c, const int32_t *buf[2], const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2], const int32_t *abuf[2], uint16_t *dest, int dstW, int yalpha, int unused_uvalpha, int y, - enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes) + enum AVPixelFormat target, int unused_hasAlpha, + int unused_eightbytes, int is_be) { int hasAlpha = abuf && abuf[0] && abuf[1]; const int32_t *buf0 = buf[0], *buf1 = buf[1], @@ -999,7 +1001,8 @@ static av_always_inline void yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0, const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2], const int32_t *abuf0, uint16_t *dest, int dstW, - int unused_uvalpha, int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes) + int unused_uvalpha, int y, enum AVPixelFormat target, + int unused_hasAlpha, int unused_eightbytes, int is_be) { int hasAlpha = !!abuf0; int i; @@ -1027,7 +1030,8 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter, const int16_t *chrFilter, const int32_t **chrUSrc, const int32_t **chrVSrc, int chrFilterSize, const int32_t **alpSrc, uint16_t *dest, int dstW, - int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) + int y, enum AVPixelFormat target, int hasAlpha, int eightbytes, + int is_be) { int i; int A1 = 0xffff<<14, A2 = 0xffff<<14; @@ -1108,7 +1112,8 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2], const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf[2], uint16_t *dest, int dstW, int yalpha, int uvalpha, int y, - enum AVPixelFormat target, int hasAlpha, int eightbytes) + enum AVPixelFormat target, int hasAlpha, int eightbytes, + int is_be) { const int32_t *buf0 = buf[0], *buf1 = buf[1], *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], @@ -1172,7 +1177,8 @@ static av_always_inline void yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf0, uint16_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) + int uvalpha, int y, enum AVPixelFormat target, + int hasAlpha, int eightbytes, int is_be) { const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; int i; @@ -1277,7 +1283,8 @@ yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter, const int16_t *chrFilter, const int32_t **chrUSrc, const int32_t **chrVSrc, int chrFilterSize, const int32_t **alpSrc, uint16_t *dest, int dstW, - int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) + int y, enum AVPixelFormat target, int hasAlpha, + int eightbytes, int is_be) { int i; int A = 0xffff<<14; @@ -1340,7 +1347,8 @@ yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2], const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf[2], uint16_t *dest, int dstW, int yalpha, int uvalpha, int y, - enum AVPixelFormat target, int hasAlpha, int eightbytes) + enum AVPixelFormat target, int hasAlpha, int eightbytes, + int is_be) { const int32_t *buf0 = buf[0], *buf1 = buf[1], *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], @@ -1391,7 +1399,8 @@ static av_always_inline void yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0, const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf0, uint16_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) + int uvalpha, int y, enum AVPixelFormat target, + int hasAlpha, int eightbytes, int is_be) { const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; int i; @@ -1468,7 +1477,11 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0, #undef r_b #undef b_r -#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \ +#define IS_BE_LE 0 +#define IS_BE_BE 1 +#define IS_BE(BE_LE) IS_BE_ ## BE_LE + +#define YUV2PACKED16WRAPPER_0(name, base, ext, fmt, is_be, hasAlpha, eightbytes) \ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ const int16_t **_lumSrc, int lumFilterSize, \ const int16_t *chrFilter, const int16_t **_chrUSrc, \ @@ -1483,7 +1496,7 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ - alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \ + alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes, is_be); \ } \ \ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ @@ -1497,7 +1510,7 @@ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ **abuf = (const int32_t **) _abuf; \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ - dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \ + dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes, is_be); \ } \ \ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ @@ -1511,36 +1524,38 @@ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ *abuf0 = (const int32_t *) _abuf0; \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ - dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \ -} - -YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0) -YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0) - -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1) + dstW, uvalpha, y, fmt, hasAlpha, eightbytes, is_be); \ +} +#define YUV2PACKED16WRAPPER(name, base, ext, fmt, endianness, hasAlpha, eightbytes) \ + YUV2PACKED16WRAPPER_0(name, base, ext, fmt ## endianness, IS_BE(endianness), hasAlpha, eightbytes) + +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48, BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48, LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48, BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48, LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64, BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64, LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64, BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64, LE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64, BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64, LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64, BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64, LE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16, BE, 1, 0) +YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16, LE, 1, 0) + +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48, BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48, LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48, BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48, LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64, BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64, LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64, BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64, LE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64, BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64, LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64, BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64, LE, 0, 1) /* * Write out 2 RGB pixels in the target pixel format. This function takes a -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop 2022-09-08 3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt @ 2022-09-16 8:39 ` Paul B Mahol 0 siblings, 0 replies; 18+ messages in thread From: Paul B Mahol @ 2022-09-16 8:39 UTC (permalink / raw) To: FFmpeg development discussions and patches; +Cc: Andreas Rheinhardt On 9/8/22, Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote: > Up until now, libswscale/output.c used a macro to write > an output pixel which involved a call to av_pix_fmt_desc_get() > to find out whether the input pixel format is BE or LE > despite this being known at compile-time (there are templates > per pixfmt). Even worse, these calls are made in a loop, > so that e.g. there are eight calls to av_pix_fmt_desc_get() > for every pixel processed in yuv2rgba64_X_c_template() > for 64bit RGB formats. > LGTM for whole set. Got nice speed boost for not SIMD optimized conversions in swscale. > This commit modifies these macros to ensure that isBE() > is evaluated at compile-time. This saved 41184B of .text > for me (GCC 11.2, -O3). > > Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> > --- > This must be the lowest-hanging fruit in the whole codebase. > Two other question: Why do all these functions in swscale_internal.h > take an enum AVPixelFormat instead of accepting an AVPixFmtDescriptor? > And would making av_pix_fmt_desc_get() av_const be beneficial? > > libswscale/output.c | 101 +++++++++++++++++++++++++------------------- > 1 file changed, 58 insertions(+), 43 deletions(-) > > diff --git a/libswscale/output.c b/libswscale/output.c > index 40a4476c6d..590334eb57 100644 > --- a/libswscale/output.c > +++ b/libswscale/output.c > @@ -919,7 +919,7 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, > AV_PIX_FMT_UYVY422) > #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE > || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B) > #define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE > || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R) > #define output_pixel(pos, val) \ > - if (isBE(target)) { \ > + if (is_be) { \ > AV_WB16(pos, val); \ > } else { \ > AV_WL16(pos, val); \ > @@ -931,7 +931,8 @@ yuv2ya16_X_c_template(SwsContext *c, const int16_t > *lumFilter, > const int16_t *chrFilter, const int32_t > **unused_chrUSrc, > const int32_t **unused_chrVSrc, int > unused_chrFilterSize, > const int32_t **alpSrc, uint16_t *dest, int dstW, > - int y, enum AVPixelFormat target, int > unused_hasAlpha, int unused_eightbytes) > + int y, enum AVPixelFormat target, > + int unused_hasAlpha, int unused_eightbytes, int > is_be) > { > int hasAlpha = !!alpSrc; > int i; > @@ -968,7 +969,8 @@ yuv2ya16_2_c_template(SwsContext *c, const int32_t > *buf[2], > const int32_t *unused_ubuf[2], const int32_t > *unused_vbuf[2], > const int32_t *abuf[2], uint16_t *dest, int dstW, > int yalpha, int unused_uvalpha, int y, > - enum AVPixelFormat target, int unused_hasAlpha, int > unused_eightbytes) > + enum AVPixelFormat target, int unused_hasAlpha, > + int unused_eightbytes, int is_be) > { > int hasAlpha = abuf && abuf[0] && abuf[1]; > const int32_t *buf0 = buf[0], *buf1 = buf[1], > @@ -999,7 +1001,8 @@ static av_always_inline void > yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0, > const int32_t *unused_ubuf[2], const int32_t > *unused_vbuf[2], > const int32_t *abuf0, uint16_t *dest, int dstW, > - int unused_uvalpha, int y, enum AVPixelFormat > target, int unused_hasAlpha, int unused_eightbytes) > + int unused_uvalpha, int y, enum AVPixelFormat > target, > + int unused_hasAlpha, int unused_eightbytes, int > is_be) > { > int hasAlpha = !!abuf0; > int i; > @@ -1027,7 +1030,8 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t > *lumFilter, > const int16_t *chrFilter, const int32_t **chrUSrc, > const int32_t **chrVSrc, int chrFilterSize, > const int32_t **alpSrc, uint16_t *dest, int dstW, > - int y, enum AVPixelFormat target, int hasAlpha, int > eightbytes) > + int y, enum AVPixelFormat target, int hasAlpha, int > eightbytes, > + int is_be) > { > int i; > int A1 = 0xffff<<14, A2 = 0xffff<<14; > @@ -1108,7 +1112,8 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t > *buf[2], > const int32_t *ubuf[2], const int32_t *vbuf[2], > const int32_t *abuf[2], uint16_t *dest, int dstW, > int yalpha, int uvalpha, int y, > - enum AVPixelFormat target, int hasAlpha, int > eightbytes) > + enum AVPixelFormat target, int hasAlpha, int > eightbytes, > + int is_be) > { > const int32_t *buf0 = buf[0], *buf1 = buf[1], > *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], > @@ -1172,7 +1177,8 @@ static av_always_inline void > yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, > const int32_t *ubuf[2], const int32_t *vbuf[2], > const int32_t *abuf0, uint16_t *dest, int dstW, > - int uvalpha, int y, enum AVPixelFormat target, int > hasAlpha, int eightbytes) > + int uvalpha, int y, enum AVPixelFormat target, > + int hasAlpha, int eightbytes, int is_be) > { > const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; > int i; > @@ -1277,7 +1283,8 @@ yuv2rgba64_full_X_c_template(SwsContext *c, const > int16_t *lumFilter, > const int16_t *chrFilter, const int32_t **chrUSrc, > const int32_t **chrVSrc, int chrFilterSize, > const int32_t **alpSrc, uint16_t *dest, int dstW, > - int y, enum AVPixelFormat target, int hasAlpha, int > eightbytes) > + int y, enum AVPixelFormat target, int hasAlpha, > + int eightbytes, int is_be) > { > int i; > int A = 0xffff<<14; > @@ -1340,7 +1347,8 @@ yuv2rgba64_full_2_c_template(SwsContext *c, const > int32_t *buf[2], > const int32_t *ubuf[2], const int32_t *vbuf[2], > const int32_t *abuf[2], uint16_t *dest, int dstW, > int yalpha, int uvalpha, int y, > - enum AVPixelFormat target, int hasAlpha, int > eightbytes) > + enum AVPixelFormat target, int hasAlpha, int > eightbytes, > + int is_be) > { > const int32_t *buf0 = buf[0], *buf1 = buf[1], > *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], > @@ -1391,7 +1399,8 @@ static av_always_inline void > yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0, > const int32_t *ubuf[2], const int32_t *vbuf[2], > const int32_t *abuf0, uint16_t *dest, int dstW, > - int uvalpha, int y, enum AVPixelFormat target, int > hasAlpha, int eightbytes) > + int uvalpha, int y, enum AVPixelFormat target, > + int hasAlpha, int eightbytes, int is_be) > { > const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; > int i; > @@ -1468,7 +1477,11 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const > int32_t *buf0, > #undef r_b > #undef b_r > > -#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \ > +#define IS_BE_LE 0 > +#define IS_BE_BE 1 > +#define IS_BE(BE_LE) IS_BE_ ## BE_LE > + > +#define YUV2PACKED16WRAPPER_0(name, base, ext, fmt, is_be, hasAlpha, > eightbytes) \ > static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ > const int16_t **_lumSrc, int lumFilterSize, \ > const int16_t *chrFilter, const int16_t **_chrUSrc, > \ > @@ -1483,7 +1496,7 @@ static void name ## ext ## _X_c(SwsContext *c, const > int16_t *lumFilter, \ > uint16_t *dest = (uint16_t *) _dest; \ > name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ > chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ > - alpSrc, dest, dstW, y, fmt, hasAlpha, > eightbytes); \ > + alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes, > is_be); \ > } \ > \ > static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ > @@ -1497,7 +1510,7 @@ static void name ## ext ## _2_c(SwsContext *c, const > int16_t *_buf[2], \ > **abuf = (const int32_t **) _abuf; \ > uint16_t *dest = (uint16_t *) _dest; \ > name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ > - dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, > eightbytes); \ > + dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, > eightbytes, is_be); \ > } \ > \ > static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ > @@ -1511,36 +1524,38 @@ static void name ## ext ## _1_c(SwsContext *c, const > int16_t *_buf0, \ > *abuf0 = (const int32_t *) _abuf0; \ > uint16_t *dest = (uint16_t *) _dest; \ > name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ > - dstW, uvalpha, y, fmt, hasAlpha, > eightbytes); \ > -} > - > -YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0) > -YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0) > -YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0) > -YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0) > -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1) > -YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0) > -YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0) > - > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, > 0) > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, > 0) > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, > 0) > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, > 0) > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, > 1, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, > 1, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, > 0, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, > 0, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, > 1, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, > 1, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, > 0, 1) > -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, > 0, 1) > + dstW, uvalpha, y, fmt, hasAlpha, > eightbytes, is_be); \ > +} > +#define YUV2PACKED16WRAPPER(name, base, ext, fmt, endianness, hasAlpha, > eightbytes) \ > + YUV2PACKED16WRAPPER_0(name, base, ext, fmt ## endianness, > IS_BE(endianness), hasAlpha, eightbytes) > + > +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48, BE, 0, 0) > +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48, LE, 0, 0) > +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48, BE, 0, 0) > +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48, LE, 0, 0) > +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64, BE, 1, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64, LE, 1, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64, BE, 0, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64, LE, 0, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64, BE, 1, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64, LE, 1, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64, BE, 0, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64, LE, 0, 1) > +YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16, BE, 1, 0) > +YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16, LE, 1, 0) > + > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48, BE, > 0, 0) > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48, LE, > 0, 0) > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48, BE, > 0, 0) > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48, LE, > 0, 0) > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64, > BE, 1, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64, > LE, 1, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64, > BE, 0, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64, > LE, 0, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64, > BE, 1, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64, > LE, 1, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64, > BE, 0, 1) > +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64, > LE, 0, 1) > > /* > * Write out 2 RGB pixels in the target pixel format. This function takes > a > -- > 2.34.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' 2022-09-08 2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt 2022-09-08 2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt 2022-09-08 3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt @ 2022-09-08 3:44 ` Philip Langdale 2 siblings, 0 replies; 18+ messages in thread From: Philip Langdale @ 2022-09-08 3:44 UTC (permalink / raw) To: ffmpeg-devel On Thu, 8 Sep 2022 04:31:55 +0200 Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote: > These macros are definitions, not only declarations and therefore > should not contain a semicolon. Such a semicolon is actually > spec-incompliant, but compilers happen to accept them. > > Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> > --- > libswscale/input.c | 10 +++++----- > 1 file changed, 5 insertions(+), 5 deletions(-) > > diff --git a/libswscale/input.c b/libswscale/input.c > index be8f3940e1..88e318e664 100644 > --- a/libswscale/input.c > +++ b/libswscale/input.c > @@ -583,8 +583,8 @@ static void yvy2ToUV_c(uint8_t *dstU, uint8_t > *dstV, const uint8_t *unused0, con AV_WN16(dst + i * 2, AV_RL16(src + > i * 4) >> shift); \ } > > -y21xle_wrapper(10, 6); > -y21xle_wrapper(12, 4); > +y21xle_wrapper(10, 6) > +y21xle_wrapper(12, 4) > > static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const > uint8_t *unused1, const uint8_t *unused2, int width, uint32_t > *unused, void *opq) @@ -828,9 +828,9 @@ static void > nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, } > \ p01x_uv_wrapper(bits, shift) > > -p01x_wrapper(10, 6); > -p01x_wrapper(12, 4); > -p01x_uv_wrapper(16, 0); > +p01x_wrapper(10, 6) > +p01x_wrapper(12, 4) > +p01x_uv_wrapper(16, 0) > > #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) > Thanks for catching this. LGTM. --phil _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
@ 2022-09-16 7:00 Anton Khirnov
2022-09-16 10:57 ` Michael Niedermayer
0 siblings, 1 reply; 18+ messages in thread
From: Anton Khirnov @ 2022-09-16 7:00 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting Andreas Rheinhardt (2022-09-09 20:15:22)
> Michael Niedermayer:
> > On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
> >> Michael Niedermayer:
> >>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> >>>> Michael Niedermayer:
> > [...]
> >>> To me if i look at the evolution
> >>> of isBE() / code checking BE-ness it become more messy over time
> >>>
> >>> I think it would be interresting to think about if we can make
> >>> av_pix_fmt_desc_get(compile time constant) work at compile time.
> >>> or if we maybe can return to a simpler implementation
> >>>
> >>
> >> We could put the av_pix_fmt_descriptors array into an internal header
> >> and use something like
> >>
> >> static av_always_inline const AVPixFmtDescriptor
> >> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
> >> {
> >> if (av_builtin_constant_p(fmt))
> >> return &av_pix_fmt_descriptors[fmt];
> >> return av_pix_fmt_desc_get(fmt);
> >> }
> >
> > yes thats what i was thinking of too.
> >
>
> Seems like Anton is away for a week or so. I am sure he has an opinion
> on the above approach. I think we will wait for him or shall I apply the
> patches as they are given that they do not block any later alternative
> solution?
> (There is one thing I already don't like about the alternative solution:
> It relies on av_builtin_constant_p, which not every compiler supports.)
For what my opinion is worth, the patch as is with some extra
explanatory comments for the new IS_BE* macros seems like the best
solution to me. They are indeed slightly confusing at first glance, but
quite obvious if you look at the code for two minutes (less for people
smarter than me). And I think few people will need to understand how
precisely they work anyway.
The other proposals seem significantly more complex and fragile than
this one.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-16 7:00 [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Anton Khirnov @ 2022-09-16 10:57 ` Michael Niedermayer 2022-09-18 20:58 ` Andreas Rheinhardt 0 siblings, 1 reply; 18+ messages in thread From: Michael Niedermayer @ 2022-09-16 10:57 UTC (permalink / raw) To: FFmpeg development discussions and patches [-- Attachment #1.1: Type: text/plain, Size: 2743 bytes --] On Fri, Sep 16, 2022 at 09:00:24AM +0200, Anton Khirnov wrote: > Quoting Andreas Rheinhardt (2022-09-09 20:15:22) > > Michael Niedermayer: > > > On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote: > > >> Michael Niedermayer: > > >>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote: > > >>>> Michael Niedermayer: > > > [...] > > >>> To me if i look at the evolution > > >>> of isBE() / code checking BE-ness it become more messy over time > > >>> > > >>> I think it would be interresting to think about if we can make > > >>> av_pix_fmt_desc_get(compile time constant) work at compile time. > > >>> or if we maybe can return to a simpler implementation > > >>> > > >> > > >> We could put the av_pix_fmt_descriptors array into an internal header > > >> and use something like > > >> > > >> static av_always_inline const AVPixFmtDescriptor > > >> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt) > > >> { > > >> if (av_builtin_constant_p(fmt)) > > >> return &av_pix_fmt_descriptors[fmt]; > > >> return av_pix_fmt_desc_get(fmt); > > >> } > > > > > > yes thats what i was thinking of too. > > > > > > > Seems like Anton is away for a week or so. I am sure he has an opinion > > on the above approach. I think we will wait for him or shall I apply the > > patches as they are given that they do not block any later alternative > > solution? > > (There is one thing I already don't like about the alternative solution: > > It relies on av_builtin_constant_p, which not every compiler supports.) > > For what my opinion is worth, the patch as is with some extra > explanatory comments for the new IS_BE* macros seems like the best > solution to me. They are indeed slightly confusing at first glance, but > quite obvious if you look at the code for two minutes (less for people > smarter than me). And I think few people will need to understand how > precisely they work anyway. i agree, maybe we can solve this by changing the IS_BE* names instead of adding comments. IS_BE, ENDIAN_IDENTIFER, (0,1) , (LE, BE) or something like that, may reduce the need for comments > > The other proposals seem significantly more complex and fragile than > this one. i agree too, though allowing av_pix_fmt_desc_get() to be done at compile time would allow its use in inlined functions and macros much broader so these are not 1:1 comparable as they serve only overlapping goals thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB The bravest are surely those who have the clearest vision of what is before them, glory and danger alike, and yet notwithstanding go out to meet it. -- Thucydides [-- Attachment #1.2: signature.asc --] [-- Type: application/pgp-signature, Size: 195 bytes --] [-- Attachment #2: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() 2022-09-16 10:57 ` Michael Niedermayer @ 2022-09-18 20:58 ` Andreas Rheinhardt 0 siblings, 0 replies; 18+ messages in thread From: Andreas Rheinhardt @ 2022-09-18 20:58 UTC (permalink / raw) To: ffmpeg-devel Michael Niedermayer: > On Fri, Sep 16, 2022 at 09:00:24AM +0200, Anton Khirnov wrote: >> Quoting Andreas Rheinhardt (2022-09-09 20:15:22) >>> Michael Niedermayer: >>>> On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote: >>>>> Michael Niedermayer: >>>>>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote: >>>>>>> Michael Niedermayer: >>>> [...] >>>>>> To me if i look at the evolution >>>>>> of isBE() / code checking BE-ness it become more messy over time >>>>>> >>>>>> I think it would be interresting to think about if we can make >>>>>> av_pix_fmt_desc_get(compile time constant) work at compile time. >>>>>> or if we maybe can return to a simpler implementation >>>>>> >>>>> >>>>> We could put the av_pix_fmt_descriptors array into an internal header >>>>> and use something like >>>>> >>>>> static av_always_inline const AVPixFmtDescriptor >>>>> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt) >>>>> { >>>>> if (av_builtin_constant_p(fmt)) >>>>> return &av_pix_fmt_descriptors[fmt]; >>>>> return av_pix_fmt_desc_get(fmt); >>>>> } >>>> >>>> yes thats what i was thinking of too. >>>> >>> >>> Seems like Anton is away for a week or so. I am sure he has an opinion >>> on the above approach. I think we will wait for him or shall I apply the >>> patches as they are given that they do not block any later alternative >>> solution? >>> (There is one thing I already don't like about the alternative solution: >>> It relies on av_builtin_constant_p, which not every compiler supports.) >> >> For what my opinion is worth, the patch as is with some extra >> explanatory comments for the new IS_BE* macros seems like the best >> solution to me. They are indeed slightly confusing at first glance, but >> quite obvious if you look at the code for two minutes (less for people >> smarter than me). And I think few people will need to understand how >> precisely they work anyway. > > i agree, maybe we can solve this by changing the IS_BE* names instead > of adding comments. > IS_BE, ENDIAN_IDENTIFER, > (0,1) , (LE, BE) > > or something like that, may reduce the need for comments > > I sent a version 2 here: https://ffmpeg.org/pipermail/ffmpeg-devel/2022-September/301535.html Can you take a look at it, please? - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2022-09-18 20:58 UTC | newest] Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-09-08 2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt 2022-09-08 2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt 2022-09-08 17:39 ` Michael Niedermayer 2022-09-08 19:38 ` Andreas Rheinhardt 2022-09-08 20:25 ` Michael Niedermayer 2022-09-08 21:44 ` Andreas Rheinhardt 2022-09-09 14:55 ` Michael Niedermayer 2022-09-09 18:15 ` Andreas Rheinhardt 2022-09-10 15:29 ` Michael Niedermayer 2022-09-10 16:34 ` Andreas Rheinhardt 2022-09-10 17:06 ` Michael Niedermayer 2022-09-09 15:52 ` Michael Niedermayer 2022-09-08 3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt 2022-09-16 8:39 ` Paul B Mahol 2022-09-08 3:44 ` [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Philip Langdale 2022-09-16 7:00 [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Anton Khirnov 2022-09-16 10:57 ` Michael Niedermayer 2022-09-18 20:58 ` Andreas Rheinhardt
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git