[FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; '

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

* [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; '
@ 2022-09-08  2:31 Andreas Rheinhardt
  2022-09-08  2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Andreas Rheinhardt @ 2022-09-08  2:31 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

These macros are definitions, not only declarations and therefore
should not contain a semicolon. Such a semicolon is actually
spec-incompliant, but compilers happen to accept them.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libswscale/input.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index be8f3940e1..88e318e664 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -583,8 +583,8 @@ static void yvy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, con
             AV_WN16(dst + i * 2, AV_RL16(src + i * 4) >> shift);         \
     }
 
-y21xle_wrapper(10, 6);
-y21xle_wrapper(12, 4);
+y21xle_wrapper(10, 6)
+y21xle_wrapper(12, 4)
 
 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width,
                        uint32_t *unused, void *opq)
@@ -828,9 +828,9 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
     }                                                                    \
     p01x_uv_wrapper(bits, shift)
 
-p01x_wrapper(10, 6);
-p01x_wrapper(12, 4);
-p01x_uv_wrapper(16, 0);
+p01x_wrapper(10, 6)
+p01x_wrapper(12, 4)
+p01x_uv_wrapper(16, 0)
 
 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
 
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08  2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt
@ 2022-09-08  2:38 ` Andreas Rheinhardt
  2022-09-08 17:39   ` Michael Niedermayer
  2022-09-08  3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt
  2022-09-08  3:44 ` [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Philip Langdale
  2 siblings, 1 reply; 18+ messages in thread
From: Andreas Rheinhardt @ 2022-09-08  2:38 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

Up until now, libswscale/input.c used a macro to read
an input pixel which involved a call to av_pix_fmt_desc_get()
to find out whether the input pixel format is BE or LE
despite this being known at compile-time (there are templates
per pixfmt). Even worse, these calls are made in a loop,
so that e.g. there are six calls to av_pix_fmt_desc_get()
for every pair of UV pixel processed in
rgb64ToUV_half_c_template().

This commit modifies these macros to ensure that isBE()
is evaluated at compile-time. This saved 9743B of .text
for me (GCC 11.2, -O3).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
output.c suffers from the same issue (and according to coverage
is responsible for quite a large chunk of the calls to
av_pix_fmt_desc_get() (144699832 of 321342234)), so I will
be looking into this tomorrow.

 libswscale/input.c | 96 +++++++++++++++++++++++++---------------------
 1 file changed, 52 insertions(+), 44 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 88e318e664..4144ff81a9 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -28,14 +28,19 @@
 #include "config.h"
 #include "swscale_internal.h"
 
-#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
+#define input_pixel(pos) (is_be ? AV_RB16(pos) : AV_RL16(pos))
+
+#define IS_BE_LE 0
+#define IS_BE_BE 1
+#define IS_BE_   0
+#define IS_BE(BE_LE) IS_BE_ ## BE_LE
 
 #define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? b_r : r_b)
 #define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? r_b : b_r)
 
 static av_always_inline void
 rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
-                    enum AVPixelFormat origin, int32_t *rgb2yuv)
+                    enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
 {
     int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
     int i;
@@ -51,7 +56,7 @@ rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
 static av_always_inline void
 rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
                     const uint16_t *src1, const uint16_t *src2,
-                    int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
+                    int width, enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
 {
     int i;
     int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -70,7 +75,7 @@ rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
 static av_always_inline void
 rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
                           const uint16_t *src1, const uint16_t *src2,
-                          int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
+                          int width, enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
 {
     int i;
     int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -92,7 +97,7 @@ static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src,
 { \
     const uint16_t *src = (const uint16_t *) _src; \
     uint16_t *dst = (uint16_t *) _dst; \
-    rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \
+    rgb64ToY_c_template(dst, src, width, origin, rgb2yuv, IS_BE(BE_LE)); \
 } \
  \
 static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
@@ -102,7 +107,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
     const uint16_t *src1 = (const uint16_t *) _src1, \
                    *src2 = (const uint16_t *) _src2; \
     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
-    rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+    rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
 } \
  \
 static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
@@ -112,7 +117,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV
     const uint16_t *src1 = (const uint16_t *) _src1, \
                    *src2 = (const uint16_t *) _src2; \
     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
-    rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+    rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
 }
 
 rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE)
@@ -123,7 +128,7 @@ rgb64funcs(bgr, BE, AV_PIX_FMT_BGRA64BE)
 static av_always_inline void rgb48ToY_c_template(uint16_t *dst,
                                                  const uint16_t *src, int width,
                                                  enum AVPixelFormat origin,
-                                                 int32_t *rgb2yuv)
+                                                 int32_t *rgb2yuv, int is_be)
 {
     int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
     int i;
@@ -142,7 +147,7 @@ static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU,
                                                   const uint16_t *src2,
                                                   int width,
                                                   enum AVPixelFormat origin,
-                                                  int32_t *rgb2yuv)
+                                                  int32_t *rgb2yuv, int is_be)
 {
     int i;
     int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -164,7 +169,7 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
                                                        const uint16_t *src2,
                                                        int width,
                                                        enum AVPixelFormat origin,
-                                                       int32_t *rgb2yuv)
+                                                       int32_t *rgb2yuv, int is_be)
 {
     int i;
     int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -197,7 +202,7 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst,              \
 {                                                                       \
     const uint16_t *src = (const uint16_t *)_src;                       \
     uint16_t *dst       = (uint16_t *)_dst;                             \
-    rgb48ToY_c_template(dst, src, width, origin, rgb2yuv);              \
+    rgb48ToY_c_template(dst, src, width, origin, rgb2yuv, IS_BE(BE_LE));\
 }                                                                       \
                                                                         \
 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU,            \
@@ -213,7 +218,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU,            \
                    *src2 = (const uint16_t *)_src2;                     \
     uint16_t *dstU = (uint16_t *)_dstU,                                 \
              *dstV = (uint16_t *)_dstV;                                 \
-    rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv);        \
+    rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
 }                                                                       \
                                                                         \
 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU,       \
@@ -229,7 +234,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU,       \
                    *src2 = (const uint16_t *)_src2;                     \
     uint16_t *dstU = (uint16_t *)_dstU,                                 \
              *dstV = (uint16_t *)_dstV;                                 \
-    rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv);   \
+    rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
 }
 
 rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE)
@@ -245,7 +250,7 @@ rgb48funcs(bgr, BE, AV_PIX_FMT_BGR48BE)
                         : ((origin == AV_PIX_FMT_X2RGB10LE ||              \
                             origin == AV_PIX_FMT_X2BGR10LE)                \
                            ? AV_RL32(&src[(i) * 4])                        \
-                           : (isBE(origin) ? AV_RB16(&src[(i) * 2])        \
+                           : (is_be ? AV_RB16(&src[(i) * 2])               \
                               : AV_RL16(&src[(i) * 2]))))
 
 static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
@@ -257,7 +262,7 @@ static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
                                                     int maskr, int maskg,
                                                     int maskb, int rsh,
                                                     int gsh, int bsh, int S,
-                                                    int32_t *rgb2yuv)
+                                                    int32_t *rgb2yuv, int is_be)
 {
     const int ry       = rgb2yuv[RY_IDX]<<rsh, gy = rgb2yuv[GY_IDX]<<gsh, by = rgb2yuv[BY_IDX]<<bsh;
     const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
@@ -283,7 +288,7 @@ static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU,
                                                      int maskr, int maskg,
                                                      int maskb, int rsh,
                                                      int gsh, int bsh, int S,
-                                                     int32_t *rgb2yuv)
+                                                     int32_t *rgb2yuv, int is_be)
 {
     const int ru       = rgb2yuv[RU_IDX] * (1 << rsh), gu = rgb2yuv[GU_IDX] * (1 << gsh), bu = rgb2yuv[BU_IDX] * (1 << bsh),
               rv       = rgb2yuv[RV_IDX] * (1 << rsh), gv = rgb2yuv[GV_IDX] * (1 << gsh), bv = rgb2yuv[BV_IDX] * (1 << bsh);
@@ -311,7 +316,7 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
                                                           int maskr, int maskg,
                                                           int maskb, int rsh,
                                                           int gsh, int bsh, int S,
-                                                          int32_t *rgb2yuv)
+                                                          int32_t *rgb2yuv, int is_be)
 {
     const int ru       = rgb2yuv[RU_IDX] * (1 << rsh), gu = rgb2yuv[GU_IDX] * (1 << gsh), bu = rgb2yuv[BU_IDX] * (1 << bsh),
               rv       = rgb2yuv[RV_IDX] * (1 << rsh), gv = rgb2yuv[GV_IDX] * (1 << gsh), bv = rgb2yuv[BV_IDX] * (1 << bsh),
@@ -345,13 +350,13 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
 
 #undef input_pixel
 
-#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr,          \
-                         maskg, maskb, rsh, gsh, bsh, S)                \
+#define rgb16_32_wrapper_0(fmt, name, shr, shg, shb, shp, maskr,        \
+                           maskg, maskb, rsh, gsh, bsh, S, is_be)       \
 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,            \
                           int width, uint32_t *tab, void *opq)          \
 {                                                                       \
     rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp,    \
-                           maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \
+                           maskr, maskg, maskb, rsh, gsh, bsh, S, tab, is_be); \
 }                                                                       \
                                                                         \
 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV,                \
@@ -360,7 +365,7 @@ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV,                \
 {                                                                       \
     rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt,                \
                             shr, shg, shb, shp,                         \
-                            maskr, maskg, maskb, rsh, gsh, bsh, S, tab);\
+                            maskr, maskg, maskb, rsh, gsh, bsh, S, tab, is_be); \
 }                                                                       \
                                                                         \
 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV,           \
@@ -371,27 +376,32 @@ static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV,           \
     rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt,           \
                                  shr, shg, shb, shp,                    \
                                  maskr, maskg, maskb,                   \
-                                 rsh, gsh, bsh, S, tab);                \
-}
-
-rgb16_32_wrapper(AV_PIX_FMT_BGR32,    bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR32_1,  bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB32,    rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB32_1,  rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR565LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR555LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_BGR444LE, bgr12le, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_RGB565LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB555LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_RGB444LE, rgb12le, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_BGR565BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_BGR444BE, bgr12be, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_RGB444BE, rgb12be, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_X2RGB10LE, rgb30le, 16, 6, 0, 0, 0x3FF00000, 0xFFC00, 0x3FF, 0, 0, 4, RGB2YUV_SHIFT + 6)
-rgb16_32_wrapper(AV_PIX_FMT_X2BGR10LE, bgr30le, 0, 6, 16, 0, 0x3FF, 0xFFC00, 0x3FF00000, 4, 0, 0, RGB2YUV_SHIFT + 6)
+                                 rsh, gsh, bsh, S, tab, is_be);         \
+}
+
+#define rgb16_32_wrapper(base_fmt, endianness, name, shr, shg, shb, shp, maskr, \
+                         maskg, maskb, rsh, gsh, bsh, S) \
+    rgb16_32_wrapper_0(base_fmt ## endianness, name, shr, shg, shb, shp, maskr, \
+                       maskg, maskb, rsh, gsh, bsh, S, IS_BE(endianness))
+
+rgb16_32_wrapper(AV_PIX_FMT_BGR32,     , bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR32_1,   , bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB32,     , rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB32_1,   , rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR565,  LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR555,  LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_BGR444,  LE, bgr12le, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_RGB565,  LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB555,  LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_RGB444,  LE, rgb12le, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_BGR565,  BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR555,  BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_BGR444,  BE, bgr12be, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_RGB565,  BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB555,  BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_RGB444,  BE, rgb12be, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_X2RGB10, LE, rgb30le, 16, 6, 0, 0, 0x3FF00000, 0xFFC00, 0x3FF, 0, 0, 4, RGB2YUV_SHIFT + 6)
+rgb16_32_wrapper(AV_PIX_FMT_X2BGR10, LE, bgr30le, 0, 6, 16, 0, 0x3FF, 0xFFC00, 0x3FF00000, 4, 0, 0, RGB2YUV_SHIFT + 6)
 
 static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV,
                          const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
@@ -832,8 +842,6 @@ p01x_wrapper(10, 6)
 p01x_wrapper(12, 4)
 p01x_uv_wrapper(16, 0)
 
-#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
-
 static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
                        int width, uint32_t *rgb2yuv, void *opq)
 {
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08  2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
@ 2022-09-08 17:39   ` Michael Niedermayer
  2022-09-08 19:38     ` Andreas Rheinhardt
  0 siblings, 1 reply; 18+ messages in thread
From: Michael Niedermayer @ 2022-09-08 17:39 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 1033 bytes --]

Hi

On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
> Up until now, libswscale/input.c used a macro to read
> an input pixel which involved a call to av_pix_fmt_desc_get()
> to find out whether the input pixel format is BE or LE
> despite this being known at compile-time (there are templates
> per pixfmt). Even worse, these calls are made in a loop,
> so that e.g. there are six calls to av_pix_fmt_desc_get()
> for every pair of UV pixel processed in
> rgb64ToUV_half_c_template().
> 
> This commit modifies these macros to ensure that isBE()
> is evaluated at compile-time. This saved 9743B of .text
> for me (GCC 11.2, -O3).

hmm, all these functions where supposed to be optimized out
why where they not ?

iam asking as the code is simpler before your patch if that
"optimization out" thing would work

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Everything should be made as simple as possible, but not simpler.
-- Albert Einstein

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08 17:39   ` Michael Niedermayer
@ 2022-09-08 19:38     ` Andreas Rheinhardt
  2022-09-08 20:25       ` Michael Niedermayer
  0 siblings, 1 reply; 18+ messages in thread
From: Andreas Rheinhardt @ 2022-09-08 19:38 UTC (permalink / raw)
  To: ffmpeg-devel

Michael Niedermayer:
> Hi
> 
> On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
>> Up until now, libswscale/input.c used a macro to read
>> an input pixel which involved a call to av_pix_fmt_desc_get()
>> to find out whether the input pixel format is BE or LE
>> despite this being known at compile-time (there are templates
>> per pixfmt). Even worse, these calls are made in a loop,
>> so that e.g. there are six calls to av_pix_fmt_desc_get()
>> for every pair of UV pixel processed in
>> rgb64ToUV_half_c_template().
>>
>> This commit modifies these macros to ensure that isBE()
>> is evaluated at compile-time. This saved 9743B of .text
>> for me (GCC 11.2, -O3).
> 
> hmm, all these functions where supposed to be optimized out
> why where they not ?
> 
> iam asking as the code is simpler before your patch if that
> "optimization out" thing would work
> 

Why should these functions be optimized out? What would enable the
compiler to optimize them out?
(And I really don't see why this patch would make the code more
complicated.)

- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08 19:38     ` Andreas Rheinhardt
@ 2022-09-08 20:25       ` Michael Niedermayer
  2022-09-08 21:44         ` Andreas Rheinhardt
  0 siblings, 1 reply; 18+ messages in thread
From: Michael Niedermayer @ 2022-09-08 20:25 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2688 bytes --]

On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > Hi
> > 
> > On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
> >> Up until now, libswscale/input.c used a macro to read
> >> an input pixel which involved a call to av_pix_fmt_desc_get()
> >> to find out whether the input pixel format is BE or LE
> >> despite this being known at compile-time (there are templates
> >> per pixfmt). Even worse, these calls are made in a loop,
> >> so that e.g. there are six calls to av_pix_fmt_desc_get()
> >> for every pair of UV pixel processed in
> >> rgb64ToUV_half_c_template().
> >>
> >> This commit modifies these macros to ensure that isBE()
> >> is evaluated at compile-time. This saved 9743B of .text
> >> for me (GCC 11.2, -O3).
> > 
> > hmm, all these functions where supposed to be optimized out
> > why where they not ?
> > 
> > iam asking as the code is simpler before your patch if that
> > "optimization out" thing would work
> > 
> 
> Why should these functions be optimized out? What would enable the
> compiler to optimize them out?

Going back into the past, there was
6b0768e2021b90215a2ab55ed427bce91d148148

before this the code certainly did get optimized out, it was just
#define isBE(x) ((x)&1)

thats simple and clean code btw
after this it became

#define isBE(x) \
+    (av_pix_fmt_descriptors[x].flags & PIX_FMT_BE)

thats still really good, and very readable, its a const array so
one would assume that a compiler can figure that out at compile time
well, i try not to think of linking and seperate objects here ;)

next it got then replaced by a function and a call that i suspect
people thought would be inlined


> (And I really don't see why this patch would make the code more
> complicated.)

the code historically was capable to lookup any flag and detail
of a pixel format at compile time
now your code works around that not working. Introducing a 2nd
system to do this in parallel. To me if i look at the evolution
of isBE() / code checking BE-ness it become more messy over time

I think it would be interresting to think about if we can make
av_pix_fmt_desc_get(compile time constant) work at compile time.
or if we maybe can return to a simpler implementation


thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Awnsering whenever a program halts or runs forever is
On a turing machine, in general impossible (turings halting problem).
On any real computer, always possible as a real computer has a finite number
of states N, and will either halt in less than N cycles or never halt.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08 20:25       ` Michael Niedermayer
@ 2022-09-08 21:44         ` Andreas Rheinhardt
  2022-09-09 14:55           ` Michael Niedermayer
  2022-09-09 15:52           ` Michael Niedermayer
  0 siblings, 2 replies; 18+ messages in thread
From: Andreas Rheinhardt @ 2022-09-08 21:44 UTC (permalink / raw)
  To: ffmpeg-devel

Michael Niedermayer:
> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
>> Michael Niedermayer:
>>> Hi
>>>
>>> On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
>>>> Up until now, libswscale/input.c used a macro to read
>>>> an input pixel which involved a call to av_pix_fmt_desc_get()
>>>> to find out whether the input pixel format is BE or LE
>>>> despite this being known at compile-time (there are templates
>>>> per pixfmt). Even worse, these calls are made in a loop,
>>>> so that e.g. there are six calls to av_pix_fmt_desc_get()
>>>> for every pair of UV pixel processed in
>>>> rgb64ToUV_half_c_template().
>>>>
>>>> This commit modifies these macros to ensure that isBE()
>>>> is evaluated at compile-time. This saved 9743B of .text
>>>> for me (GCC 11.2, -O3).
>>>
>>> hmm, all these functions where supposed to be optimized out
>>> why where they not ?
>>>
>>> iam asking as the code is simpler before your patch if that
>>> "optimization out" thing would work
>>>
>>
>> Why should these functions be optimized out? What would enable the
>> compiler to optimize them out?
> 
> Going back into the past, there was
> 6b0768e2021b90215a2ab55ed427bce91d148148
> 
> before this the code certainly did get optimized out, it was just
> #define isBE(x) ((x)&1)
> 
> thats simple and clean code btw

I don't really consider such magic numbers to be clean.

> after this it became
> 
> #define isBE(x) \
> +    (av_pix_fmt_descriptors[x].flags & PIX_FMT_BE)
> 
> thats still really good, and very readable, its a const array so
> one would assume that a compiler can figure that out at compile time
> well, i try not to think of linking and seperate objects here ;)
> 
> next it got then replaced by a function and a call that i suspect
> people thought would be inlined
> 
> 
>> (And I really don't see why this patch would make the code more
>> complicated.)
> 
> the code historically was capable to lookup any flag and detail
> of a pixel format at compile time
> now your code works around that not working. Introducing a 2nd
> system to do this in parallel. 

I am not introducing a second system, I am reusing the existing system,
namely our existing naming system (the fact that we use BE/LE in the
name of BE/LE pixel formats).

> To me if i look at the evolution
> of isBE() / code checking BE-ness it become more messy over time
> 
> I think it would be interresting to think about if we can make
> av_pix_fmt_desc_get(compile time constant) work at compile time.
> or if we maybe can return to a simpler implementation
> 

We could put the av_pix_fmt_descriptors array into an internal header
and use something like

static av_always_inline const AVPixFmtDescriptor
*ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
{
    if (av_builtin_constant_p(fmt))
        return &av_pix_fmt_descriptors[fmt];
    return av_pix_fmt_desc_get(fmt);
}

- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08 21:44         ` Andreas Rheinhardt
@ 2022-09-09 14:55           ` Michael Niedermayer
  2022-09-09 18:15             ` Andreas Rheinhardt
  2022-09-09 15:52           ` Michael Niedermayer
  1 sibling, 1 reply; 18+ messages in thread
From: Michael Niedermayer @ 2022-09-09 14:55 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 1113 bytes --]

On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> >> Michael Niedermayer:
[...]
> > To me if i look at the evolution
> > of isBE() / code checking BE-ness it become more messy over time
> > 
> > I think it would be interresting to think about if we can make
> > av_pix_fmt_desc_get(compile time constant) work at compile time.
> > or if we maybe can return to a simpler implementation
> > 
> 
> We could put the av_pix_fmt_descriptors array into an internal header
> and use something like
> 
> static av_always_inline const AVPixFmtDescriptor
> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
> {
>     if (av_builtin_constant_p(fmt))
>         return &av_pix_fmt_descriptors[fmt];
>     return av_pix_fmt_desc_get(fmt);
> }

yes thats what i was thinking of too.

thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I do not agree with what you have to say, but I'll defend to the death your
right to say it. -- Voltaire

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-09 14:55           ` Michael Niedermayer
@ 2022-09-09 18:15             ` Andreas Rheinhardt
  2022-09-10 15:29               ` Michael Niedermayer
  0 siblings, 1 reply; 18+ messages in thread
From: Andreas Rheinhardt @ 2022-09-09 18:15 UTC (permalink / raw)
  To: ffmpeg-devel

Michael Niedermayer:
> On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
>> Michael Niedermayer:
>>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
>>>> Michael Niedermayer:
> [...]
>>> To me if i look at the evolution
>>> of isBE() / code checking BE-ness it become more messy over time
>>>
>>> I think it would be interresting to think about if we can make
>>> av_pix_fmt_desc_get(compile time constant) work at compile time.
>>> or if we maybe can return to a simpler implementation
>>>
>>
>> We could put the av_pix_fmt_descriptors array into an internal header
>> and use something like
>>
>> static av_always_inline const AVPixFmtDescriptor
>> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
>> {
>>     if (av_builtin_constant_p(fmt))
>>         return &av_pix_fmt_descriptors[fmt];
>>     return av_pix_fmt_desc_get(fmt);
>> }
> 
> yes thats what i was thinking of too.
> 

Seems like Anton is away for a week or so. I am sure he has an opinion
on the above approach. I think we will wait for him or shall I apply the
patches as they are given that they do not block any later alternative
solution?
(There is one thing I already don't like about the alternative solution:
It relies on av_builtin_constant_p, which not every compiler supports.)

- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-09 18:15             ` Andreas Rheinhardt
@ 2022-09-10 15:29               ` Michael Niedermayer
  2022-09-10 16:34                 ` Andreas Rheinhardt
  0 siblings, 1 reply; 18+ messages in thread
From: Michael Niedermayer @ 2022-09-10 15:29 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2371 bytes --]

On Fri, Sep 09, 2022 at 08:15:22PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
> >> Michael Niedermayer:
> >>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> >>>> Michael Niedermayer:
> > [...]
> >>> To me if i look at the evolution
> >>> of isBE() / code checking BE-ness it become more messy over time
> >>>
> >>> I think it would be interresting to think about if we can make
> >>> av_pix_fmt_desc_get(compile time constant) work at compile time.
> >>> or if we maybe can return to a simpler implementation
> >>>
> >>
> >> We could put the av_pix_fmt_descriptors array into an internal header
> >> and use something like
> >>
> >> static av_always_inline const AVPixFmtDescriptor
> >> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
> >> {
> >>     if (av_builtin_constant_p(fmt))
> >>         return &av_pix_fmt_descriptors[fmt];
> >>     return av_pix_fmt_desc_get(fmt);
> >> }
> > 
> > yes thats what i was thinking of too.
> > 
> 
> Seems like Anton is away for a week or so. I am sure he has an opinion
> on the above approach. I think we will wait for him or shall I apply the
> patches as they are given that they do not block any later alternative
> solution?

please dont apply code like "IS_BE(BE_LE)" 
iam sure it makes sense to you today but it requires an additional step
for the reader to understand
simplest is a seperate endianness and isbe variable. on the wrapper
that should be less code too but quite possibly you see a better and
cleaner way


> (There is one thing I already don't like about the alternative solution:
> It relies on av_builtin_constant_p, which not every compiler supports.)

Thats why i didnt suggets to use av_builtin_constant_p() i was hoping you
saw a better way to achieve it.

But this is a problem which occurs more than once in the codebase.
mapping some identifer to some value just depending on the identifer,
something that is compile time constant, yet it calls to a function to
do a lookup

thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The bravest are surely those who have the clearest vision
of what is before them, glory and danger alike, and yet
notwithstanding go out to meet it. -- Thucydides

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-10 15:29               ` Michael Niedermayer
@ 2022-09-10 16:34                 ` Andreas Rheinhardt
  2022-09-10 17:06                   ` Michael Niedermayer
  0 siblings, 1 reply; 18+ messages in thread
From: Andreas Rheinhardt @ 2022-09-10 16:34 UTC (permalink / raw)
  To: ffmpeg-devel

Michael Niedermayer:
> On Fri, Sep 09, 2022 at 08:15:22PM +0200, Andreas Rheinhardt wrote:
>> Michael Niedermayer:
>>> On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
>>>> Michael Niedermayer:
>>>>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
>>>>>> Michael Niedermayer:
>>> [...]
>>>>> To me if i look at the evolution
>>>>> of isBE() / code checking BE-ness it become more messy over time
>>>>>
>>>>> I think it would be interresting to think about if we can make
>>>>> av_pix_fmt_desc_get(compile time constant) work at compile time.
>>>>> or if we maybe can return to a simpler implementation
>>>>>
>>>>
>>>> We could put the av_pix_fmt_descriptors array into an internal header
>>>> and use something like
>>>>
>>>> static av_always_inline const AVPixFmtDescriptor
>>>> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
>>>> {
>>>>     if (av_builtin_constant_p(fmt))
>>>>         return &av_pix_fmt_descriptors[fmt];
>>>>     return av_pix_fmt_desc_get(fmt);
>>>> }
>>>
>>> yes thats what i was thinking of too.
>>>
>>
>> Seems like Anton is away for a week or so. I am sure he has an opinion
>> on the above approach. I think we will wait for him or shall I apply the
>> patches as they are given that they do not block any later alternative
>> solution?
> 
> please dont apply code like "IS_BE(BE_LE)" 
> iam sure it makes sense to you today but it requires an additional step
> for the reader to understand
> simplest is a seperate endianness and isbe variable. on the wrapper
> that should be less code too but quite possibly you see a better and
> cleaner way
> 

I actually thought that my solution is superior to the one you seem to
have in mind; after all, the parameter for isbe is redundant: It can
also be derived from the pixfmt-name.

> 
>> (There is one thing I already don't like about the alternative solution:
>> It relies on av_builtin_constant_p, which not every compiler supports.)
> 
> Thats why i didnt suggets to use av_builtin_constant_p() i was hoping you
> saw a better way to achieve it.
> 

Well, without av_builtin_constant_p() the only other way for this would
be to add two systems to get the information, one for compile-time
constants and one for not-constants. Ensuring that the former is only
used for compile-time constants will be a challenge, to say the least.

> But this is a problem which occurs more than once in the codebase.
> mapping some identifer to some value just depending on the identifer,
> something that is compile time constant, yet it calls to a function to
> do a lookup
> 

Another idea from the top of my head:
- We could provide some of the info contained in the descriptors via
separate defines/enums that parallel the actual enum, like
enum AVPixelFormatFlags {
    AV_PIX_FMT_YUV420P_FLAGS = AV_PIX_FMT_FLAG_PLANAR,
    AV_PIX_FMT_YUYV422_FLAGS = 0,
...
};
The list in pixdesc.c would then use these constants instead of defining
the flags twice (to avoid inconsistencies). These constants can be used
from macros via fmt ## _FLAGS, avoiding the av_builtin_constant_p()
issue. This could even be made public if we add a big warning that new
flags may be added in the future. Other such enums for other values may
be added, too, but honestly I don't really like this approach.

We could also use make an xmacro out of the list in lavu/pixdesc.c and
use this xmacro to query the values. E.g. isBE() would then in effect
become one big switch:
isBE(fmt)
{
    switch (fmt) {
    case AV_PIX_FMT_YUV420P: return 0;
....
    }
}
This could be made to handle non-compile-time constants as well (by
having a default that uses av_pix_fmt_desc_get()), but it has a
downside: There would be runtime checks for whether we are in the
default branch or not. So once again this function should not be used
with non-compile time constants.

- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-10 16:34                 ` Andreas Rheinhardt
@ 2022-09-10 17:06                   ` Michael Niedermayer
  0 siblings, 0 replies; 18+ messages in thread
From: Michael Niedermayer @ 2022-09-10 17:06 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 3818 bytes --]

On Sat, Sep 10, 2022 at 06:34:43PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > On Fri, Sep 09, 2022 at 08:15:22PM +0200, Andreas Rheinhardt wrote:
> >> Michael Niedermayer:
> >>> On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
> >>>> Michael Niedermayer:
> >>>>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> >>>>>> Michael Niedermayer:
> >>> [...]
> >>>>> To me if i look at the evolution
> >>>>> of isBE() / code checking BE-ness it become more messy over time
> >>>>>
> >>>>> I think it would be interresting to think about if we can make
> >>>>> av_pix_fmt_desc_get(compile time constant) work at compile time.
> >>>>> or if we maybe can return to a simpler implementation
> >>>>>
> >>>>
> >>>> We could put the av_pix_fmt_descriptors array into an internal header
> >>>> and use something like
> >>>>
> >>>> static av_always_inline const AVPixFmtDescriptor
> >>>> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
> >>>> {
> >>>>     if (av_builtin_constant_p(fmt))
> >>>>         return &av_pix_fmt_descriptors[fmt];
> >>>>     return av_pix_fmt_desc_get(fmt);
> >>>> }
> >>>
> >>> yes thats what i was thinking of too.
> >>>
> >>
> >> Seems like Anton is away for a week or so. I am sure he has an opinion
> >> on the above approach. I think we will wait for him or shall I apply the
> >> patches as they are given that they do not block any later alternative
> >> solution?
> > 
> > please dont apply code like "IS_BE(BE_LE)" 
> > iam sure it makes sense to you today but it requires an additional step
> > for the reader to understand
> > simplest is a seperate endianness and isbe variable. on the wrapper
> > that should be less code too but quite possibly you see a better and
> > cleaner way
> > 
> 
> I actually thought that my solution is superior to the one you seem to
> have in mind; after all, the parameter for isbe is redundant: It can
> also be derived from the pixfmt-name.

your solution is supperior in some sense. But iam concerned that it is
less readable for a small gain.
Maybe BE_LE and others can be renamed to make it immedeatly clear


> 
> > 
> >> (There is one thing I already don't like about the alternative solution:
> >> It relies on av_builtin_constant_p, which not every compiler supports.)
> > 
> > Thats why i didnt suggets to use av_builtin_constant_p() i was hoping you
> > saw a better way to achieve it.
> > 
> 
> Well, without av_builtin_constant_p() the only other way for this would
> be to add two systems to get the information, one for compile-time
> constants and one for not-constants. Ensuring that the former is only
> used for compile-time constants will be a challenge, to say the least.

I dont think its a challenge, but iam still quite unsure if this would
lead to other issues.

For example, if we add a constant lookup function that has direct access
to the table. We assue that will only be used where constants are the
arguzment. Now its easy to add a extra field to that table and then
grep the binary for that. If its found it was either used with a non
constant or the compiler failed to optimize it out.
This could be a fate test. If it works 100% thats simple but if it works
99% it will be a pain and is not a direction i would suggest, it would
be more pain than its worth.

So really this is just for discussion at this point, far from a plan
with understood consequences

thx
[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Frequently ignored answer#1 FFmpeg bugs should be sent to our bugtracker. User
questions about the command line tools should be sent to the ffmpeg-user ML.
And questions about how to use libav* should be sent to the libav-user ML.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08 21:44         ` Andreas Rheinhardt
  2022-09-09 14:55           ` Michael Niedermayer
@ 2022-09-09 15:52           ` Michael Niedermayer
  1 sibling, 0 replies; 18+ messages in thread
From: Michael Niedermayer @ 2022-09-09 15:52 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2179 bytes --]

On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> >> Michael Niedermayer:
> >>> Hi
> >>>
> >>> On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
> >>>> Up until now, libswscale/input.c used a macro to read
> >>>> an input pixel which involved a call to av_pix_fmt_desc_get()
> >>>> to find out whether the input pixel format is BE or LE
> >>>> despite this being known at compile-time (there are templates
> >>>> per pixfmt). Even worse, these calls are made in a loop,
> >>>> so that e.g. there are six calls to av_pix_fmt_desc_get()
> >>>> for every pair of UV pixel processed in
> >>>> rgb64ToUV_half_c_template().
> >>>>
> >>>> This commit modifies these macros to ensure that isBE()
> >>>> is evaluated at compile-time. This saved 9743B of .text
> >>>> for me (GCC 11.2, -O3).
> >>>
> >>> hmm, all these functions where supposed to be optimized out
> >>> why where they not ?
> >>>
> >>> iam asking as the code is simpler before your patch if that
> >>> "optimization out" thing would work
> >>>
> >>
> >> Why should these functions be optimized out? What would enable the
> >> compiler to optimize them out?
> > 
> > Going back into the past, there was
> > 6b0768e2021b90215a2ab55ed427bce91d148148
> > 
> > before this the code certainly did get optimized out, it was just
> > #define isBE(x) ((x)&1)
> > 
> > thats simple and clean code btw
> 
> I don't really consider such magic numbers to be clean.

no, but that wasnt what i meant
what i meant was more that if you can structure an identifer so that it
itself can provide alot of the used information from within its structure, 
that has advanatges over requiring a descriptor table lookup.
I didnt mean to suggest that this should use hardcoded litteral numbers
and be undocumented and untested.

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

No human being will ever know the Truth, for even if they happen to say it
by chance, they would not even known they had done so. -- Xenophanes

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop
  2022-09-08  2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt
  2022-09-08  2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
@ 2022-09-08  3:31 ` Andreas Rheinhardt
  2022-09-16  8:39   ` Paul B Mahol
  2022-09-08  3:44 ` [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Philip Langdale
  2 siblings, 1 reply; 18+ messages in thread
From: Andreas Rheinhardt @ 2022-09-08  3:31 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

Up until now, libswscale/output.c used a macro to write
an output pixel which involved a call to av_pix_fmt_desc_get()
to find out whether the input pixel format is BE or LE
despite this being known at compile-time (there are templates
per pixfmt). Even worse, these calls are made in a loop,
so that e.g. there are eight calls to av_pix_fmt_desc_get()
for every pixel processed in yuv2rgba64_X_c_template()
for 64bit RGB formats.

This commit modifies these macros to ensure that isBE()
is evaluated at compile-time. This saved 41184B of .text
for me (GCC 11.2, -O3).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
This must be the lowest-hanging fruit in the whole codebase.
Two other question: Why do all these functions in swscale_internal.h
take an enum AVPixelFormat instead of accepting an AVPixFmtDescriptor?
And would making av_pix_fmt_desc_get() av_const be beneficial?

 libswscale/output.c | 101 +++++++++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 43 deletions(-)

diff --git a/libswscale/output.c b/libswscale/output.c
index 40a4476c6d..590334eb57 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -919,7 +919,7 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
 #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B)
 #define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R)
 #define output_pixel(pos, val) \
-    if (isBE(target)) { \
+    if (is_be) { \
         AV_WB16(pos, val); \
     } else { \
         AV_WL16(pos, val); \
@@ -931,7 +931,8 @@ yuv2ya16_X_c_template(SwsContext *c, const int16_t *lumFilter,
                         const int16_t *chrFilter, const int32_t **unused_chrUSrc,
                         const int32_t **unused_chrVSrc, int unused_chrFilterSize,
                         const int32_t **alpSrc, uint16_t *dest, int dstW,
-                        int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+                        int y, enum AVPixelFormat target,
+                        int unused_hasAlpha, int unused_eightbytes, int is_be)
 {
     int hasAlpha = !!alpSrc;
     int i;
@@ -968,7 +969,8 @@ yuv2ya16_2_c_template(SwsContext *c, const int32_t *buf[2],
                         const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
                         const int32_t *abuf[2], uint16_t *dest, int dstW,
                         int yalpha, int unused_uvalpha, int y,
-                        enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+                        enum AVPixelFormat target, int unused_hasAlpha,
+                        int unused_eightbytes, int is_be)
 {
     int hasAlpha = abuf && abuf[0] && abuf[1];
     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
@@ -999,7 +1001,8 @@ static av_always_inline void
 yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0,
                         const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
                         const int32_t *abuf0, uint16_t *dest, int dstW,
-                        int unused_uvalpha, int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+                        int unused_uvalpha, int y, enum AVPixelFormat target,
+                        int unused_hasAlpha, int unused_eightbytes, int is_be)
 {
     int hasAlpha = !!abuf0;
     int i;
@@ -1027,7 +1030,8 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
                        const int16_t *chrFilter, const int32_t **chrUSrc,
                        const int32_t **chrVSrc, int chrFilterSize,
                        const int32_t **alpSrc, uint16_t *dest, int dstW,
-                       int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       int y, enum AVPixelFormat target, int hasAlpha, int eightbytes,
+                       int is_be)
 {
     int i;
     int A1 = 0xffff<<14, A2 = 0xffff<<14;
@@ -1108,7 +1112,8 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf[2], uint16_t *dest, int dstW,
                        int yalpha, int uvalpha, int y,
-                       enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       enum AVPixelFormat target, int hasAlpha, int eightbytes,
+                       int is_be)
 {
     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
@@ -1172,7 +1177,8 @@ static av_always_inline void
 yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf0, uint16_t *dest, int dstW,
-                       int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       int uvalpha, int y, enum AVPixelFormat target,
+                       int hasAlpha, int eightbytes, int is_be)
 {
     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     int i;
@@ -1277,7 +1283,8 @@ yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
                        const int16_t *chrFilter, const int32_t **chrUSrc,
                        const int32_t **chrVSrc, int chrFilterSize,
                        const int32_t **alpSrc, uint16_t *dest, int dstW,
-                       int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       int y, enum AVPixelFormat target, int hasAlpha,
+                       int eightbytes, int is_be)
 {
     int i;
     int A = 0xffff<<14;
@@ -1340,7 +1347,8 @@ yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2],
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf[2], uint16_t *dest, int dstW,
                        int yalpha, int uvalpha, int y,
-                       enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       enum AVPixelFormat target, int hasAlpha, int eightbytes,
+                       int is_be)
 {
     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
@@ -1391,7 +1399,8 @@ static av_always_inline void
 yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf0, uint16_t *dest, int dstW,
-                       int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       int uvalpha, int y, enum AVPixelFormat target,
+                       int hasAlpha, int eightbytes, int is_be)
 {
     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     int i;
@@ -1468,7 +1477,11 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
 #undef r_b
 #undef b_r
 
-#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \
+#define IS_BE_LE 0
+#define IS_BE_BE 1
+#define IS_BE(BE_LE) IS_BE_ ## BE_LE
+
+#define YUV2PACKED16WRAPPER_0(name, base, ext, fmt, is_be, hasAlpha, eightbytes) \
 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
                         const int16_t **_lumSrc, int lumFilterSize, \
                         const int16_t *chrFilter, const int16_t **_chrUSrc, \
@@ -1483,7 +1496,7 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
     uint16_t *dest = (uint16_t *) _dest; \
     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                          alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \
+                          alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes, is_be); \
 } \
  \
 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
@@ -1497,7 +1510,7 @@ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
                   **abuf = (const int32_t **) _abuf; \
     uint16_t *dest = (uint16_t *) _dest; \
     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
-                          dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \
+                          dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes, is_be); \
 } \
  \
 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
@@ -1511,36 +1524,38 @@ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
                   *abuf0 = (const int32_t *)  _abuf0; \
     uint16_t *dest = (uint16_t *) _dest; \
     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
-                                  dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \
-}
-
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0)
-YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0)
-
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1)
+                                  dstW, uvalpha, y, fmt, hasAlpha, eightbytes, is_be); \
+}
+#define YUV2PACKED16WRAPPER(name, base, ext, fmt, endianness, hasAlpha, eightbytes) \
+    YUV2PACKED16WRAPPER_0(name, base, ext, fmt ## endianness, IS_BE(endianness), hasAlpha, eightbytes)
+
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64, LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64, LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16, BE, 1, 0)
+YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16, LE, 1, 0)
+
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64, LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64, LE, 0, 1)
 
 /*
  * Write out 2 RGB pixels in the target pixel format. This function takes a
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop
  2022-09-08  3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt
@ 2022-09-16  8:39   ` Paul B Mahol
  0 siblings, 0 replies; 18+ messages in thread
From: Paul B Mahol @ 2022-09-16  8:39 UTC (permalink / raw)
  To: FFmpeg development discussions and patches; +Cc: Andreas Rheinhardt

On 9/8/22, Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote:
> Up until now, libswscale/output.c used a macro to write
> an output pixel which involved a call to av_pix_fmt_desc_get()
> to find out whether the input pixel format is BE or LE
> despite this being known at compile-time (there are templates
> per pixfmt). Even worse, these calls are made in a loop,
> so that e.g. there are eight calls to av_pix_fmt_desc_get()
> for every pixel processed in yuv2rgba64_X_c_template()
> for 64bit RGB formats.
>

LGTM for whole set.

Got nice speed boost for not SIMD optimized conversions in swscale.

> This commit modifies these macros to ensure that isBE()
> is evaluated at compile-time. This saved 41184B of .text
> for me (GCC 11.2, -O3).
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
> This must be the lowest-hanging fruit in the whole codebase.
> Two other question: Why do all these functions in swscale_internal.h
> take an enum AVPixelFormat instead of accepting an AVPixFmtDescriptor?
> And would making av_pix_fmt_desc_get() av_const be beneficial?
>
>  libswscale/output.c | 101 +++++++++++++++++++++++++-------------------
>  1 file changed, 58 insertions(+), 43 deletions(-)
>
> diff --git a/libswscale/output.c b/libswscale/output.c
> index 40a4476c6d..590334eb57 100644
> --- a/libswscale/output.c
> +++ b/libswscale/output.c
> @@ -919,7 +919,7 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422,
> AV_PIX_FMT_UYVY422)
>  #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE
> || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B)
>  #define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE
> || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R)
>  #define output_pixel(pos, val) \
> -    if (isBE(target)) { \
> +    if (is_be) { \
>          AV_WB16(pos, val); \
>      } else { \
>          AV_WL16(pos, val); \
> @@ -931,7 +931,8 @@ yuv2ya16_X_c_template(SwsContext *c, const int16_t
> *lumFilter,
>                          const int16_t *chrFilter, const int32_t
> **unused_chrUSrc,
>                          const int32_t **unused_chrVSrc, int
> unused_chrFilterSize,
>                          const int32_t **alpSrc, uint16_t *dest, int dstW,
> -                        int y, enum AVPixelFormat target, int
> unused_hasAlpha, int unused_eightbytes)
> +                        int y, enum AVPixelFormat target,
> +                        int unused_hasAlpha, int unused_eightbytes, int
> is_be)
>  {
>      int hasAlpha = !!alpSrc;
>      int i;
> @@ -968,7 +969,8 @@ yuv2ya16_2_c_template(SwsContext *c, const int32_t
> *buf[2],
>                          const int32_t *unused_ubuf[2], const int32_t
> *unused_vbuf[2],
>                          const int32_t *abuf[2], uint16_t *dest, int dstW,
>                          int yalpha, int unused_uvalpha, int y,
> -                        enum AVPixelFormat target, int unused_hasAlpha, int
> unused_eightbytes)
> +                        enum AVPixelFormat target, int unused_hasAlpha,
> +                        int unused_eightbytes, int is_be)
>  {
>      int hasAlpha = abuf && abuf[0] && abuf[1];
>      const int32_t *buf0  = buf[0],  *buf1  = buf[1],
> @@ -999,7 +1001,8 @@ static av_always_inline void
>  yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0,
>                          const int32_t *unused_ubuf[2], const int32_t
> *unused_vbuf[2],
>                          const int32_t *abuf0, uint16_t *dest, int dstW,
> -                        int unused_uvalpha, int y, enum AVPixelFormat
> target, int unused_hasAlpha, int unused_eightbytes)
> +                        int unused_uvalpha, int y, enum AVPixelFormat
> target,
> +                        int unused_hasAlpha, int unused_eightbytes, int
> is_be)
>  {
>      int hasAlpha = !!abuf0;
>      int i;
> @@ -1027,7 +1030,8 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t
> *lumFilter,
>                         const int16_t *chrFilter, const int32_t **chrUSrc,
>                         const int32_t **chrVSrc, int chrFilterSize,
>                         const int32_t **alpSrc, uint16_t *dest, int dstW,
> -                       int y, enum AVPixelFormat target, int hasAlpha, int
> eightbytes)
> +                       int y, enum AVPixelFormat target, int hasAlpha, int
> eightbytes,
> +                       int is_be)
>  {
>      int i;
>      int A1 = 0xffff<<14, A2 = 0xffff<<14;
> @@ -1108,7 +1112,8 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t
> *buf[2],
>                         const int32_t *ubuf[2], const int32_t *vbuf[2],
>                         const int32_t *abuf[2], uint16_t *dest, int dstW,
>                         int yalpha, int uvalpha, int y,
> -                       enum AVPixelFormat target, int hasAlpha, int
> eightbytes)
> +                       enum AVPixelFormat target, int hasAlpha, int
> eightbytes,
> +                       int is_be)
>  {
>      const int32_t *buf0  = buf[0],  *buf1  = buf[1],
>                    *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
> @@ -1172,7 +1177,8 @@ static av_always_inline void
>  yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
>                         const int32_t *ubuf[2], const int32_t *vbuf[2],
>                         const int32_t *abuf0, uint16_t *dest, int dstW,
> -                       int uvalpha, int y, enum AVPixelFormat target, int
> hasAlpha, int eightbytes)
> +                       int uvalpha, int y, enum AVPixelFormat target,
> +                       int hasAlpha, int eightbytes, int is_be)
>  {
>      const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
>      int i;
> @@ -1277,7 +1283,8 @@ yuv2rgba64_full_X_c_template(SwsContext *c, const
> int16_t *lumFilter,
>                         const int16_t *chrFilter, const int32_t **chrUSrc,
>                         const int32_t **chrVSrc, int chrFilterSize,
>                         const int32_t **alpSrc, uint16_t *dest, int dstW,
> -                       int y, enum AVPixelFormat target, int hasAlpha, int
> eightbytes)
> +                       int y, enum AVPixelFormat target, int hasAlpha,
> +                       int eightbytes, int is_be)
>  {
>      int i;
>      int A = 0xffff<<14;
> @@ -1340,7 +1347,8 @@ yuv2rgba64_full_2_c_template(SwsContext *c, const
> int32_t *buf[2],
>                         const int32_t *ubuf[2], const int32_t *vbuf[2],
>                         const int32_t *abuf[2], uint16_t *dest, int dstW,
>                         int yalpha, int uvalpha, int y,
> -                       enum AVPixelFormat target, int hasAlpha, int
> eightbytes)
> +                       enum AVPixelFormat target, int hasAlpha, int
> eightbytes,
> +                       int is_be)
>  {
>      const int32_t *buf0  = buf[0],  *buf1  = buf[1],
>                    *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
> @@ -1391,7 +1399,8 @@ static av_always_inline void
>  yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
>                         const int32_t *ubuf[2], const int32_t *vbuf[2],
>                         const int32_t *abuf0, uint16_t *dest, int dstW,
> -                       int uvalpha, int y, enum AVPixelFormat target, int
> hasAlpha, int eightbytes)
> +                       int uvalpha, int y, enum AVPixelFormat target,
> +                       int hasAlpha, int eightbytes, int is_be)
>  {
>      const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
>      int i;
> @@ -1468,7 +1477,11 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const
> int32_t *buf0,
>  #undef r_b
>  #undef b_r
>
> -#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \
> +#define IS_BE_LE 0
> +#define IS_BE_BE 1
> +#define IS_BE(BE_LE) IS_BE_ ## BE_LE
> +
> +#define YUV2PACKED16WRAPPER_0(name, base, ext, fmt, is_be, hasAlpha,
> eightbytes) \
>  static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
>                          const int16_t **_lumSrc, int lumFilterSize, \
>                          const int16_t *chrFilter, const int16_t **_chrUSrc,
> \
> @@ -1483,7 +1496,7 @@ static void name ## ext ## _X_c(SwsContext *c, const
> int16_t *lumFilter, \
>      uint16_t *dest = (uint16_t *) _dest; \
>      name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
>                            chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
> -                          alpSrc, dest, dstW, y, fmt, hasAlpha,
> eightbytes); \
> +                          alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes,
> is_be); \
>  } \
>   \
>  static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
> @@ -1497,7 +1510,7 @@ static void name ## ext ## _2_c(SwsContext *c, const
> int16_t *_buf[2], \
>                    **abuf = (const int32_t **) _abuf; \
>      uint16_t *dest = (uint16_t *) _dest; \
>      name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
> -                          dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha,
> eightbytes); \
> +                          dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha,
> eightbytes, is_be); \
>  } \
>   \
>  static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
> @@ -1511,36 +1524,38 @@ static void name ## ext ## _1_c(SwsContext *c, const
> int16_t *_buf0, \
>                    *abuf0 = (const int32_t *)  _abuf0; \
>      uint16_t *dest = (uint16_t *) _dest; \
>      name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
> -                                  dstW, uvalpha, y, fmt, hasAlpha,
> eightbytes); \
> -}
> -
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0)
> -YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0)
> -
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0,
> 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0,
> 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0,
> 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0,
> 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE,
> 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE,
> 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE,
> 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE,
> 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE,
> 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE,
> 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE,
> 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE,
> 0, 1)
> +                                  dstW, uvalpha, y, fmt, hasAlpha,
> eightbytes, is_be); \
> +}
> +#define YUV2PACKED16WRAPPER(name, base, ext, fmt, endianness, hasAlpha,
> eightbytes) \
> +    YUV2PACKED16WRAPPER_0(name, base, ext, fmt ## endianness,
> IS_BE(endianness), hasAlpha, eightbytes)
> +
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48, BE, 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48, LE, 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48, BE, 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48, LE, 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64, BE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64, LE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64, BE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64, LE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64, BE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64, LE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64, BE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64, LE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16, BE, 1, 0)
> +YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16, LE, 1, 0)
> +
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48, BE,
> 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48, LE,
> 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48, BE,
> 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48, LE,
> 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64,
> BE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64,
> LE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64,
> BE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64,
> LE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64,
> BE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64,
> LE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64,
> BE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64,
> LE, 0, 1)
>
>  /*
>   * Write out 2 RGB pixels in the target pixel format. This function takes
> a
> --
> 2.34.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; '
  2022-09-08  2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt
  2022-09-08  2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
  2022-09-08  3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt
@ 2022-09-08  3:44 ` Philip Langdale
  2 siblings, 0 replies; 18+ messages in thread
From: Philip Langdale @ 2022-09-08  3:44 UTC (permalink / raw)
  To: ffmpeg-devel

On Thu,  8 Sep 2022 04:31:55 +0200
Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote:

> These macros are definitions, not only declarations and therefore
> should not contain a semicolon. Such a semicolon is actually
> spec-incompliant, but compilers happen to accept them.
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
>  libswscale/input.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/libswscale/input.c b/libswscale/input.c
> index be8f3940e1..88e318e664 100644
> --- a/libswscale/input.c
> +++ b/libswscale/input.c
> @@ -583,8 +583,8 @@ static void yvy2ToUV_c(uint8_t *dstU, uint8_t
> *dstV, const uint8_t *unused0, con AV_WN16(dst + i * 2, AV_RL16(src +
> i * 4) >> shift);         \ }
>  
> -y21xle_wrapper(10, 6);
> -y21xle_wrapper(12, 4);
> +y21xle_wrapper(10, 6)
> +y21xle_wrapper(12, 4)
>  
>  static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const
> uint8_t *unused1, const uint8_t *unused2, int width, uint32_t
> *unused, void *opq) @@ -828,9 +828,9 @@ static void
> nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, }
>                                         \ p01x_uv_wrapper(bits, shift)
>  
> -p01x_wrapper(10, 6);
> -p01x_wrapper(12, 4);
> -p01x_uv_wrapper(16, 0);
> +p01x_wrapper(10, 6)
> +p01x_wrapper(12, 4)
> +p01x_uv_wrapper(16, 0)
>  
>  #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
>  

Thanks for catching this. LGTM.

--phil
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
@ 2022-09-16  7:00 Anton Khirnov
  2022-09-16 10:57 ` Michael Niedermayer
  0 siblings, 1 reply; 18+ messages in thread
From: Anton Khirnov @ 2022-09-16  7:00 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Quoting Andreas Rheinhardt (2022-09-09 20:15:22)
> Michael Niedermayer:
> > On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
> >> Michael Niedermayer:
> >>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> >>>> Michael Niedermayer:
> > [...]
> >>> To me if i look at the evolution
> >>> of isBE() / code checking BE-ness it become more messy over time
> >>>
> >>> I think it would be interresting to think about if we can make
> >>> av_pix_fmt_desc_get(compile time constant) work at compile time.
> >>> or if we maybe can return to a simpler implementation
> >>>
> >>
> >> We could put the av_pix_fmt_descriptors array into an internal header
> >> and use something like
> >>
> >> static av_always_inline const AVPixFmtDescriptor
> >> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
> >> {
> >>     if (av_builtin_constant_p(fmt))
> >>         return &av_pix_fmt_descriptors[fmt];
> >>     return av_pix_fmt_desc_get(fmt);
> >> }
> > 
> > yes thats what i was thinking of too.
> > 
> 
> Seems like Anton is away for a week or so. I am sure he has an opinion
> on the above approach. I think we will wait for him or shall I apply the
> patches as they are given that they do not block any later alternative
> solution?
> (There is one thing I already don't like about the alternative solution:
> It relies on av_builtin_constant_p, which not every compiler supports.)

For what my opinion is worth, the patch as is with some extra
explanatory comments for the new IS_BE* macros seems like the best
solution to me. They are indeed slightly confusing at first glance, but
quite obvious if you look at the code for two minutes (less for people
smarter than me). And I think few people will need to understand how
precisely they work anyway.

The other proposals seem significantly more complex and fragile than
this one.

-- 
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-16  7:00 [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Anton Khirnov
@ 2022-09-16 10:57 ` Michael Niedermayer
  2022-09-18 20:58   ` Andreas Rheinhardt
  0 siblings, 1 reply; 18+ messages in thread
From: Michael Niedermayer @ 2022-09-16 10:57 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2743 bytes --]

On Fri, Sep 16, 2022 at 09:00:24AM +0200, Anton Khirnov wrote:
> Quoting Andreas Rheinhardt (2022-09-09 20:15:22)
> > Michael Niedermayer:
> > > On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
> > >> Michael Niedermayer:
> > >>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> > >>>> Michael Niedermayer:
> > > [...]
> > >>> To me if i look at the evolution
> > >>> of isBE() / code checking BE-ness it become more messy over time
> > >>>
> > >>> I think it would be interresting to think about if we can make
> > >>> av_pix_fmt_desc_get(compile time constant) work at compile time.
> > >>> or if we maybe can return to a simpler implementation
> > >>>
> > >>
> > >> We could put the av_pix_fmt_descriptors array into an internal header
> > >> and use something like
> > >>
> > >> static av_always_inline const AVPixFmtDescriptor
> > >> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
> > >> {
> > >>     if (av_builtin_constant_p(fmt))
> > >>         return &av_pix_fmt_descriptors[fmt];
> > >>     return av_pix_fmt_desc_get(fmt);
> > >> }
> > > 
> > > yes thats what i was thinking of too.
> > > 
> > 
> > Seems like Anton is away for a week or so. I am sure he has an opinion
> > on the above approach. I think we will wait for him or shall I apply the
> > patches as they are given that they do not block any later alternative
> > solution?
> > (There is one thing I already don't like about the alternative solution:
> > It relies on av_builtin_constant_p, which not every compiler supports.)
> 
> For what my opinion is worth, the patch as is with some extra
> explanatory comments for the new IS_BE* macros seems like the best
> solution to me. They are indeed slightly confusing at first glance, but
> quite obvious if you look at the code for two minutes (less for people
> smarter than me). And I think few people will need to understand how
> precisely they work anyway.

i agree, maybe we can solve this by changing the IS_BE* names instead
of adding comments.
IS_BE, ENDIAN_IDENTIFER, 
(0,1)  , (LE, BE)

or something like that, may reduce the need for comments


> 
> The other proposals seem significantly more complex and fragile than
> this one.

i agree too, though allowing av_pix_fmt_desc_get() to be done at compile
time would allow its use in inlined functions and macros much broader
so these are not 1:1 comparable as they serve only overlapping goals

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The bravest are surely those who have the clearest vision
of what is before them, glory and danger alike, and yet
notwithstanding go out to meet it. -- Thucydides

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-16 10:57 ` Michael Niedermayer
@ 2022-09-18 20:58   ` Andreas Rheinhardt
  0 siblings, 0 replies; 18+ messages in thread
From: Andreas Rheinhardt @ 2022-09-18 20:58 UTC (permalink / raw)
  To: ffmpeg-devel

Michael Niedermayer:
> On Fri, Sep 16, 2022 at 09:00:24AM +0200, Anton Khirnov wrote:
>> Quoting Andreas Rheinhardt (2022-09-09 20:15:22)
>>> Michael Niedermayer:
>>>> On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
>>>>> Michael Niedermayer:
>>>>>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
>>>>>>> Michael Niedermayer:
>>>> [...]
>>>>>> To me if i look at the evolution
>>>>>> of isBE() / code checking BE-ness it become more messy over time
>>>>>>
>>>>>> I think it would be interresting to think about if we can make
>>>>>> av_pix_fmt_desc_get(compile time constant) work at compile time.
>>>>>> or if we maybe can return to a simpler implementation
>>>>>>
>>>>>
>>>>> We could put the av_pix_fmt_descriptors array into an internal header
>>>>> and use something like
>>>>>
>>>>> static av_always_inline const AVPixFmtDescriptor
>>>>> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
>>>>> {
>>>>>     if (av_builtin_constant_p(fmt))
>>>>>         return &av_pix_fmt_descriptors[fmt];
>>>>>     return av_pix_fmt_desc_get(fmt);
>>>>> }
>>>>
>>>> yes thats what i was thinking of too.
>>>>
>>>
>>> Seems like Anton is away for a week or so. I am sure he has an opinion
>>> on the above approach. I think we will wait for him or shall I apply the
>>> patches as they are given that they do not block any later alternative
>>> solution?
>>> (There is one thing I already don't like about the alternative solution:
>>> It relies on av_builtin_constant_p, which not every compiler supports.)
>>
>> For what my opinion is worth, the patch as is with some extra
>> explanatory comments for the new IS_BE* macros seems like the best
>> solution to me. They are indeed slightly confusing at first glance, but
>> quite obvious if you look at the code for two minutes (less for people
>> smarter than me). And I think few people will need to understand how
>> precisely they work anyway.
> 
> i agree, maybe we can solve this by changing the IS_BE* names instead
> of adding comments.
> IS_BE, ENDIAN_IDENTIFER, 
> (0,1)  , (LE, BE)
> 
> or something like that, may reduce the need for comments
> 
> 

I sent a version 2 here:
https://ffmpeg.org/pipermail/ffmpeg-devel/2022-September/301535.html Can
you take a look at it, please?

- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2022-09-18 20:58 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-08  2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt
2022-09-08  2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
2022-09-08 17:39   ` Michael Niedermayer
2022-09-08 19:38     ` Andreas Rheinhardt
2022-09-08 20:25       ` Michael Niedermayer
2022-09-08 21:44         ` Andreas Rheinhardt
2022-09-09 14:55           ` Michael Niedermayer
2022-09-09 18:15             ` Andreas Rheinhardt
2022-09-10 15:29               ` Michael Niedermayer
2022-09-10 16:34                 ` Andreas Rheinhardt
2022-09-10 17:06                   ` Michael Niedermayer
2022-09-09 15:52           ` Michael Niedermayer
2022-09-08  3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt
2022-09-16  8:39   ` Paul B Mahol
2022-09-08  3:44 ` [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Philip Langdale
2022-09-16  7:00 [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Anton Khirnov
2022-09-16 10:57 ` Michael Niedermayer
2022-09-18 20:58   ` Andreas Rheinhardt

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git