[FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; '

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

* [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; '
@ 2022-09-08  2:31 Andreas Rheinhardt
  2022-09-08  2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Andreas Rheinhardt @ 2022-09-08  2:31 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

These macros are definitions, not only declarations and therefore
should not contain a semicolon. Such a semicolon is actually
spec-incompliant, but compilers happen to accept them.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libswscale/input.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index be8f3940e1..88e318e664 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -583,8 +583,8 @@ static void yvy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, con
             AV_WN16(dst + i * 2, AV_RL16(src + i * 4) >> shift);         \
     }
 
-y21xle_wrapper(10, 6);
-y21xle_wrapper(12, 4);
+y21xle_wrapper(10, 6)
+y21xle_wrapper(12, 4)
 
 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width,
                        uint32_t *unused, void *opq)
@@ -828,9 +828,9 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
     }                                                                    \
     p01x_uv_wrapper(bits, shift)
 
-p01x_wrapper(10, 6);
-p01x_wrapper(12, 4);
-p01x_uv_wrapper(16, 0);
+p01x_wrapper(10, 6)
+p01x_wrapper(12, 4)
+p01x_uv_wrapper(16, 0)
 
 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
 
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08  2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt
@ 2022-09-08  2:38 ` Andreas Rheinhardt
  2022-09-08 17:39   ` Michael Niedermayer
  2022-09-08  3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt
  2022-09-08  3:44 ` [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Philip Langdale
  2 siblings, 1 reply; 15+ messages in thread
From: Andreas Rheinhardt @ 2022-09-08  2:38 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

Up until now, libswscale/input.c used a macro to read
an input pixel which involved a call to av_pix_fmt_desc_get()
to find out whether the input pixel format is BE or LE
despite this being known at compile-time (there are templates
per pixfmt). Even worse, these calls are made in a loop,
so that e.g. there are six calls to av_pix_fmt_desc_get()
for every pair of UV pixel processed in
rgb64ToUV_half_c_template().

This commit modifies these macros to ensure that isBE()
is evaluated at compile-time. This saved 9743B of .text
for me (GCC 11.2, -O3).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
output.c suffers from the same issue (and according to coverage
is responsible for quite a large chunk of the calls to
av_pix_fmt_desc_get() (144699832 of 321342234)), so I will
be looking into this tomorrow.

 libswscale/input.c | 96 +++++++++++++++++++++++++---------------------
 1 file changed, 52 insertions(+), 44 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 88e318e664..4144ff81a9 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -28,14 +28,19 @@
 #include "config.h"
 #include "swscale_internal.h"
 
-#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
+#define input_pixel(pos) (is_be ? AV_RB16(pos) : AV_RL16(pos))
+
+#define IS_BE_LE 0
+#define IS_BE_BE 1
+#define IS_BE_   0
+#define IS_BE(BE_LE) IS_BE_ ## BE_LE
 
 #define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? b_r : r_b)
 #define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? r_b : b_r)
 
 static av_always_inline void
 rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
-                    enum AVPixelFormat origin, int32_t *rgb2yuv)
+                    enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
 {
     int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
     int i;
@@ -51,7 +56,7 @@ rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
 static av_always_inline void
 rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
                     const uint16_t *src1, const uint16_t *src2,
-                    int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
+                    int width, enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
 {
     int i;
     int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -70,7 +75,7 @@ rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
 static av_always_inline void
 rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
                           const uint16_t *src1, const uint16_t *src2,
-                          int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
+                          int width, enum AVPixelFormat origin, int32_t *rgb2yuv, int is_be)
 {
     int i;
     int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -92,7 +97,7 @@ static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src,
 { \
     const uint16_t *src = (const uint16_t *) _src; \
     uint16_t *dst = (uint16_t *) _dst; \
-    rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \
+    rgb64ToY_c_template(dst, src, width, origin, rgb2yuv, IS_BE(BE_LE)); \
 } \
  \
 static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
@@ -102,7 +107,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
     const uint16_t *src1 = (const uint16_t *) _src1, \
                    *src2 = (const uint16_t *) _src2; \
     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
-    rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+    rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
 } \
  \
 static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
@@ -112,7 +117,7 @@ static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV
     const uint16_t *src1 = (const uint16_t *) _src1, \
                    *src2 = (const uint16_t *) _src2; \
     uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
-    rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+    rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
 }
 
 rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE)
@@ -123,7 +128,7 @@ rgb64funcs(bgr, BE, AV_PIX_FMT_BGRA64BE)
 static av_always_inline void rgb48ToY_c_template(uint16_t *dst,
                                                  const uint16_t *src, int width,
                                                  enum AVPixelFormat origin,
-                                                 int32_t *rgb2yuv)
+                                                 int32_t *rgb2yuv, int is_be)
 {
     int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
     int i;
@@ -142,7 +147,7 @@ static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU,
                                                   const uint16_t *src2,
                                                   int width,
                                                   enum AVPixelFormat origin,
-                                                  int32_t *rgb2yuv)
+                                                  int32_t *rgb2yuv, int is_be)
 {
     int i;
     int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -164,7 +169,7 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
                                                        const uint16_t *src2,
                                                        int width,
                                                        enum AVPixelFormat origin,
-                                                       int32_t *rgb2yuv)
+                                                       int32_t *rgb2yuv, int is_be)
 {
     int i;
     int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
@@ -197,7 +202,7 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst,              \
 {                                                                       \
     const uint16_t *src = (const uint16_t *)_src;                       \
     uint16_t *dst       = (uint16_t *)_dst;                             \
-    rgb48ToY_c_template(dst, src, width, origin, rgb2yuv);              \
+    rgb48ToY_c_template(dst, src, width, origin, rgb2yuv, IS_BE(BE_LE));\
 }                                                                       \
                                                                         \
 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU,            \
@@ -213,7 +218,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU,            \
                    *src2 = (const uint16_t *)_src2;                     \
     uint16_t *dstU = (uint16_t *)_dstU,                                 \
              *dstV = (uint16_t *)_dstV;                                 \
-    rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv);        \
+    rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
 }                                                                       \
                                                                         \
 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU,       \
@@ -229,7 +234,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU,       \
                    *src2 = (const uint16_t *)_src2;                     \
     uint16_t *dstU = (uint16_t *)_dstU,                                 \
              *dstV = (uint16_t *)_dstV;                                 \
-    rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv);   \
+    rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv, IS_BE(BE_LE)); \
 }
 
 rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE)
@@ -245,7 +250,7 @@ rgb48funcs(bgr, BE, AV_PIX_FMT_BGR48BE)
                         : ((origin == AV_PIX_FMT_X2RGB10LE ||              \
                             origin == AV_PIX_FMT_X2BGR10LE)                \
                            ? AV_RL32(&src[(i) * 4])                        \
-                           : (isBE(origin) ? AV_RB16(&src[(i) * 2])        \
+                           : (is_be ? AV_RB16(&src[(i) * 2])               \
                               : AV_RL16(&src[(i) * 2]))))
 
 static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
@@ -257,7 +262,7 @@ static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
                                                     int maskr, int maskg,
                                                     int maskb, int rsh,
                                                     int gsh, int bsh, int S,
-                                                    int32_t *rgb2yuv)
+                                                    int32_t *rgb2yuv, int is_be)
 {
     const int ry       = rgb2yuv[RY_IDX]<<rsh, gy = rgb2yuv[GY_IDX]<<gsh, by = rgb2yuv[BY_IDX]<<bsh;
     const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
@@ -283,7 +288,7 @@ static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU,
                                                      int maskr, int maskg,
                                                      int maskb, int rsh,
                                                      int gsh, int bsh, int S,
-                                                     int32_t *rgb2yuv)
+                                                     int32_t *rgb2yuv, int is_be)
 {
     const int ru       = rgb2yuv[RU_IDX] * (1 << rsh), gu = rgb2yuv[GU_IDX] * (1 << gsh), bu = rgb2yuv[BU_IDX] * (1 << bsh),
               rv       = rgb2yuv[RV_IDX] * (1 << rsh), gv = rgb2yuv[GV_IDX] * (1 << gsh), bv = rgb2yuv[BV_IDX] * (1 << bsh);
@@ -311,7 +316,7 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
                                                           int maskr, int maskg,
                                                           int maskb, int rsh,
                                                           int gsh, int bsh, int S,
-                                                          int32_t *rgb2yuv)
+                                                          int32_t *rgb2yuv, int is_be)
 {
     const int ru       = rgb2yuv[RU_IDX] * (1 << rsh), gu = rgb2yuv[GU_IDX] * (1 << gsh), bu = rgb2yuv[BU_IDX] * (1 << bsh),
               rv       = rgb2yuv[RV_IDX] * (1 << rsh), gv = rgb2yuv[GV_IDX] * (1 << gsh), bv = rgb2yuv[BV_IDX] * (1 << bsh),
@@ -345,13 +350,13 @@ static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
 
 #undef input_pixel
 
-#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr,          \
-                         maskg, maskb, rsh, gsh, bsh, S)                \
+#define rgb16_32_wrapper_0(fmt, name, shr, shg, shb, shp, maskr,        \
+                           maskg, maskb, rsh, gsh, bsh, S, is_be)       \
 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,            \
                           int width, uint32_t *tab, void *opq)          \
 {                                                                       \
     rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp,    \
-                           maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \
+                           maskr, maskg, maskb, rsh, gsh, bsh, S, tab, is_be); \
 }                                                                       \
                                                                         \
 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV,                \
@@ -360,7 +365,7 @@ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV,                \
 {                                                                       \
     rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt,                \
                             shr, shg, shb, shp,                         \
-                            maskr, maskg, maskb, rsh, gsh, bsh, S, tab);\
+                            maskr, maskg, maskb, rsh, gsh, bsh, S, tab, is_be); \
 }                                                                       \
                                                                         \
 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV,           \
@@ -371,27 +376,32 @@ static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV,           \
     rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt,           \
                                  shr, shg, shb, shp,                    \
                                  maskr, maskg, maskb,                   \
-                                 rsh, gsh, bsh, S, tab);                \
-}
-
-rgb16_32_wrapper(AV_PIX_FMT_BGR32,    bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR32_1,  bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB32,    rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB32_1,  rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR565LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR555LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_BGR444LE, bgr12le, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_RGB565LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB555LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_RGB444LE, rgb12le, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_BGR565BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_BGR444BE, bgr12be, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT + 8)
-rgb16_32_wrapper(AV_PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT + 7)
-rgb16_32_wrapper(AV_PIX_FMT_RGB444BE, rgb12be, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT + 4)
-rgb16_32_wrapper(AV_PIX_FMT_X2RGB10LE, rgb30le, 16, 6, 0, 0, 0x3FF00000, 0xFFC00, 0x3FF, 0, 0, 4, RGB2YUV_SHIFT + 6)
-rgb16_32_wrapper(AV_PIX_FMT_X2BGR10LE, bgr30le, 0, 6, 16, 0, 0x3FF, 0xFFC00, 0x3FF00000, 4, 0, 0, RGB2YUV_SHIFT + 6)
+                                 rsh, gsh, bsh, S, tab, is_be);         \
+}
+
+#define rgb16_32_wrapper(base_fmt, endianness, name, shr, shg, shb, shp, maskr, \
+                         maskg, maskb, rsh, gsh, bsh, S) \
+    rgb16_32_wrapper_0(base_fmt ## endianness, name, shr, shg, shb, shp, maskr, \
+                       maskg, maskb, rsh, gsh, bsh, S, IS_BE(endianness))
+
+rgb16_32_wrapper(AV_PIX_FMT_BGR32,     , bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR32_1,   , bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB32,     , rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB32_1,   , rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR565,  LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR555,  LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_BGR444,  LE, bgr12le, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_RGB565,  LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB555,  LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_RGB444,  LE, rgb12le, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_BGR565,  BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_BGR555,  BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_BGR444,  BE, bgr12be, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_RGB565,  BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT + 8)
+rgb16_32_wrapper(AV_PIX_FMT_RGB555,  BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT + 7)
+rgb16_32_wrapper(AV_PIX_FMT_RGB444,  BE, rgb12be, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT + 4)
+rgb16_32_wrapper(AV_PIX_FMT_X2RGB10, LE, rgb30le, 16, 6, 0, 0, 0x3FF00000, 0xFFC00, 0x3FF, 0, 0, 4, RGB2YUV_SHIFT + 6)
+rgb16_32_wrapper(AV_PIX_FMT_X2BGR10, LE, bgr30le, 0, 6, 16, 0, 0x3FF, 0xFFC00, 0x3FF00000, 4, 0, 0, RGB2YUV_SHIFT + 6)
 
 static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV,
                          const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
@@ -832,8 +842,6 @@ p01x_wrapper(10, 6)
 p01x_wrapper(12, 4)
 p01x_uv_wrapper(16, 0)
 
-#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
-
 static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
                        int width, uint32_t *rgb2yuv, void *opq)
 {
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08  2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
@ 2022-09-08 17:39   ` Michael Niedermayer
  2022-09-08 19:38     ` Andreas Rheinhardt
  0 siblings, 1 reply; 15+ messages in thread
From: Michael Niedermayer @ 2022-09-08 17:39 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 1033 bytes --]

Hi

On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
> Up until now, libswscale/input.c used a macro to read
> an input pixel which involved a call to av_pix_fmt_desc_get()
> to find out whether the input pixel format is BE or LE
> despite this being known at compile-time (there are templates
> per pixfmt). Even worse, these calls are made in a loop,
> so that e.g. there are six calls to av_pix_fmt_desc_get()
> for every pair of UV pixel processed in
> rgb64ToUV_half_c_template().
> 
> This commit modifies these macros to ensure that isBE()
> is evaluated at compile-time. This saved 9743B of .text
> for me (GCC 11.2, -O3).

hmm, all these functions where supposed to be optimized out
why where they not ?

iam asking as the code is simpler before your patch if that
"optimization out" thing would work

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Everything should be made as simple as possible, but not simpler.
-- Albert Einstein

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08 17:39   ` Michael Niedermayer
@ 2022-09-08 19:38     ` Andreas Rheinhardt
  2022-09-08 20:25       ` Michael Niedermayer
  0 siblings, 1 reply; 15+ messages in thread
From: Andreas Rheinhardt @ 2022-09-08 19:38 UTC (permalink / raw)
  To: ffmpeg-devel

Michael Niedermayer:
> Hi
> 
> On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
>> Up until now, libswscale/input.c used a macro to read
>> an input pixel which involved a call to av_pix_fmt_desc_get()
>> to find out whether the input pixel format is BE or LE
>> despite this being known at compile-time (there are templates
>> per pixfmt). Even worse, these calls are made in a loop,
>> so that e.g. there are six calls to av_pix_fmt_desc_get()
>> for every pair of UV pixel processed in
>> rgb64ToUV_half_c_template().
>>
>> This commit modifies these macros to ensure that isBE()
>> is evaluated at compile-time. This saved 9743B of .text
>> for me (GCC 11.2, -O3).
> 
> hmm, all these functions where supposed to be optimized out
> why where they not ?
> 
> iam asking as the code is simpler before your patch if that
> "optimization out" thing would work
> 

Why should these functions be optimized out? What would enable the
compiler to optimize them out?
(And I really don't see why this patch would make the code more
complicated.)

- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08 19:38     ` Andreas Rheinhardt
@ 2022-09-08 20:25       ` Michael Niedermayer
  2022-09-08 21:44         ` Andreas Rheinhardt
  0 siblings, 1 reply; 15+ messages in thread
From: Michael Niedermayer @ 2022-09-08 20:25 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2688 bytes --]

On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > Hi
> > 
> > On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
> >> Up until now, libswscale/input.c used a macro to read
> >> an input pixel which involved a call to av_pix_fmt_desc_get()
> >> to find out whether the input pixel format is BE or LE
> >> despite this being known at compile-time (there are templates
> >> per pixfmt). Even worse, these calls are made in a loop,
> >> so that e.g. there are six calls to av_pix_fmt_desc_get()
> >> for every pair of UV pixel processed in
> >> rgb64ToUV_half_c_template().
> >>
> >> This commit modifies these macros to ensure that isBE()
> >> is evaluated at compile-time. This saved 9743B of .text
> >> for me (GCC 11.2, -O3).
> > 
> > hmm, all these functions where supposed to be optimized out
> > why where they not ?
> > 
> > iam asking as the code is simpler before your patch if that
> > "optimization out" thing would work
> > 
> 
> Why should these functions be optimized out? What would enable the
> compiler to optimize them out?

Going back into the past, there was
6b0768e2021b90215a2ab55ed427bce91d148148

before this the code certainly did get optimized out, it was just
#define isBE(x) ((x)&1)

thats simple and clean code btw
after this it became

#define isBE(x) \
+    (av_pix_fmt_descriptors[x].flags & PIX_FMT_BE)

thats still really good, and very readable, its a const array so
one would assume that a compiler can figure that out at compile time
well, i try not to think of linking and seperate objects here ;)

next it got then replaced by a function and a call that i suspect
people thought would be inlined


> (And I really don't see why this patch would make the code more
> complicated.)

the code historically was capable to lookup any flag and detail
of a pixel format at compile time
now your code works around that not working. Introducing a 2nd
system to do this in parallel. To me if i look at the evolution
of isBE() / code checking BE-ness it become more messy over time

I think it would be interresting to think about if we can make
av_pix_fmt_desc_get(compile time constant) work at compile time.
or if we maybe can return to a simpler implementation


thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Awnsering whenever a program halts or runs forever is
On a turing machine, in general impossible (turings halting problem).
On any real computer, always possible as a real computer has a finite number
of states N, and will either halt in less than N cycles or never halt.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08 20:25       ` Michael Niedermayer
@ 2022-09-08 21:44         ` Andreas Rheinhardt
  2022-09-09 14:55           ` Michael Niedermayer
  2022-09-09 15:52           ` Michael Niedermayer
  0 siblings, 2 replies; 15+ messages in thread
From: Andreas Rheinhardt @ 2022-09-08 21:44 UTC (permalink / raw)
  To: ffmpeg-devel

Michael Niedermayer:
> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
>> Michael Niedermayer:
>>> Hi
>>>
>>> On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
>>>> Up until now, libswscale/input.c used a macro to read
>>>> an input pixel which involved a call to av_pix_fmt_desc_get()
>>>> to find out whether the input pixel format is BE or LE
>>>> despite this being known at compile-time (there are templates
>>>> per pixfmt). Even worse, these calls are made in a loop,
>>>> so that e.g. there are six calls to av_pix_fmt_desc_get()
>>>> for every pair of UV pixel processed in
>>>> rgb64ToUV_half_c_template().
>>>>
>>>> This commit modifies these macros to ensure that isBE()
>>>> is evaluated at compile-time. This saved 9743B of .text
>>>> for me (GCC 11.2, -O3).
>>>
>>> hmm, all these functions where supposed to be optimized out
>>> why where they not ?
>>>
>>> iam asking as the code is simpler before your patch if that
>>> "optimization out" thing would work
>>>
>>
>> Why should these functions be optimized out? What would enable the
>> compiler to optimize them out?
> 
> Going back into the past, there was
> 6b0768e2021b90215a2ab55ed427bce91d148148
> 
> before this the code certainly did get optimized out, it was just
> #define isBE(x) ((x)&1)
> 
> thats simple and clean code btw

I don't really consider such magic numbers to be clean.

> after this it became
> 
> #define isBE(x) \
> +    (av_pix_fmt_descriptors[x].flags & PIX_FMT_BE)
> 
> thats still really good, and very readable, its a const array so
> one would assume that a compiler can figure that out at compile time
> well, i try not to think of linking and seperate objects here ;)
> 
> next it got then replaced by a function and a call that i suspect
> people thought would be inlined
> 
> 
>> (And I really don't see why this patch would make the code more
>> complicated.)
> 
> the code historically was capable to lookup any flag and detail
> of a pixel format at compile time
> now your code works around that not working. Introducing a 2nd
> system to do this in parallel. 

I am not introducing a second system, I am reusing the existing system,
namely our existing naming system (the fact that we use BE/LE in the
name of BE/LE pixel formats).

> To me if i look at the evolution
> of isBE() / code checking BE-ness it become more messy over time
> 
> I think it would be interresting to think about if we can make
> av_pix_fmt_desc_get(compile time constant) work at compile time.
> or if we maybe can return to a simpler implementation
> 

We could put the av_pix_fmt_descriptors array into an internal header
and use something like

static av_always_inline const AVPixFmtDescriptor
*ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
{
    if (av_builtin_constant_p(fmt))
        return &av_pix_fmt_descriptors[fmt];
    return av_pix_fmt_desc_get(fmt);
}

- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08 21:44         ` Andreas Rheinhardt
@ 2022-09-09 14:55           ` Michael Niedermayer
  2022-09-09 18:15             ` Andreas Rheinhardt
  2022-09-09 15:52           ` Michael Niedermayer
  1 sibling, 1 reply; 15+ messages in thread
From: Michael Niedermayer @ 2022-09-09 14:55 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 1113 bytes --]

On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> >> Michael Niedermayer:
[...]
> > To me if i look at the evolution
> > of isBE() / code checking BE-ness it become more messy over time
> > 
> > I think it would be interresting to think about if we can make
> > av_pix_fmt_desc_get(compile time constant) work at compile time.
> > or if we maybe can return to a simpler implementation
> > 
> 
> We could put the av_pix_fmt_descriptors array into an internal header
> and use something like
> 
> static av_always_inline const AVPixFmtDescriptor
> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
> {
>     if (av_builtin_constant_p(fmt))
>         return &av_pix_fmt_descriptors[fmt];
>     return av_pix_fmt_desc_get(fmt);
> }

yes thats what i was thinking of too.

thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

I do not agree with what you have to say, but I'll defend to the death your
right to say it. -- Voltaire

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-09 14:55           ` Michael Niedermayer
@ 2022-09-09 18:15             ` Andreas Rheinhardt
  2022-09-10 15:29               ` Michael Niedermayer
  0 siblings, 1 reply; 15+ messages in thread
From: Andreas Rheinhardt @ 2022-09-09 18:15 UTC (permalink / raw)
  To: ffmpeg-devel

Michael Niedermayer:
> On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
>> Michael Niedermayer:
>>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
>>>> Michael Niedermayer:
> [...]
>>> To me if i look at the evolution
>>> of isBE() / code checking BE-ness it become more messy over time
>>>
>>> I think it would be interresting to think about if we can make
>>> av_pix_fmt_desc_get(compile time constant) work at compile time.
>>> or if we maybe can return to a simpler implementation
>>>
>>
>> We could put the av_pix_fmt_descriptors array into an internal header
>> and use something like
>>
>> static av_always_inline const AVPixFmtDescriptor
>> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
>> {
>>     if (av_builtin_constant_p(fmt))
>>         return &av_pix_fmt_descriptors[fmt];
>>     return av_pix_fmt_desc_get(fmt);
>> }
> 
> yes thats what i was thinking of too.
> 

Seems like Anton is away for a week or so. I am sure he has an opinion
on the above approach. I think we will wait for him or shall I apply the
patches as they are given that they do not block any later alternative
solution?
(There is one thing I already don't like about the alternative solution:
It relies on av_builtin_constant_p, which not every compiler supports.)

- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-09 18:15             ` Andreas Rheinhardt
@ 2022-09-10 15:29               ` Michael Niedermayer
  2022-09-10 16:34                 ` Andreas Rheinhardt
  0 siblings, 1 reply; 15+ messages in thread
From: Michael Niedermayer @ 2022-09-10 15:29 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2371 bytes --]

On Fri, Sep 09, 2022 at 08:15:22PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
> >> Michael Niedermayer:
> >>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> >>>> Michael Niedermayer:
> > [...]
> >>> To me if i look at the evolution
> >>> of isBE() / code checking BE-ness it become more messy over time
> >>>
> >>> I think it would be interresting to think about if we can make
> >>> av_pix_fmt_desc_get(compile time constant) work at compile time.
> >>> or if we maybe can return to a simpler implementation
> >>>
> >>
> >> We could put the av_pix_fmt_descriptors array into an internal header
> >> and use something like
> >>
> >> static av_always_inline const AVPixFmtDescriptor
> >> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
> >> {
> >>     if (av_builtin_constant_p(fmt))
> >>         return &av_pix_fmt_descriptors[fmt];
> >>     return av_pix_fmt_desc_get(fmt);
> >> }
> > 
> > yes thats what i was thinking of too.
> > 
> 
> Seems like Anton is away for a week or so. I am sure he has an opinion
> on the above approach. I think we will wait for him or shall I apply the
> patches as they are given that they do not block any later alternative
> solution?

please dont apply code like "IS_BE(BE_LE)" 
iam sure it makes sense to you today but it requires an additional step
for the reader to understand
simplest is a seperate endianness and isbe variable. on the wrapper
that should be less code too but quite possibly you see a better and
cleaner way


> (There is one thing I already don't like about the alternative solution:
> It relies on av_builtin_constant_p, which not every compiler supports.)

Thats why i didnt suggets to use av_builtin_constant_p() i was hoping you
saw a better way to achieve it.

But this is a problem which occurs more than once in the codebase.
mapping some identifer to some value just depending on the identifer,
something that is compile time constant, yet it calls to a function to
do a lookup

thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The bravest are surely those who have the clearest vision
of what is before them, glory and danger alike, and yet
notwithstanding go out to meet it. -- Thucydides

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-10 15:29               ` Michael Niedermayer
@ 2022-09-10 16:34                 ` Andreas Rheinhardt
  2022-09-10 17:06                   ` Michael Niedermayer
  0 siblings, 1 reply; 15+ messages in thread
From: Andreas Rheinhardt @ 2022-09-10 16:34 UTC (permalink / raw)
  To: ffmpeg-devel

Michael Niedermayer:
> On Fri, Sep 09, 2022 at 08:15:22PM +0200, Andreas Rheinhardt wrote:
>> Michael Niedermayer:
>>> On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
>>>> Michael Niedermayer:
>>>>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
>>>>>> Michael Niedermayer:
>>> [...]
>>>>> To me if i look at the evolution
>>>>> of isBE() / code checking BE-ness it become more messy over time
>>>>>
>>>>> I think it would be interresting to think about if we can make
>>>>> av_pix_fmt_desc_get(compile time constant) work at compile time.
>>>>> or if we maybe can return to a simpler implementation
>>>>>
>>>>
>>>> We could put the av_pix_fmt_descriptors array into an internal header
>>>> and use something like
>>>>
>>>> static av_always_inline const AVPixFmtDescriptor
>>>> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
>>>> {
>>>>     if (av_builtin_constant_p(fmt))
>>>>         return &av_pix_fmt_descriptors[fmt];
>>>>     return av_pix_fmt_desc_get(fmt);
>>>> }
>>>
>>> yes thats what i was thinking of too.
>>>
>>
>> Seems like Anton is away for a week or so. I am sure he has an opinion
>> on the above approach. I think we will wait for him or shall I apply the
>> patches as they are given that they do not block any later alternative
>> solution?
> 
> please dont apply code like "IS_BE(BE_LE)" 
> iam sure it makes sense to you today but it requires an additional step
> for the reader to understand
> simplest is a seperate endianness and isbe variable. on the wrapper
> that should be less code too but quite possibly you see a better and
> cleaner way
> 

I actually thought that my solution is superior to the one you seem to
have in mind; after all, the parameter for isbe is redundant: It can
also be derived from the pixfmt-name.

> 
>> (There is one thing I already don't like about the alternative solution:
>> It relies on av_builtin_constant_p, which not every compiler supports.)
> 
> Thats why i didnt suggets to use av_builtin_constant_p() i was hoping you
> saw a better way to achieve it.
> 

Well, without av_builtin_constant_p() the only other way for this would
be to add two systems to get the information, one for compile-time
constants and one for not-constants. Ensuring that the former is only
used for compile-time constants will be a challenge, to say the least.

> But this is a problem which occurs more than once in the codebase.
> mapping some identifer to some value just depending on the identifer,
> something that is compile time constant, yet it calls to a function to
> do a lookup
> 

Another idea from the top of my head:
- We could provide some of the info contained in the descriptors via
separate defines/enums that parallel the actual enum, like
enum AVPixelFormatFlags {
    AV_PIX_FMT_YUV420P_FLAGS = AV_PIX_FMT_FLAG_PLANAR,
    AV_PIX_FMT_YUYV422_FLAGS = 0,
...
};
The list in pixdesc.c would then use these constants instead of defining
the flags twice (to avoid inconsistencies). These constants can be used
from macros via fmt ## _FLAGS, avoiding the av_builtin_constant_p()
issue. This could even be made public if we add a big warning that new
flags may be added in the future. Other such enums for other values may
be added, too, but honestly I don't really like this approach.

We could also use make an xmacro out of the list in lavu/pixdesc.c and
use this xmacro to query the values. E.g. isBE() would then in effect
become one big switch:
isBE(fmt)
{
    switch (fmt) {
    case AV_PIX_FMT_YUV420P: return 0;
....
    }
}
This could be made to handle non-compile-time constants as well (by
having a default that uses av_pix_fmt_desc_get()), but it has a
downside: There would be runtime checks for whether we are in the
default branch or not. So once again this function should not be used
with non-compile time constants.

- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-10 16:34                 ` Andreas Rheinhardt
@ 2022-09-10 17:06                   ` Michael Niedermayer
  0 siblings, 0 replies; 15+ messages in thread
From: Michael Niedermayer @ 2022-09-10 17:06 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 3818 bytes --]

On Sat, Sep 10, 2022 at 06:34:43PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > On Fri, Sep 09, 2022 at 08:15:22PM +0200, Andreas Rheinhardt wrote:
> >> Michael Niedermayer:
> >>> On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
> >>>> Michael Niedermayer:
> >>>>> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> >>>>>> Michael Niedermayer:
> >>> [...]
> >>>>> To me if i look at the evolution
> >>>>> of isBE() / code checking BE-ness it become more messy over time
> >>>>>
> >>>>> I think it would be interresting to think about if we can make
> >>>>> av_pix_fmt_desc_get(compile time constant) work at compile time.
> >>>>> or if we maybe can return to a simpler implementation
> >>>>>
> >>>>
> >>>> We could put the av_pix_fmt_descriptors array into an internal header
> >>>> and use something like
> >>>>
> >>>> static av_always_inline const AVPixFmtDescriptor
> >>>> *ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
> >>>> {
> >>>>     if (av_builtin_constant_p(fmt))
> >>>>         return &av_pix_fmt_descriptors[fmt];
> >>>>     return av_pix_fmt_desc_get(fmt);
> >>>> }
> >>>
> >>> yes thats what i was thinking of too.
> >>>
> >>
> >> Seems like Anton is away for a week or so. I am sure he has an opinion
> >> on the above approach. I think we will wait for him or shall I apply the
> >> patches as they are given that they do not block any later alternative
> >> solution?
> > 
> > please dont apply code like "IS_BE(BE_LE)" 
> > iam sure it makes sense to you today but it requires an additional step
> > for the reader to understand
> > simplest is a seperate endianness and isbe variable. on the wrapper
> > that should be less code too but quite possibly you see a better and
> > cleaner way
> > 
> 
> I actually thought that my solution is superior to the one you seem to
> have in mind; after all, the parameter for isbe is redundant: It can
> also be derived from the pixfmt-name.

your solution is supperior in some sense. But iam concerned that it is
less readable for a small gain.
Maybe BE_LE and others can be renamed to make it immedeatly clear


> 
> > 
> >> (There is one thing I already don't like about the alternative solution:
> >> It relies on av_builtin_constant_p, which not every compiler supports.)
> > 
> > Thats why i didnt suggets to use av_builtin_constant_p() i was hoping you
> > saw a better way to achieve it.
> > 
> 
> Well, without av_builtin_constant_p() the only other way for this would
> be to add two systems to get the information, one for compile-time
> constants and one for not-constants. Ensuring that the former is only
> used for compile-time constants will be a challenge, to say the least.

I dont think its a challenge, but iam still quite unsure if this would
lead to other issues.

For example, if we add a constant lookup function that has direct access
to the table. We assue that will only be used where constants are the
arguzment. Now its easy to add a extra field to that table and then
grep the binary for that. If its found it was either used with a non
constant or the compiler failed to optimize it out.
This could be a fate test. If it works 100% thats simple but if it works
99% it will be a pain and is not a direction i would suggest, it would
be more pain than its worth.

So really this is just for discussion at this point, far from a plan
with understood consequences

thx
[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Frequently ignored answer#1 FFmpeg bugs should be sent to our bugtracker. User
questions about the command line tools should be sent to the ffmpeg-user ML.
And questions about how to use libav* should be sent to the libav-user ML.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()
  2022-09-08 21:44         ` Andreas Rheinhardt
  2022-09-09 14:55           ` Michael Niedermayer
@ 2022-09-09 15:52           ` Michael Niedermayer
  1 sibling, 0 replies; 15+ messages in thread
From: Michael Niedermayer @ 2022-09-09 15:52 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2179 bytes --]

On Thu, Sep 08, 2022 at 11:44:51PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> >> Michael Niedermayer:
> >>> Hi
> >>>
> >>> On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
> >>>> Up until now, libswscale/input.c used a macro to read
> >>>> an input pixel which involved a call to av_pix_fmt_desc_get()
> >>>> to find out whether the input pixel format is BE or LE
> >>>> despite this being known at compile-time (there are templates
> >>>> per pixfmt). Even worse, these calls are made in a loop,
> >>>> so that e.g. there are six calls to av_pix_fmt_desc_get()
> >>>> for every pair of UV pixel processed in
> >>>> rgb64ToUV_half_c_template().
> >>>>
> >>>> This commit modifies these macros to ensure that isBE()
> >>>> is evaluated at compile-time. This saved 9743B of .text
> >>>> for me (GCC 11.2, -O3).
> >>>
> >>> hmm, all these functions where supposed to be optimized out
> >>> why where they not ?
> >>>
> >>> iam asking as the code is simpler before your patch if that
> >>> "optimization out" thing would work
> >>>
> >>
> >> Why should these functions be optimized out? What would enable the
> >> compiler to optimize them out?
> > 
> > Going back into the past, there was
> > 6b0768e2021b90215a2ab55ed427bce91d148148
> > 
> > before this the code certainly did get optimized out, it was just
> > #define isBE(x) ((x)&1)
> > 
> > thats simple and clean code btw
> 
> I don't really consider such magic numbers to be clean.

no, but that wasnt what i meant
what i meant was more that if you can structure an identifer so that it
itself can provide alot of the used information from within its structure, 
that has advanatges over requiring a descriptor table lookup.
I didnt mean to suggest that this should use hardcoded litteral numbers
and be undocumented and untested.

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

No human being will ever know the Truth, for even if they happen to say it
by chance, they would not even known they had done so. -- Xenophanes

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop
  2022-09-08  2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt
  2022-09-08  2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
@ 2022-09-08  3:31 ` Andreas Rheinhardt
  2022-09-16  8:39   ` Paul B Mahol
  2022-09-08  3:44 ` [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Philip Langdale
  2 siblings, 1 reply; 15+ messages in thread
From: Andreas Rheinhardt @ 2022-09-08  3:31 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Andreas Rheinhardt

Up until now, libswscale/output.c used a macro to write
an output pixel which involved a call to av_pix_fmt_desc_get()
to find out whether the input pixel format is BE or LE
despite this being known at compile-time (there are templates
per pixfmt). Even worse, these calls are made in a loop,
so that e.g. there are eight calls to av_pix_fmt_desc_get()
for every pixel processed in yuv2rgba64_X_c_template()
for 64bit RGB formats.

This commit modifies these macros to ensure that isBE()
is evaluated at compile-time. This saved 41184B of .text
for me (GCC 11.2, -O3).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
This must be the lowest-hanging fruit in the whole codebase.
Two other question: Why do all these functions in swscale_internal.h
take an enum AVPixelFormat instead of accepting an AVPixFmtDescriptor?
And would making av_pix_fmt_desc_get() av_const be beneficial?

 libswscale/output.c | 101 +++++++++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 43 deletions(-)

diff --git a/libswscale/output.c b/libswscale/output.c
index 40a4476c6d..590334eb57 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -919,7 +919,7 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
 #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B)
 #define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R)
 #define output_pixel(pos, val) \
-    if (isBE(target)) { \
+    if (is_be) { \
         AV_WB16(pos, val); \
     } else { \
         AV_WL16(pos, val); \
@@ -931,7 +931,8 @@ yuv2ya16_X_c_template(SwsContext *c, const int16_t *lumFilter,
                         const int16_t *chrFilter, const int32_t **unused_chrUSrc,
                         const int32_t **unused_chrVSrc, int unused_chrFilterSize,
                         const int32_t **alpSrc, uint16_t *dest, int dstW,
-                        int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+                        int y, enum AVPixelFormat target,
+                        int unused_hasAlpha, int unused_eightbytes, int is_be)
 {
     int hasAlpha = !!alpSrc;
     int i;
@@ -968,7 +969,8 @@ yuv2ya16_2_c_template(SwsContext *c, const int32_t *buf[2],
                         const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
                         const int32_t *abuf[2], uint16_t *dest, int dstW,
                         int yalpha, int unused_uvalpha, int y,
-                        enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+                        enum AVPixelFormat target, int unused_hasAlpha,
+                        int unused_eightbytes, int is_be)
 {
     int hasAlpha = abuf && abuf[0] && abuf[1];
     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
@@ -999,7 +1001,8 @@ static av_always_inline void
 yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0,
                         const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
                         const int32_t *abuf0, uint16_t *dest, int dstW,
-                        int unused_uvalpha, int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+                        int unused_uvalpha, int y, enum AVPixelFormat target,
+                        int unused_hasAlpha, int unused_eightbytes, int is_be)
 {
     int hasAlpha = !!abuf0;
     int i;
@@ -1027,7 +1030,8 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
                        const int16_t *chrFilter, const int32_t **chrUSrc,
                        const int32_t **chrVSrc, int chrFilterSize,
                        const int32_t **alpSrc, uint16_t *dest, int dstW,
-                       int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       int y, enum AVPixelFormat target, int hasAlpha, int eightbytes,
+                       int is_be)
 {
     int i;
     int A1 = 0xffff<<14, A2 = 0xffff<<14;
@@ -1108,7 +1112,8 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf[2], uint16_t *dest, int dstW,
                        int yalpha, int uvalpha, int y,
-                       enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       enum AVPixelFormat target, int hasAlpha, int eightbytes,
+                       int is_be)
 {
     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
@@ -1172,7 +1177,8 @@ static av_always_inline void
 yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf0, uint16_t *dest, int dstW,
-                       int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       int uvalpha, int y, enum AVPixelFormat target,
+                       int hasAlpha, int eightbytes, int is_be)
 {
     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     int i;
@@ -1277,7 +1283,8 @@ yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
                        const int16_t *chrFilter, const int32_t **chrUSrc,
                        const int32_t **chrVSrc, int chrFilterSize,
                        const int32_t **alpSrc, uint16_t *dest, int dstW,
-                       int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       int y, enum AVPixelFormat target, int hasAlpha,
+                       int eightbytes, int is_be)
 {
     int i;
     int A = 0xffff<<14;
@@ -1340,7 +1347,8 @@ yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2],
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf[2], uint16_t *dest, int dstW,
                        int yalpha, int uvalpha, int y,
-                       enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       enum AVPixelFormat target, int hasAlpha, int eightbytes,
+                       int is_be)
 {
     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
@@ -1391,7 +1399,8 @@ static av_always_inline void
 yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf0, uint16_t *dest, int dstW,
-                       int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
+                       int uvalpha, int y, enum AVPixelFormat target,
+                       int hasAlpha, int eightbytes, int is_be)
 {
     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     int i;
@@ -1468,7 +1477,11 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
 #undef r_b
 #undef b_r
 
-#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \
+#define IS_BE_LE 0
+#define IS_BE_BE 1
+#define IS_BE(BE_LE) IS_BE_ ## BE_LE
+
+#define YUV2PACKED16WRAPPER_0(name, base, ext, fmt, is_be, hasAlpha, eightbytes) \
 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
                         const int16_t **_lumSrc, int lumFilterSize, \
                         const int16_t *chrFilter, const int16_t **_chrUSrc, \
@@ -1483,7 +1496,7 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
     uint16_t *dest = (uint16_t *) _dest; \
     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                          alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \
+                          alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes, is_be); \
 } \
  \
 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
@@ -1497,7 +1510,7 @@ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
                   **abuf = (const int32_t **) _abuf; \
     uint16_t *dest = (uint16_t *) _dest; \
     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
-                          dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \
+                          dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes, is_be); \
 } \
  \
 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
@@ -1511,36 +1524,38 @@ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
                   *abuf0 = (const int32_t *)  _abuf0; \
     uint16_t *dest = (uint16_t *) _dest; \
     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
-                                  dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \
-}
-
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0)
-YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0)
-
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1)
+                                  dstW, uvalpha, y, fmt, hasAlpha, eightbytes, is_be); \
+}
+#define YUV2PACKED16WRAPPER(name, base, ext, fmt, endianness, hasAlpha, eightbytes) \
+    YUV2PACKED16WRAPPER_0(name, base, ext, fmt ## endianness, IS_BE(endianness), hasAlpha, eightbytes)
+
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64, LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64, LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16, BE, 1, 0)
+YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16, LE, 1, 0)
+
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48, BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48, LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64, LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64, BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64, LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64, BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64, LE, 0, 1)
 
 /*
  * Write out 2 RGB pixels in the target pixel format. This function takes a
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop
  2022-09-08  3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt
@ 2022-09-16  8:39   ` Paul B Mahol
  0 siblings, 0 replies; 15+ messages in thread
From: Paul B Mahol @ 2022-09-16  8:39 UTC (permalink / raw)
  To: FFmpeg development discussions and patches; +Cc: Andreas Rheinhardt

On 9/8/22, Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote:
> Up until now, libswscale/output.c used a macro to write
> an output pixel which involved a call to av_pix_fmt_desc_get()
> to find out whether the input pixel format is BE or LE
> despite this being known at compile-time (there are templates
> per pixfmt). Even worse, these calls are made in a loop,
> so that e.g. there are eight calls to av_pix_fmt_desc_get()
> for every pixel processed in yuv2rgba64_X_c_template()
> for 64bit RGB formats.
>

LGTM for whole set.

Got nice speed boost for not SIMD optimized conversions in swscale.

> This commit modifies these macros to ensure that isBE()
> is evaluated at compile-time. This saved 41184B of .text
> for me (GCC 11.2, -O3).
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
> This must be the lowest-hanging fruit in the whole codebase.
> Two other question: Why do all these functions in swscale_internal.h
> take an enum AVPixelFormat instead of accepting an AVPixFmtDescriptor?
> And would making av_pix_fmt_desc_get() av_const be beneficial?
>
>  libswscale/output.c | 101 +++++++++++++++++++++++++-------------------
>  1 file changed, 58 insertions(+), 43 deletions(-)
>
> diff --git a/libswscale/output.c b/libswscale/output.c
> index 40a4476c6d..590334eb57 100644
> --- a/libswscale/output.c
> +++ b/libswscale/output.c
> @@ -919,7 +919,7 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422,
> AV_PIX_FMT_UYVY422)
>  #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE
> || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B)
>  #define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE
> || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R)
>  #define output_pixel(pos, val) \
> -    if (isBE(target)) { \
> +    if (is_be) { \
>          AV_WB16(pos, val); \
>      } else { \
>          AV_WL16(pos, val); \
> @@ -931,7 +931,8 @@ yuv2ya16_X_c_template(SwsContext *c, const int16_t
> *lumFilter,
>                          const int16_t *chrFilter, const int32_t
> **unused_chrUSrc,
>                          const int32_t **unused_chrVSrc, int
> unused_chrFilterSize,
>                          const int32_t **alpSrc, uint16_t *dest, int dstW,
> -                        int y, enum AVPixelFormat target, int
> unused_hasAlpha, int unused_eightbytes)
> +                        int y, enum AVPixelFormat target,
> +                        int unused_hasAlpha, int unused_eightbytes, int
> is_be)
>  {
>      int hasAlpha = !!alpSrc;
>      int i;
> @@ -968,7 +969,8 @@ yuv2ya16_2_c_template(SwsContext *c, const int32_t
> *buf[2],
>                          const int32_t *unused_ubuf[2], const int32_t
> *unused_vbuf[2],
>                          const int32_t *abuf[2], uint16_t *dest, int dstW,
>                          int yalpha, int unused_uvalpha, int y,
> -                        enum AVPixelFormat target, int unused_hasAlpha, int
> unused_eightbytes)
> +                        enum AVPixelFormat target, int unused_hasAlpha,
> +                        int unused_eightbytes, int is_be)
>  {
>      int hasAlpha = abuf && abuf[0] && abuf[1];
>      const int32_t *buf0  = buf[0],  *buf1  = buf[1],
> @@ -999,7 +1001,8 @@ static av_always_inline void
>  yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0,
>                          const int32_t *unused_ubuf[2], const int32_t
> *unused_vbuf[2],
>                          const int32_t *abuf0, uint16_t *dest, int dstW,
> -                        int unused_uvalpha, int y, enum AVPixelFormat
> target, int unused_hasAlpha, int unused_eightbytes)
> +                        int unused_uvalpha, int y, enum AVPixelFormat
> target,
> +                        int unused_hasAlpha, int unused_eightbytes, int
> is_be)
>  {
>      int hasAlpha = !!abuf0;
>      int i;
> @@ -1027,7 +1030,8 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t
> *lumFilter,
>                         const int16_t *chrFilter, const int32_t **chrUSrc,
>                         const int32_t **chrVSrc, int chrFilterSize,
>                         const int32_t **alpSrc, uint16_t *dest, int dstW,
> -                       int y, enum AVPixelFormat target, int hasAlpha, int
> eightbytes)
> +                       int y, enum AVPixelFormat target, int hasAlpha, int
> eightbytes,
> +                       int is_be)
>  {
>      int i;
>      int A1 = 0xffff<<14, A2 = 0xffff<<14;
> @@ -1108,7 +1112,8 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t
> *buf[2],
>                         const int32_t *ubuf[2], const int32_t *vbuf[2],
>                         const int32_t *abuf[2], uint16_t *dest, int dstW,
>                         int yalpha, int uvalpha, int y,
> -                       enum AVPixelFormat target, int hasAlpha, int
> eightbytes)
> +                       enum AVPixelFormat target, int hasAlpha, int
> eightbytes,
> +                       int is_be)
>  {
>      const int32_t *buf0  = buf[0],  *buf1  = buf[1],
>                    *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
> @@ -1172,7 +1177,8 @@ static av_always_inline void
>  yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
>                         const int32_t *ubuf[2], const int32_t *vbuf[2],
>                         const int32_t *abuf0, uint16_t *dest, int dstW,
> -                       int uvalpha, int y, enum AVPixelFormat target, int
> hasAlpha, int eightbytes)
> +                       int uvalpha, int y, enum AVPixelFormat target,
> +                       int hasAlpha, int eightbytes, int is_be)
>  {
>      const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
>      int i;
> @@ -1277,7 +1283,8 @@ yuv2rgba64_full_X_c_template(SwsContext *c, const
> int16_t *lumFilter,
>                         const int16_t *chrFilter, const int32_t **chrUSrc,
>                         const int32_t **chrVSrc, int chrFilterSize,
>                         const int32_t **alpSrc, uint16_t *dest, int dstW,
> -                       int y, enum AVPixelFormat target, int hasAlpha, int
> eightbytes)
> +                       int y, enum AVPixelFormat target, int hasAlpha,
> +                       int eightbytes, int is_be)
>  {
>      int i;
>      int A = 0xffff<<14;
> @@ -1340,7 +1347,8 @@ yuv2rgba64_full_2_c_template(SwsContext *c, const
> int32_t *buf[2],
>                         const int32_t *ubuf[2], const int32_t *vbuf[2],
>                         const int32_t *abuf[2], uint16_t *dest, int dstW,
>                         int yalpha, int uvalpha, int y,
> -                       enum AVPixelFormat target, int hasAlpha, int
> eightbytes)
> +                       enum AVPixelFormat target, int hasAlpha, int
> eightbytes,
> +                       int is_be)
>  {
>      const int32_t *buf0  = buf[0],  *buf1  = buf[1],
>                    *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
> @@ -1391,7 +1399,8 @@ static av_always_inline void
>  yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
>                         const int32_t *ubuf[2], const int32_t *vbuf[2],
>                         const int32_t *abuf0, uint16_t *dest, int dstW,
> -                       int uvalpha, int y, enum AVPixelFormat target, int
> hasAlpha, int eightbytes)
> +                       int uvalpha, int y, enum AVPixelFormat target,
> +                       int hasAlpha, int eightbytes, int is_be)
>  {
>      const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
>      int i;
> @@ -1468,7 +1477,11 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const
> int32_t *buf0,
>  #undef r_b
>  #undef b_r
>
> -#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \
> +#define IS_BE_LE 0
> +#define IS_BE_BE 1
> +#define IS_BE(BE_LE) IS_BE_ ## BE_LE
> +
> +#define YUV2PACKED16WRAPPER_0(name, base, ext, fmt, is_be, hasAlpha,
> eightbytes) \
>  static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
>                          const int16_t **_lumSrc, int lumFilterSize, \
>                          const int16_t *chrFilter, const int16_t **_chrUSrc,
> \
> @@ -1483,7 +1496,7 @@ static void name ## ext ## _X_c(SwsContext *c, const
> int16_t *lumFilter, \
>      uint16_t *dest = (uint16_t *) _dest; \
>      name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
>                            chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
> -                          alpSrc, dest, dstW, y, fmt, hasAlpha,
> eightbytes); \
> +                          alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes,
> is_be); \
>  } \
>   \
>  static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
> @@ -1497,7 +1510,7 @@ static void name ## ext ## _2_c(SwsContext *c, const
> int16_t *_buf[2], \
>                    **abuf = (const int32_t **) _abuf; \
>      uint16_t *dest = (uint16_t *) _dest; \
>      name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
> -                          dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha,
> eightbytes); \
> +                          dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha,
> eightbytes, is_be); \
>  } \
>   \
>  static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
> @@ -1511,36 +1524,38 @@ static void name ## ext ## _1_c(SwsContext *c, const
> int16_t *_buf0, \
>                    *abuf0 = (const int32_t *)  _abuf0; \
>      uint16_t *dest = (uint16_t *) _dest; \
>      name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
> -                                  dstW, uvalpha, y, fmt, hasAlpha,
> eightbytes); \
> -}
> -
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0)
> -YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0)
> -
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0,
> 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0,
> 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0,
> 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0,
> 0)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE,
> 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE,
> 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE,
> 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE,
> 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE,
> 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE,
> 1, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE,
> 0, 1)
> -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE,
> 0, 1)
> +                                  dstW, uvalpha, y, fmt, hasAlpha,
> eightbytes, is_be); \
> +}
> +#define YUV2PACKED16WRAPPER(name, base, ext, fmt, endianness, hasAlpha,
> eightbytes) \
> +    YUV2PACKED16WRAPPER_0(name, base, ext, fmt ## endianness,
> IS_BE(endianness), hasAlpha, eightbytes)
> +
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48, BE, 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48, LE, 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48, BE, 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48, LE, 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64, BE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64, LE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64, BE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64, LE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64, BE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64, LE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64, BE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64, LE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16, BE, 1, 0)
> +YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16, LE, 1, 0)
> +
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48, BE,
> 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48, LE,
> 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48, BE,
> 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48, LE,
> 0, 0)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64,
> BE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64,
> LE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64,
> BE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64,
> LE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64,
> BE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64,
> LE, 1, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64,
> BE, 0, 1)
> +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64,
> LE, 0, 1)
>
>  /*
>   * Write out 2 RGB pixels in the target pixel format. This function takes
> a
> --
> 2.34.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; '
  2022-09-08  2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt
  2022-09-08  2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
  2022-09-08  3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt
@ 2022-09-08  3:44 ` Philip Langdale
  2 siblings, 0 replies; 15+ messages in thread
From: Philip Langdale @ 2022-09-08  3:44 UTC (permalink / raw)
  To: ffmpeg-devel

On Thu,  8 Sep 2022 04:31:55 +0200
Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote:

> These macros are definitions, not only declarations and therefore
> should not contain a semicolon. Such a semicolon is actually
> spec-incompliant, but compilers happen to accept them.
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
>  libswscale/input.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/libswscale/input.c b/libswscale/input.c
> index be8f3940e1..88e318e664 100644
> --- a/libswscale/input.c
> +++ b/libswscale/input.c
> @@ -583,8 +583,8 @@ static void yvy2ToUV_c(uint8_t *dstU, uint8_t
> *dstV, const uint8_t *unused0, con AV_WN16(dst + i * 2, AV_RL16(src +
> i * 4) >> shift);         \ }
>  
> -y21xle_wrapper(10, 6);
> -y21xle_wrapper(12, 4);
> +y21xle_wrapper(10, 6)
> +y21xle_wrapper(12, 4)
>  
>  static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const
> uint8_t *unused1, const uint8_t *unused2, int width, uint32_t
> *unused, void *opq) @@ -828,9 +828,9 @@ static void
> nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, }
>                                         \ p01x_uv_wrapper(bits, shift)
>  
> -p01x_wrapper(10, 6);
> -p01x_wrapper(12, 4);
> -p01x_uv_wrapper(16, 0);
> +p01x_wrapper(10, 6)
> +p01x_wrapper(12, 4)
> +p01x_uv_wrapper(16, 0)
>  
>  #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
>  

Thanks for catching this. LGTM.

--phil
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2022-09-16  8:39 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-08  2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt
2022-09-08  2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt
2022-09-08 17:39   ` Michael Niedermayer
2022-09-08 19:38     ` Andreas Rheinhardt
2022-09-08 20:25       ` Michael Niedermayer
2022-09-08 21:44         ` Andreas Rheinhardt
2022-09-09 14:55           ` Michael Niedermayer
2022-09-09 18:15             ` Andreas Rheinhardt
2022-09-10 15:29               ` Michael Niedermayer
2022-09-10 16:34                 ` Andreas Rheinhardt
2022-09-10 17:06                   ` Michael Niedermayer
2022-09-09 15:52           ` Michael Niedermayer
2022-09-08  3:31 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Andreas Rheinhardt
2022-09-16  8:39   ` Paul B Mahol
2022-09-08  3:44 ` [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Philip Langdale

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git