Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8
@ 2024-06-16 22:28 Ramiro Polla
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 2/6] swscale/yuv2rgb: add macros to simplify code generation Ramiro Polla
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
  To: ffmpeg-devel

The C code for some pixel formats (rgb555, rgb565, rgb444, and monob)
was not converting the last pixels on widths not aligned to 8.

NOTE: the last pixel for odd widths is still not converted for any of
      the pixel formats in the C code for yuv2rgb except for monob.
---
 libswscale/yuv2rgb.c | 101 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 93 insertions(+), 8 deletions(-)

diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index c1d6236f37..e641c765c7 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -172,10 +172,6 @@ const int *sws_getCoefficients(int colorspace)
         return srcSliceH;                           \
     }
 
-#define CLOSEYUV2RGBFUNC(dst_delta)                 \
-    ENDYUV2RGBLINE(dst_delta, 0)                    \
-    ENDYUV2RGBFUNC()
-
 YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0)
     LOADCHROMA(0);
     PUTRGB48(dst_1, py_1, 0);
@@ -432,7 +428,27 @@ YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
     LOADCHROMA(3);
     PUTRGB16(dst_2, py_2, 3, 6 + 8);
     PUTRGB16(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
+ENDYUV2RGBLINE(8, 0)
+    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+    const uint8_t *e16 = ff_dither_2x2_4[y & 1];
+    const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
+
+    LOADCHROMA(0);
+    PUTRGB16(dst_1, py_1, 0, 0);
+    PUTRGB16(dst_2, py_2, 0, 0 + 8);
+
+    LOADCHROMA(1);
+    PUTRGB16(dst_2, py_2, 1, 2 + 8);
+    PUTRGB16(dst_1, py_1, 1, 2);
+ENDYUV2RGBLINE(8, 1)
+    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+    const uint8_t *e16 = ff_dither_2x2_4[y & 1];
+    const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
+
+    LOADCHROMA(0);
+    PUTRGB16(dst_1, py_1, 0, 0);
+    PUTRGB16(dst_2, py_2, 0, 0 + 8);
+ENDYUV2RGBFUNC()
 
 YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
     const uint8_t *d16 = ff_dither_2x2_8[y & 1];
@@ -462,7 +478,25 @@ YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
     LOADCHROMA(3);
     PUTRGB15(dst_2, py_2, 3, 6 + 8);
     PUTRGB15(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
+ENDYUV2RGBLINE(8, 0)
+    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+    const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
+
+    LOADCHROMA(0);
+    PUTRGB15(dst_1, py_1, 0, 0);
+    PUTRGB15(dst_2, py_2, 0, 0 + 8);
+
+    LOADCHROMA(1);
+    PUTRGB15(dst_2, py_2, 1, 2 + 8);
+    PUTRGB15(dst_1, py_1, 1, 2);
+ENDYUV2RGBLINE(8, 1)
+    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+    const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
+
+    LOADCHROMA(0);
+    PUTRGB15(dst_1, py_1, 0, 0);
+    PUTRGB15(dst_2, py_2, 0, 0 + 8);
+ENDYUV2RGBFUNC()
 
 // r, g, b, dst_1, dst_2
 YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
@@ -493,7 +527,23 @@ YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
     LOADCHROMA(3);
     PUTRGB12(dst_2, py_2, 3, 6 + 8);
     PUTRGB12(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
+ENDYUV2RGBLINE(8, 0)
+    const uint8_t *d16 = ff_dither_4x4_16[y & 3];
+
+    LOADCHROMA(0);
+    PUTRGB12(dst_1, py_1, 0, 0);
+    PUTRGB12(dst_2, py_2, 0, 0 + 8);
+
+    LOADCHROMA(1);
+    PUTRGB12(dst_2, py_2, 1, 2 + 8);
+    PUTRGB12(dst_1, py_1, 1, 2);
+ENDYUV2RGBLINE(8, 1)
+    const uint8_t *d16 = ff_dither_4x4_16[y & 3];
+
+    LOADCHROMA(0);
+    PUTRGB12(dst_1, py_1, 0, 0);
+    PUTRGB12(dst_2, py_2, 0, 0 + 8);
+ENDYUV2RGBFUNC()
 
 // r, g, b, dst_1, dst_2
 YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
@@ -672,7 +722,42 @@ YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
 
     dst_1[0] = out_1;
     dst_2[0] = out_2;
-CLOSEYUV2RGBFUNC(1)
+
+    py_1  += 8;
+    py_2  += 8;
+    dst_1 += 1;
+    dst_2 += 1;
+    }
+    if (c->dstW & 7) {
+        int av_unused Y, U, V;
+        int pixels_left = c->dstW & 7;
+    const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
+    char out_1 = 0, out_2 = 0;
+    g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
+
+#define PUTRGB1_OR00(out, src, i, o)                \
+    if (pixels_left) {                              \
+        PUTRGB1(out, src, i, o)                     \
+        pixels_left--;                              \
+    } else {                                        \
+        out <<= 2;                                  \
+    }
+
+    PUTRGB1_OR00(out_1, py_1, 0, 0);
+    PUTRGB1_OR00(out_2, py_2, 0, 0 + 8);
+
+    PUTRGB1_OR00(out_2, py_2, 1, 2 + 8);
+    PUTRGB1_OR00(out_1, py_1, 1, 2);
+
+    PUTRGB1_OR00(out_1, py_1, 2, 4);
+    PUTRGB1_OR00(out_2, py_2, 2, 4 + 8);
+
+    PUTRGB1_OR00(out_2, py_2, 3, 6 + 8);
+    PUTRGB1_OR00(out_1, py_1, 3, 6);
+
+    dst_1[0] = out_1;
+    dst_2[0] = out_2;
+ENDYUV2RGBFUNC()
 
 SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
 {
-- 
2.30.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [FFmpeg-devel] [PATCH 2/6] swscale/yuv2rgb: add macros to simplify code generation
  2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
@ 2024-06-16 22:28 ` Ramiro Polla
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code Ramiro Polla
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libswscale/yuv2rgb.c | 574 +++++++++----------------------------------
 1 file changed, 113 insertions(+), 461 deletions(-)

diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index e641c765c7..7386d3a2a2 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -72,13 +72,13 @@ const int *sws_getCoefficients(int colorspace)
     g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]);  \
     b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM];
 
-#define PUTRGB(dst, src, i)                         \
+#define PUTRGB(dst, src, asrc, i, abase)            \
     Y              = src[2 * i];                    \
     dst[2 * i]     = r[Y] + g[Y] + b[Y];            \
     Y              = src[2 * i + 1];                \
     dst[2 * i + 1] = r[Y] + g[Y] + b[Y];
 
-#define PUTRGB24(dst, src, i)                       \
+#define PUTRGB24(dst, src, asrc, i, abase)          \
     Y              = src[2 * i];                    \
     dst[6 * i + 0] = r[Y];                          \
     dst[6 * i + 1] = g[Y];                          \
@@ -88,7 +88,7 @@ const int *sws_getCoefficients(int colorspace)
     dst[6 * i + 4] = g[Y];                          \
     dst[6 * i + 5] = b[Y];
 
-#define PUTBGR24(dst, src, i)                       \
+#define PUTBGR24(dst, src, asrc, i, abase)          \
     Y              = src[2 * i];                    \
     dst[6 * i + 0] = b[Y];                          \
     dst[6 * i + 1] = g[Y];                          \
@@ -98,13 +98,13 @@ const int *sws_getCoefficients(int colorspace)
     dst[6 * i + 4] = g[Y];                          \
     dst[6 * i + 5] = r[Y];
 
-#define PUTRGBA(dst, ysrc, asrc, i, s)                                  \
+#define PUTRGBA(dst, ysrc, asrc, i, abase)                              \
     Y              = ysrc[2 * i];                                       \
-    dst[2 * i]     = r[Y] + g[Y] + b[Y] + (asrc[2 * i]     << s);       \
+    dst[2 * i]     = r[Y] + g[Y] + b[Y] + (asrc[2 * i]     << abase);   \
     Y              = ysrc[2 * i + 1];                                   \
-    dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << s);
+    dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << abase);
 
-#define PUTRGB48(dst, src, i)                       \
+#define PUTRGB48(dst, src, asrc, i, abase)          \
     Y                = src[ 2 * i];                 \
     dst[12 * i +  0] = dst[12 * i +  1] = r[Y];     \
     dst[12 * i +  2] = dst[12 * i +  3] = g[Y];     \
@@ -114,7 +114,7 @@ const int *sws_getCoefficients(int colorspace)
     dst[12 * i +  8] = dst[12 * i +  9] = g[Y];     \
     dst[12 * i + 10] = dst[12 * i + 11] = b[Y];
 
-#define PUTBGR48(dst, src, i)                       \
+#define PUTBGR48(dst, src, asrc, i, abase)          \
     Y                = src[2 * i];                  \
     dst[12 * i +  0] = dst[12 * i +  1] = b[Y];     \
     dst[12 * i +  2] = dst[12 * i +  3] = g[Y];     \
@@ -155,11 +155,15 @@ const int *sws_getCoefficients(int colorspace)
             while (h_size--) {                                              \
                 int av_unused U, V, Y;                                      \
 
-#define ENDYUV2RGBLINE(dst_delta, ss)               \
+#define ENDYUV2RGBLINE(dst_delta, ss, alpha)        \
     pu    += 4 >> ss;                               \
     pv    += 4 >> ss;                               \
     py_1  += 8 >> ss;                               \
     py_2  += 8 >> ss;                               \
+    if (alpha) {                                    \
+        pa_1 += 8 >> ss;                            \
+        pa_2 += 8 >> ss;                            \
+    }                                               \
     dst_1 += dst_delta >> ss;                       \
     dst_2 += dst_delta >> ss;                       \
     }                                               \
@@ -172,236 +176,77 @@ const int *sws_getCoefficients(int colorspace)
         return srcSliceH;                           \
     }
 
-YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTRGB48(dst_1, py_1, 0);
-    PUTRGB48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB48(dst_2, py_2, 1);
-    PUTRGB48(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTRGB48(dst_1, py_1, 2);
-    PUTRGB48(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTRGB48(dst_2, py_2, 3);
-    PUTRGB48(dst_1, py_1, 3);
-ENDYUV2RGBLINE(48, 0)
-    LOADCHROMA(0);
-    PUTRGB48(dst_1, py_1, 0);
-    PUTRGB48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB48(dst_2, py_2, 1);
-    PUTRGB48(dst_1, py_1, 1);
-ENDYUV2RGBLINE(48, 1)
-    LOADCHROMA(0);
-    PUTRGB48(dst_1, py_1, 0);
-    PUTRGB48(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_bgr48, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTBGR48(dst_1, py_1, 0);
-    PUTBGR48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR48(dst_2, py_2, 1);
-    PUTBGR48(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTBGR48(dst_1, py_1, 2);
-    PUTBGR48(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTBGR48(dst_2, py_2, 3);
-    PUTBGR48(dst_1, py_1, 3);
-ENDYUV2RGBLINE(48, 0)
-    LOADCHROMA(0);
-    PUTBGR48(dst_1, py_1, 0);
-    PUTBGR48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR48(dst_2, py_2, 1);
-    PUTBGR48(dst_1, py_1, 1);
-ENDYUV2RGBLINE(48, 1)
-    LOADCHROMA(0);
-    PUTBGR48(dst_1, py_1, 0);
-    PUTBGR48(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0)
-    LOADCHROMA(0);
-    PUTRGB(dst_1, py_1, 0);
-    PUTRGB(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2, py_2, 1);
-    PUTRGB(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTRGB(dst_1, py_1, 2);
-    PUTRGB(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTRGB(dst_2, py_2, 3);
-    PUTRGB(dst_1, py_1, 3);
-ENDYUV2RGBLINE(8, 0)
-    LOADCHROMA(0);
-    PUTRGB(dst_1, py_1, 0);
-    PUTRGB(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2, py_2, 1);
-    PUTRGB(dst_1, py_1, 1);
-ENDYUV2RGBLINE(8, 1)
-    LOADCHROMA(0);
-    PUTRGB(dst_1, py_1, 0);
-    PUTRGB(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-#if HAVE_BIGENDIAN
-YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
-#else
-YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
-#endif
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 24);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 24);
-
-    LOADCHROMA(2);
-    PUTRGBA(dst_1, py_1, pa_1, 2, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 2, 24);
-
-    LOADCHROMA(3);
-    PUTRGBA(dst_2, py_2, pa_2, 3, 24);
-    PUTRGBA(dst_1, py_1, pa_1, 3, 24);
-    pa_1 += 8;
-    pa_2 += 8;
-ENDYUV2RGBLINE(8, 0)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 24);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 24);
-    pa_1 += 4;
-    pa_2 += 4;
-ENDYUV2RGBLINE(8, 1)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-ENDYUV2RGBFUNC()
-
-#if HAVE_BIGENDIAN
-YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
-#else
-YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
-#endif
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 0);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 0);
-
-    LOADCHROMA(2);
-    PUTRGBA(dst_1, py_1, pa_1, 2, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 2, 0);
-
-    LOADCHROMA(3);
-    PUTRGBA(dst_2, py_2, pa_2, 3, 0);
-    PUTRGBA(dst_1, py_1, pa_1, 3, 0);
-    pa_1 += 8;
-    pa_2 += 8;
-ENDYUV2RGBLINE(8, 0)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 0);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 0);
-    pa_1 += 4;
-    pa_2 += 4;
-ENDYUV2RGBLINE(8, 1)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTRGB24(dst_1, py_1, 0);
-    PUTRGB24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB24(dst_2, py_2, 1);
-    PUTRGB24(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTRGB24(dst_1, py_1, 2);
-    PUTRGB24(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTRGB24(dst_2, py_2, 3);
-    PUTRGB24(dst_1, py_1, 3);
-ENDYUV2RGBLINE(24, 0)
-    LOADCHROMA(0);
-    PUTRGB24(dst_1, py_1, 0);
-    PUTRGB24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB24(dst_2, py_2, 1);
-    PUTRGB24(dst_1, py_1, 1);
-ENDYUV2RGBLINE(24, 1)
-    LOADCHROMA(0);
-    PUTRGB24(dst_1, py_1, 0);
-    PUTRGB24(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-// only trivial mods from yuv2rgb_c_24_rgb
-YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTBGR24(dst_1, py_1, 0);
-    PUTBGR24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR24(dst_2, py_2, 1);
-    PUTBGR24(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTBGR24(dst_1, py_1, 2);
-    PUTBGR24(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTBGR24(dst_2, py_2, 3);
-    PUTBGR24(dst_1, py_1, 3);
-ENDYUV2RGBLINE(24, 0)
-    LOADCHROMA(0);
-    PUTBGR24(dst_1, py_1, 0);
-    PUTBGR24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR24(dst_2, py_2, 1);
-    PUTBGR24(dst_1, py_1, 1);
-ENDYUV2RGBLINE(24, 1)
-    LOADCHROMA(0);
-    PUTBGR24(dst_1, py_1, 0);
-    PUTBGR24(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
-    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
-    const uint8_t *e16 = ff_dither_2x2_4[y & 1];
+#define YUV420FUNC(func_name, dst_type, alpha, abase, PUTFUNC, dst_delta) \
+    YUV2RGBFUNC(func_name, dst_type, alpha)                             \
+        LOADCHROMA(0);                                                  \
+        PUTFUNC(dst_1, py_1, pa_1, 0, abase);                           \
+        PUTFUNC(dst_2, py_2, pa_2, 0, abase);                           \
+                                                                        \
+        LOADCHROMA(1);                                                  \
+        PUTFUNC(dst_2, py_2, pa_2, 1, abase);                           \
+        PUTFUNC(dst_1, py_1, pa_1, 1, abase);                           \
+                                                                        \
+        LOADCHROMA(2);                                                  \
+        PUTFUNC(dst_1, py_1, pa_1, 2, abase);                           \
+        PUTFUNC(dst_2, py_2, pa_2, 2, abase);                           \
+                                                                        \
+        LOADCHROMA(3);                                                  \
+        PUTFUNC(dst_2, py_2, pa_2, 3, abase);                           \
+        PUTFUNC(dst_1, py_1, pa_1, 3, abase);                           \
+    ENDYUV2RGBLINE(dst_delta, 0, alpha)                                 \
+        LOADCHROMA(0);                                                  \
+        PUTFUNC(dst_1, py_1, pa_1, 0, abase);                           \
+        PUTFUNC(dst_2, py_2, pa_2, 0, abase);                           \
+                                                                        \
+        LOADCHROMA(1);                                                  \
+        PUTFUNC(dst_2, py_2, pa_2, 1, abase);                           \
+        PUTFUNC(dst_1, py_1, pa_1, 1, abase);                           \
+    ENDYUV2RGBLINE(dst_delta, 1, alpha)                                 \
+        LOADCHROMA(0);                                                  \
+        PUTFUNC(dst_1, py_1, pa_1, 0, abase);                           \
+        PUTFUNC(dst_2, py_2, pa_2, 0, abase);                           \
+    ENDYUV2RGBFUNC()
+
+#define YUV420FUNC_DITHER(func_name, dst_type, LOADDITHER, PUTFUNC, dst_delta) \
+    YUV2RGBFUNC(func_name, dst_type, 0)                                 \
+        LOADDITHER                                                      \
+                                                                        \
+        LOADCHROMA(0);                                                  \
+        PUTFUNC(dst_1, py_1, 0, 0);                                     \
+        PUTFUNC(dst_2, py_2, 0, 0 + 8);                                 \
+                                                                        \
+        LOADCHROMA(1);                                                  \
+        PUTFUNC(dst_2, py_2, 1, 2 + 8);                                 \
+        PUTFUNC(dst_1, py_1, 1, 2);                                     \
+                                                                        \
+        LOADCHROMA(2);                                                  \
+        PUTFUNC(dst_1, py_1, 2, 4);                                     \
+        PUTFUNC(dst_2, py_2, 2, 4 + 8);                                 \
+                                                                        \
+        LOADCHROMA(3);                                                  \
+        PUTFUNC(dst_2, py_2, 3, 6 + 8);                                 \
+        PUTFUNC(dst_1, py_1, 3, 6);                                     \
+    ENDYUV2RGBLINE(dst_delta, 0, 0)                                     \
+        LOADDITHER                                                      \
+                                                                        \
+        LOADCHROMA(0);                                                  \
+        PUTFUNC(dst_1, py_1, 0, 0);                                     \
+        PUTFUNC(dst_2, py_2, 0, 0 + 8);                                 \
+                                                                        \
+        LOADCHROMA(1);                                                  \
+        PUTFUNC(dst_2, py_2, 1, 2 + 8);                                 \
+        PUTFUNC(dst_1, py_1, 1, 2);                                     \
+    ENDYUV2RGBLINE(dst_delta, 1, 0)                                     \
+        LOADDITHER                                                      \
+                                                                        \
+        LOADCHROMA(0);                                                  \
+        PUTFUNC(dst_1, py_1, 0, 0);                                     \
+        PUTFUNC(dst_2, py_2, 0, 0 + 8);                                 \
+    ENDYUV2RGBFUNC()
+
+#define LOADDITHER16                                    \
+    const uint8_t *d16 = ff_dither_2x2_8[y & 1];        \
+    const uint8_t *e16 = ff_dither_2x2_4[y & 1];        \
     const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
 
 #define PUTRGB16(dst, src, i, o)                    \
@@ -413,45 +258,9 @@ YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
     dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
                      g[Y + e16[1 + o]] +            \
                      b[Y + f16[1 + o]];
-    LOADCHROMA(0);
-    PUTRGB16(dst_1, py_1, 0, 0);
-    PUTRGB16(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB16(dst_2, py_2, 1, 2 + 8);
-    PUTRGB16(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB16(dst_1, py_1, 2, 4);
-    PUTRGB16(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB16(dst_2, py_2, 3, 6 + 8);
-    PUTRGB16(dst_1, py_1, 3, 6);
-ENDYUV2RGBLINE(8, 0)
-    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
-    const uint8_t *e16 = ff_dither_2x2_4[y & 1];
-    const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
-
-    LOADCHROMA(0);
-    PUTRGB16(dst_1, py_1, 0, 0);
-    PUTRGB16(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB16(dst_2, py_2, 1, 2 + 8);
-    PUTRGB16(dst_1, py_1, 1, 2);
-ENDYUV2RGBLINE(8, 1)
-    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
-    const uint8_t *e16 = ff_dither_2x2_4[y & 1];
-    const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
 
-    LOADCHROMA(0);
-    PUTRGB16(dst_1, py_1, 0, 0);
-    PUTRGB16(dst_2, py_2, 0, 0 + 8);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
-    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+#define LOADDITHER15                                    \
+    const uint8_t *d16 = ff_dither_2x2_8[y & 1];        \
     const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
 
 #define PUTRGB15(dst, src, i, o)                    \
@@ -463,43 +272,8 @@ YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
     dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
                      g[Y + d16[0 + o]] +            \
                      b[Y + e16[1 + o]];
-    LOADCHROMA(0);
-    PUTRGB15(dst_1, py_1, 0, 0);
-    PUTRGB15(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB15(dst_2, py_2, 1, 2 + 8);
-    PUTRGB15(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB15(dst_1, py_1, 2, 4);
-    PUTRGB15(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB15(dst_2, py_2, 3, 6 + 8);
-    PUTRGB15(dst_1, py_1, 3, 6);
-ENDYUV2RGBLINE(8, 0)
-    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
-    const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
-
-    LOADCHROMA(0);
-    PUTRGB15(dst_1, py_1, 0, 0);
-    PUTRGB15(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB15(dst_2, py_2, 1, 2 + 8);
-    PUTRGB15(dst_1, py_1, 1, 2);
-ENDYUV2RGBLINE(8, 1)
-    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
-    const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
-
-    LOADCHROMA(0);
-    PUTRGB15(dst_1, py_1, 0, 0);
-    PUTRGB15(dst_2, py_2, 0, 0 + 8);
-ENDYUV2RGBFUNC()
 
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
+#define LOADDITHER12                                    \
     const uint8_t *d16 = ff_dither_4x4_16[y & 3];
 
 #define PUTRGB12(dst, src, i, o)                    \
@@ -512,42 +286,8 @@ YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
                      g[Y + d16[1 + o]] +            \
                      b[Y + d16[1 + o]];
 
-    LOADCHROMA(0);
-    PUTRGB12(dst_1, py_1, 0, 0);
-    PUTRGB12(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB12(dst_2, py_2, 1, 2 + 8);
-    PUTRGB12(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB12(dst_1, py_1, 2, 4);
-    PUTRGB12(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB12(dst_2, py_2, 3, 6 + 8);
-    PUTRGB12(dst_1, py_1, 3, 6);
-ENDYUV2RGBLINE(8, 0)
-    const uint8_t *d16 = ff_dither_4x4_16[y & 3];
-
-    LOADCHROMA(0);
-    PUTRGB12(dst_1, py_1, 0, 0);
-    PUTRGB12(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB12(dst_2, py_2, 1, 2 + 8);
-    PUTRGB12(dst_1, py_1, 1, 2);
-ENDYUV2RGBLINE(8, 1)
-    const uint8_t *d16 = ff_dither_4x4_16[y & 3];
-
-    LOADCHROMA(0);
-    PUTRGB12(dst_1, py_1, 0, 0);
-    PUTRGB12(dst_2, py_2, 0, 0 + 8);
-ENDYUV2RGBFUNC()
-
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
-    const uint8_t *d32 = ff_dither_8x8_32[yd & 7];
+#define LOADDITHER8                                     \
+    const uint8_t *d32 = ff_dither_8x8_32[yd & 7];      \
     const uint8_t *d64 = ff_dither_8x8_73[yd & 7];
 
 #define PUTRGB8(dst, src, i, o)                     \
@@ -560,46 +300,9 @@ YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
                      g[Y + d32[1 + o]] +            \
                      b[Y + d64[1 + o]];
 
-    LOADCHROMA(0);
-    PUTRGB8(dst_1, py_1, 0, 0);
-    PUTRGB8(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB8(dst_2, py_2, 1, 2 + 8);
-    PUTRGB8(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB8(dst_1, py_1, 2, 4);
-    PUTRGB8(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB8(dst_2, py_2, 3, 6 + 8);
-    PUTRGB8(dst_1, py_1, 3, 6);
-
-ENDYUV2RGBLINE(8, 0)
-    const uint8_t *d32 = ff_dither_8x8_32[yd & 7];
-    const uint8_t *d64 = ff_dither_8x8_73[yd & 7];
-    LOADCHROMA(0);
-    PUTRGB8(dst_1, py_1, 0, 0);
-    PUTRGB8(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB8(dst_2, py_2, 1, 2 + 8);
-    PUTRGB8(dst_1, py_1, 1, 2);
-
-ENDYUV2RGBLINE(8, 1)
-    const uint8_t *d32 = ff_dither_8x8_32[yd & 7];
-    const uint8_t *d64 = ff_dither_8x8_73[yd & 7];
-    LOADCHROMA(0);
-    PUTRGB8(dst_1, py_1, 0, 0);
-    PUTRGB8(dst_2, py_2, 0, 0 + 8);
-
-ENDYUV2RGBFUNC()
-
-
-YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
-    const uint8_t * d64 = ff_dither_8x8_73[yd & 7];
-    const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
+#define LOADDITHER4D                                    \
+    const uint8_t * d64 = ff_dither_8x8_73[yd & 7];     \
+    const uint8_t *d128 = ff_dither_8x8_220[yd & 7];    \
     int acc;
 
 #define PUTRGB4D(dst, src, i, o)                    \
@@ -613,45 +316,8 @@ YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
               b[Y + d128[1 + o]]) << 4;             \
     dst[i] = acc;
 
-    LOADCHROMA(0);
-    PUTRGB4D(dst_1, py_1, 0, 0);
-    PUTRGB4D(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB4D(dst_2, py_2, 1, 2 + 8);
-    PUTRGB4D(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB4D(dst_1, py_1, 2, 4);
-    PUTRGB4D(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB4D(dst_2, py_2, 3, 6 + 8);
-    PUTRGB4D(dst_1, py_1, 3, 6);
-
-ENDYUV2RGBLINE(4, 0)
-    const uint8_t * d64 = ff_dither_8x8_73[yd & 7];
-    const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
-    int acc;
-    LOADCHROMA(0);
-    PUTRGB4D(dst_1, py_1, 0, 0);
-    PUTRGB4D(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB4D(dst_2, py_2, 1, 2 + 8);
-    PUTRGB4D(dst_1, py_1, 1, 2);
-
-ENDYUV2RGBLINE(4, 1)
-    const uint8_t * d64 = ff_dither_8x8_73[yd & 7];
-    const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
-    int acc;
-    LOADCHROMA(0);
-    PUTRGB4D(dst_1, py_1, 0, 0);
-    PUTRGB4D(dst_2, py_2, 0, 0 + 8);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
-    const uint8_t *d64  = ff_dither_8x8_73[yd & 7];
+#define LOADDITHER4DB                                   \
+    const uint8_t *d64  = ff_dither_8x8_73[yd & 7];     \
     const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
 
 #define PUTRGB4DB(dst, src, i, o)                   \
@@ -664,39 +330,6 @@ YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
                      g[Y +  d64[1 + o]] +           \
                      b[Y + d128[1 + o]];
 
-    LOADCHROMA(0);
-    PUTRGB4DB(dst_1, py_1, 0, 0);
-    PUTRGB4DB(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB4DB(dst_2, py_2, 1, 2 + 8);
-    PUTRGB4DB(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB4DB(dst_1, py_1, 2, 4);
-    PUTRGB4DB(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB4DB(dst_2, py_2, 3, 6 + 8);
-    PUTRGB4DB(dst_1, py_1, 3, 6);
-ENDYUV2RGBLINE(8, 0)
-    const uint8_t *d64  = ff_dither_8x8_73[yd & 7];
-    const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
-    LOADCHROMA(0);
-    PUTRGB4DB(dst_1, py_1, 0, 0);
-    PUTRGB4DB(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB4DB(dst_2, py_2, 1, 2 + 8);
-    PUTRGB4DB(dst_1, py_1, 1, 2);
-ENDYUV2RGBLINE(8, 1)
-    const uint8_t *d64  = ff_dither_8x8_73[yd & 7];
-    const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
-    LOADCHROMA(0);
-    PUTRGB4DB(dst_1, py_1, 0, 0);
-    PUTRGB4DB(dst_2, py_2, 0, 0 + 8);
-ENDYUV2RGBFUNC()
-
 YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
     const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
     char out_1 = 0, out_2 = 0;
@@ -759,6 +392,25 @@ YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
     dst_2[0] = out_2;
 ENDYUV2RGBFUNC()
 
+YUV420FUNC(yuv2rgb_c_48,     uint8_t,  0,  0, PUTRGB48, 48)
+YUV420FUNC(yuv2rgb_c_bgr48,  uint8_t,  0,  0, PUTBGR48, 48)
+YUV420FUNC(yuv2rgb_c_32,     uint32_t, 0,  0, PUTRGB,    8)
+#if HAVE_BIGENDIAN
+YUV420FUNC(yuva2argb_c,      uint32_t, 1, 24, PUTRGBA,   8)
+YUV420FUNC(yuva2rgba_c,      uint32_t, 1,  0, PUTRGBA,   8)
+#else
+YUV420FUNC(yuva2rgba_c,      uint32_t, 1, 24, PUTRGBA,   8)
+YUV420FUNC(yuva2argb_c,      uint32_t, 1,  0, PUTRGBA,   8)
+#endif
+YUV420FUNC(yuv2rgb_c_24_rgb, uint8_t,  0,  0, PUTRGB24, 24)
+YUV420FUNC(yuv2rgb_c_24_bgr, uint8_t,  0,  0, PUTBGR24, 24)
+YUV420FUNC_DITHER(yuv2rgb_c_16_ordered_dither, uint16_t, LOADDITHER16,  PUTRGB16,  8)
+YUV420FUNC_DITHER(yuv2rgb_c_15_ordered_dither, uint16_t, LOADDITHER15,  PUTRGB15,  8)
+YUV420FUNC_DITHER(yuv2rgb_c_12_ordered_dither, uint16_t, LOADDITHER12,  PUTRGB12,  8)
+YUV420FUNC_DITHER(yuv2rgb_c_8_ordered_dither,  uint8_t,  LOADDITHER8,   PUTRGB8,   8)
+YUV420FUNC_DITHER(yuv2rgb_c_4_ordered_dither,  uint8_t,  LOADDITHER4D,  PUTRGB4D,  4)
+YUV420FUNC_DITHER(yuv2rgb_c_4b_ordered_dither, uint8_t,  LOADDITHER4DB, PUTRGB4DB, 8)
+
 SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
 {
     SwsFunc t = NULL;
-- 
2.30.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code
  2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 2/6] swscale/yuv2rgb: add macros to simplify code generation Ramiro Polla
@ 2024-06-16 22:28 ` Ramiro Polla
  2024-06-19 10:13   ` Michael Niedermayer
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 4/6] swscale/yuv2rgb: reindent after previous commit Ramiro Polla
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
  To: ffmpeg-devel

The C code was silently ignoring the second chroma line on yuv422p
input.
---
 libswscale/yuv2rgb.c | 228 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 196 insertions(+), 32 deletions(-)

diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 7386d3a2a2..1ea87ac17a 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -65,7 +65,7 @@ const int *sws_getCoefficients(int colorspace)
     return ff_yuv2rgb_coeffs[colorspace];
 }
 
-#define LOADCHROMA(i)                               \
+#define LOADCHROMA(pu, pv, i)                       \
     U = pu[i];                                      \
     V = pv[i];                                      \
     r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM];                     \
@@ -124,17 +124,13 @@ const int *sws_getCoefficients(int colorspace)
     dst[12 * i +  8] = dst[12 * i +  9] = g[Y];     \
     dst[12 * i + 10] = dst[12 * i + 11] = r[Y];
 
-#define YUV2RGBFUNC(func_name, dst_type, alpha)                             \
+#define YUV2RGBFUNC(func_name, dst_type, alpha, yuv422)                     \
     static int func_name(SwsContext *c, const uint8_t *src[],               \
                          int srcStride[], int srcSliceY, int srcSliceH,     \
                          uint8_t *dst[], int dstStride[])                   \
     {                                                                       \
         int y;                                                              \
                                                                             \
-        if (!alpha && c->srcFormat == AV_PIX_FMT_YUV422P) {                    \
-            srcStride[1] *= 2;                                              \
-            srcStride[2] *= 2;                                              \
-        }                                                                   \
         for (y = 0; y < srcSliceH; y += 2) {                                \
             int yd = y + srcSliceY;                                         \
             dst_type *dst_1 =                                               \
@@ -144,10 +140,15 @@ const int *sws_getCoefficients(int colorspace)
             dst_type av_unused *r, *g, *b;                                  \
             const uint8_t *py_1 = src[0] +  y       * srcStride[0];         \
             const uint8_t *py_2 = py_1   +            srcStride[0];         \
-            const uint8_t av_unused *pu = src[1] + (y >> 1) * srcStride[1]; \
-            const uint8_t av_unused *pv = src[2] + (y >> 1) * srcStride[2]; \
+            const uint8_t av_unused *pu_1 = src[1] + (y >> !yuv422) * srcStride[1]; \
+            const uint8_t av_unused *pv_1 = src[2] + (y >> !yuv422) * srcStride[2]; \
+            const uint8_t av_unused *pu_2, *pv_2;                           \
             const uint8_t av_unused *pa_1, *pa_2;                           \
             unsigned int h_size = c->dstW >> 3;                             \
+            if (yuv422) {                                                   \
+                pu_2 = pu_1 + srcStride[1];                                 \
+                pv_2 = pv_1 + srcStride[2];                                 \
+            }                                                               \
             if (alpha) {                                                    \
                 pa_1 = src[3] + y * srcStride[3];                           \
                 pa_2 = pa_1   +     srcStride[3];                           \
@@ -155,9 +156,13 @@ const int *sws_getCoefficients(int colorspace)
             while (h_size--) {                                              \
                 int av_unused U, V, Y;                                      \
 
-#define ENDYUV2RGBLINE(dst_delta, ss, alpha)        \
-    pu    += 4 >> ss;                               \
-    pv    += 4 >> ss;                               \
+#define ENDYUV2RGBLINE(dst_delta, ss, alpha, yuv422) \
+    pu_1  += 4 >> ss;                               \
+    pv_1  += 4 >> ss;                               \
+    if (yuv422) {                                   \
+        pu_2 += 4 >> ss;                            \
+        pv_2 += 4 >> ss;                            \
+    }                                               \
     py_1  += 8 >> ss;                               \
     py_2  += 8 >> ss;                               \
     if (alpha) {                                    \
@@ -177,73 +182,169 @@ const int *sws_getCoefficients(int colorspace)
     }
 
 #define YUV420FUNC(func_name, dst_type, alpha, abase, PUTFUNC, dst_delta) \
-    YUV2RGBFUNC(func_name, dst_type, alpha)                             \
-        LOADCHROMA(0);                                                  \
+    YUV2RGBFUNC(func_name, dst_type, alpha, 0)                          \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
         PUTFUNC(dst_1, py_1, pa_1, 0, abase);                           \
         PUTFUNC(dst_2, py_2, pa_2, 0, abase);                           \
                                                                         \
-        LOADCHROMA(1);                                                  \
+        LOADCHROMA(pu_1, pv_1, 1);                                      \
         PUTFUNC(dst_2, py_2, pa_2, 1, abase);                           \
         PUTFUNC(dst_1, py_1, pa_1, 1, abase);                           \
                                                                         \
-        LOADCHROMA(2);                                                  \
+        LOADCHROMA(pu_1, pv_1, 2);                                      \
         PUTFUNC(dst_1, py_1, pa_1, 2, abase);                           \
         PUTFUNC(dst_2, py_2, pa_2, 2, abase);                           \
                                                                         \
-        LOADCHROMA(3);                                                  \
+        LOADCHROMA(pu_1, pv_1, 3);                                      \
         PUTFUNC(dst_2, py_2, pa_2, 3, abase);                           \
         PUTFUNC(dst_1, py_1, pa_1, 3, abase);                           \
-    ENDYUV2RGBLINE(dst_delta, 0, alpha)                                 \
-        LOADCHROMA(0);                                                  \
+    ENDYUV2RGBLINE(dst_delta, 0, alpha, 0)                              \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
         PUTFUNC(dst_1, py_1, pa_1, 0, abase);                           \
         PUTFUNC(dst_2, py_2, pa_2, 0, abase);                           \
                                                                         \
-        LOADCHROMA(1);                                                  \
+        LOADCHROMA(pu_1, pv_1, 1);                                      \
         PUTFUNC(dst_2, py_2, pa_2, 1, abase);                           \
         PUTFUNC(dst_1, py_1, pa_1, 1, abase);                           \
-    ENDYUV2RGBLINE(dst_delta, 1, alpha)                                 \
-        LOADCHROMA(0);                                                  \
+    ENDYUV2RGBLINE(dst_delta, 1, alpha, 0)                              \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
         PUTFUNC(dst_1, py_1, pa_1, 0, abase);                           \
         PUTFUNC(dst_2, py_2, pa_2, 0, abase);                           \
     ENDYUV2RGBFUNC()
 
+#define YUV422FUNC(func_name, dst_type, alpha, abase, PUTFUNC, dst_delta) \
+    YUV2RGBFUNC(func_name, dst_type, alpha, 1)                          \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
+        PUTFUNC(dst_1, py_1, pa_1, 0, abase);                           \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 0);                                      \
+        PUTFUNC(dst_2, py_2, pa_2, 0, abase);                           \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 1);                                      \
+        PUTFUNC(dst_2, py_2, pa_2, 1, abase);                           \
+                                                                        \
+        LOADCHROMA(pu_1, pv_1, 1);                                      \
+        PUTFUNC(dst_1, py_1, pa_1, 1, abase);                           \
+                                                                        \
+        LOADCHROMA(pu_1, pv_1, 2);                                      \
+        PUTFUNC(dst_1, py_1, pa_1, 2, abase);                           \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 2);                                      \
+        PUTFUNC(dst_2, py_2, pa_2, 2, abase);                           \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 3);                                      \
+        PUTFUNC(dst_2, py_2, pa_2, 3, abase);                           \
+                                                                        \
+        LOADCHROMA(pu_1, pv_1, 3);                                      \
+        PUTFUNC(dst_1, py_1, pa_1, 3, abase);                           \
+    ENDYUV2RGBLINE(dst_delta, 0, alpha, 1)                              \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
+        PUTFUNC(dst_1, py_1, pa_1, 0, abase);                           \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 0);                                      \
+        PUTFUNC(dst_2, py_2, pa_2, 0, abase);                           \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 1);                                      \
+        PUTFUNC(dst_2, py_2, pa_2, 1, abase);                           \
+                                                                        \
+        LOADCHROMA(pu_1, pv_1, 1);                                      \
+        PUTFUNC(dst_1, py_1, pa_1, 1, abase);                           \
+    ENDYUV2RGBLINE(dst_delta, 1, alpha, 1)                              \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
+        PUTFUNC(dst_1, py_1, pa_1, 0, abase);                           \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 0);                                      \
+        PUTFUNC(dst_2, py_2, pa_2, 0, abase);                           \
+    ENDYUV2RGBFUNC()
+
 #define YUV420FUNC_DITHER(func_name, dst_type, LOADDITHER, PUTFUNC, dst_delta) \
-    YUV2RGBFUNC(func_name, dst_type, 0)                                 \
+    YUV2RGBFUNC(func_name, dst_type, 0, 0)                              \
         LOADDITHER                                                      \
                                                                         \
-        LOADCHROMA(0);                                                  \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
         PUTFUNC(dst_1, py_1, 0, 0);                                     \
         PUTFUNC(dst_2, py_2, 0, 0 + 8);                                 \
                                                                         \
-        LOADCHROMA(1);                                                  \
+        LOADCHROMA(pu_1, pv_1, 1);                                      \
         PUTFUNC(dst_2, py_2, 1, 2 + 8);                                 \
         PUTFUNC(dst_1, py_1, 1, 2);                                     \
                                                                         \
-        LOADCHROMA(2);                                                  \
+        LOADCHROMA(pu_1, pv_1, 2);                                      \
         PUTFUNC(dst_1, py_1, 2, 4);                                     \
         PUTFUNC(dst_2, py_2, 2, 4 + 8);                                 \
                                                                         \
-        LOADCHROMA(3);                                                  \
+        LOADCHROMA(pu_1, pv_1, 3);                                      \
         PUTFUNC(dst_2, py_2, 3, 6 + 8);                                 \
         PUTFUNC(dst_1, py_1, 3, 6);                                     \
-    ENDYUV2RGBLINE(dst_delta, 0, 0)                                     \
+    ENDYUV2RGBLINE(dst_delta, 0, 0, 0)                                  \
         LOADDITHER                                                      \
                                                                         \
-        LOADCHROMA(0);                                                  \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
         PUTFUNC(dst_1, py_1, 0, 0);                                     \
         PUTFUNC(dst_2, py_2, 0, 0 + 8);                                 \
                                                                         \
-        LOADCHROMA(1);                                                  \
+        LOADCHROMA(pu_1, pv_1, 1);                                      \
         PUTFUNC(dst_2, py_2, 1, 2 + 8);                                 \
         PUTFUNC(dst_1, py_1, 1, 2);                                     \
-    ENDYUV2RGBLINE(dst_delta, 1, 0)                                     \
+    ENDYUV2RGBLINE(dst_delta, 1, 0, 0)                                  \
         LOADDITHER                                                      \
                                                                         \
-        LOADCHROMA(0);                                                  \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
         PUTFUNC(dst_1, py_1, 0, 0);                                     \
         PUTFUNC(dst_2, py_2, 0, 0 + 8);                                 \
     ENDYUV2RGBFUNC()
 
+#define YUV422FUNC_DITHER(func_name, dst_type, LOADDITHER, PUTFUNC, dst_delta) \
+    YUV2RGBFUNC(func_name, dst_type, 0, 1)                              \
+        LOADDITHER                                                      \
+                                                                        \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
+        PUTFUNC(dst_1, py_1, 0, 0);                                     \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 0);                                      \
+        PUTFUNC(dst_2, py_2, 0, 0 + 8);                                 \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 1);                                      \
+        PUTFUNC(dst_2, py_2, 1, 2 + 8);                                 \
+                                                                        \
+        LOADCHROMA(pu_1, pv_1, 1);                                      \
+        PUTFUNC(dst_1, py_1, 1, 2);                                     \
+                                                                        \
+        LOADCHROMA(pu_1, pv_1, 2);                                      \
+        PUTFUNC(dst_1, py_1, 2, 4);                                     \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 2);                                      \
+        PUTFUNC(dst_2, py_2, 2, 4 + 8);                                 \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 3);                                      \
+        PUTFUNC(dst_2, py_2, 3, 6 + 8);                                 \
+                                                                        \
+        LOADCHROMA(pu_1, pv_1, 3);                                      \
+        PUTFUNC(dst_1, py_1, 3, 6);                                     \
+    ENDYUV2RGBLINE(dst_delta, 0, 0, 1)                                  \
+        LOADDITHER                                                      \
+                                                                        \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
+        PUTFUNC(dst_1, py_1, 0, 0);                                     \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 0);                                      \
+        PUTFUNC(dst_2, py_2, 0, 0 + 8);                                 \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 1);                                      \
+        PUTFUNC(dst_2, py_2, 1, 2 + 8);                                 \
+                                                                        \
+        LOADCHROMA(pu_1, pv_1, 1);                                      \
+        PUTFUNC(dst_1, py_1, 1, 2);                                     \
+    ENDYUV2RGBLINE(dst_delta, 1, 0, 1)                                  \
+        LOADDITHER                                                      \
+                                                                        \
+        LOADCHROMA(pu_1, pv_1, 0);                                      \
+        PUTFUNC(dst_1, py_1, 0, 0);                                     \
+                                                                        \
+        LOADCHROMA(pu_2, pv_2, 0);                                      \
+        PUTFUNC(dst_2, py_2, 0, 0 + 8);                                 \
+    ENDYUV2RGBFUNC()
+
 #define LOADDITHER16                                    \
     const uint8_t *d16 = ff_dither_2x2_8[y & 1];        \
     const uint8_t *e16 = ff_dither_2x2_4[y & 1];        \
@@ -330,7 +431,7 @@ const int *sws_getCoefficients(int colorspace)
                      g[Y +  d64[1 + o]] +           \
                      b[Y + d128[1 + o]];
 
-YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
+YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0, 0)
     const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
     char out_1 = 0, out_2 = 0;
     g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
@@ -392,6 +493,7 @@ YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
     dst_2[0] = out_2;
 ENDYUV2RGBFUNC()
 
+// YUV420
 YUV420FUNC(yuv2rgb_c_48,     uint8_t,  0,  0, PUTRGB48, 48)
 YUV420FUNC(yuv2rgb_c_bgr48,  uint8_t,  0,  0, PUTBGR48, 48)
 YUV420FUNC(yuv2rgb_c_32,     uint32_t, 0,  0, PUTRGB,    8)
@@ -411,6 +513,26 @@ YUV420FUNC_DITHER(yuv2rgb_c_8_ordered_dither,  uint8_t,  LOADDITHER8,   PUTRGB8,
 YUV420FUNC_DITHER(yuv2rgb_c_4_ordered_dither,  uint8_t,  LOADDITHER4D,  PUTRGB4D,  4)
 YUV420FUNC_DITHER(yuv2rgb_c_4b_ordered_dither, uint8_t,  LOADDITHER4DB, PUTRGB4DB, 8)
 
+// YUV422
+YUV422FUNC(yuv422p_rgb48_c,  uint8_t,  0,  0, PUTRGB48, 48)
+YUV422FUNC(yuv422p_bgr48_c,  uint8_t,  0,  0, PUTBGR48, 48)
+YUV422FUNC(yuv422p_rgb32_c,  uint32_t, 0,  0, PUTRGB,    8)
+#if HAVE_BIGENDIAN
+YUV422FUNC(yuva422p_argb_c,  uint32_t, 1, 24, PUTRGBA,   8)
+YUV422FUNC(yuva422p_rgba_c,  uint32_t, 1,  0, PUTRGBA,   8)
+#else
+YUV422FUNC(yuva422p_rgba_c,  uint32_t, 1, 24, PUTRGBA,   8)
+YUV422FUNC(yuva422p_argb_c,  uint32_t, 1,  0, PUTRGBA,   8)
+#endif
+YUV422FUNC(yuv422p_rgb24_c,  uint8_t,  0,  0, PUTRGB24, 24)
+YUV422FUNC(yuv422p_bgr24_c,  uint8_t,  0,  0, PUTBGR24, 24)
+YUV422FUNC_DITHER(yuv422p_bgr16,     uint16_t, LOADDITHER16,  PUTRGB16,  8)
+YUV422FUNC_DITHER(yuv422p_bgr15,     uint16_t, LOADDITHER15,  PUTRGB15,  8)
+YUV422FUNC_DITHER(yuv422p_bgr12,     uint16_t, LOADDITHER12,  PUTRGB12,  8)
+YUV422FUNC_DITHER(yuv422p_bgr8,      uint8_t,  LOADDITHER8,   PUTRGB8,   8)
+YUV422FUNC_DITHER(yuv422p_bgr4,      uint8_t,  LOADDITHER4D,  PUTRGB4D,  4)
+YUV422FUNC_DITHER(yuv422p_bgr4_byte, uint8_t,  LOADDITHER4DB, PUTRGB4DB, 8)
+
 SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
 {
     SwsFunc t = NULL;
@@ -430,6 +552,47 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
            "No accelerated colorspace conversion found from %s to %s.\n",
            av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat));
 
+    if (c->srcFormat == AV_PIX_FMT_YUV422P) {
+        switch (c->dstFormat) {
+        case AV_PIX_FMT_BGR48BE:
+        case AV_PIX_FMT_BGR48LE:
+            return yuv422p_bgr48_c;
+        case AV_PIX_FMT_RGB48BE:
+        case AV_PIX_FMT_RGB48LE:
+            return yuv422p_rgb48_c;
+        case AV_PIX_FMT_ARGB:
+        case AV_PIX_FMT_ABGR:
+            if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat))
+                return yuva422p_argb_c;
+        case AV_PIX_FMT_RGBA:
+        case AV_PIX_FMT_BGRA:
+            return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva422p_rgba_c : yuv422p_rgb32_c;
+        case AV_PIX_FMT_RGB24:
+            return yuv422p_rgb24_c;
+        case AV_PIX_FMT_BGR24:
+            return yuv422p_bgr24_c;
+        case AV_PIX_FMT_RGB565:
+        case AV_PIX_FMT_BGR565:
+            return yuv422p_bgr16;
+        case AV_PIX_FMT_RGB555:
+        case AV_PIX_FMT_BGR555:
+            return yuv422p_bgr15;
+        case AV_PIX_FMT_RGB444:
+        case AV_PIX_FMT_BGR444:
+            return yuv422p_bgr12;
+        case AV_PIX_FMT_RGB8:
+        case AV_PIX_FMT_BGR8:
+            return yuv422p_bgr8;
+        case AV_PIX_FMT_RGB4:
+        case AV_PIX_FMT_BGR4:
+            return yuv422p_bgr4;
+        case AV_PIX_FMT_RGB4_BYTE:
+        case AV_PIX_FMT_BGR4_BYTE:
+            return yuv422p_bgr4_byte;
+        case AV_PIX_FMT_MONOBLACK:
+            return yuv2rgb_c_1_ordered_dither;
+        }
+    } else {
     switch (c->dstFormat) {
     case AV_PIX_FMT_BGR48BE:
     case AV_PIX_FMT_BGR48LE:
@@ -469,6 +632,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
     case AV_PIX_FMT_MONOBLACK:
         return yuv2rgb_c_1_ordered_dither;
     }
+    }
     return NULL;
 }
 
-- 
2.30.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [FFmpeg-devel] [PATCH 4/6] swscale/yuv2rgb: reindent after previous commit
  2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 2/6] swscale/yuv2rgb: add macros to simplify code generation Ramiro Polla
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code Ramiro Polla
@ 2024-06-16 22:28 ` Ramiro Polla
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb Ramiro Polla
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions Ramiro Polla
  4 siblings, 0 replies; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libswscale/yuv2rgb.c | 78 ++++++++++++++++++++++----------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 1ea87ac17a..977eb3a7dd 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -593,45 +593,45 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
             return yuv2rgb_c_1_ordered_dither;
         }
     } else {
-    switch (c->dstFormat) {
-    case AV_PIX_FMT_BGR48BE:
-    case AV_PIX_FMT_BGR48LE:
-        return yuv2rgb_c_bgr48;
-    case AV_PIX_FMT_RGB48BE:
-    case AV_PIX_FMT_RGB48LE:
-        return yuv2rgb_c_48;
-    case AV_PIX_FMT_ARGB:
-    case AV_PIX_FMT_ABGR:
-        if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat))
-            return yuva2argb_c;
-    case AV_PIX_FMT_RGBA:
-    case AV_PIX_FMT_BGRA:
-        return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32;
-    case AV_PIX_FMT_RGB24:
-        return yuv2rgb_c_24_rgb;
-    case AV_PIX_FMT_BGR24:
-        return yuv2rgb_c_24_bgr;
-    case AV_PIX_FMT_RGB565:
-    case AV_PIX_FMT_BGR565:
-        return yuv2rgb_c_16_ordered_dither;
-    case AV_PIX_FMT_RGB555:
-    case AV_PIX_FMT_BGR555:
-        return yuv2rgb_c_15_ordered_dither;
-    case AV_PIX_FMT_RGB444:
-    case AV_PIX_FMT_BGR444:
-        return yuv2rgb_c_12_ordered_dither;
-    case AV_PIX_FMT_RGB8:
-    case AV_PIX_FMT_BGR8:
-        return yuv2rgb_c_8_ordered_dither;
-    case AV_PIX_FMT_RGB4:
-    case AV_PIX_FMT_BGR4:
-        return yuv2rgb_c_4_ordered_dither;
-    case AV_PIX_FMT_RGB4_BYTE:
-    case AV_PIX_FMT_BGR4_BYTE:
-        return yuv2rgb_c_4b_ordered_dither;
-    case AV_PIX_FMT_MONOBLACK:
-        return yuv2rgb_c_1_ordered_dither;
-    }
+        switch (c->dstFormat) {
+        case AV_PIX_FMT_BGR48BE:
+        case AV_PIX_FMT_BGR48LE:
+            return yuv2rgb_c_bgr48;
+        case AV_PIX_FMT_RGB48BE:
+        case AV_PIX_FMT_RGB48LE:
+            return yuv2rgb_c_48;
+        case AV_PIX_FMT_ARGB:
+        case AV_PIX_FMT_ABGR:
+            if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat))
+                return yuva2argb_c;
+        case AV_PIX_FMT_RGBA:
+        case AV_PIX_FMT_BGRA:
+            return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32;
+        case AV_PIX_FMT_RGB24:
+            return yuv2rgb_c_24_rgb;
+        case AV_PIX_FMT_BGR24:
+            return yuv2rgb_c_24_bgr;
+        case AV_PIX_FMT_RGB565:
+        case AV_PIX_FMT_BGR565:
+            return yuv2rgb_c_16_ordered_dither;
+        case AV_PIX_FMT_RGB555:
+        case AV_PIX_FMT_BGR555:
+            return yuv2rgb_c_15_ordered_dither;
+        case AV_PIX_FMT_RGB444:
+        case AV_PIX_FMT_BGR444:
+            return yuv2rgb_c_12_ordered_dither;
+        case AV_PIX_FMT_RGB8:
+        case AV_PIX_FMT_BGR8:
+            return yuv2rgb_c_8_ordered_dither;
+        case AV_PIX_FMT_RGB4:
+        case AV_PIX_FMT_BGR4:
+            return yuv2rgb_c_4_ordered_dither;
+        case AV_PIX_FMT_RGB4_BYTE:
+        case AV_PIX_FMT_BGR4_BYTE:
+            return yuv2rgb_c_4b_ordered_dither;
+        case AV_PIX_FMT_MONOBLACK:
+            return yuv2rgb_c_1_ordered_dither;
+        }
     }
     return NULL;
 }
-- 
2.30.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb
  2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
                   ` (2 preceding siblings ...)
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 4/6] swscale/yuv2rgb: reindent after previous commit Ramiro Polla
@ 2024-06-16 22:28 ` Ramiro Polla
  2024-06-20 14:59   ` Ramiro Polla
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions Ramiro Polla
  4 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
  To: ffmpeg-devel

---
 tests/checkasm/Makefile     |   2 +-
 tests/checkasm/checkasm.c   |   1 +
 tests/checkasm/checkasm.h   |   1 +
 tests/checkasm/sw_yuv2rgb.c | 205 ++++++++++++++++++++++++++++++++++++
 tests/fate/checkasm.mak     |   1 +
 5 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100644 tests/checkasm/sw_yuv2rgb.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index f20732b37a..3a7670e24b 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -63,7 +63,7 @@ AVFILTEROBJS-$(CONFIG_SOBEL_FILTER)      += vf_convolution.o
 CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
 
 # swscale tests
-SWSCALEOBJS                             += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o
+SWSCALEOBJS                             += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o sw_yuv2rgb.o
 
 CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWSCALEOBJS)
 
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 56232ab1e0..d9ac772a08 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -254,6 +254,7 @@ static const struct {
     { "sw_range_convert", checkasm_check_sw_range_convert },
     { "sw_rgb", checkasm_check_sw_rgb },
     { "sw_scale", checkasm_check_sw_scale },
+    { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb },
 #endif
 #if CONFIG_AVUTIL
         { "fixed_dsp", checkasm_check_fixed_dsp },
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index e544007b67..4d5f3e387e 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -122,6 +122,7 @@ void checkasm_check_sw_gbrp(void);
 void checkasm_check_sw_range_convert(void);
 void checkasm_check_sw_rgb(void);
 void checkasm_check_sw_scale(void);
+void checkasm_check_sw_yuv2rgb(void);
 void checkasm_check_takdsp(void);
 void checkasm_check_utvideodsp(void);
 void checkasm_check_v210dec(void);
diff --git a/tests/checkasm/sw_yuv2rgb.c b/tests/checkasm/sw_yuv2rgb.c
new file mode 100644
index 0000000000..fbe01a7788
--- /dev/null
+++ b/tests/checkasm/sw_yuv2rgb.c
@@ -0,0 +1,205 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/pixdesc.h"
+
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)      \
+    do {                                  \
+        for (int j = 0; j < size; j += 4) \
+            AV_WN32(buf + j, rnd());      \
+    } while (0)
+
+static const int dst_fmts[] = {
+//     AV_PIX_FMT_BGR48BE,
+//     AV_PIX_FMT_BGR48LE,
+//     AV_PIX_FMT_RGB48BE,
+//     AV_PIX_FMT_RGB48LE,
+    AV_PIX_FMT_ARGB,
+    AV_PIX_FMT_ABGR,
+    AV_PIX_FMT_RGBA,
+    AV_PIX_FMT_BGRA,
+    AV_PIX_FMT_RGB24,
+    AV_PIX_FMT_BGR24,
+    AV_PIX_FMT_RGB565,
+    AV_PIX_FMT_BGR565,
+    AV_PIX_FMT_RGB555,
+    AV_PIX_FMT_BGR555,
+//     AV_PIX_FMT_RGB444,
+//     AV_PIX_FMT_BGR444,
+//     AV_PIX_FMT_RGB8,
+//     AV_PIX_FMT_BGR8,
+//     AV_PIX_FMT_RGB4,
+//     AV_PIX_FMT_BGR4,
+//     AV_PIX_FMT_RGB4_BYTE,
+//     AV_PIX_FMT_BGR4_BYTE,
+//     AV_PIX_FMT_MONOBLACK,
+};
+
+static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+    for (size_t i = 0; i < n; i++) {
+        if (abs(ref[i] - test[i]) > accuracy)
+            return 1;
+    }
+    return 0;
+}
+
+static int cmp_555_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+    const uint16_t *ref16  = (const uint16_t *) ref;
+    const uint16_t *test16 = (const uint16_t *) test;
+    for (size_t i = 0; i < n; i++) {
+        if (abs(( ref16[i]        & 0x1f) - ( test16[i]        & 0x1f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >>  5) & 0x1f) - ((test16[i] >>  5) & 0x1f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >> 10) & 0x1f) - ((test16[i] >> 10) & 0x1f)) > accuracy)
+            return 1;
+    }
+    return 0;
+}
+
+static int cmp_565_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+    const uint16_t *ref16  = (const uint16_t *) ref;
+    const uint16_t *test16 = (const uint16_t *) test;
+    for (size_t i = 0; i < n; i++) {
+        if (abs(( ref16[i]        & 0x1f) - ( test16[i]        & 0x1f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >>  5) & 0x3f) - ((test16[i] >>  5) & 0x3f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >> 11) & 0x1f) - ((test16[i] >> 11) & 0x1f)) > accuracy)
+            return 1;
+    }
+    return 0;
+}
+
+static void check_yuv2rgb(int src_pix_fmt)
+{
+    const AVPixFmtDescriptor *src_desc = av_pix_fmt_desc_get(src_pix_fmt);
+#define MAX_LINE_SIZE 1920
+    static const int input_sizes[] = {8, 128, 1080, MAX_LINE_SIZE};
+
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
+                      int, SwsContext *c, const uint8_t *src[],
+                           int srcStride[], int srcSliceY, int srcSliceH,
+                           uint8_t *dst[], int dstStride[]);
+
+    LOCAL_ALIGNED_8(uint8_t, src_y, [MAX_LINE_SIZE * 2]);
+    LOCAL_ALIGNED_8(uint8_t, src_u, [MAX_LINE_SIZE]);
+    LOCAL_ALIGNED_8(uint8_t, src_v, [MAX_LINE_SIZE]);
+    LOCAL_ALIGNED_8(uint8_t, src_a, [MAX_LINE_SIZE * 2]);
+    const uint8_t *src[4] = { src_y, src_u, src_v, src_a };
+
+    LOCAL_ALIGNED_8(uint8_t, dst0_, [2 * MAX_LINE_SIZE * 6]);
+    uint8_t *dst0[4] = { dst0_ };
+    uint8_t *lines0[2] = { dst0_, dst0_ + MAX_LINE_SIZE * 6 };
+
+    LOCAL_ALIGNED_8(uint8_t, dst1_, [2 * MAX_LINE_SIZE * 6]);
+    uint8_t *dst1[4] = { dst1_ };
+    uint8_t *lines1[2] = { dst1_, dst1_ + MAX_LINE_SIZE * 6 };
+
+    randomize_buffers(src_y, MAX_LINE_SIZE * 2);
+    randomize_buffers(src_u, MAX_LINE_SIZE);
+    randomize_buffers(src_v, MAX_LINE_SIZE);
+    randomize_buffers(src_a, MAX_LINE_SIZE * 2);
+
+    for (int dfi = 0; dfi < FF_ARRAY_ELEMS(dst_fmts); dfi++) {
+        int dst_pix_fmt = dst_fmts[dfi];
+        const AVPixFmtDescriptor *dst_desc = av_pix_fmt_desc_get(dst_pix_fmt);
+        int sample_size = av_get_padded_bits_per_pixel(dst_desc) >> 3;
+        for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
+            struct SwsContext *ctx;
+            int width = input_sizes[isi];
+            int srcSliceY = 0;
+            int srcSliceH = 2;
+            int srcStride[4] = {
+                width,
+                width >> src_desc->log2_chroma_w,
+                width >> src_desc->log2_chroma_w,
+                width,
+            };
+            int dstStride[4] = { MAX_LINE_SIZE * 6 };
+
+            ctx = sws_getContext(width, srcSliceH, src_pix_fmt,
+                                 width, srcSliceH, dst_pix_fmt,
+                                 0, NULL, NULL, NULL);
+            if (!ctx)
+                fail();
+
+            if (check_func(ctx->convert_unscaled, "%s_%s_%d", src_desc->name, dst_desc->name, width)) {
+                memset(dst0_, 0xFF, 2 * MAX_LINE_SIZE * 6);
+                memset(dst1_, 0xFF, 2 * MAX_LINE_SIZE * 6);
+
+                call_ref(ctx, src, srcStride, srcSliceY,
+                         srcSliceH, dst0, dstStride);
+                call_new(ctx, src, srcStride, srcSliceY,
+                         srcSliceH, dst1, dstStride);
+
+                if (dst_pix_fmt == AV_PIX_FMT_ARGB  ||
+                    dst_pix_fmt == AV_PIX_FMT_ABGR  ||
+                    dst_pix_fmt == AV_PIX_FMT_RGBA  ||
+                    dst_pix_fmt == AV_PIX_FMT_BGRA  ||
+                    dst_pix_fmt == AV_PIX_FMT_RGB24 ||
+                    dst_pix_fmt == AV_PIX_FMT_BGR24) {
+                    if (cmp_off_by_n(lines0[0], lines1[0], width * sample_size, 3) ||
+                        cmp_off_by_n(lines0[1], lines1[1], width * sample_size, 3))
+                        fail();
+                } else if (dst_pix_fmt == AV_PIX_FMT_RGB565 ||
+                           dst_pix_fmt == AV_PIX_FMT_BGR565) {
+                    if (cmp_565_by_n(lines0[0], lines1[0], width, 2) ||
+                        cmp_565_by_n(lines0[1], lines1[1], width, 2))
+                        fail();
+                } else if (dst_pix_fmt == AV_PIX_FMT_RGB555 ||
+                           dst_pix_fmt == AV_PIX_FMT_BGR555) {
+                    if (cmp_555_by_n(lines0[0], lines1[0], width, 2) ||
+                        cmp_555_by_n(lines0[1], lines1[1], width, 2))
+                        fail();
+                } else {
+                    fail();
+                }
+
+                bench_new(ctx, src, srcStride, srcSliceY,
+                          srcSliceH, dst0, dstStride);
+            }
+            sws_freeContext(ctx);
+        }
+    }
+}
+
+#undef MAX_LINE_SIZE
+
+void checkasm_check_sw_yuv2rgb(void)
+{
+    check_yuv2rgb(AV_PIX_FMT_YUV420P);
+    report("yuv420p");
+    check_yuv2rgb(AV_PIX_FMT_YUV422P);
+    report("yuv422p");
+    check_yuv2rgb(AV_PIX_FMT_YUVA420P);
+    report("yuva420p");
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 0ed2ea5be6..49832b09bf 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -44,6 +44,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                                 \
                 fate-checkasm-sw_range_convert                          \
                 fate-checkasm-sw_rgb                                    \
                 fate-checkasm-sw_scale                                  \
+                fate-checkasm-sw_yuv2rgb                                \
                 fate-checkasm-takdsp                                    \
                 fate-checkasm-utvideodsp                                \
                 fate-checkasm-v210dec                                   \
-- 
2.30.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions
  2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
                   ` (3 preceding siblings ...)
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb Ramiro Polla
@ 2024-06-16 22:28 ` Ramiro Polla
  2024-06-16 23:15   ` James Almer
  4 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
  To: ffmpeg-devel

These functions are either slower or barely faster than the C LUT
yuv2rgb code.
---
 libswscale/x86/yuv2rgb.c          | 51 -----------------
 libswscale/x86/yuv2rgb_template.c |  4 --
 libswscale/x86/yuv_2_rgb.asm      | 93 +------------------------------
 3 files changed, 3 insertions(+), 145 deletions(-)

diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 6754062245..41dfa80f33 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -41,25 +41,8 @@
 
 #define DITHER1XBPP // only for MMX
 
-//MMX versions
-#if HAVE_MMX
-#undef RENAME
-#define COMPILE_TEMPLATE_MMX
-#define RENAME(a) a ## _mmx
-#include "yuv2rgb_template.c"
-#undef COMPILE_TEMPLATE_MMX
-#endif /* HAVE_MMX */
-
-// MMXEXT versions
-#undef RENAME
-#define COMPILE_TEMPLATE_MMXEXT
-#define RENAME(a) a ## _mmxext
-#include "yuv2rgb_template.c"
-#undef COMPILE_TEMPLATE_MMXEXT
-
 //SSSE3 versions
 #undef RENAME
-#define COMPILE_TEMPLATE_SSSE3
 #define RENAME(a) a ## _ssse3
 #include "yuv2rgb_template.c"
 
@@ -99,40 +82,6 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
         }
     }
 
-    if (EXTERNAL_MMXEXT(cpu_flags)) {
-        switch (c->dstFormat) {
-        case AV_PIX_FMT_RGB24:
-            return yuv420_rgb24_mmxext;
-        case AV_PIX_FMT_BGR24:
-            return yuv420_bgr24_mmxext;
-        }
-    }
-
-    if (EXTERNAL_MMX(cpu_flags)) {
-        switch (c->dstFormat) {
-            case AV_PIX_FMT_RGB32:
-                if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if CONFIG_SWSCALE_ALPHA
-                    return yuva420_rgb32_mmx;
-#endif
-                    break;
-                } else
-                    return yuv420_rgb32_mmx;
-            case AV_PIX_FMT_BGR32:
-                if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if CONFIG_SWSCALE_ALPHA
-                    return yuva420_bgr32_mmx;
-#endif
-                    break;
-                } else
-                    return yuv420_bgr32_mmx;
-            case AV_PIX_FMT_RGB565:
-                return yuv420_rgb16_mmx;
-            case AV_PIX_FMT_RGB555:
-                return yuv420_rgb15_mmx;
-        }
-    }
-
 #endif /* HAVE_X86ASM */
     return NULL;
 }
diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c
index 596943bb73..a4741e6873 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -47,7 +47,6 @@ extern void RENAME(ff_yuv_420_bgr24)(x86_reg index, uint8_t *image, const uint8_
                                      const uint8_t *pv_index, const uint64_t *pointer_c_dither,
                                      const uint8_t *py_2index);
 
-#ifndef COMPILE_TEMPLATE_MMXEXT
 extern void RENAME(ff_yuv_420_rgb15)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
                                      const uint8_t *pv_index, const uint64_t *pointer_c_dither,
                                      const uint8_t *py_2index);
@@ -163,9 +162,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
     }
     return srcSliceH;
 }
-#endif
 
-#if !defined(COMPILE_TEMPLATE_MMX)
 static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
                                        int srcStride[],
                                        int srcSliceY, int srcSliceH,
@@ -193,4 +190,3 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
     }
     return srcSliceH;
 }
-#endif
diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm
index a1f9134e08..b67ab162d2 100644
--- a/libswscale/x86/yuv_2_rgb.asm
+++ b/libswscale/x86/yuv_2_rgb.asm
@@ -38,12 +38,6 @@ pb_e0:   times 16 db 224
 pb_03:   times 16 db 3
 pb_07:   times 16 db 7
 
-mask_1101: dw -1, -1,  0, -1
-mask_0010: dw  0,  0, -1,  0
-mask_0110: dw  0, -1, -1,  0
-mask_1001: dw -1,  0,  0, -1
-mask_0100: dw  0, -1,  0,  0
-
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -55,14 +49,6 @@ SECTION .text
 ;
 ;-----------------------------------------------------------------------------
 
-%macro MOV_H2L 1
-%if mmsize == 8
-    psrlq %1, 32
-%else ; mmsize == 16
-    psrldq %1, 8
-%endif
-%endmacro
-
 %macro yuv2rgb_fn 3
 
 %if %3 == 32
@@ -91,18 +77,6 @@ SECTION .text
 %define m_blue m1
 %endif
 
-%if mmsize == 8
-%define time_num 1
-%define reg_num 8
-%define y_offset [pointer_c_ditherq + 8  * 8]
-%define u_offset [pointer_c_ditherq + 9  * 8]
-%define v_offset [pointer_c_ditherq + 10 * 8]
-%define ug_coff  [pointer_c_ditherq + 7  * 8]
-%define vg_coff  [pointer_c_ditherq + 6  * 8]
-%define y_coff   [pointer_c_ditherq + 3  * 8]
-%define ub_coff  [pointer_c_ditherq + 5  * 8]
-%define vr_coff  [pointer_c_ditherq + 4  * 8]
-%elif mmsize == 16
 %define time_num 2
 %if ARCH_X86_32
 %define reg_num 8
@@ -125,13 +99,11 @@ SECTION .text
 %define ub_coff  m14
 %define vr_coff  m15
 %endif ; ARCH_X86_32/64
-%endif ; coeff define mmsize == 8/16
 
 cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
 
 %if ARCH_X86_64
     movsxd indexq, indexd
-%if mmsize == 16
     VBROADCASTSD y_offset, [pointer_c_ditherq + 8  * 8]
     VBROADCASTSD u_offset, [pointer_c_ditherq + 9  * 8]
     VBROADCASTSD v_offset, [pointer_c_ditherq + 10 * 8]
@@ -141,7 +113,6 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
     VBROADCASTSD ub_coff,  [pointer_c_ditherq + 5  * 8]
     VBROADCASTSD vr_coff,  [pointer_c_ditherq + 4  * 8]
 %endif
-%endif
 .loop0:
     movu m_y, [py_2indexq + 2 * indexq]
     movh m_u, [pu_indexq  +     indexq]
@@ -157,7 +128,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
     psllw m1, 3
     psllw m6, 3
     psllw m7, 3
-%if (ARCH_X86_32 && mmsize == 16)
+%if ARCH_X86_32
     VBROADCASTSD m2, mu_offset
     VBROADCASTSD m3, mv_offset
     VBROADCASTSD m4, my_offset
@@ -176,7 +147,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
     pmulhw m0, m5
     VBROADCASTSD m4, mvr_coff
     pmulhw m1, m4
-%else ; ARCH_X86_64 || mmsize == 8
+%else ; ARCH_X86_64
     psubsw m0, u_offset ; U = U - 128
     psubsw m1, v_offset ; V = V - 128
     psubw  m6, y_offset
@@ -207,49 +178,10 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
     packuswb m2, m7 ; G0 G2 G4 G6 ... G1 G3 G5 G7 ...
     mova m3, m_red
     mova m6, m_blue
-    MOV_H2L m_red
+    psrldq m_red, 8
     punpcklbw m3, m2     ; R0 G0 R2 G2 R4 G4 R6 G6 R8 G8 ...
     punpcklbw m6, m_red  ; B0 R1 B2 R3 B4 R5 B6 R7 B8 R9 ...
-    mova m5, m3
     punpckhbw m2, m_blue ; G1 B1 G3 B3 G5 B5 G7 B7 G9 B9 ...
-%if  mmsize == 8
-    punpcklwd m3 ,m6     ; R0 G0 B0 R1 R2 G2 B2 R3
-    punpckhwd m5, m6     ; R4 G4 B4 R5 R6 G6 B6 R7
-%if cpuflag(mmxext)
-    pshufw m1, m2, 0xc6
-    pshufw m6, m3, 0x84
-    pshufw m7, m5, 0x38
-    pand m6, [mask_1101] ; R0 G0 B0 R1 -- -- R2 G2
-    movq m0, m1
-    pand m7, [mask_0110] ; -- -- R6 G6 B6 R7 -- --
-    movq m2, m1
-    pand m1, [mask_0100] ; -- -- G3 B3 -- -- -- --
-    psrlq m3, 48         ; B2 R3 -- -- -- -- -- --
-    pand m0, [mask_0010] ; -- -- -- -- G1 B1 -- --
-    psllq m5, 32         ; -- -- -- -- R4 G4 B4 R5
-    pand m2, [mask_1001] ; G5 B5 -- -- -- -- G7 B7
-    por m1, m3
-    por m0, m6
-    por m1, m5
-    por m2, m7
-    movntq [imageq], m0
-    movntq [imageq + 8], m1
-    movntq [imageq + 16], m2
-%else ; cpuflag(mmx)
-    movd [imageq], m3      ; R0 G0 R2 G2
-    movd [imageq + 4], m2  ; G1 B1
-    psrlq m3, 32
-    psrlq m2, 16
-    movd [imageq + 6], m3  ; R2 G2 B2 R3
-    movd [imageq + 10], m2 ; G3 B3
-    psrlq m2, 16
-    movd [imageq + 12], m5 ; R4 G4 B4 R5
-    movd [imageq + 16], m2 ; G5 B5
-    psrlq m5, 32
-    movd [imageq + 20], m2 ; -- -- G7 B7
-    movd [imageq + 18], m5 ; R6 G6 B6 R7
-%endif ; cpuflag
-%else ; mmsize == 16
     pshufb m3, [rgb24_shuf1] ; r0  g0  r6  g6  r12 g12 r2  g2  r8  g8  r14 g14 r4  g4  r10 g10
     pshufb m6, [rgb24_shuf2] ; b10 r11 b0  r1  b6  r7  b12 r13 b2  r3  b8  r9  b14 r15 b4  r5
     pshufb m2, [rgb24_shuf3] ; g5  b5  g11 b11 g1  b1  g7  b7  g13 b13 g3  b3  g9  b9  g15 b15
@@ -274,7 +206,6 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
     movu [imageq], m0
     movu [imageq + 16], m1
     movu [imageq + 32], m2
-%endif ; mmsize = 16
 %else ; PACK RGB15/16/32
     packuswb m0, m1
     packuswb m3, m5
@@ -309,18 +240,12 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
     movu [imageq + 24 * time_num], m_alpha
 %else ; PACK RGB15/16
 %define depth 2
-%if cpuflag(ssse3)
     %define red_dither m3
     %define green_dither m4
     %define blue_dither m5
     VBROADCASTSD red_dither,   [pointer_c_ditherq + 0 * 8]
     VBROADCASTSD green_dither, [pointer_c_ditherq + 1 * 8]
     VBROADCASTSD blue_dither,  [pointer_c_ditherq + 2 * 8]
-%else ; cpuflag(mmx/mmxext)
-%define blue_dither  [pointer_c_ditherq + 2  * 8]
-%define green_dither [pointer_c_ditherq + 1  * 8]
-%define red_dither   [pointer_c_ditherq + 0  * 8]
-%endif
 %if %3 == 15
 %define gmask pb_03
 %define isRGB15 1
@@ -358,18 +283,6 @@ RET
 
 %endmacro
 
-INIT_MMX mmx
-yuv2rgb_fn yuv,  rgb, 32
-yuv2rgb_fn yuv,  bgr, 32
-yuv2rgb_fn yuva, rgb, 32
-yuv2rgb_fn yuva, bgr, 32
-yuv2rgb_fn yuv,  rgb, 15
-yuv2rgb_fn yuv,  rgb, 16
-
-INIT_MMX mmxext
-yuv2rgb_fn yuv, rgb, 24
-yuv2rgb_fn yuv, bgr, 24
-
 INIT_XMM ssse3
 yuv2rgb_fn yuv,  rgb, 24
 yuv2rgb_fn yuv,  bgr, 24
-- 
2.30.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions Ramiro Polla
@ 2024-06-16 23:15   ` James Almer
  2024-06-17 12:46     ` Ramiro Polla
  0 siblings, 1 reply; 14+ messages in thread
From: James Almer @ 2024-06-16 23:15 UTC (permalink / raw)
  To: ffmpeg-devel

On 6/16/2024 7:28 PM, Ramiro Polla wrote:
> These functions are either slower or barely faster than the C LUT
> yuv2rgb code.
> ---
>   libswscale/x86/yuv2rgb.c          | 51 -----------------
>   libswscale/x86/yuv2rgb_template.c |  4 --
>   libswscale/x86/yuv_2_rgb.asm      | 93 +------------------------------
>   3 files changed, 3 insertions(+), 145 deletions(-)
> 
> diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
> index 6754062245..41dfa80f33 100644
> --- a/libswscale/x86/yuv2rgb.c
> +++ b/libswscale/x86/yuv2rgb.c
> @@ -41,25 +41,8 @@
>   
>   #define DITHER1XBPP // only for MMX

Shouldn't this be removed too?

>   
> -//MMX versions
> -#if HAVE_MMX
> -#undef RENAME
> -#define COMPILE_TEMPLATE_MMX
> -#define RENAME(a) a ## _mmx
> -#include "yuv2rgb_template.c"
> -#undef COMPILE_TEMPLATE_MMX
> -#endif /* HAVE_MMX */
> -
> -// MMXEXT versions
> -#undef RENAME
> -#define COMPILE_TEMPLATE_MMXEXT
> -#define RENAME(a) a ## _mmxext
> -#include "yuv2rgb_template.c"
> -#undef COMPILE_TEMPLATE_MMXEXT
> -
>   //SSSE3 versions
>   #undef RENAME
> -#define COMPILE_TEMPLATE_SSSE3
>   #define RENAME(a) a ## _ssse3
>   #include "yuv2rgb_template.c"

You could write a seventh patch that moves the template stuff back to 
this file, now that SSSE3 is the only version. See commit 8b62fb231a78.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions
  2024-06-16 23:15   ` James Almer
@ 2024-06-17 12:46     ` Ramiro Polla
  2024-06-20 14:50       ` Ramiro Polla
  0 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-17 12:46 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Mon, Jun 17, 2024 at 1:16 AM James Almer <jamrial@gmail.com> wrote:
> On 6/16/2024 7:28 PM, Ramiro Polla wrote:
> > These functions are either slower or barely faster than the C LUT
> > yuv2rgb code.
> > ---
> >   libswscale/x86/yuv2rgb.c          | 51 -----------------
> >   libswscale/x86/yuv2rgb_template.c |  4 --
> >   libswscale/x86/yuv_2_rgb.asm      | 93 +------------------------------
> >   3 files changed, 3 insertions(+), 145 deletions(-)
> >
> > diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
> > index 6754062245..41dfa80f33 100644
> > --- a/libswscale/x86/yuv2rgb.c
> > +++ b/libswscale/x86/yuv2rgb.c
> > @@ -41,25 +41,8 @@
> >
> >   #define DITHER1XBPP // only for MMX
>
> Shouldn't this be removed too?

I think this #define can already be removed from everywhere. It seems
to be unconditionally set in swscale_internal.h (I haven't tracked
down since when this is the case).

> > -//MMX versions
> > -#if HAVE_MMX
> > -#undef RENAME
> > -#define COMPILE_TEMPLATE_MMX
> > -#define RENAME(a) a ## _mmx
> > -#include "yuv2rgb_template.c"
> > -#undef COMPILE_TEMPLATE_MMX
> > -#endif /* HAVE_MMX */
> > -
> > -// MMXEXT versions
> > -#undef RENAME
> > -#define COMPILE_TEMPLATE_MMXEXT
> > -#define RENAME(a) a ## _mmxext
> > -#include "yuv2rgb_template.c"
> > -#undef COMPILE_TEMPLATE_MMXEXT
> > -
> >   //SSSE3 versions
> >   #undef RENAME
> > -#define COMPILE_TEMPLATE_SSSE3
> >   #define RENAME(a) a ## _ssse3
> >   #include "yuv2rgb_template.c"
>
> You could write a seventh patch that moves the template stuff back to
> this file, now that SSSE3 is the only version. See commit 8b62fb231a78.

Will do in the next version of this patchset.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code Ramiro Polla
@ 2024-06-19 10:13   ` Michael Niedermayer
  0 siblings, 0 replies; 14+ messages in thread
From: Michael Niedermayer @ 2024-06-19 10:13 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 624 bytes --]

On Mon, Jun 17, 2024 at 12:28:46AM +0200, Ramiro Polla wrote:
> The C code was silently ignoring the second chroma line on yuv422p
> input.
> ---
>  libswscale/yuv2rgb.c | 228 +++++++++++++++++++++++++++++++++++++------
>  1 file changed, 196 insertions(+), 32 deletions(-)

nice you found this. Thats an ugly bug

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If you fake or manipulate statistics in a paper in physics you will never
get a job again.
If you fake or manipulate statistics in a paper in medicin you will get
a job for life at the pharma industry.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions
  2024-06-17 12:46     ` Ramiro Polla
@ 2024-06-20 14:50       ` Ramiro Polla
  0 siblings, 0 replies; 14+ messages in thread
From: Ramiro Polla @ 2024-06-20 14:50 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Mon, Jun 17, 2024 at 2:46 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> On Mon, Jun 17, 2024 at 1:16 AM James Almer <jamrial@gmail.com> wrote:
> > On 6/16/2024 7:28 PM, Ramiro Polla wrote:
> > > These functions are either slower or barely faster than the C LUT
> > > yuv2rgb code.
> > > ---
> > >   libswscale/x86/yuv2rgb.c          | 51 -----------------
> > >   libswscale/x86/yuv2rgb_template.c |  4 --
> > >   libswscale/x86/yuv_2_rgb.asm      | 93 +------------------------------
> > >   3 files changed, 3 insertions(+), 145 deletions(-)
> > >
> > > diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
> > > index 6754062245..41dfa80f33 100644
> > > --- a/libswscale/x86/yuv2rgb.c
> > > +++ b/libswscale/x86/yuv2rgb.c
> > > @@ -41,25 +41,8 @@
> > >
> > >   #define DITHER1XBPP // only for MMX
> >
> > Shouldn't this be removed too?
>
> I think this #define can already be removed from everywhere. It seems
> to be unconditionally set in swscale_internal.h (I haven't tracked
> down since when this is the case).
>
> > > -//MMX versions
> > > -#if HAVE_MMX
> > > -#undef RENAME
> > > -#define COMPILE_TEMPLATE_MMX
> > > -#define RENAME(a) a ## _mmx
> > > -#include "yuv2rgb_template.c"
> > > -#undef COMPILE_TEMPLATE_MMX
> > > -#endif /* HAVE_MMX */
> > > -
> > > -// MMXEXT versions
> > > -#undef RENAME
> > > -#define COMPILE_TEMPLATE_MMXEXT
> > > -#define RENAME(a) a ## _mmxext
> > > -#include "yuv2rgb_template.c"
> > > -#undef COMPILE_TEMPLATE_MMXEXT
> > > -
> > >   //SSSE3 versions
> > >   #undef RENAME
> > > -#define COMPILE_TEMPLATE_SSSE3
> > >   #define RENAME(a) a ## _ssse3
> > >   #include "yuv2rgb_template.c"
> >
> > You could write a seventh patch that moves the template stuff back to
> > this file, now that SSSE3 is the only version. See commit 8b62fb231a78.
>
> Will do in the next version of this patchset.

I'll apply this patchset if there are no more comments, before
submitting more patches to deal with DITHER1XBPP and detemplatizing.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb
  2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb Ramiro Polla
@ 2024-06-20 14:59   ` Ramiro Polla
  2024-06-25 21:19     ` Ramiro Polla
  0 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-20 14:59 UTC (permalink / raw)
  To: ffmpeg-devel

On Mon, Jun 17, 2024 at 12:28 AM Ramiro Polla <ramiro.polla@gmail.com> wrote:
>
> ---
>  tests/checkasm/Makefile     |   2 +-
>  tests/checkasm/checkasm.c   |   1 +
>  tests/checkasm/checkasm.h   |   1 +
>  tests/checkasm/sw_yuv2rgb.c | 205 ++++++++++++++++++++++++++++++++++++
>  tests/fate/checkasm.mak     |   1 +
>  5 files changed, 209 insertions(+), 1 deletion(-)
>  create mode 100644 tests/checkasm/sw_yuv2rgb.c
>
> diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
> index f20732b37a..3a7670e24b 100644
> --- a/tests/checkasm/Makefile
> +++ b/tests/checkasm/Makefile
> @@ -63,7 +63,7 @@ AVFILTEROBJS-$(CONFIG_SOBEL_FILTER)      += vf_convolution.o
>  CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
>
>  # swscale tests
> -SWSCALEOBJS                             += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o
> +SWSCALEOBJS                             += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o sw_yuv2rgb.o
>
>  CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWSCALEOBJS)
>
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index 56232ab1e0..d9ac772a08 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -254,6 +254,7 @@ static const struct {
>      { "sw_range_convert", checkasm_check_sw_range_convert },
>      { "sw_rgb", checkasm_check_sw_rgb },
>      { "sw_scale", checkasm_check_sw_scale },
> +    { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb },
>  #endif
>  #if CONFIG_AVUTIL
>          { "fixed_dsp", checkasm_check_fixed_dsp },
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index e544007b67..4d5f3e387e 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -122,6 +122,7 @@ void checkasm_check_sw_gbrp(void);
>  void checkasm_check_sw_range_convert(void);
>  void checkasm_check_sw_rgb(void);
>  void checkasm_check_sw_scale(void);
> +void checkasm_check_sw_yuv2rgb(void);
>  void checkasm_check_takdsp(void);
>  void checkasm_check_utvideodsp(void);
>  void checkasm_check_v210dec(void);
> diff --git a/tests/checkasm/sw_yuv2rgb.c b/tests/checkasm/sw_yuv2rgb.c
> new file mode 100644
> index 0000000000..fbe01a7788
> --- /dev/null
> +++ b/tests/checkasm/sw_yuv2rgb.c
> @@ -0,0 +1,205 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +#include <string.h>
> +
> +#include "libavutil/common.h"
> +#include "libavutil/intreadwrite.h"
> +#include "libavutil/mem_internal.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "libswscale/swscale.h"
> +#include "libswscale/swscale_internal.h"
> +
> +#include "checkasm.h"
> +
> +#define randomize_buffers(buf, size)      \
> +    do {                                  \
> +        for (int j = 0; j < size; j += 4) \
> +            AV_WN32(buf + j, rnd());      \
> +    } while (0)
> +
> +static const int dst_fmts[] = {
> +//     AV_PIX_FMT_BGR48BE,
> +//     AV_PIX_FMT_BGR48LE,
> +//     AV_PIX_FMT_RGB48BE,
> +//     AV_PIX_FMT_RGB48LE,
> +    AV_PIX_FMT_ARGB,
> +    AV_PIX_FMT_ABGR,
> +    AV_PIX_FMT_RGBA,
> +    AV_PIX_FMT_BGRA,
> +    AV_PIX_FMT_RGB24,
> +    AV_PIX_FMT_BGR24,
> +    AV_PIX_FMT_RGB565,
> +    AV_PIX_FMT_BGR565,
> +    AV_PIX_FMT_RGB555,
> +    AV_PIX_FMT_BGR555,
> +//     AV_PIX_FMT_RGB444,
> +//     AV_PIX_FMT_BGR444,
> +//     AV_PIX_FMT_RGB8,
> +//     AV_PIX_FMT_BGR8,
> +//     AV_PIX_FMT_RGB4,
> +//     AV_PIX_FMT_BGR4,
> +//     AV_PIX_FMT_RGB4_BYTE,
> +//     AV_PIX_FMT_BGR4_BYTE,
> +//     AV_PIX_FMT_MONOBLACK,
> +};
> +
> +static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
> +{
> +    for (size_t i = 0; i < n; i++) {
> +        if (abs(ref[i] - test[i]) > accuracy)
> +            return 1;
> +    }
> +    return 0;
> +}
> +
> +static int cmp_555_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
> +{
> +    const uint16_t *ref16  = (const uint16_t *) ref;
> +    const uint16_t *test16 = (const uint16_t *) test;
> +    for (size_t i = 0; i < n; i++) {
> +        if (abs(( ref16[i]        & 0x1f) - ( test16[i]        & 0x1f)) > accuracy)
> +            return 1;
> +        if (abs(((ref16[i] >>  5) & 0x1f) - ((test16[i] >>  5) & 0x1f)) > accuracy)
> +            return 1;
> +        if (abs(((ref16[i] >> 10) & 0x1f) - ((test16[i] >> 10) & 0x1f)) > accuracy)
> +            return 1;
> +    }
> +    return 0;
> +}
> +
> +static int cmp_565_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
> +{
> +    const uint16_t *ref16  = (const uint16_t *) ref;
> +    const uint16_t *test16 = (const uint16_t *) test;
> +    for (size_t i = 0; i < n; i++) {
> +        if (abs(( ref16[i]        & 0x1f) - ( test16[i]        & 0x1f)) > accuracy)
> +            return 1;
> +        if (abs(((ref16[i] >>  5) & 0x3f) - ((test16[i] >>  5) & 0x3f)) > accuracy)
> +            return 1;
> +        if (abs(((ref16[i] >> 11) & 0x1f) - ((test16[i] >> 11) & 0x1f)) > accuracy)
> +            return 1;
> +    }
> +    return 0;
> +}
> +
> +static void check_yuv2rgb(int src_pix_fmt)
> +{
> +    const AVPixFmtDescriptor *src_desc = av_pix_fmt_desc_get(src_pix_fmt);
> +#define MAX_LINE_SIZE 1920
> +    static const int input_sizes[] = {8, 128, 1080, MAX_LINE_SIZE};
> +
> +    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
> +                      int, SwsContext *c, const uint8_t *src[],
> +                           int srcStride[], int srcSliceY, int srcSliceH,
> +                           uint8_t *dst[], int dstStride[]);
> +
> +    LOCAL_ALIGNED_8(uint8_t, src_y, [MAX_LINE_SIZE * 2]);
> +    LOCAL_ALIGNED_8(uint8_t, src_u, [MAX_LINE_SIZE]);
> +    LOCAL_ALIGNED_8(uint8_t, src_v, [MAX_LINE_SIZE]);
> +    LOCAL_ALIGNED_8(uint8_t, src_a, [MAX_LINE_SIZE * 2]);
> +    const uint8_t *src[4] = { src_y, src_u, src_v, src_a };
> +
> +    LOCAL_ALIGNED_8(uint8_t, dst0_, [2 * MAX_LINE_SIZE * 6]);
> +    uint8_t *dst0[4] = { dst0_ };
> +    uint8_t *lines0[2] = { dst0_, dst0_ + MAX_LINE_SIZE * 6 };
> +
> +    LOCAL_ALIGNED_8(uint8_t, dst1_, [2 * MAX_LINE_SIZE * 6]);
> +    uint8_t *dst1[4] = { dst1_ };
> +    uint8_t *lines1[2] = { dst1_, dst1_ + MAX_LINE_SIZE * 6 };
> +
> +    randomize_buffers(src_y, MAX_LINE_SIZE * 2);
> +    randomize_buffers(src_u, MAX_LINE_SIZE);
> +    randomize_buffers(src_v, MAX_LINE_SIZE);
> +    randomize_buffers(src_a, MAX_LINE_SIZE * 2);
> +
> +    for (int dfi = 0; dfi < FF_ARRAY_ELEMS(dst_fmts); dfi++) {
> +        int dst_pix_fmt = dst_fmts[dfi];
> +        const AVPixFmtDescriptor *dst_desc = av_pix_fmt_desc_get(dst_pix_fmt);
> +        int sample_size = av_get_padded_bits_per_pixel(dst_desc) >> 3;
> +        for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
> +            struct SwsContext *ctx;
> +            int width = input_sizes[isi];
> +            int srcSliceY = 0;
> +            int srcSliceH = 2;
> +            int srcStride[4] = {
> +                width,
> +                width >> src_desc->log2_chroma_w,
> +                width >> src_desc->log2_chroma_w,
> +                width,
> +            };
> +            int dstStride[4] = { MAX_LINE_SIZE * 6 };
> +
> +            ctx = sws_getContext(width, srcSliceH, src_pix_fmt,
> +                                 width, srcSliceH, dst_pix_fmt,
> +                                 0, NULL, NULL, NULL);

checkasm gets a little spammy with this commit by printing "No
accelerated colorspace conversion found from %s to %s.\n" from
libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
the call to sws_getContext()?

> +            if (!ctx)
> +                fail();
> +
> +            if (check_func(ctx->convert_unscaled, "%s_%s_%d", src_desc->name, dst_desc->name, width)) {
> +                memset(dst0_, 0xFF, 2 * MAX_LINE_SIZE * 6);
> +                memset(dst1_, 0xFF, 2 * MAX_LINE_SIZE * 6);
> +
> +                call_ref(ctx, src, srcStride, srcSliceY,
> +                         srcSliceH, dst0, dstStride);
> +                call_new(ctx, src, srcStride, srcSliceY,
> +                         srcSliceH, dst1, dstStride);
> +
> +                if (dst_pix_fmt == AV_PIX_FMT_ARGB  ||
> +                    dst_pix_fmt == AV_PIX_FMT_ABGR  ||
> +                    dst_pix_fmt == AV_PIX_FMT_RGBA  ||
> +                    dst_pix_fmt == AV_PIX_FMT_BGRA  ||
> +                    dst_pix_fmt == AV_PIX_FMT_RGB24 ||
> +                    dst_pix_fmt == AV_PIX_FMT_BGR24) {
> +                    if (cmp_off_by_n(lines0[0], lines1[0], width * sample_size, 3) ||
> +                        cmp_off_by_n(lines0[1], lines1[1], width * sample_size, 3))
> +                        fail();
> +                } else if (dst_pix_fmt == AV_PIX_FMT_RGB565 ||
> +                           dst_pix_fmt == AV_PIX_FMT_BGR565) {
> +                    if (cmp_565_by_n(lines0[0], lines1[0], width, 2) ||
> +                        cmp_565_by_n(lines0[1], lines1[1], width, 2))
> +                        fail();
> +                } else if (dst_pix_fmt == AV_PIX_FMT_RGB555 ||
> +                           dst_pix_fmt == AV_PIX_FMT_BGR555) {
> +                    if (cmp_555_by_n(lines0[0], lines1[0], width, 2) ||
> +                        cmp_555_by_n(lines0[1], lines1[1], width, 2))
> +                        fail();
> +                } else {
> +                    fail();
> +                }
> +
> +                bench_new(ctx, src, srcStride, srcSliceY,
> +                          srcSliceH, dst0, dstStride);
> +            }
> +            sws_freeContext(ctx);
> +        }
> +    }
> +}
> +
> +#undef MAX_LINE_SIZE
> +
> +void checkasm_check_sw_yuv2rgb(void)
> +{
> +    check_yuv2rgb(AV_PIX_FMT_YUV420P);
> +    report("yuv420p");
> +    check_yuv2rgb(AV_PIX_FMT_YUV422P);
> +    report("yuv422p");
> +    check_yuv2rgb(AV_PIX_FMT_YUVA420P);
> +    report("yuva420p");
> +}
> diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
> index 0ed2ea5be6..49832b09bf 100644
> --- a/tests/fate/checkasm.mak
> +++ b/tests/fate/checkasm.mak
> @@ -44,6 +44,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                                 \
>                  fate-checkasm-sw_range_convert                          \
>                  fate-checkasm-sw_rgb                                    \
>                  fate-checkasm-sw_scale                                  \
> +                fate-checkasm-sw_yuv2rgb                                \
>                  fate-checkasm-takdsp                                    \
>                  fate-checkasm-utvideodsp                                \
>                  fate-checkasm-v210dec                                   \
> --
> 2.30.2
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb
  2024-06-20 14:59   ` Ramiro Polla
@ 2024-06-25 21:19     ` Ramiro Polla
  2024-06-27 14:02       ` Ramiro Polla
  0 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-25 21:19 UTC (permalink / raw)
  To: ffmpeg-devel

[-- Attachment #1: Type: text/plain, Size: 456 bytes --]

On Thu, Jun 20, 2024 at 4:59 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> checkasm gets a little spammy with this commit by printing "No
> accelerated colorspace conversion found from %s to %s.\n" from
> libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
> downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
> the call to sws_getContext()?

New patch attached that overrides the log level to prevent spamming.

[-- Attachment #2: 0001-checkasm-add-tests-for-yuv2rgb.patch --]
[-- Type: text/x-patch, Size: 11009 bytes --]

From c0359653f11a5e6ae39445134c2e73cb211c4a65 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ramiro.polla@gmail.com>
Date: Sun, 16 Jun 2024 22:40:13 +0200
Subject: [PATCH] checkasm: add tests for yuv2rgb

---
 tests/checkasm/Makefile     |   2 +-
 tests/checkasm/checkasm.c   |   1 +
 tests/checkasm/checkasm.h   |   1 +
 tests/checkasm/sw_yuv2rgb.c | 211 ++++++++++++++++++++++++++++++++++++
 tests/fate/checkasm.mak     |   1 +
 5 files changed, 215 insertions(+), 1 deletion(-)
 create mode 100644 tests/checkasm/sw_yuv2rgb.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index f20732b37a..3a7670e24b 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -63,7 +63,7 @@ AVFILTEROBJS-$(CONFIG_SOBEL_FILTER)      += vf_convolution.o
 CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
 
 # swscale tests
-SWSCALEOBJS                             += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o
+SWSCALEOBJS                             += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o sw_yuv2rgb.o
 
 CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWSCALEOBJS)
 
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index b88b128e4c..de0024099a 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -254,6 +254,7 @@ static const struct {
     { "sw_range_convert", checkasm_check_sw_range_convert },
     { "sw_rgb", checkasm_check_sw_rgb },
     { "sw_scale", checkasm_check_sw_scale },
+    { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb },
 #endif
 #if CONFIG_AVUTIL
         { "fixed_dsp", checkasm_check_fixed_dsp },
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index e544007b67..4d5f3e387e 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -122,6 +122,7 @@ void checkasm_check_sw_gbrp(void);
 void checkasm_check_sw_range_convert(void);
 void checkasm_check_sw_rgb(void);
 void checkasm_check_sw_scale(void);
+void checkasm_check_sw_yuv2rgb(void);
 void checkasm_check_takdsp(void);
 void checkasm_check_utvideodsp(void);
 void checkasm_check_v210dec(void);
diff --git a/tests/checkasm/sw_yuv2rgb.c b/tests/checkasm/sw_yuv2rgb.c
new file mode 100644
index 0000000000..02ed9a74d5
--- /dev/null
+++ b/tests/checkasm/sw_yuv2rgb.c
@@ -0,0 +1,211 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/pixdesc.h"
+
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size)      \
+    do {                                  \
+        for (int j = 0; j < size; j += 4) \
+            AV_WN32(buf + j, rnd());      \
+    } while (0)
+
+static const int dst_fmts[] = {
+//     AV_PIX_FMT_BGR48BE,
+//     AV_PIX_FMT_BGR48LE,
+//     AV_PIX_FMT_RGB48BE,
+//     AV_PIX_FMT_RGB48LE,
+    AV_PIX_FMT_ARGB,
+    AV_PIX_FMT_ABGR,
+    AV_PIX_FMT_RGBA,
+    AV_PIX_FMT_BGRA,
+    AV_PIX_FMT_RGB24,
+    AV_PIX_FMT_BGR24,
+    AV_PIX_FMT_RGB565,
+    AV_PIX_FMT_BGR565,
+    AV_PIX_FMT_RGB555,
+    AV_PIX_FMT_BGR555,
+//     AV_PIX_FMT_RGB444,
+//     AV_PIX_FMT_BGR444,
+//     AV_PIX_FMT_RGB8,
+//     AV_PIX_FMT_BGR8,
+//     AV_PIX_FMT_RGB4,
+//     AV_PIX_FMT_BGR4,
+//     AV_PIX_FMT_RGB4_BYTE,
+//     AV_PIX_FMT_BGR4_BYTE,
+//     AV_PIX_FMT_MONOBLACK,
+};
+
+static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+    for (size_t i = 0; i < n; i++) {
+        if (abs(ref[i] - test[i]) > accuracy)
+            return 1;
+    }
+    return 0;
+}
+
+static int cmp_555_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+    const uint16_t *ref16  = (const uint16_t *) ref;
+    const uint16_t *test16 = (const uint16_t *) test;
+    for (size_t i = 0; i < n; i++) {
+        if (abs(( ref16[i]        & 0x1f) - ( test16[i]        & 0x1f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >>  5) & 0x1f) - ((test16[i] >>  5) & 0x1f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >> 10) & 0x1f) - ((test16[i] >> 10) & 0x1f)) > accuracy)
+            return 1;
+    }
+    return 0;
+}
+
+static int cmp_565_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+    const uint16_t *ref16  = (const uint16_t *) ref;
+    const uint16_t *test16 = (const uint16_t *) test;
+    for (size_t i = 0; i < n; i++) {
+        if (abs(( ref16[i]        & 0x1f) - ( test16[i]        & 0x1f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >>  5) & 0x3f) - ((test16[i] >>  5) & 0x3f)) > accuracy)
+            return 1;
+        if (abs(((ref16[i] >> 11) & 0x1f) - ((test16[i] >> 11) & 0x1f)) > accuracy)
+            return 1;
+    }
+    return 0;
+}
+
+static void check_yuv2rgb(int src_pix_fmt)
+{
+    const AVPixFmtDescriptor *src_desc = av_pix_fmt_desc_get(src_pix_fmt);
+#define MAX_LINE_SIZE 1920
+    static const int input_sizes[] = {8, 128, 1080, MAX_LINE_SIZE};
+
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
+                      int, SwsContext *c, const uint8_t *src[],
+                           int srcStride[], int srcSliceY, int srcSliceH,
+                           uint8_t *dst[], int dstStride[]);
+
+    LOCAL_ALIGNED_8(uint8_t, src_y, [MAX_LINE_SIZE * 2]);
+    LOCAL_ALIGNED_8(uint8_t, src_u, [MAX_LINE_SIZE]);
+    LOCAL_ALIGNED_8(uint8_t, src_v, [MAX_LINE_SIZE]);
+    LOCAL_ALIGNED_8(uint8_t, src_a, [MAX_LINE_SIZE * 2]);
+    const uint8_t *src[4] = { src_y, src_u, src_v, src_a };
+
+    LOCAL_ALIGNED_8(uint8_t, dst0_, [2 * MAX_LINE_SIZE * 6]);
+    uint8_t *dst0[4] = { dst0_ };
+    uint8_t *lines0[2] = { dst0_, dst0_ + MAX_LINE_SIZE * 6 };
+
+    LOCAL_ALIGNED_8(uint8_t, dst1_, [2 * MAX_LINE_SIZE * 6]);
+    uint8_t *dst1[4] = { dst1_ };
+    uint8_t *lines1[2] = { dst1_, dst1_ + MAX_LINE_SIZE * 6 };
+
+    randomize_buffers(src_y, MAX_LINE_SIZE * 2);
+    randomize_buffers(src_u, MAX_LINE_SIZE);
+    randomize_buffers(src_v, MAX_LINE_SIZE);
+    randomize_buffers(src_a, MAX_LINE_SIZE * 2);
+
+    for (int dfi = 0; dfi < FF_ARRAY_ELEMS(dst_fmts); dfi++) {
+        int dst_pix_fmt = dst_fmts[dfi];
+        const AVPixFmtDescriptor *dst_desc = av_pix_fmt_desc_get(dst_pix_fmt);
+        int sample_size = av_get_padded_bits_per_pixel(dst_desc) >> 3;
+        for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
+            struct SwsContext *ctx;
+            int log_level;
+            int width = input_sizes[isi];
+            int srcSliceY = 0;
+            int srcSliceH = 2;
+            int srcStride[4] = {
+                width,
+                width >> src_desc->log2_chroma_w,
+                width >> src_desc->log2_chroma_w,
+                width,
+            };
+            int dstStride[4] = { MAX_LINE_SIZE * 6 };
+
+            // override log level to prevent spamming of the message
+            // "No accelerated colorspace conversion found from %s to %s"
+            log_level = av_log_get_level();
+            av_log_set_level(AV_LOG_ERROR);
+            ctx = sws_getContext(width, srcSliceH, src_pix_fmt,
+                                 width, srcSliceH, dst_pix_fmt,
+                                 0, NULL, NULL, NULL);
+            av_log_set_level(log_level);
+            if (!ctx)
+                fail();
+
+            if (check_func(ctx->convert_unscaled, "%s_%s_%d", src_desc->name, dst_desc->name, width)) {
+                memset(dst0_, 0xFF, 2 * MAX_LINE_SIZE * 6);
+                memset(dst1_, 0xFF, 2 * MAX_LINE_SIZE * 6);
+
+                call_ref(ctx, src, srcStride, srcSliceY,
+                         srcSliceH, dst0, dstStride);
+                call_new(ctx, src, srcStride, srcSliceY,
+                         srcSliceH, dst1, dstStride);
+
+                if (dst_pix_fmt == AV_PIX_FMT_ARGB  ||
+                    dst_pix_fmt == AV_PIX_FMT_ABGR  ||
+                    dst_pix_fmt == AV_PIX_FMT_RGBA  ||
+                    dst_pix_fmt == AV_PIX_FMT_BGRA  ||
+                    dst_pix_fmt == AV_PIX_FMT_RGB24 ||
+                    dst_pix_fmt == AV_PIX_FMT_BGR24) {
+                    if (cmp_off_by_n(lines0[0], lines1[0], width * sample_size, 3) ||
+                        cmp_off_by_n(lines0[1], lines1[1], width * sample_size, 3))
+                        fail();
+                } else if (dst_pix_fmt == AV_PIX_FMT_RGB565 ||
+                           dst_pix_fmt == AV_PIX_FMT_BGR565) {
+                    if (cmp_565_by_n(lines0[0], lines1[0], width, 2) ||
+                        cmp_565_by_n(lines0[1], lines1[1], width, 2))
+                        fail();
+                } else if (dst_pix_fmt == AV_PIX_FMT_RGB555 ||
+                           dst_pix_fmt == AV_PIX_FMT_BGR555) {
+                    if (cmp_555_by_n(lines0[0], lines1[0], width, 2) ||
+                        cmp_555_by_n(lines0[1], lines1[1], width, 2))
+                        fail();
+                } else {
+                    fail();
+                }
+
+                bench_new(ctx, src, srcStride, srcSliceY,
+                          srcSliceH, dst0, dstStride);
+            }
+            sws_freeContext(ctx);
+        }
+    }
+}
+
+#undef MAX_LINE_SIZE
+
+void checkasm_check_sw_yuv2rgb(void)
+{
+    check_yuv2rgb(AV_PIX_FMT_YUV420P);
+    report("yuv420p");
+    check_yuv2rgb(AV_PIX_FMT_YUV422P);
+    report("yuv422p");
+    check_yuv2rgb(AV_PIX_FMT_YUVA420P);
+    report("yuva420p");
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 0ed2ea5be6..49832b09bf 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -44,6 +44,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                                 \
                 fate-checkasm-sw_range_convert                          \
                 fate-checkasm-sw_rgb                                    \
                 fate-checkasm-sw_scale                                  \
+                fate-checkasm-sw_yuv2rgb                                \
                 fate-checkasm-takdsp                                    \
                 fate-checkasm-utvideodsp                                \
                 fate-checkasm-v210dec                                   \
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb
  2024-06-25 21:19     ` Ramiro Polla
@ 2024-06-27 14:02       ` Ramiro Polla
  2024-06-30  0:33         ` Sean McGovern
  0 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-27 14:02 UTC (permalink / raw)
  To: ffmpeg-devel

On Tue, Jun 25, 2024 at 11:19 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> On Thu, Jun 20, 2024 at 4:59 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> > checkasm gets a little spammy with this commit by printing "No
> > accelerated colorspace conversion found from %s to %s.\n" from
> > libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
> > downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
> > the call to sws_getContext()?
>
> New patch attached that overrides the log level to prevent spamming.

I'll apply this tomorrow if there are no comments.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb
  2024-06-27 14:02       ` Ramiro Polla
@ 2024-06-30  0:33         ` Sean McGovern
  0 siblings, 0 replies; 14+ messages in thread
From: Sean McGovern @ 2024-06-30  0:33 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Hi Ramiro,


On Thu, Jun 27, 2024, 10:37 Ramiro Polla <ramiro.polla@gmail.com> wrote:

> On Tue, Jun 25, 2024 at 11:19 PM Ramiro Polla <ramiro.polla@gmail.com>
> wrote:
> > On Thu, Jun 20, 2024 at 4:59 PM Ramiro Polla <ramiro.polla@gmail.com>
> wrote:
> > > checkasm gets a little spammy with this commit by printing "No
> > > accelerated colorspace conversion found from %s to %s.\n" from
> > > libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
> > > downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
> > > the call to sws_getContext()?
> >
> > New patch attached that overrides the log level to prevent spamming.
>
> I'll apply this tomorrow if there are no comments.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

It looks like this exposed an issue in the AltiVec acceleration in
libswscale.

libswscale/ppc/yuv2rgb.c looks a bit too complex for me to investigate. I'd
be curious to see if this fails on a 64-bit POWER machine. I'll try to test
it later.

-- Sean McGovern

>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2024-06-30  0:33 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 2/6] swscale/yuv2rgb: add macros to simplify code generation Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code Ramiro Polla
2024-06-19 10:13   ` Michael Niedermayer
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 4/6] swscale/yuv2rgb: reindent after previous commit Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb Ramiro Polla
2024-06-20 14:59   ` Ramiro Polla
2024-06-25 21:19     ` Ramiro Polla
2024-06-27 14:02       ` Ramiro Polla
2024-06-30  0:33         ` Sean McGovern
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions Ramiro Polla
2024-06-16 23:15   ` James Almer
2024-06-17 12:46     ` Ramiro Polla
2024-06-20 14:50       ` Ramiro Polla

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git