* [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8
@ 2024-06-16 22:28 Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 2/6] swscale/yuv2rgb: add macros to simplify code generation Ramiro Polla
` (4 more replies)
0 siblings, 5 replies; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
To: ffmpeg-devel
The C code for some pixel formats (rgb555, rgb565, rgb444, and monob)
was not converting the last pixels on widths not aligned to 8.
NOTE: the last pixel for odd widths is still not converted for any of
the pixel formats in the C code for yuv2rgb except for monob.
---
libswscale/yuv2rgb.c | 101 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 93 insertions(+), 8 deletions(-)
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index c1d6236f37..e641c765c7 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -172,10 +172,6 @@ const int *sws_getCoefficients(int colorspace)
return srcSliceH; \
}
-#define CLOSEYUV2RGBFUNC(dst_delta) \
- ENDYUV2RGBLINE(dst_delta, 0) \
- ENDYUV2RGBFUNC()
-
YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0)
LOADCHROMA(0);
PUTRGB48(dst_1, py_1, 0);
@@ -432,7 +428,27 @@ YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
LOADCHROMA(3);
PUTRGB16(dst_2, py_2, 3, 6 + 8);
PUTRGB16(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
+ENDYUV2RGBLINE(8, 0)
+ const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+ const uint8_t *e16 = ff_dither_2x2_4[y & 1];
+ const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
+
+ LOADCHROMA(0);
+ PUTRGB16(dst_1, py_1, 0, 0);
+ PUTRGB16(dst_2, py_2, 0, 0 + 8);
+
+ LOADCHROMA(1);
+ PUTRGB16(dst_2, py_2, 1, 2 + 8);
+ PUTRGB16(dst_1, py_1, 1, 2);
+ENDYUV2RGBLINE(8, 1)
+ const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+ const uint8_t *e16 = ff_dither_2x2_4[y & 1];
+ const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
+
+ LOADCHROMA(0);
+ PUTRGB16(dst_1, py_1, 0, 0);
+ PUTRGB16(dst_2, py_2, 0, 0 + 8);
+ENDYUV2RGBFUNC()
YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
const uint8_t *d16 = ff_dither_2x2_8[y & 1];
@@ -462,7 +478,25 @@ YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
LOADCHROMA(3);
PUTRGB15(dst_2, py_2, 3, 6 + 8);
PUTRGB15(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
+ENDYUV2RGBLINE(8, 0)
+ const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+ const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
+
+ LOADCHROMA(0);
+ PUTRGB15(dst_1, py_1, 0, 0);
+ PUTRGB15(dst_2, py_2, 0, 0 + 8);
+
+ LOADCHROMA(1);
+ PUTRGB15(dst_2, py_2, 1, 2 + 8);
+ PUTRGB15(dst_1, py_1, 1, 2);
+ENDYUV2RGBLINE(8, 1)
+ const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+ const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
+
+ LOADCHROMA(0);
+ PUTRGB15(dst_1, py_1, 0, 0);
+ PUTRGB15(dst_2, py_2, 0, 0 + 8);
+ENDYUV2RGBFUNC()
// r, g, b, dst_1, dst_2
YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
@@ -493,7 +527,23 @@ YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
LOADCHROMA(3);
PUTRGB12(dst_2, py_2, 3, 6 + 8);
PUTRGB12(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
+ENDYUV2RGBLINE(8, 0)
+ const uint8_t *d16 = ff_dither_4x4_16[y & 3];
+
+ LOADCHROMA(0);
+ PUTRGB12(dst_1, py_1, 0, 0);
+ PUTRGB12(dst_2, py_2, 0, 0 + 8);
+
+ LOADCHROMA(1);
+ PUTRGB12(dst_2, py_2, 1, 2 + 8);
+ PUTRGB12(dst_1, py_1, 1, 2);
+ENDYUV2RGBLINE(8, 1)
+ const uint8_t *d16 = ff_dither_4x4_16[y & 3];
+
+ LOADCHROMA(0);
+ PUTRGB12(dst_1, py_1, 0, 0);
+ PUTRGB12(dst_2, py_2, 0, 0 + 8);
+ENDYUV2RGBFUNC()
// r, g, b, dst_1, dst_2
YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
@@ -672,7 +722,42 @@ YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
dst_1[0] = out_1;
dst_2[0] = out_2;
-CLOSEYUV2RGBFUNC(1)
+
+ py_1 += 8;
+ py_2 += 8;
+ dst_1 += 1;
+ dst_2 += 1;
+ }
+ if (c->dstW & 7) {
+ int av_unused Y, U, V;
+ int pixels_left = c->dstW & 7;
+ const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
+ char out_1 = 0, out_2 = 0;
+ g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
+
+#define PUTRGB1_OR00(out, src, i, o) \
+ if (pixels_left) { \
+ PUTRGB1(out, src, i, o) \
+ pixels_left--; \
+ } else { \
+ out <<= 2; \
+ }
+
+ PUTRGB1_OR00(out_1, py_1, 0, 0);
+ PUTRGB1_OR00(out_2, py_2, 0, 0 + 8);
+
+ PUTRGB1_OR00(out_2, py_2, 1, 2 + 8);
+ PUTRGB1_OR00(out_1, py_1, 1, 2);
+
+ PUTRGB1_OR00(out_1, py_1, 2, 4);
+ PUTRGB1_OR00(out_2, py_2, 2, 4 + 8);
+
+ PUTRGB1_OR00(out_2, py_2, 3, 6 + 8);
+ PUTRGB1_OR00(out_1, py_1, 3, 6);
+
+ dst_1[0] = out_1;
+ dst_2[0] = out_2;
+ENDYUV2RGBFUNC()
SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
{
--
2.30.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH 2/6] swscale/yuv2rgb: add macros to simplify code generation
2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
@ 2024-06-16 22:28 ` Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code Ramiro Polla
` (3 subsequent siblings)
4 siblings, 0 replies; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
To: ffmpeg-devel
---
libswscale/yuv2rgb.c | 574 +++++++++----------------------------------
1 file changed, 113 insertions(+), 461 deletions(-)
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index e641c765c7..7386d3a2a2 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -72,13 +72,13 @@ const int *sws_getCoefficients(int colorspace)
g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \
b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM];
-#define PUTRGB(dst, src, i) \
+#define PUTRGB(dst, src, asrc, i, abase) \
Y = src[2 * i]; \
dst[2 * i] = r[Y] + g[Y] + b[Y]; \
Y = src[2 * i + 1]; \
dst[2 * i + 1] = r[Y] + g[Y] + b[Y];
-#define PUTRGB24(dst, src, i) \
+#define PUTRGB24(dst, src, asrc, i, abase) \
Y = src[2 * i]; \
dst[6 * i + 0] = r[Y]; \
dst[6 * i + 1] = g[Y]; \
@@ -88,7 +88,7 @@ const int *sws_getCoefficients(int colorspace)
dst[6 * i + 4] = g[Y]; \
dst[6 * i + 5] = b[Y];
-#define PUTBGR24(dst, src, i) \
+#define PUTBGR24(dst, src, asrc, i, abase) \
Y = src[2 * i]; \
dst[6 * i + 0] = b[Y]; \
dst[6 * i + 1] = g[Y]; \
@@ -98,13 +98,13 @@ const int *sws_getCoefficients(int colorspace)
dst[6 * i + 4] = g[Y]; \
dst[6 * i + 5] = r[Y];
-#define PUTRGBA(dst, ysrc, asrc, i, s) \
+#define PUTRGBA(dst, ysrc, asrc, i, abase) \
Y = ysrc[2 * i]; \
- dst[2 * i] = r[Y] + g[Y] + b[Y] + (asrc[2 * i] << s); \
+ dst[2 * i] = r[Y] + g[Y] + b[Y] + (asrc[2 * i] << abase); \
Y = ysrc[2 * i + 1]; \
- dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << s);
+ dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << abase);
-#define PUTRGB48(dst, src, i) \
+#define PUTRGB48(dst, src, asrc, i, abase) \
Y = src[ 2 * i]; \
dst[12 * i + 0] = dst[12 * i + 1] = r[Y]; \
dst[12 * i + 2] = dst[12 * i + 3] = g[Y]; \
@@ -114,7 +114,7 @@ const int *sws_getCoefficients(int colorspace)
dst[12 * i + 8] = dst[12 * i + 9] = g[Y]; \
dst[12 * i + 10] = dst[12 * i + 11] = b[Y];
-#define PUTBGR48(dst, src, i) \
+#define PUTBGR48(dst, src, asrc, i, abase) \
Y = src[2 * i]; \
dst[12 * i + 0] = dst[12 * i + 1] = b[Y]; \
dst[12 * i + 2] = dst[12 * i + 3] = g[Y]; \
@@ -155,11 +155,15 @@ const int *sws_getCoefficients(int colorspace)
while (h_size--) { \
int av_unused U, V, Y; \
-#define ENDYUV2RGBLINE(dst_delta, ss) \
+#define ENDYUV2RGBLINE(dst_delta, ss, alpha) \
pu += 4 >> ss; \
pv += 4 >> ss; \
py_1 += 8 >> ss; \
py_2 += 8 >> ss; \
+ if (alpha) { \
+ pa_1 += 8 >> ss; \
+ pa_2 += 8 >> ss; \
+ } \
dst_1 += dst_delta >> ss; \
dst_2 += dst_delta >> ss; \
} \
@@ -172,236 +176,77 @@ const int *sws_getCoefficients(int colorspace)
return srcSliceH; \
}
-YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0)
- LOADCHROMA(0);
- PUTRGB48(dst_1, py_1, 0);
- PUTRGB48(dst_2, py_2, 0);
-
- LOADCHROMA(1);
- PUTRGB48(dst_2, py_2, 1);
- PUTRGB48(dst_1, py_1, 1);
-
- LOADCHROMA(2);
- PUTRGB48(dst_1, py_1, 2);
- PUTRGB48(dst_2, py_2, 2);
-
- LOADCHROMA(3);
- PUTRGB48(dst_2, py_2, 3);
- PUTRGB48(dst_1, py_1, 3);
-ENDYUV2RGBLINE(48, 0)
- LOADCHROMA(0);
- PUTRGB48(dst_1, py_1, 0);
- PUTRGB48(dst_2, py_2, 0);
-
- LOADCHROMA(1);
- PUTRGB48(dst_2, py_2, 1);
- PUTRGB48(dst_1, py_1, 1);
-ENDYUV2RGBLINE(48, 1)
- LOADCHROMA(0);
- PUTRGB48(dst_1, py_1, 0);
- PUTRGB48(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_bgr48, uint8_t, 0)
- LOADCHROMA(0);
- PUTBGR48(dst_1, py_1, 0);
- PUTBGR48(dst_2, py_2, 0);
-
- LOADCHROMA(1);
- PUTBGR48(dst_2, py_2, 1);
- PUTBGR48(dst_1, py_1, 1);
-
- LOADCHROMA(2);
- PUTBGR48(dst_1, py_1, 2);
- PUTBGR48(dst_2, py_2, 2);
-
- LOADCHROMA(3);
- PUTBGR48(dst_2, py_2, 3);
- PUTBGR48(dst_1, py_1, 3);
-ENDYUV2RGBLINE(48, 0)
- LOADCHROMA(0);
- PUTBGR48(dst_1, py_1, 0);
- PUTBGR48(dst_2, py_2, 0);
-
- LOADCHROMA(1);
- PUTBGR48(dst_2, py_2, 1);
- PUTBGR48(dst_1, py_1, 1);
-ENDYUV2RGBLINE(48, 1)
- LOADCHROMA(0);
- PUTBGR48(dst_1, py_1, 0);
- PUTBGR48(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0)
- LOADCHROMA(0);
- PUTRGB(dst_1, py_1, 0);
- PUTRGB(dst_2, py_2, 0);
-
- LOADCHROMA(1);
- PUTRGB(dst_2, py_2, 1);
- PUTRGB(dst_1, py_1, 1);
-
- LOADCHROMA(2);
- PUTRGB(dst_1, py_1, 2);
- PUTRGB(dst_2, py_2, 2);
-
- LOADCHROMA(3);
- PUTRGB(dst_2, py_2, 3);
- PUTRGB(dst_1, py_1, 3);
-ENDYUV2RGBLINE(8, 0)
- LOADCHROMA(0);
- PUTRGB(dst_1, py_1, 0);
- PUTRGB(dst_2, py_2, 0);
-
- LOADCHROMA(1);
- PUTRGB(dst_2, py_2, 1);
- PUTRGB(dst_1, py_1, 1);
-ENDYUV2RGBLINE(8, 1)
- LOADCHROMA(0);
- PUTRGB(dst_1, py_1, 0);
- PUTRGB(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-#if HAVE_BIGENDIAN
-YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
-#else
-YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
-#endif
- LOADCHROMA(0);
- PUTRGBA(dst_1, py_1, pa_1, 0, 24);
- PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-
- LOADCHROMA(1);
- PUTRGBA(dst_2, py_2, pa_2, 1, 24);
- PUTRGBA(dst_1, py_1, pa_1, 1, 24);
-
- LOADCHROMA(2);
- PUTRGBA(dst_1, py_1, pa_1, 2, 24);
- PUTRGBA(dst_2, py_2, pa_2, 2, 24);
-
- LOADCHROMA(3);
- PUTRGBA(dst_2, py_2, pa_2, 3, 24);
- PUTRGBA(dst_1, py_1, pa_1, 3, 24);
- pa_1 += 8;
- pa_2 += 8;
-ENDYUV2RGBLINE(8, 0)
- LOADCHROMA(0);
- PUTRGBA(dst_1, py_1, pa_1, 0, 24);
- PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-
- LOADCHROMA(1);
- PUTRGBA(dst_2, py_2, pa_2, 1, 24);
- PUTRGBA(dst_1, py_1, pa_1, 1, 24);
- pa_1 += 4;
- pa_2 += 4;
-ENDYUV2RGBLINE(8, 1)
- LOADCHROMA(0);
- PUTRGBA(dst_1, py_1, pa_1, 0, 24);
- PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-ENDYUV2RGBFUNC()
-
-#if HAVE_BIGENDIAN
-YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
-#else
-YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
-#endif
- LOADCHROMA(0);
- PUTRGBA(dst_1, py_1, pa_1, 0, 0);
- PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-
- LOADCHROMA(1);
- PUTRGBA(dst_2, py_2, pa_2, 1, 0);
- PUTRGBA(dst_1, py_1, pa_1, 1, 0);
-
- LOADCHROMA(2);
- PUTRGBA(dst_1, py_1, pa_1, 2, 0);
- PUTRGBA(dst_2, py_2, pa_2, 2, 0);
-
- LOADCHROMA(3);
- PUTRGBA(dst_2, py_2, pa_2, 3, 0);
- PUTRGBA(dst_1, py_1, pa_1, 3, 0);
- pa_1 += 8;
- pa_2 += 8;
-ENDYUV2RGBLINE(8, 0)
- LOADCHROMA(0);
- PUTRGBA(dst_1, py_1, pa_1, 0, 0);
- PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-
- LOADCHROMA(1);
- PUTRGBA(dst_2, py_2, pa_2, 1, 0);
- PUTRGBA(dst_1, py_1, pa_1, 1, 0);
- pa_1 += 4;
- pa_2 += 4;
-ENDYUV2RGBLINE(8, 1)
- LOADCHROMA(0);
- PUTRGBA(dst_1, py_1, pa_1, 0, 0);
- PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0)
- LOADCHROMA(0);
- PUTRGB24(dst_1, py_1, 0);
- PUTRGB24(dst_2, py_2, 0);
-
- LOADCHROMA(1);
- PUTRGB24(dst_2, py_2, 1);
- PUTRGB24(dst_1, py_1, 1);
-
- LOADCHROMA(2);
- PUTRGB24(dst_1, py_1, 2);
- PUTRGB24(dst_2, py_2, 2);
-
- LOADCHROMA(3);
- PUTRGB24(dst_2, py_2, 3);
- PUTRGB24(dst_1, py_1, 3);
-ENDYUV2RGBLINE(24, 0)
- LOADCHROMA(0);
- PUTRGB24(dst_1, py_1, 0);
- PUTRGB24(dst_2, py_2, 0);
-
- LOADCHROMA(1);
- PUTRGB24(dst_2, py_2, 1);
- PUTRGB24(dst_1, py_1, 1);
-ENDYUV2RGBLINE(24, 1)
- LOADCHROMA(0);
- PUTRGB24(dst_1, py_1, 0);
- PUTRGB24(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-// only trivial mods from yuv2rgb_c_24_rgb
-YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0)
- LOADCHROMA(0);
- PUTBGR24(dst_1, py_1, 0);
- PUTBGR24(dst_2, py_2, 0);
-
- LOADCHROMA(1);
- PUTBGR24(dst_2, py_2, 1);
- PUTBGR24(dst_1, py_1, 1);
-
- LOADCHROMA(2);
- PUTBGR24(dst_1, py_1, 2);
- PUTBGR24(dst_2, py_2, 2);
-
- LOADCHROMA(3);
- PUTBGR24(dst_2, py_2, 3);
- PUTBGR24(dst_1, py_1, 3);
-ENDYUV2RGBLINE(24, 0)
- LOADCHROMA(0);
- PUTBGR24(dst_1, py_1, 0);
- PUTBGR24(dst_2, py_2, 0);
-
- LOADCHROMA(1);
- PUTBGR24(dst_2, py_2, 1);
- PUTBGR24(dst_1, py_1, 1);
-ENDYUV2RGBLINE(24, 1)
- LOADCHROMA(0);
- PUTBGR24(dst_1, py_1, 0);
- PUTBGR24(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
- const uint8_t *d16 = ff_dither_2x2_8[y & 1];
- const uint8_t *e16 = ff_dither_2x2_4[y & 1];
+#define YUV420FUNC(func_name, dst_type, alpha, abase, PUTFUNC, dst_delta) \
+ YUV2RGBFUNC(func_name, dst_type, alpha) \
+ LOADCHROMA(0); \
+ PUTFUNC(dst_1, py_1, pa_1, 0, abase); \
+ PUTFUNC(dst_2, py_2, pa_2, 0, abase); \
+ \
+ LOADCHROMA(1); \
+ PUTFUNC(dst_2, py_2, pa_2, 1, abase); \
+ PUTFUNC(dst_1, py_1, pa_1, 1, abase); \
+ \
+ LOADCHROMA(2); \
+ PUTFUNC(dst_1, py_1, pa_1, 2, abase); \
+ PUTFUNC(dst_2, py_2, pa_2, 2, abase); \
+ \
+ LOADCHROMA(3); \
+ PUTFUNC(dst_2, py_2, pa_2, 3, abase); \
+ PUTFUNC(dst_1, py_1, pa_1, 3, abase); \
+ ENDYUV2RGBLINE(dst_delta, 0, alpha) \
+ LOADCHROMA(0); \
+ PUTFUNC(dst_1, py_1, pa_1, 0, abase); \
+ PUTFUNC(dst_2, py_2, pa_2, 0, abase); \
+ \
+ LOADCHROMA(1); \
+ PUTFUNC(dst_2, py_2, pa_2, 1, abase); \
+ PUTFUNC(dst_1, py_1, pa_1, 1, abase); \
+ ENDYUV2RGBLINE(dst_delta, 1, alpha) \
+ LOADCHROMA(0); \
+ PUTFUNC(dst_1, py_1, pa_1, 0, abase); \
+ PUTFUNC(dst_2, py_2, pa_2, 0, abase); \
+ ENDYUV2RGBFUNC()
+
+#define YUV420FUNC_DITHER(func_name, dst_type, LOADDITHER, PUTFUNC, dst_delta) \
+ YUV2RGBFUNC(func_name, dst_type, 0) \
+ LOADDITHER \
+ \
+ LOADCHROMA(0); \
+ PUTFUNC(dst_1, py_1, 0, 0); \
+ PUTFUNC(dst_2, py_2, 0, 0 + 8); \
+ \
+ LOADCHROMA(1); \
+ PUTFUNC(dst_2, py_2, 1, 2 + 8); \
+ PUTFUNC(dst_1, py_1, 1, 2); \
+ \
+ LOADCHROMA(2); \
+ PUTFUNC(dst_1, py_1, 2, 4); \
+ PUTFUNC(dst_2, py_2, 2, 4 + 8); \
+ \
+ LOADCHROMA(3); \
+ PUTFUNC(dst_2, py_2, 3, 6 + 8); \
+ PUTFUNC(dst_1, py_1, 3, 6); \
+ ENDYUV2RGBLINE(dst_delta, 0, 0) \
+ LOADDITHER \
+ \
+ LOADCHROMA(0); \
+ PUTFUNC(dst_1, py_1, 0, 0); \
+ PUTFUNC(dst_2, py_2, 0, 0 + 8); \
+ \
+ LOADCHROMA(1); \
+ PUTFUNC(dst_2, py_2, 1, 2 + 8); \
+ PUTFUNC(dst_1, py_1, 1, 2); \
+ ENDYUV2RGBLINE(dst_delta, 1, 0) \
+ LOADDITHER \
+ \
+ LOADCHROMA(0); \
+ PUTFUNC(dst_1, py_1, 0, 0); \
+ PUTFUNC(dst_2, py_2, 0, 0 + 8); \
+ ENDYUV2RGBFUNC()
+
+#define LOADDITHER16 \
+ const uint8_t *d16 = ff_dither_2x2_8[y & 1]; \
+ const uint8_t *e16 = ff_dither_2x2_4[y & 1]; \
const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
#define PUTRGB16(dst, src, i, o) \
@@ -413,45 +258,9 @@ YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
dst[2 * i + 1] = r[Y + d16[1 + o]] + \
g[Y + e16[1 + o]] + \
b[Y + f16[1 + o]];
- LOADCHROMA(0);
- PUTRGB16(dst_1, py_1, 0, 0);
- PUTRGB16(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB16(dst_2, py_2, 1, 2 + 8);
- PUTRGB16(dst_1, py_1, 1, 2);
-
- LOADCHROMA(2);
- PUTRGB16(dst_1, py_1, 2, 4);
- PUTRGB16(dst_2, py_2, 2, 4 + 8);
-
- LOADCHROMA(3);
- PUTRGB16(dst_2, py_2, 3, 6 + 8);
- PUTRGB16(dst_1, py_1, 3, 6);
-ENDYUV2RGBLINE(8, 0)
- const uint8_t *d16 = ff_dither_2x2_8[y & 1];
- const uint8_t *e16 = ff_dither_2x2_4[y & 1];
- const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
-
- LOADCHROMA(0);
- PUTRGB16(dst_1, py_1, 0, 0);
- PUTRGB16(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB16(dst_2, py_2, 1, 2 + 8);
- PUTRGB16(dst_1, py_1, 1, 2);
-ENDYUV2RGBLINE(8, 1)
- const uint8_t *d16 = ff_dither_2x2_8[y & 1];
- const uint8_t *e16 = ff_dither_2x2_4[y & 1];
- const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
- LOADCHROMA(0);
- PUTRGB16(dst_1, py_1, 0, 0);
- PUTRGB16(dst_2, py_2, 0, 0 + 8);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
- const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+#define LOADDITHER15 \
+ const uint8_t *d16 = ff_dither_2x2_8[y & 1]; \
const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
#define PUTRGB15(dst, src, i, o) \
@@ -463,43 +272,8 @@ YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
dst[2 * i + 1] = r[Y + d16[1 + o]] + \
g[Y + d16[0 + o]] + \
b[Y + e16[1 + o]];
- LOADCHROMA(0);
- PUTRGB15(dst_1, py_1, 0, 0);
- PUTRGB15(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB15(dst_2, py_2, 1, 2 + 8);
- PUTRGB15(dst_1, py_1, 1, 2);
-
- LOADCHROMA(2);
- PUTRGB15(dst_1, py_1, 2, 4);
- PUTRGB15(dst_2, py_2, 2, 4 + 8);
-
- LOADCHROMA(3);
- PUTRGB15(dst_2, py_2, 3, 6 + 8);
- PUTRGB15(dst_1, py_1, 3, 6);
-ENDYUV2RGBLINE(8, 0)
- const uint8_t *d16 = ff_dither_2x2_8[y & 1];
- const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
-
- LOADCHROMA(0);
- PUTRGB15(dst_1, py_1, 0, 0);
- PUTRGB15(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB15(dst_2, py_2, 1, 2 + 8);
- PUTRGB15(dst_1, py_1, 1, 2);
-ENDYUV2RGBLINE(8, 1)
- const uint8_t *d16 = ff_dither_2x2_8[y & 1];
- const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
-
- LOADCHROMA(0);
- PUTRGB15(dst_1, py_1, 0, 0);
- PUTRGB15(dst_2, py_2, 0, 0 + 8);
-ENDYUV2RGBFUNC()
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
+#define LOADDITHER12 \
const uint8_t *d16 = ff_dither_4x4_16[y & 3];
#define PUTRGB12(dst, src, i, o) \
@@ -512,42 +286,8 @@ YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
g[Y + d16[1 + o]] + \
b[Y + d16[1 + o]];
- LOADCHROMA(0);
- PUTRGB12(dst_1, py_1, 0, 0);
- PUTRGB12(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB12(dst_2, py_2, 1, 2 + 8);
- PUTRGB12(dst_1, py_1, 1, 2);
-
- LOADCHROMA(2);
- PUTRGB12(dst_1, py_1, 2, 4);
- PUTRGB12(dst_2, py_2, 2, 4 + 8);
-
- LOADCHROMA(3);
- PUTRGB12(dst_2, py_2, 3, 6 + 8);
- PUTRGB12(dst_1, py_1, 3, 6);
-ENDYUV2RGBLINE(8, 0)
- const uint8_t *d16 = ff_dither_4x4_16[y & 3];
-
- LOADCHROMA(0);
- PUTRGB12(dst_1, py_1, 0, 0);
- PUTRGB12(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB12(dst_2, py_2, 1, 2 + 8);
- PUTRGB12(dst_1, py_1, 1, 2);
-ENDYUV2RGBLINE(8, 1)
- const uint8_t *d16 = ff_dither_4x4_16[y & 3];
-
- LOADCHROMA(0);
- PUTRGB12(dst_1, py_1, 0, 0);
- PUTRGB12(dst_2, py_2, 0, 0 + 8);
-ENDYUV2RGBFUNC()
-
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
- const uint8_t *d32 = ff_dither_8x8_32[yd & 7];
+#define LOADDITHER8 \
+ const uint8_t *d32 = ff_dither_8x8_32[yd & 7]; \
const uint8_t *d64 = ff_dither_8x8_73[yd & 7];
#define PUTRGB8(dst, src, i, o) \
@@ -560,46 +300,9 @@ YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
g[Y + d32[1 + o]] + \
b[Y + d64[1 + o]];
- LOADCHROMA(0);
- PUTRGB8(dst_1, py_1, 0, 0);
- PUTRGB8(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB8(dst_2, py_2, 1, 2 + 8);
- PUTRGB8(dst_1, py_1, 1, 2);
-
- LOADCHROMA(2);
- PUTRGB8(dst_1, py_1, 2, 4);
- PUTRGB8(dst_2, py_2, 2, 4 + 8);
-
- LOADCHROMA(3);
- PUTRGB8(dst_2, py_2, 3, 6 + 8);
- PUTRGB8(dst_1, py_1, 3, 6);
-
-ENDYUV2RGBLINE(8, 0)
- const uint8_t *d32 = ff_dither_8x8_32[yd & 7];
- const uint8_t *d64 = ff_dither_8x8_73[yd & 7];
- LOADCHROMA(0);
- PUTRGB8(dst_1, py_1, 0, 0);
- PUTRGB8(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB8(dst_2, py_2, 1, 2 + 8);
- PUTRGB8(dst_1, py_1, 1, 2);
-
-ENDYUV2RGBLINE(8, 1)
- const uint8_t *d32 = ff_dither_8x8_32[yd & 7];
- const uint8_t *d64 = ff_dither_8x8_73[yd & 7];
- LOADCHROMA(0);
- PUTRGB8(dst_1, py_1, 0, 0);
- PUTRGB8(dst_2, py_2, 0, 0 + 8);
-
-ENDYUV2RGBFUNC()
-
-
-YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
- const uint8_t * d64 = ff_dither_8x8_73[yd & 7];
- const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
+#define LOADDITHER4D \
+ const uint8_t * d64 = ff_dither_8x8_73[yd & 7]; \
+ const uint8_t *d128 = ff_dither_8x8_220[yd & 7]; \
int acc;
#define PUTRGB4D(dst, src, i, o) \
@@ -613,45 +316,8 @@ YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
b[Y + d128[1 + o]]) << 4; \
dst[i] = acc;
- LOADCHROMA(0);
- PUTRGB4D(dst_1, py_1, 0, 0);
- PUTRGB4D(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB4D(dst_2, py_2, 1, 2 + 8);
- PUTRGB4D(dst_1, py_1, 1, 2);
-
- LOADCHROMA(2);
- PUTRGB4D(dst_1, py_1, 2, 4);
- PUTRGB4D(dst_2, py_2, 2, 4 + 8);
-
- LOADCHROMA(3);
- PUTRGB4D(dst_2, py_2, 3, 6 + 8);
- PUTRGB4D(dst_1, py_1, 3, 6);
-
-ENDYUV2RGBLINE(4, 0)
- const uint8_t * d64 = ff_dither_8x8_73[yd & 7];
- const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
- int acc;
- LOADCHROMA(0);
- PUTRGB4D(dst_1, py_1, 0, 0);
- PUTRGB4D(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB4D(dst_2, py_2, 1, 2 + 8);
- PUTRGB4D(dst_1, py_1, 1, 2);
-
-ENDYUV2RGBLINE(4, 1)
- const uint8_t * d64 = ff_dither_8x8_73[yd & 7];
- const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
- int acc;
- LOADCHROMA(0);
- PUTRGB4D(dst_1, py_1, 0, 0);
- PUTRGB4D(dst_2, py_2, 0, 0 + 8);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
- const uint8_t *d64 = ff_dither_8x8_73[yd & 7];
+#define LOADDITHER4DB \
+ const uint8_t *d64 = ff_dither_8x8_73[yd & 7]; \
const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
#define PUTRGB4DB(dst, src, i, o) \
@@ -664,39 +330,6 @@ YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
g[Y + d64[1 + o]] + \
b[Y + d128[1 + o]];
- LOADCHROMA(0);
- PUTRGB4DB(dst_1, py_1, 0, 0);
- PUTRGB4DB(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB4DB(dst_2, py_2, 1, 2 + 8);
- PUTRGB4DB(dst_1, py_1, 1, 2);
-
- LOADCHROMA(2);
- PUTRGB4DB(dst_1, py_1, 2, 4);
- PUTRGB4DB(dst_2, py_2, 2, 4 + 8);
-
- LOADCHROMA(3);
- PUTRGB4DB(dst_2, py_2, 3, 6 + 8);
- PUTRGB4DB(dst_1, py_1, 3, 6);
-ENDYUV2RGBLINE(8, 0)
- const uint8_t *d64 = ff_dither_8x8_73[yd & 7];
- const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
- LOADCHROMA(0);
- PUTRGB4DB(dst_1, py_1, 0, 0);
- PUTRGB4DB(dst_2, py_2, 0, 0 + 8);
-
- LOADCHROMA(1);
- PUTRGB4DB(dst_2, py_2, 1, 2 + 8);
- PUTRGB4DB(dst_1, py_1, 1, 2);
-ENDYUV2RGBLINE(8, 1)
- const uint8_t *d64 = ff_dither_8x8_73[yd & 7];
- const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
- LOADCHROMA(0);
- PUTRGB4DB(dst_1, py_1, 0, 0);
- PUTRGB4DB(dst_2, py_2, 0, 0 + 8);
-ENDYUV2RGBFUNC()
-
YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
char out_1 = 0, out_2 = 0;
@@ -759,6 +392,25 @@ YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
dst_2[0] = out_2;
ENDYUV2RGBFUNC()
+YUV420FUNC(yuv2rgb_c_48, uint8_t, 0, 0, PUTRGB48, 48)
+YUV420FUNC(yuv2rgb_c_bgr48, uint8_t, 0, 0, PUTBGR48, 48)
+YUV420FUNC(yuv2rgb_c_32, uint32_t, 0, 0, PUTRGB, 8)
+#if HAVE_BIGENDIAN
+YUV420FUNC(yuva2argb_c, uint32_t, 1, 24, PUTRGBA, 8)
+YUV420FUNC(yuva2rgba_c, uint32_t, 1, 0, PUTRGBA, 8)
+#else
+YUV420FUNC(yuva2rgba_c, uint32_t, 1, 24, PUTRGBA, 8)
+YUV420FUNC(yuva2argb_c, uint32_t, 1, 0, PUTRGBA, 8)
+#endif
+YUV420FUNC(yuv2rgb_c_24_rgb, uint8_t, 0, 0, PUTRGB24, 24)
+YUV420FUNC(yuv2rgb_c_24_bgr, uint8_t, 0, 0, PUTBGR24, 24)
+YUV420FUNC_DITHER(yuv2rgb_c_16_ordered_dither, uint16_t, LOADDITHER16, PUTRGB16, 8)
+YUV420FUNC_DITHER(yuv2rgb_c_15_ordered_dither, uint16_t, LOADDITHER15, PUTRGB15, 8)
+YUV420FUNC_DITHER(yuv2rgb_c_12_ordered_dither, uint16_t, LOADDITHER12, PUTRGB12, 8)
+YUV420FUNC_DITHER(yuv2rgb_c_8_ordered_dither, uint8_t, LOADDITHER8, PUTRGB8, 8)
+YUV420FUNC_DITHER(yuv2rgb_c_4_ordered_dither, uint8_t, LOADDITHER4D, PUTRGB4D, 4)
+YUV420FUNC_DITHER(yuv2rgb_c_4b_ordered_dither, uint8_t, LOADDITHER4DB, PUTRGB4DB, 8)
+
SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
{
SwsFunc t = NULL;
--
2.30.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code
2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 2/6] swscale/yuv2rgb: add macros to simplify code generation Ramiro Polla
@ 2024-06-16 22:28 ` Ramiro Polla
2024-06-19 10:13 ` Michael Niedermayer
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 4/6] swscale/yuv2rgb: reindent after previous commit Ramiro Polla
` (2 subsequent siblings)
4 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
To: ffmpeg-devel
The C code was silently ignoring the second chroma line on yuv422p
input.
---
libswscale/yuv2rgb.c | 228 +++++++++++++++++++++++++++++++++++++------
1 file changed, 196 insertions(+), 32 deletions(-)
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 7386d3a2a2..1ea87ac17a 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -65,7 +65,7 @@ const int *sws_getCoefficients(int colorspace)
return ff_yuv2rgb_coeffs[colorspace];
}
-#define LOADCHROMA(i) \
+#define LOADCHROMA(pu, pv, i) \
U = pu[i]; \
V = pv[i]; \
r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \
@@ -124,17 +124,13 @@ const int *sws_getCoefficients(int colorspace)
dst[12 * i + 8] = dst[12 * i + 9] = g[Y]; \
dst[12 * i + 10] = dst[12 * i + 11] = r[Y];
-#define YUV2RGBFUNC(func_name, dst_type, alpha) \
+#define YUV2RGBFUNC(func_name, dst_type, alpha, yuv422) \
static int func_name(SwsContext *c, const uint8_t *src[], \
int srcStride[], int srcSliceY, int srcSliceH, \
uint8_t *dst[], int dstStride[]) \
{ \
int y; \
\
- if (!alpha && c->srcFormat == AV_PIX_FMT_YUV422P) { \
- srcStride[1] *= 2; \
- srcStride[2] *= 2; \
- } \
for (y = 0; y < srcSliceH; y += 2) { \
int yd = y + srcSliceY; \
dst_type *dst_1 = \
@@ -144,10 +140,15 @@ const int *sws_getCoefficients(int colorspace)
dst_type av_unused *r, *g, *b; \
const uint8_t *py_1 = src[0] + y * srcStride[0]; \
const uint8_t *py_2 = py_1 + srcStride[0]; \
- const uint8_t av_unused *pu = src[1] + (y >> 1) * srcStride[1]; \
- const uint8_t av_unused *pv = src[2] + (y >> 1) * srcStride[2]; \
+ const uint8_t av_unused *pu_1 = src[1] + (y >> !yuv422) * srcStride[1]; \
+ const uint8_t av_unused *pv_1 = src[2] + (y >> !yuv422) * srcStride[2]; \
+ const uint8_t av_unused *pu_2, *pv_2; \
const uint8_t av_unused *pa_1, *pa_2; \
unsigned int h_size = c->dstW >> 3; \
+ if (yuv422) { \
+ pu_2 = pu_1 + srcStride[1]; \
+ pv_2 = pv_1 + srcStride[2]; \
+ } \
if (alpha) { \
pa_1 = src[3] + y * srcStride[3]; \
pa_2 = pa_1 + srcStride[3]; \
@@ -155,9 +156,13 @@ const int *sws_getCoefficients(int colorspace)
while (h_size--) { \
int av_unused U, V, Y; \
-#define ENDYUV2RGBLINE(dst_delta, ss, alpha) \
- pu += 4 >> ss; \
- pv += 4 >> ss; \
+#define ENDYUV2RGBLINE(dst_delta, ss, alpha, yuv422) \
+ pu_1 += 4 >> ss; \
+ pv_1 += 4 >> ss; \
+ if (yuv422) { \
+ pu_2 += 4 >> ss; \
+ pv_2 += 4 >> ss; \
+ } \
py_1 += 8 >> ss; \
py_2 += 8 >> ss; \
if (alpha) { \
@@ -177,73 +182,169 @@ const int *sws_getCoefficients(int colorspace)
}
#define YUV420FUNC(func_name, dst_type, alpha, abase, PUTFUNC, dst_delta) \
- YUV2RGBFUNC(func_name, dst_type, alpha) \
- LOADCHROMA(0); \
+ YUV2RGBFUNC(func_name, dst_type, alpha, 0) \
+ LOADCHROMA(pu_1, pv_1, 0); \
PUTFUNC(dst_1, py_1, pa_1, 0, abase); \
PUTFUNC(dst_2, py_2, pa_2, 0, abase); \
\
- LOADCHROMA(1); \
+ LOADCHROMA(pu_1, pv_1, 1); \
PUTFUNC(dst_2, py_2, pa_2, 1, abase); \
PUTFUNC(dst_1, py_1, pa_1, 1, abase); \
\
- LOADCHROMA(2); \
+ LOADCHROMA(pu_1, pv_1, 2); \
PUTFUNC(dst_1, py_1, pa_1, 2, abase); \
PUTFUNC(dst_2, py_2, pa_2, 2, abase); \
\
- LOADCHROMA(3); \
+ LOADCHROMA(pu_1, pv_1, 3); \
PUTFUNC(dst_2, py_2, pa_2, 3, abase); \
PUTFUNC(dst_1, py_1, pa_1, 3, abase); \
- ENDYUV2RGBLINE(dst_delta, 0, alpha) \
- LOADCHROMA(0); \
+ ENDYUV2RGBLINE(dst_delta, 0, alpha, 0) \
+ LOADCHROMA(pu_1, pv_1, 0); \
PUTFUNC(dst_1, py_1, pa_1, 0, abase); \
PUTFUNC(dst_2, py_2, pa_2, 0, abase); \
\
- LOADCHROMA(1); \
+ LOADCHROMA(pu_1, pv_1, 1); \
PUTFUNC(dst_2, py_2, pa_2, 1, abase); \
PUTFUNC(dst_1, py_1, pa_1, 1, abase); \
- ENDYUV2RGBLINE(dst_delta, 1, alpha) \
- LOADCHROMA(0); \
+ ENDYUV2RGBLINE(dst_delta, 1, alpha, 0) \
+ LOADCHROMA(pu_1, pv_1, 0); \
PUTFUNC(dst_1, py_1, pa_1, 0, abase); \
PUTFUNC(dst_2, py_2, pa_2, 0, abase); \
ENDYUV2RGBFUNC()
+#define YUV422FUNC(func_name, dst_type, alpha, abase, PUTFUNC, dst_delta) \
+ YUV2RGBFUNC(func_name, dst_type, alpha, 1) \
+ LOADCHROMA(pu_1, pv_1, 0); \
+ PUTFUNC(dst_1, py_1, pa_1, 0, abase); \
+ \
+ LOADCHROMA(pu_2, pv_2, 0); \
+ PUTFUNC(dst_2, py_2, pa_2, 0, abase); \
+ \
+ LOADCHROMA(pu_2, pv_2, 1); \
+ PUTFUNC(dst_2, py_2, pa_2, 1, abase); \
+ \
+ LOADCHROMA(pu_1, pv_1, 1); \
+ PUTFUNC(dst_1, py_1, pa_1, 1, abase); \
+ \
+ LOADCHROMA(pu_1, pv_1, 2); \
+ PUTFUNC(dst_1, py_1, pa_1, 2, abase); \
+ \
+ LOADCHROMA(pu_2, pv_2, 2); \
+ PUTFUNC(dst_2, py_2, pa_2, 2, abase); \
+ \
+ LOADCHROMA(pu_2, pv_2, 3); \
+ PUTFUNC(dst_2, py_2, pa_2, 3, abase); \
+ \
+ LOADCHROMA(pu_1, pv_1, 3); \
+ PUTFUNC(dst_1, py_1, pa_1, 3, abase); \
+ ENDYUV2RGBLINE(dst_delta, 0, alpha, 1) \
+ LOADCHROMA(pu_1, pv_1, 0); \
+ PUTFUNC(dst_1, py_1, pa_1, 0, abase); \
+ \
+ LOADCHROMA(pu_2, pv_2, 0); \
+ PUTFUNC(dst_2, py_2, pa_2, 0, abase); \
+ \
+ LOADCHROMA(pu_2, pv_2, 1); \
+ PUTFUNC(dst_2, py_2, pa_2, 1, abase); \
+ \
+ LOADCHROMA(pu_1, pv_1, 1); \
+ PUTFUNC(dst_1, py_1, pa_1, 1, abase); \
+ ENDYUV2RGBLINE(dst_delta, 1, alpha, 1) \
+ LOADCHROMA(pu_1, pv_1, 0); \
+ PUTFUNC(dst_1, py_1, pa_1, 0, abase); \
+ \
+ LOADCHROMA(pu_2, pv_2, 0); \
+ PUTFUNC(dst_2, py_2, pa_2, 0, abase); \
+ ENDYUV2RGBFUNC()
+
#define YUV420FUNC_DITHER(func_name, dst_type, LOADDITHER, PUTFUNC, dst_delta) \
- YUV2RGBFUNC(func_name, dst_type, 0) \
+ YUV2RGBFUNC(func_name, dst_type, 0, 0) \
LOADDITHER \
\
- LOADCHROMA(0); \
+ LOADCHROMA(pu_1, pv_1, 0); \
PUTFUNC(dst_1, py_1, 0, 0); \
PUTFUNC(dst_2, py_2, 0, 0 + 8); \
\
- LOADCHROMA(1); \
+ LOADCHROMA(pu_1, pv_1, 1); \
PUTFUNC(dst_2, py_2, 1, 2 + 8); \
PUTFUNC(dst_1, py_1, 1, 2); \
\
- LOADCHROMA(2); \
+ LOADCHROMA(pu_1, pv_1, 2); \
PUTFUNC(dst_1, py_1, 2, 4); \
PUTFUNC(dst_2, py_2, 2, 4 + 8); \
\
- LOADCHROMA(3); \
+ LOADCHROMA(pu_1, pv_1, 3); \
PUTFUNC(dst_2, py_2, 3, 6 + 8); \
PUTFUNC(dst_1, py_1, 3, 6); \
- ENDYUV2RGBLINE(dst_delta, 0, 0) \
+ ENDYUV2RGBLINE(dst_delta, 0, 0, 0) \
LOADDITHER \
\
- LOADCHROMA(0); \
+ LOADCHROMA(pu_1, pv_1, 0); \
PUTFUNC(dst_1, py_1, 0, 0); \
PUTFUNC(dst_2, py_2, 0, 0 + 8); \
\
- LOADCHROMA(1); \
+ LOADCHROMA(pu_1, pv_1, 1); \
PUTFUNC(dst_2, py_2, 1, 2 + 8); \
PUTFUNC(dst_1, py_1, 1, 2); \
- ENDYUV2RGBLINE(dst_delta, 1, 0) \
+ ENDYUV2RGBLINE(dst_delta, 1, 0, 0) \
LOADDITHER \
\
- LOADCHROMA(0); \
+ LOADCHROMA(pu_1, pv_1, 0); \
PUTFUNC(dst_1, py_1, 0, 0); \
PUTFUNC(dst_2, py_2, 0, 0 + 8); \
ENDYUV2RGBFUNC()
+#define YUV422FUNC_DITHER(func_name, dst_type, LOADDITHER, PUTFUNC, dst_delta) \
+ YUV2RGBFUNC(func_name, dst_type, 0, 1) \
+ LOADDITHER \
+ \
+ LOADCHROMA(pu_1, pv_1, 0); \
+ PUTFUNC(dst_1, py_1, 0, 0); \
+ \
+ LOADCHROMA(pu_2, pv_2, 0); \
+ PUTFUNC(dst_2, py_2, 0, 0 + 8); \
+ \
+ LOADCHROMA(pu_2, pv_2, 1); \
+ PUTFUNC(dst_2, py_2, 1, 2 + 8); \
+ \
+ LOADCHROMA(pu_1, pv_1, 1); \
+ PUTFUNC(dst_1, py_1, 1, 2); \
+ \
+ LOADCHROMA(pu_1, pv_1, 2); \
+ PUTFUNC(dst_1, py_1, 2, 4); \
+ \
+ LOADCHROMA(pu_2, pv_2, 2); \
+ PUTFUNC(dst_2, py_2, 2, 4 + 8); \
+ \
+ LOADCHROMA(pu_2, pv_2, 3); \
+ PUTFUNC(dst_2, py_2, 3, 6 + 8); \
+ \
+ LOADCHROMA(pu_1, pv_1, 3); \
+ PUTFUNC(dst_1, py_1, 3, 6); \
+ ENDYUV2RGBLINE(dst_delta, 0, 0, 1) \
+ LOADDITHER \
+ \
+ LOADCHROMA(pu_1, pv_1, 0); \
+ PUTFUNC(dst_1, py_1, 0, 0); \
+ \
+ LOADCHROMA(pu_2, pv_2, 0); \
+ PUTFUNC(dst_2, py_2, 0, 0 + 8); \
+ \
+ LOADCHROMA(pu_2, pv_2, 1); \
+ PUTFUNC(dst_2, py_2, 1, 2 + 8); \
+ \
+ LOADCHROMA(pu_1, pv_1, 1); \
+ PUTFUNC(dst_1, py_1, 1, 2); \
+ ENDYUV2RGBLINE(dst_delta, 1, 0, 1) \
+ LOADDITHER \
+ \
+ LOADCHROMA(pu_1, pv_1, 0); \
+ PUTFUNC(dst_1, py_1, 0, 0); \
+ \
+ LOADCHROMA(pu_2, pv_2, 0); \
+ PUTFUNC(dst_2, py_2, 0, 0 + 8); \
+ ENDYUV2RGBFUNC()
+
#define LOADDITHER16 \
const uint8_t *d16 = ff_dither_2x2_8[y & 1]; \
const uint8_t *e16 = ff_dither_2x2_4[y & 1]; \
@@ -330,7 +431,7 @@ const int *sws_getCoefficients(int colorspace)
g[Y + d64[1 + o]] + \
b[Y + d128[1 + o]];
-YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
+YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0, 0)
const uint8_t *d128 = ff_dither_8x8_220[yd & 7];
char out_1 = 0, out_2 = 0;
g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
@@ -392,6 +493,7 @@ YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
dst_2[0] = out_2;
ENDYUV2RGBFUNC()
+// YUV420
YUV420FUNC(yuv2rgb_c_48, uint8_t, 0, 0, PUTRGB48, 48)
YUV420FUNC(yuv2rgb_c_bgr48, uint8_t, 0, 0, PUTBGR48, 48)
YUV420FUNC(yuv2rgb_c_32, uint32_t, 0, 0, PUTRGB, 8)
@@ -411,6 +513,26 @@ YUV420FUNC_DITHER(yuv2rgb_c_8_ordered_dither, uint8_t, LOADDITHER8, PUTRGB8,
YUV420FUNC_DITHER(yuv2rgb_c_4_ordered_dither, uint8_t, LOADDITHER4D, PUTRGB4D, 4)
YUV420FUNC_DITHER(yuv2rgb_c_4b_ordered_dither, uint8_t, LOADDITHER4DB, PUTRGB4DB, 8)
+// YUV422
+YUV422FUNC(yuv422p_rgb48_c, uint8_t, 0, 0, PUTRGB48, 48)
+YUV422FUNC(yuv422p_bgr48_c, uint8_t, 0, 0, PUTBGR48, 48)
+YUV422FUNC(yuv422p_rgb32_c, uint32_t, 0, 0, PUTRGB, 8)
+#if HAVE_BIGENDIAN
+YUV422FUNC(yuva422p_argb_c, uint32_t, 1, 24, PUTRGBA, 8)
+YUV422FUNC(yuva422p_rgba_c, uint32_t, 1, 0, PUTRGBA, 8)
+#else
+YUV422FUNC(yuva422p_rgba_c, uint32_t, 1, 24, PUTRGBA, 8)
+YUV422FUNC(yuva422p_argb_c, uint32_t, 1, 0, PUTRGBA, 8)
+#endif
+YUV422FUNC(yuv422p_rgb24_c, uint8_t, 0, 0, PUTRGB24, 24)
+YUV422FUNC(yuv422p_bgr24_c, uint8_t, 0, 0, PUTBGR24, 24)
+YUV422FUNC_DITHER(yuv422p_bgr16, uint16_t, LOADDITHER16, PUTRGB16, 8)
+YUV422FUNC_DITHER(yuv422p_bgr15, uint16_t, LOADDITHER15, PUTRGB15, 8)
+YUV422FUNC_DITHER(yuv422p_bgr12, uint16_t, LOADDITHER12, PUTRGB12, 8)
+YUV422FUNC_DITHER(yuv422p_bgr8, uint8_t, LOADDITHER8, PUTRGB8, 8)
+YUV422FUNC_DITHER(yuv422p_bgr4, uint8_t, LOADDITHER4D, PUTRGB4D, 4)
+YUV422FUNC_DITHER(yuv422p_bgr4_byte, uint8_t, LOADDITHER4DB, PUTRGB4DB, 8)
+
SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
{
SwsFunc t = NULL;
@@ -430,6 +552,47 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
"No accelerated colorspace conversion found from %s to %s.\n",
av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat));
+ if (c->srcFormat == AV_PIX_FMT_YUV422P) {
+ switch (c->dstFormat) {
+ case AV_PIX_FMT_BGR48BE:
+ case AV_PIX_FMT_BGR48LE:
+ return yuv422p_bgr48_c;
+ case AV_PIX_FMT_RGB48BE:
+ case AV_PIX_FMT_RGB48LE:
+ return yuv422p_rgb48_c;
+ case AV_PIX_FMT_ARGB:
+ case AV_PIX_FMT_ABGR:
+ if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat))
+ return yuva422p_argb_c;
+ case AV_PIX_FMT_RGBA:
+ case AV_PIX_FMT_BGRA:
+ return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva422p_rgba_c : yuv422p_rgb32_c;
+ case AV_PIX_FMT_RGB24:
+ return yuv422p_rgb24_c;
+ case AV_PIX_FMT_BGR24:
+ return yuv422p_bgr24_c;
+ case AV_PIX_FMT_RGB565:
+ case AV_PIX_FMT_BGR565:
+ return yuv422p_bgr16;
+ case AV_PIX_FMT_RGB555:
+ case AV_PIX_FMT_BGR555:
+ return yuv422p_bgr15;
+ case AV_PIX_FMT_RGB444:
+ case AV_PIX_FMT_BGR444:
+ return yuv422p_bgr12;
+ case AV_PIX_FMT_RGB8:
+ case AV_PIX_FMT_BGR8:
+ return yuv422p_bgr8;
+ case AV_PIX_FMT_RGB4:
+ case AV_PIX_FMT_BGR4:
+ return yuv422p_bgr4;
+ case AV_PIX_FMT_RGB4_BYTE:
+ case AV_PIX_FMT_BGR4_BYTE:
+ return yuv422p_bgr4_byte;
+ case AV_PIX_FMT_MONOBLACK:
+ return yuv2rgb_c_1_ordered_dither;
+ }
+ } else {
switch (c->dstFormat) {
case AV_PIX_FMT_BGR48BE:
case AV_PIX_FMT_BGR48LE:
@@ -469,6 +632,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
case AV_PIX_FMT_MONOBLACK:
return yuv2rgb_c_1_ordered_dither;
}
+ }
return NULL;
}
--
2.30.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH 4/6] swscale/yuv2rgb: reindent after previous commit
2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 2/6] swscale/yuv2rgb: add macros to simplify code generation Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code Ramiro Polla
@ 2024-06-16 22:28 ` Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions Ramiro Polla
4 siblings, 0 replies; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
To: ffmpeg-devel
---
libswscale/yuv2rgb.c | 78 ++++++++++++++++++++++----------------------
1 file changed, 39 insertions(+), 39 deletions(-)
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 1ea87ac17a..977eb3a7dd 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -593,45 +593,45 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
return yuv2rgb_c_1_ordered_dither;
}
} else {
- switch (c->dstFormat) {
- case AV_PIX_FMT_BGR48BE:
- case AV_PIX_FMT_BGR48LE:
- return yuv2rgb_c_bgr48;
- case AV_PIX_FMT_RGB48BE:
- case AV_PIX_FMT_RGB48LE:
- return yuv2rgb_c_48;
- case AV_PIX_FMT_ARGB:
- case AV_PIX_FMT_ABGR:
- if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat))
- return yuva2argb_c;
- case AV_PIX_FMT_RGBA:
- case AV_PIX_FMT_BGRA:
- return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32;
- case AV_PIX_FMT_RGB24:
- return yuv2rgb_c_24_rgb;
- case AV_PIX_FMT_BGR24:
- return yuv2rgb_c_24_bgr;
- case AV_PIX_FMT_RGB565:
- case AV_PIX_FMT_BGR565:
- return yuv2rgb_c_16_ordered_dither;
- case AV_PIX_FMT_RGB555:
- case AV_PIX_FMT_BGR555:
- return yuv2rgb_c_15_ordered_dither;
- case AV_PIX_FMT_RGB444:
- case AV_PIX_FMT_BGR444:
- return yuv2rgb_c_12_ordered_dither;
- case AV_PIX_FMT_RGB8:
- case AV_PIX_FMT_BGR8:
- return yuv2rgb_c_8_ordered_dither;
- case AV_PIX_FMT_RGB4:
- case AV_PIX_FMT_BGR4:
- return yuv2rgb_c_4_ordered_dither;
- case AV_PIX_FMT_RGB4_BYTE:
- case AV_PIX_FMT_BGR4_BYTE:
- return yuv2rgb_c_4b_ordered_dither;
- case AV_PIX_FMT_MONOBLACK:
- return yuv2rgb_c_1_ordered_dither;
- }
+ switch (c->dstFormat) {
+ case AV_PIX_FMT_BGR48BE:
+ case AV_PIX_FMT_BGR48LE:
+ return yuv2rgb_c_bgr48;
+ case AV_PIX_FMT_RGB48BE:
+ case AV_PIX_FMT_RGB48LE:
+ return yuv2rgb_c_48;
+ case AV_PIX_FMT_ARGB:
+ case AV_PIX_FMT_ABGR:
+ if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat))
+ return yuva2argb_c;
+ case AV_PIX_FMT_RGBA:
+ case AV_PIX_FMT_BGRA:
+ return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32;
+ case AV_PIX_FMT_RGB24:
+ return yuv2rgb_c_24_rgb;
+ case AV_PIX_FMT_BGR24:
+ return yuv2rgb_c_24_bgr;
+ case AV_PIX_FMT_RGB565:
+ case AV_PIX_FMT_BGR565:
+ return yuv2rgb_c_16_ordered_dither;
+ case AV_PIX_FMT_RGB555:
+ case AV_PIX_FMT_BGR555:
+ return yuv2rgb_c_15_ordered_dither;
+ case AV_PIX_FMT_RGB444:
+ case AV_PIX_FMT_BGR444:
+ return yuv2rgb_c_12_ordered_dither;
+ case AV_PIX_FMT_RGB8:
+ case AV_PIX_FMT_BGR8:
+ return yuv2rgb_c_8_ordered_dither;
+ case AV_PIX_FMT_RGB4:
+ case AV_PIX_FMT_BGR4:
+ return yuv2rgb_c_4_ordered_dither;
+ case AV_PIX_FMT_RGB4_BYTE:
+ case AV_PIX_FMT_BGR4_BYTE:
+ return yuv2rgb_c_4b_ordered_dither;
+ case AV_PIX_FMT_MONOBLACK:
+ return yuv2rgb_c_1_ordered_dither;
+ }
}
return NULL;
}
--
2.30.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb
2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
` (2 preceding siblings ...)
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 4/6] swscale/yuv2rgb: reindent after previous commit Ramiro Polla
@ 2024-06-16 22:28 ` Ramiro Polla
2024-06-20 14:59 ` Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions Ramiro Polla
4 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
To: ffmpeg-devel
---
tests/checkasm/Makefile | 2 +-
tests/checkasm/checkasm.c | 1 +
tests/checkasm/checkasm.h | 1 +
tests/checkasm/sw_yuv2rgb.c | 205 ++++++++++++++++++++++++++++++++++++
tests/fate/checkasm.mak | 1 +
5 files changed, 209 insertions(+), 1 deletion(-)
create mode 100644 tests/checkasm/sw_yuv2rgb.c
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index f20732b37a..3a7670e24b 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -63,7 +63,7 @@ AVFILTEROBJS-$(CONFIG_SOBEL_FILTER) += vf_convolution.o
CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
# swscale tests
-SWSCALEOBJS += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o
+SWSCALEOBJS += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o sw_yuv2rgb.o
CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWSCALEOBJS)
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 56232ab1e0..d9ac772a08 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -254,6 +254,7 @@ static const struct {
{ "sw_range_convert", checkasm_check_sw_range_convert },
{ "sw_rgb", checkasm_check_sw_rgb },
{ "sw_scale", checkasm_check_sw_scale },
+ { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb },
#endif
#if CONFIG_AVUTIL
{ "fixed_dsp", checkasm_check_fixed_dsp },
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index e544007b67..4d5f3e387e 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -122,6 +122,7 @@ void checkasm_check_sw_gbrp(void);
void checkasm_check_sw_range_convert(void);
void checkasm_check_sw_rgb(void);
void checkasm_check_sw_scale(void);
+void checkasm_check_sw_yuv2rgb(void);
void checkasm_check_takdsp(void);
void checkasm_check_utvideodsp(void);
void checkasm_check_v210dec(void);
diff --git a/tests/checkasm/sw_yuv2rgb.c b/tests/checkasm/sw_yuv2rgb.c
new file mode 100644
index 0000000000..fbe01a7788
--- /dev/null
+++ b/tests/checkasm/sw_yuv2rgb.c
@@ -0,0 +1,205 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/pixdesc.h"
+
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size) \
+ do { \
+ for (int j = 0; j < size; j += 4) \
+ AV_WN32(buf + j, rnd()); \
+ } while (0)
+
+static const int dst_fmts[] = {
+// AV_PIX_FMT_BGR48BE,
+// AV_PIX_FMT_BGR48LE,
+// AV_PIX_FMT_RGB48BE,
+// AV_PIX_FMT_RGB48LE,
+ AV_PIX_FMT_ARGB,
+ AV_PIX_FMT_ABGR,
+ AV_PIX_FMT_RGBA,
+ AV_PIX_FMT_BGRA,
+ AV_PIX_FMT_RGB24,
+ AV_PIX_FMT_BGR24,
+ AV_PIX_FMT_RGB565,
+ AV_PIX_FMT_BGR565,
+ AV_PIX_FMT_RGB555,
+ AV_PIX_FMT_BGR555,
+// AV_PIX_FMT_RGB444,
+// AV_PIX_FMT_BGR444,
+// AV_PIX_FMT_RGB8,
+// AV_PIX_FMT_BGR8,
+// AV_PIX_FMT_RGB4,
+// AV_PIX_FMT_BGR4,
+// AV_PIX_FMT_RGB4_BYTE,
+// AV_PIX_FMT_BGR4_BYTE,
+// AV_PIX_FMT_MONOBLACK,
+};
+
+static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+ for (size_t i = 0; i < n; i++) {
+ if (abs(ref[i] - test[i]) > accuracy)
+ return 1;
+ }
+ return 0;
+}
+
+static int cmp_555_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+ const uint16_t *ref16 = (const uint16_t *) ref;
+ const uint16_t *test16 = (const uint16_t *) test;
+ for (size_t i = 0; i < n; i++) {
+ if (abs(( ref16[i] & 0x1f) - ( test16[i] & 0x1f)) > accuracy)
+ return 1;
+ if (abs(((ref16[i] >> 5) & 0x1f) - ((test16[i] >> 5) & 0x1f)) > accuracy)
+ return 1;
+ if (abs(((ref16[i] >> 10) & 0x1f) - ((test16[i] >> 10) & 0x1f)) > accuracy)
+ return 1;
+ }
+ return 0;
+}
+
+static int cmp_565_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+ const uint16_t *ref16 = (const uint16_t *) ref;
+ const uint16_t *test16 = (const uint16_t *) test;
+ for (size_t i = 0; i < n; i++) {
+ if (abs(( ref16[i] & 0x1f) - ( test16[i] & 0x1f)) > accuracy)
+ return 1;
+ if (abs(((ref16[i] >> 5) & 0x3f) - ((test16[i] >> 5) & 0x3f)) > accuracy)
+ return 1;
+ if (abs(((ref16[i] >> 11) & 0x1f) - ((test16[i] >> 11) & 0x1f)) > accuracy)
+ return 1;
+ }
+ return 0;
+}
+
+static void check_yuv2rgb(int src_pix_fmt)
+{
+ const AVPixFmtDescriptor *src_desc = av_pix_fmt_desc_get(src_pix_fmt);
+#define MAX_LINE_SIZE 1920
+ static const int input_sizes[] = {8, 128, 1080, MAX_LINE_SIZE};
+
+ declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
+ int, SwsContext *c, const uint8_t *src[],
+ int srcStride[], int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[]);
+
+ LOCAL_ALIGNED_8(uint8_t, src_y, [MAX_LINE_SIZE * 2]);
+ LOCAL_ALIGNED_8(uint8_t, src_u, [MAX_LINE_SIZE]);
+ LOCAL_ALIGNED_8(uint8_t, src_v, [MAX_LINE_SIZE]);
+ LOCAL_ALIGNED_8(uint8_t, src_a, [MAX_LINE_SIZE * 2]);
+ const uint8_t *src[4] = { src_y, src_u, src_v, src_a };
+
+ LOCAL_ALIGNED_8(uint8_t, dst0_, [2 * MAX_LINE_SIZE * 6]);
+ uint8_t *dst0[4] = { dst0_ };
+ uint8_t *lines0[2] = { dst0_, dst0_ + MAX_LINE_SIZE * 6 };
+
+ LOCAL_ALIGNED_8(uint8_t, dst1_, [2 * MAX_LINE_SIZE * 6]);
+ uint8_t *dst1[4] = { dst1_ };
+ uint8_t *lines1[2] = { dst1_, dst1_ + MAX_LINE_SIZE * 6 };
+
+ randomize_buffers(src_y, MAX_LINE_SIZE * 2);
+ randomize_buffers(src_u, MAX_LINE_SIZE);
+ randomize_buffers(src_v, MAX_LINE_SIZE);
+ randomize_buffers(src_a, MAX_LINE_SIZE * 2);
+
+ for (int dfi = 0; dfi < FF_ARRAY_ELEMS(dst_fmts); dfi++) {
+ int dst_pix_fmt = dst_fmts[dfi];
+ const AVPixFmtDescriptor *dst_desc = av_pix_fmt_desc_get(dst_pix_fmt);
+ int sample_size = av_get_padded_bits_per_pixel(dst_desc) >> 3;
+ for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
+ struct SwsContext *ctx;
+ int width = input_sizes[isi];
+ int srcSliceY = 0;
+ int srcSliceH = 2;
+ int srcStride[4] = {
+ width,
+ width >> src_desc->log2_chroma_w,
+ width >> src_desc->log2_chroma_w,
+ width,
+ };
+ int dstStride[4] = { MAX_LINE_SIZE * 6 };
+
+ ctx = sws_getContext(width, srcSliceH, src_pix_fmt,
+ width, srcSliceH, dst_pix_fmt,
+ 0, NULL, NULL, NULL);
+ if (!ctx)
+ fail();
+
+ if (check_func(ctx->convert_unscaled, "%s_%s_%d", src_desc->name, dst_desc->name, width)) {
+ memset(dst0_, 0xFF, 2 * MAX_LINE_SIZE * 6);
+ memset(dst1_, 0xFF, 2 * MAX_LINE_SIZE * 6);
+
+ call_ref(ctx, src, srcStride, srcSliceY,
+ srcSliceH, dst0, dstStride);
+ call_new(ctx, src, srcStride, srcSliceY,
+ srcSliceH, dst1, dstStride);
+
+ if (dst_pix_fmt == AV_PIX_FMT_ARGB ||
+ dst_pix_fmt == AV_PIX_FMT_ABGR ||
+ dst_pix_fmt == AV_PIX_FMT_RGBA ||
+ dst_pix_fmt == AV_PIX_FMT_BGRA ||
+ dst_pix_fmt == AV_PIX_FMT_RGB24 ||
+ dst_pix_fmt == AV_PIX_FMT_BGR24) {
+ if (cmp_off_by_n(lines0[0], lines1[0], width * sample_size, 3) ||
+ cmp_off_by_n(lines0[1], lines1[1], width * sample_size, 3))
+ fail();
+ } else if (dst_pix_fmt == AV_PIX_FMT_RGB565 ||
+ dst_pix_fmt == AV_PIX_FMT_BGR565) {
+ if (cmp_565_by_n(lines0[0], lines1[0], width, 2) ||
+ cmp_565_by_n(lines0[1], lines1[1], width, 2))
+ fail();
+ } else if (dst_pix_fmt == AV_PIX_FMT_RGB555 ||
+ dst_pix_fmt == AV_PIX_FMT_BGR555) {
+ if (cmp_555_by_n(lines0[0], lines1[0], width, 2) ||
+ cmp_555_by_n(lines0[1], lines1[1], width, 2))
+ fail();
+ } else {
+ fail();
+ }
+
+ bench_new(ctx, src, srcStride, srcSliceY,
+ srcSliceH, dst0, dstStride);
+ }
+ sws_freeContext(ctx);
+ }
+ }
+}
+
+#undef MAX_LINE_SIZE
+
+void checkasm_check_sw_yuv2rgb(void)
+{
+ check_yuv2rgb(AV_PIX_FMT_YUV420P);
+ report("yuv420p");
+ check_yuv2rgb(AV_PIX_FMT_YUV422P);
+ report("yuv422p");
+ check_yuv2rgb(AV_PIX_FMT_YUVA420P);
+ report("yuva420p");
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 0ed2ea5be6..49832b09bf 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -44,6 +44,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp \
fate-checkasm-sw_range_convert \
fate-checkasm-sw_rgb \
fate-checkasm-sw_scale \
+ fate-checkasm-sw_yuv2rgb \
fate-checkasm-takdsp \
fate-checkasm-utvideodsp \
fate-checkasm-v210dec \
--
2.30.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions
2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
` (3 preceding siblings ...)
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb Ramiro Polla
@ 2024-06-16 22:28 ` Ramiro Polla
2024-06-16 23:15 ` James Almer
4 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-16 22:28 UTC (permalink / raw)
To: ffmpeg-devel
These functions are either slower or barely faster than the C LUT
yuv2rgb code.
---
libswscale/x86/yuv2rgb.c | 51 -----------------
libswscale/x86/yuv2rgb_template.c | 4 --
libswscale/x86/yuv_2_rgb.asm | 93 +------------------------------
3 files changed, 3 insertions(+), 145 deletions(-)
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 6754062245..41dfa80f33 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -41,25 +41,8 @@
#define DITHER1XBPP // only for MMX
-//MMX versions
-#if HAVE_MMX
-#undef RENAME
-#define COMPILE_TEMPLATE_MMX
-#define RENAME(a) a ## _mmx
-#include "yuv2rgb_template.c"
-#undef COMPILE_TEMPLATE_MMX
-#endif /* HAVE_MMX */
-
-// MMXEXT versions
-#undef RENAME
-#define COMPILE_TEMPLATE_MMXEXT
-#define RENAME(a) a ## _mmxext
-#include "yuv2rgb_template.c"
-#undef COMPILE_TEMPLATE_MMXEXT
-
//SSSE3 versions
#undef RENAME
-#define COMPILE_TEMPLATE_SSSE3
#define RENAME(a) a ## _ssse3
#include "yuv2rgb_template.c"
@@ -99,40 +82,6 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
}
}
- if (EXTERNAL_MMXEXT(cpu_flags)) {
- switch (c->dstFormat) {
- case AV_PIX_FMT_RGB24:
- return yuv420_rgb24_mmxext;
- case AV_PIX_FMT_BGR24:
- return yuv420_bgr24_mmxext;
- }
- }
-
- if (EXTERNAL_MMX(cpu_flags)) {
- switch (c->dstFormat) {
- case AV_PIX_FMT_RGB32:
- if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if CONFIG_SWSCALE_ALPHA
- return yuva420_rgb32_mmx;
-#endif
- break;
- } else
- return yuv420_rgb32_mmx;
- case AV_PIX_FMT_BGR32:
- if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
-#if CONFIG_SWSCALE_ALPHA
- return yuva420_bgr32_mmx;
-#endif
- break;
- } else
- return yuv420_bgr32_mmx;
- case AV_PIX_FMT_RGB565:
- return yuv420_rgb16_mmx;
- case AV_PIX_FMT_RGB555:
- return yuv420_rgb15_mmx;
- }
- }
-
#endif /* HAVE_X86ASM */
return NULL;
}
diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c
index 596943bb73..a4741e6873 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -47,7 +47,6 @@ extern void RENAME(ff_yuv_420_bgr24)(x86_reg index, uint8_t *image, const uint8_
const uint8_t *pv_index, const uint64_t *pointer_c_dither,
const uint8_t *py_2index);
-#ifndef COMPILE_TEMPLATE_MMXEXT
extern void RENAME(ff_yuv_420_rgb15)(x86_reg index, uint8_t *image, const uint8_t *pu_index,
const uint8_t *pv_index, const uint64_t *pointer_c_dither,
const uint8_t *py_2index);
@@ -163,9 +162,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
}
return srcSliceH;
}
-#endif
-#if !defined(COMPILE_TEMPLATE_MMX)
static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
int srcStride[],
int srcSliceY, int srcSliceH,
@@ -193,4 +190,3 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
}
return srcSliceH;
}
-#endif
diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm
index a1f9134e08..b67ab162d2 100644
--- a/libswscale/x86/yuv_2_rgb.asm
+++ b/libswscale/x86/yuv_2_rgb.asm
@@ -38,12 +38,6 @@ pb_e0: times 16 db 224
pb_03: times 16 db 3
pb_07: times 16 db 7
-mask_1101: dw -1, -1, 0, -1
-mask_0010: dw 0, 0, -1, 0
-mask_0110: dw 0, -1, -1, 0
-mask_1001: dw -1, 0, 0, -1
-mask_0100: dw 0, -1, 0, 0
-
SECTION .text
;-----------------------------------------------------------------------------
@@ -55,14 +49,6 @@ SECTION .text
;
;-----------------------------------------------------------------------------
-%macro MOV_H2L 1
-%if mmsize == 8
- psrlq %1, 32
-%else ; mmsize == 16
- psrldq %1, 8
-%endif
-%endmacro
-
%macro yuv2rgb_fn 3
%if %3 == 32
@@ -91,18 +77,6 @@ SECTION .text
%define m_blue m1
%endif
-%if mmsize == 8
-%define time_num 1
-%define reg_num 8
-%define y_offset [pointer_c_ditherq + 8 * 8]
-%define u_offset [pointer_c_ditherq + 9 * 8]
-%define v_offset [pointer_c_ditherq + 10 * 8]
-%define ug_coff [pointer_c_ditherq + 7 * 8]
-%define vg_coff [pointer_c_ditherq + 6 * 8]
-%define y_coff [pointer_c_ditherq + 3 * 8]
-%define ub_coff [pointer_c_ditherq + 5 * 8]
-%define vr_coff [pointer_c_ditherq + 4 * 8]
-%elif mmsize == 16
%define time_num 2
%if ARCH_X86_32
%define reg_num 8
@@ -125,13 +99,11 @@ SECTION .text
%define ub_coff m14
%define vr_coff m15
%endif ; ARCH_X86_32/64
-%endif ; coeff define mmsize == 8/16
cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
%if ARCH_X86_64
movsxd indexq, indexd
-%if mmsize == 16
VBROADCASTSD y_offset, [pointer_c_ditherq + 8 * 8]
VBROADCASTSD u_offset, [pointer_c_ditherq + 9 * 8]
VBROADCASTSD v_offset, [pointer_c_ditherq + 10 * 8]
@@ -141,7 +113,6 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
VBROADCASTSD ub_coff, [pointer_c_ditherq + 5 * 8]
VBROADCASTSD vr_coff, [pointer_c_ditherq + 4 * 8]
%endif
-%endif
.loop0:
movu m_y, [py_2indexq + 2 * indexq]
movh m_u, [pu_indexq + indexq]
@@ -157,7 +128,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
psllw m1, 3
psllw m6, 3
psllw m7, 3
-%if (ARCH_X86_32 && mmsize == 16)
+%if ARCH_X86_32
VBROADCASTSD m2, mu_offset
VBROADCASTSD m3, mv_offset
VBROADCASTSD m4, my_offset
@@ -176,7 +147,7 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
pmulhw m0, m5
VBROADCASTSD m4, mvr_coff
pmulhw m1, m4
-%else ; ARCH_X86_64 || mmsize == 8
+%else ; ARCH_X86_64
psubsw m0, u_offset ; U = U - 128
psubsw m1, v_offset ; V = V - 128
psubw m6, y_offset
@@ -207,49 +178,10 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
packuswb m2, m7 ; G0 G2 G4 G6 ... G1 G3 G5 G7 ...
mova m3, m_red
mova m6, m_blue
- MOV_H2L m_red
+ psrldq m_red, 8
punpcklbw m3, m2 ; R0 G0 R2 G2 R4 G4 R6 G6 R8 G8 ...
punpcklbw m6, m_red ; B0 R1 B2 R3 B4 R5 B6 R7 B8 R9 ...
- mova m5, m3
punpckhbw m2, m_blue ; G1 B1 G3 B3 G5 B5 G7 B7 G9 B9 ...
-%if mmsize == 8
- punpcklwd m3 ,m6 ; R0 G0 B0 R1 R2 G2 B2 R3
- punpckhwd m5, m6 ; R4 G4 B4 R5 R6 G6 B6 R7
-%if cpuflag(mmxext)
- pshufw m1, m2, 0xc6
- pshufw m6, m3, 0x84
- pshufw m7, m5, 0x38
- pand m6, [mask_1101] ; R0 G0 B0 R1 -- -- R2 G2
- movq m0, m1
- pand m7, [mask_0110] ; -- -- R6 G6 B6 R7 -- --
- movq m2, m1
- pand m1, [mask_0100] ; -- -- G3 B3 -- -- -- --
- psrlq m3, 48 ; B2 R3 -- -- -- -- -- --
- pand m0, [mask_0010] ; -- -- -- -- G1 B1 -- --
- psllq m5, 32 ; -- -- -- -- R4 G4 B4 R5
- pand m2, [mask_1001] ; G5 B5 -- -- -- -- G7 B7
- por m1, m3
- por m0, m6
- por m1, m5
- por m2, m7
- movntq [imageq], m0
- movntq [imageq + 8], m1
- movntq [imageq + 16], m2
-%else ; cpuflag(mmx)
- movd [imageq], m3 ; R0 G0 R2 G2
- movd [imageq + 4], m2 ; G1 B1
- psrlq m3, 32
- psrlq m2, 16
- movd [imageq + 6], m3 ; R2 G2 B2 R3
- movd [imageq + 10], m2 ; G3 B3
- psrlq m2, 16
- movd [imageq + 12], m5 ; R4 G4 B4 R5
- movd [imageq + 16], m2 ; G5 B5
- psrlq m5, 32
- movd [imageq + 20], m2 ; -- -- G7 B7
- movd [imageq + 18], m5 ; R6 G6 B6 R7
-%endif ; cpuflag
-%else ; mmsize == 16
pshufb m3, [rgb24_shuf1] ; r0 g0 r6 g6 r12 g12 r2 g2 r8 g8 r14 g14 r4 g4 r10 g10
pshufb m6, [rgb24_shuf2] ; b10 r11 b0 r1 b6 r7 b12 r13 b2 r3 b8 r9 b14 r15 b4 r5
pshufb m2, [rgb24_shuf3] ; g5 b5 g11 b11 g1 b1 g7 b7 g13 b13 g3 b3 g9 b9 g15 b15
@@ -274,7 +206,6 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
movu [imageq], m0
movu [imageq + 16], m1
movu [imageq + 32], m2
-%endif ; mmsize = 16
%else ; PACK RGB15/16/32
packuswb m0, m1
packuswb m3, m5
@@ -309,18 +240,12 @@ cglobal %1_420_%2%3, GPR_num, GPR_num, reg_num, parameters
movu [imageq + 24 * time_num], m_alpha
%else ; PACK RGB15/16
%define depth 2
-%if cpuflag(ssse3)
%define red_dither m3
%define green_dither m4
%define blue_dither m5
VBROADCASTSD red_dither, [pointer_c_ditherq + 0 * 8]
VBROADCASTSD green_dither, [pointer_c_ditherq + 1 * 8]
VBROADCASTSD blue_dither, [pointer_c_ditherq + 2 * 8]
-%else ; cpuflag(mmx/mmxext)
-%define blue_dither [pointer_c_ditherq + 2 * 8]
-%define green_dither [pointer_c_ditherq + 1 * 8]
-%define red_dither [pointer_c_ditherq + 0 * 8]
-%endif
%if %3 == 15
%define gmask pb_03
%define isRGB15 1
@@ -358,18 +283,6 @@ RET
%endmacro
-INIT_MMX mmx
-yuv2rgb_fn yuv, rgb, 32
-yuv2rgb_fn yuv, bgr, 32
-yuv2rgb_fn yuva, rgb, 32
-yuv2rgb_fn yuva, bgr, 32
-yuv2rgb_fn yuv, rgb, 15
-yuv2rgb_fn yuv, rgb, 16
-
-INIT_MMX mmxext
-yuv2rgb_fn yuv, rgb, 24
-yuv2rgb_fn yuv, bgr, 24
-
INIT_XMM ssse3
yuv2rgb_fn yuv, rgb, 24
yuv2rgb_fn yuv, bgr, 24
--
2.30.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions Ramiro Polla
@ 2024-06-16 23:15 ` James Almer
2024-06-17 12:46 ` Ramiro Polla
0 siblings, 1 reply; 14+ messages in thread
From: James Almer @ 2024-06-16 23:15 UTC (permalink / raw)
To: ffmpeg-devel
On 6/16/2024 7:28 PM, Ramiro Polla wrote:
> These functions are either slower or barely faster than the C LUT
> yuv2rgb code.
> ---
> libswscale/x86/yuv2rgb.c | 51 -----------------
> libswscale/x86/yuv2rgb_template.c | 4 --
> libswscale/x86/yuv_2_rgb.asm | 93 +------------------------------
> 3 files changed, 3 insertions(+), 145 deletions(-)
>
> diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
> index 6754062245..41dfa80f33 100644
> --- a/libswscale/x86/yuv2rgb.c
> +++ b/libswscale/x86/yuv2rgb.c
> @@ -41,25 +41,8 @@
>
> #define DITHER1XBPP // only for MMX
Shouldn't this be removed too?
>
> -//MMX versions
> -#if HAVE_MMX
> -#undef RENAME
> -#define COMPILE_TEMPLATE_MMX
> -#define RENAME(a) a ## _mmx
> -#include "yuv2rgb_template.c"
> -#undef COMPILE_TEMPLATE_MMX
> -#endif /* HAVE_MMX */
> -
> -// MMXEXT versions
> -#undef RENAME
> -#define COMPILE_TEMPLATE_MMXEXT
> -#define RENAME(a) a ## _mmxext
> -#include "yuv2rgb_template.c"
> -#undef COMPILE_TEMPLATE_MMXEXT
> -
> //SSSE3 versions
> #undef RENAME
> -#define COMPILE_TEMPLATE_SSSE3
> #define RENAME(a) a ## _ssse3
> #include "yuv2rgb_template.c"
You could write a seventh patch that moves the template stuff back to
this file, now that SSSE3 is the only version. See commit 8b62fb231a78.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions
2024-06-16 23:15 ` James Almer
@ 2024-06-17 12:46 ` Ramiro Polla
2024-06-20 14:50 ` Ramiro Polla
0 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-17 12:46 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Mon, Jun 17, 2024 at 1:16 AM James Almer <jamrial@gmail.com> wrote:
> On 6/16/2024 7:28 PM, Ramiro Polla wrote:
> > These functions are either slower or barely faster than the C LUT
> > yuv2rgb code.
> > ---
> > libswscale/x86/yuv2rgb.c | 51 -----------------
> > libswscale/x86/yuv2rgb_template.c | 4 --
> > libswscale/x86/yuv_2_rgb.asm | 93 +------------------------------
> > 3 files changed, 3 insertions(+), 145 deletions(-)
> >
> > diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
> > index 6754062245..41dfa80f33 100644
> > --- a/libswscale/x86/yuv2rgb.c
> > +++ b/libswscale/x86/yuv2rgb.c
> > @@ -41,25 +41,8 @@
> >
> > #define DITHER1XBPP // only for MMX
>
> Shouldn't this be removed too?
I think this #define can already be removed from everywhere. It seems
to be unconditionally set in swscale_internal.h (I haven't tracked
down since when this is the case).
> > -//MMX versions
> > -#if HAVE_MMX
> > -#undef RENAME
> > -#define COMPILE_TEMPLATE_MMX
> > -#define RENAME(a) a ## _mmx
> > -#include "yuv2rgb_template.c"
> > -#undef COMPILE_TEMPLATE_MMX
> > -#endif /* HAVE_MMX */
> > -
> > -// MMXEXT versions
> > -#undef RENAME
> > -#define COMPILE_TEMPLATE_MMXEXT
> > -#define RENAME(a) a ## _mmxext
> > -#include "yuv2rgb_template.c"
> > -#undef COMPILE_TEMPLATE_MMXEXT
> > -
> > //SSSE3 versions
> > #undef RENAME
> > -#define COMPILE_TEMPLATE_SSSE3
> > #define RENAME(a) a ## _ssse3
> > #include "yuv2rgb_template.c"
>
> You could write a seventh patch that moves the template stuff back to
> this file, now that SSSE3 is the only version. See commit 8b62fb231a78.
Will do in the next version of this patchset.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code Ramiro Polla
@ 2024-06-19 10:13 ` Michael Niedermayer
0 siblings, 0 replies; 14+ messages in thread
From: Michael Niedermayer @ 2024-06-19 10:13 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 624 bytes --]
On Mon, Jun 17, 2024 at 12:28:46AM +0200, Ramiro Polla wrote:
> The C code was silently ignoring the second chroma line on yuv422p
> input.
> ---
> libswscale/yuv2rgb.c | 228 +++++++++++++++++++++++++++++++++++++------
> 1 file changed, 196 insertions(+), 32 deletions(-)
nice you found this. Thats an ugly bug
thx
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
If you fake or manipulate statistics in a paper in physics you will never
get a job again.
If you fake or manipulate statistics in a paper in medicin you will get
a job for life at the pharma industry.
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions
2024-06-17 12:46 ` Ramiro Polla
@ 2024-06-20 14:50 ` Ramiro Polla
0 siblings, 0 replies; 14+ messages in thread
From: Ramiro Polla @ 2024-06-20 14:50 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Mon, Jun 17, 2024 at 2:46 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> On Mon, Jun 17, 2024 at 1:16 AM James Almer <jamrial@gmail.com> wrote:
> > On 6/16/2024 7:28 PM, Ramiro Polla wrote:
> > > These functions are either slower or barely faster than the C LUT
> > > yuv2rgb code.
> > > ---
> > > libswscale/x86/yuv2rgb.c | 51 -----------------
> > > libswscale/x86/yuv2rgb_template.c | 4 --
> > > libswscale/x86/yuv_2_rgb.asm | 93 +------------------------------
> > > 3 files changed, 3 insertions(+), 145 deletions(-)
> > >
> > > diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
> > > index 6754062245..41dfa80f33 100644
> > > --- a/libswscale/x86/yuv2rgb.c
> > > +++ b/libswscale/x86/yuv2rgb.c
> > > @@ -41,25 +41,8 @@
> > >
> > > #define DITHER1XBPP // only for MMX
> >
> > Shouldn't this be removed too?
>
> I think this #define can already be removed from everywhere. It seems
> to be unconditionally set in swscale_internal.h (I haven't tracked
> down since when this is the case).
>
> > > -//MMX versions
> > > -#if HAVE_MMX
> > > -#undef RENAME
> > > -#define COMPILE_TEMPLATE_MMX
> > > -#define RENAME(a) a ## _mmx
> > > -#include "yuv2rgb_template.c"
> > > -#undef COMPILE_TEMPLATE_MMX
> > > -#endif /* HAVE_MMX */
> > > -
> > > -// MMXEXT versions
> > > -#undef RENAME
> > > -#define COMPILE_TEMPLATE_MMXEXT
> > > -#define RENAME(a) a ## _mmxext
> > > -#include "yuv2rgb_template.c"
> > > -#undef COMPILE_TEMPLATE_MMXEXT
> > > -
> > > //SSSE3 versions
> > > #undef RENAME
> > > -#define COMPILE_TEMPLATE_SSSE3
> > > #define RENAME(a) a ## _ssse3
> > > #include "yuv2rgb_template.c"
> >
> > You could write a seventh patch that moves the template stuff back to
> > this file, now that SSSE3 is the only version. See commit 8b62fb231a78.
>
> Will do in the next version of this patchset.
I'll apply this patchset if there are no more comments, before
submitting more patches to deal with DITHER1XBPP and detemplatizing.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb Ramiro Polla
@ 2024-06-20 14:59 ` Ramiro Polla
2024-06-25 21:19 ` Ramiro Polla
0 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-20 14:59 UTC (permalink / raw)
To: ffmpeg-devel
On Mon, Jun 17, 2024 at 12:28 AM Ramiro Polla <ramiro.polla@gmail.com> wrote:
>
> ---
> tests/checkasm/Makefile | 2 +-
> tests/checkasm/checkasm.c | 1 +
> tests/checkasm/checkasm.h | 1 +
> tests/checkasm/sw_yuv2rgb.c | 205 ++++++++++++++++++++++++++++++++++++
> tests/fate/checkasm.mak | 1 +
> 5 files changed, 209 insertions(+), 1 deletion(-)
> create mode 100644 tests/checkasm/sw_yuv2rgb.c
>
> diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
> index f20732b37a..3a7670e24b 100644
> --- a/tests/checkasm/Makefile
> +++ b/tests/checkasm/Makefile
> @@ -63,7 +63,7 @@ AVFILTEROBJS-$(CONFIG_SOBEL_FILTER) += vf_convolution.o
> CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
>
> # swscale tests
> -SWSCALEOBJS += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o
> +SWSCALEOBJS += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o sw_yuv2rgb.o
>
> CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWSCALEOBJS)
>
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index 56232ab1e0..d9ac772a08 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -254,6 +254,7 @@ static const struct {
> { "sw_range_convert", checkasm_check_sw_range_convert },
> { "sw_rgb", checkasm_check_sw_rgb },
> { "sw_scale", checkasm_check_sw_scale },
> + { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb },
> #endif
> #if CONFIG_AVUTIL
> { "fixed_dsp", checkasm_check_fixed_dsp },
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index e544007b67..4d5f3e387e 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -122,6 +122,7 @@ void checkasm_check_sw_gbrp(void);
> void checkasm_check_sw_range_convert(void);
> void checkasm_check_sw_rgb(void);
> void checkasm_check_sw_scale(void);
> +void checkasm_check_sw_yuv2rgb(void);
> void checkasm_check_takdsp(void);
> void checkasm_check_utvideodsp(void);
> void checkasm_check_v210dec(void);
> diff --git a/tests/checkasm/sw_yuv2rgb.c b/tests/checkasm/sw_yuv2rgb.c
> new file mode 100644
> index 0000000000..fbe01a7788
> --- /dev/null
> +++ b/tests/checkasm/sw_yuv2rgb.c
> @@ -0,0 +1,205 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +#include <string.h>
> +
> +#include "libavutil/common.h"
> +#include "libavutil/intreadwrite.h"
> +#include "libavutil/mem_internal.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "libswscale/swscale.h"
> +#include "libswscale/swscale_internal.h"
> +
> +#include "checkasm.h"
> +
> +#define randomize_buffers(buf, size) \
> + do { \
> + for (int j = 0; j < size; j += 4) \
> + AV_WN32(buf + j, rnd()); \
> + } while (0)
> +
> +static const int dst_fmts[] = {
> +// AV_PIX_FMT_BGR48BE,
> +// AV_PIX_FMT_BGR48LE,
> +// AV_PIX_FMT_RGB48BE,
> +// AV_PIX_FMT_RGB48LE,
> + AV_PIX_FMT_ARGB,
> + AV_PIX_FMT_ABGR,
> + AV_PIX_FMT_RGBA,
> + AV_PIX_FMT_BGRA,
> + AV_PIX_FMT_RGB24,
> + AV_PIX_FMT_BGR24,
> + AV_PIX_FMT_RGB565,
> + AV_PIX_FMT_BGR565,
> + AV_PIX_FMT_RGB555,
> + AV_PIX_FMT_BGR555,
> +// AV_PIX_FMT_RGB444,
> +// AV_PIX_FMT_BGR444,
> +// AV_PIX_FMT_RGB8,
> +// AV_PIX_FMT_BGR8,
> +// AV_PIX_FMT_RGB4,
> +// AV_PIX_FMT_BGR4,
> +// AV_PIX_FMT_RGB4_BYTE,
> +// AV_PIX_FMT_BGR4_BYTE,
> +// AV_PIX_FMT_MONOBLACK,
> +};
> +
> +static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
> +{
> + for (size_t i = 0; i < n; i++) {
> + if (abs(ref[i] - test[i]) > accuracy)
> + return 1;
> + }
> + return 0;
> +}
> +
> +static int cmp_555_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
> +{
> + const uint16_t *ref16 = (const uint16_t *) ref;
> + const uint16_t *test16 = (const uint16_t *) test;
> + for (size_t i = 0; i < n; i++) {
> + if (abs(( ref16[i] & 0x1f) - ( test16[i] & 0x1f)) > accuracy)
> + return 1;
> + if (abs(((ref16[i] >> 5) & 0x1f) - ((test16[i] >> 5) & 0x1f)) > accuracy)
> + return 1;
> + if (abs(((ref16[i] >> 10) & 0x1f) - ((test16[i] >> 10) & 0x1f)) > accuracy)
> + return 1;
> + }
> + return 0;
> +}
> +
> +static int cmp_565_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
> +{
> + const uint16_t *ref16 = (const uint16_t *) ref;
> + const uint16_t *test16 = (const uint16_t *) test;
> + for (size_t i = 0; i < n; i++) {
> + if (abs(( ref16[i] & 0x1f) - ( test16[i] & 0x1f)) > accuracy)
> + return 1;
> + if (abs(((ref16[i] >> 5) & 0x3f) - ((test16[i] >> 5) & 0x3f)) > accuracy)
> + return 1;
> + if (abs(((ref16[i] >> 11) & 0x1f) - ((test16[i] >> 11) & 0x1f)) > accuracy)
> + return 1;
> + }
> + return 0;
> +}
> +
> +static void check_yuv2rgb(int src_pix_fmt)
> +{
> + const AVPixFmtDescriptor *src_desc = av_pix_fmt_desc_get(src_pix_fmt);
> +#define MAX_LINE_SIZE 1920
> + static const int input_sizes[] = {8, 128, 1080, MAX_LINE_SIZE};
> +
> + declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
> + int, SwsContext *c, const uint8_t *src[],
> + int srcStride[], int srcSliceY, int srcSliceH,
> + uint8_t *dst[], int dstStride[]);
> +
> + LOCAL_ALIGNED_8(uint8_t, src_y, [MAX_LINE_SIZE * 2]);
> + LOCAL_ALIGNED_8(uint8_t, src_u, [MAX_LINE_SIZE]);
> + LOCAL_ALIGNED_8(uint8_t, src_v, [MAX_LINE_SIZE]);
> + LOCAL_ALIGNED_8(uint8_t, src_a, [MAX_LINE_SIZE * 2]);
> + const uint8_t *src[4] = { src_y, src_u, src_v, src_a };
> +
> + LOCAL_ALIGNED_8(uint8_t, dst0_, [2 * MAX_LINE_SIZE * 6]);
> + uint8_t *dst0[4] = { dst0_ };
> + uint8_t *lines0[2] = { dst0_, dst0_ + MAX_LINE_SIZE * 6 };
> +
> + LOCAL_ALIGNED_8(uint8_t, dst1_, [2 * MAX_LINE_SIZE * 6]);
> + uint8_t *dst1[4] = { dst1_ };
> + uint8_t *lines1[2] = { dst1_, dst1_ + MAX_LINE_SIZE * 6 };
> +
> + randomize_buffers(src_y, MAX_LINE_SIZE * 2);
> + randomize_buffers(src_u, MAX_LINE_SIZE);
> + randomize_buffers(src_v, MAX_LINE_SIZE);
> + randomize_buffers(src_a, MAX_LINE_SIZE * 2);
> +
> + for (int dfi = 0; dfi < FF_ARRAY_ELEMS(dst_fmts); dfi++) {
> + int dst_pix_fmt = dst_fmts[dfi];
> + const AVPixFmtDescriptor *dst_desc = av_pix_fmt_desc_get(dst_pix_fmt);
> + int sample_size = av_get_padded_bits_per_pixel(dst_desc) >> 3;
> + for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
> + struct SwsContext *ctx;
> + int width = input_sizes[isi];
> + int srcSliceY = 0;
> + int srcSliceH = 2;
> + int srcStride[4] = {
> + width,
> + width >> src_desc->log2_chroma_w,
> + width >> src_desc->log2_chroma_w,
> + width,
> + };
> + int dstStride[4] = { MAX_LINE_SIZE * 6 };
> +
> + ctx = sws_getContext(width, srcSliceH, src_pix_fmt,
> + width, srcSliceH, dst_pix_fmt,
> + 0, NULL, NULL, NULL);
checkasm gets a little spammy with this commit by printing "No
accelerated colorspace conversion found from %s to %s.\n" from
libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
the call to sws_getContext()?
> + if (!ctx)
> + fail();
> +
> + if (check_func(ctx->convert_unscaled, "%s_%s_%d", src_desc->name, dst_desc->name, width)) {
> + memset(dst0_, 0xFF, 2 * MAX_LINE_SIZE * 6);
> + memset(dst1_, 0xFF, 2 * MAX_LINE_SIZE * 6);
> +
> + call_ref(ctx, src, srcStride, srcSliceY,
> + srcSliceH, dst0, dstStride);
> + call_new(ctx, src, srcStride, srcSliceY,
> + srcSliceH, dst1, dstStride);
> +
> + if (dst_pix_fmt == AV_PIX_FMT_ARGB ||
> + dst_pix_fmt == AV_PIX_FMT_ABGR ||
> + dst_pix_fmt == AV_PIX_FMT_RGBA ||
> + dst_pix_fmt == AV_PIX_FMT_BGRA ||
> + dst_pix_fmt == AV_PIX_FMT_RGB24 ||
> + dst_pix_fmt == AV_PIX_FMT_BGR24) {
> + if (cmp_off_by_n(lines0[0], lines1[0], width * sample_size, 3) ||
> + cmp_off_by_n(lines0[1], lines1[1], width * sample_size, 3))
> + fail();
> + } else if (dst_pix_fmt == AV_PIX_FMT_RGB565 ||
> + dst_pix_fmt == AV_PIX_FMT_BGR565) {
> + if (cmp_565_by_n(lines0[0], lines1[0], width, 2) ||
> + cmp_565_by_n(lines0[1], lines1[1], width, 2))
> + fail();
> + } else if (dst_pix_fmt == AV_PIX_FMT_RGB555 ||
> + dst_pix_fmt == AV_PIX_FMT_BGR555) {
> + if (cmp_555_by_n(lines0[0], lines1[0], width, 2) ||
> + cmp_555_by_n(lines0[1], lines1[1], width, 2))
> + fail();
> + } else {
> + fail();
> + }
> +
> + bench_new(ctx, src, srcStride, srcSliceY,
> + srcSliceH, dst0, dstStride);
> + }
> + sws_freeContext(ctx);
> + }
> + }
> +}
> +
> +#undef MAX_LINE_SIZE
> +
> +void checkasm_check_sw_yuv2rgb(void)
> +{
> + check_yuv2rgb(AV_PIX_FMT_YUV420P);
> + report("yuv420p");
> + check_yuv2rgb(AV_PIX_FMT_YUV422P);
> + report("yuv422p");
> + check_yuv2rgb(AV_PIX_FMT_YUVA420P);
> + report("yuva420p");
> +}
> diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
> index 0ed2ea5be6..49832b09bf 100644
> --- a/tests/fate/checkasm.mak
> +++ b/tests/fate/checkasm.mak
> @@ -44,6 +44,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp \
> fate-checkasm-sw_range_convert \
> fate-checkasm-sw_rgb \
> fate-checkasm-sw_scale \
> + fate-checkasm-sw_yuv2rgb \
> fate-checkasm-takdsp \
> fate-checkasm-utvideodsp \
> fate-checkasm-v210dec \
> --
> 2.30.2
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb
2024-06-20 14:59 ` Ramiro Polla
@ 2024-06-25 21:19 ` Ramiro Polla
2024-06-27 14:02 ` Ramiro Polla
0 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-25 21:19 UTC (permalink / raw)
To: ffmpeg-devel
[-- Attachment #1: Type: text/plain, Size: 456 bytes --]
On Thu, Jun 20, 2024 at 4:59 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> checkasm gets a little spammy with this commit by printing "No
> accelerated colorspace conversion found from %s to %s.\n" from
> libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
> downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
> the call to sws_getContext()?
New patch attached that overrides the log level to prevent spamming.
[-- Attachment #2: 0001-checkasm-add-tests-for-yuv2rgb.patch --]
[-- Type: text/x-patch, Size: 11009 bytes --]
From c0359653f11a5e6ae39445134c2e73cb211c4a65 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <ramiro.polla@gmail.com>
Date: Sun, 16 Jun 2024 22:40:13 +0200
Subject: [PATCH] checkasm: add tests for yuv2rgb
---
tests/checkasm/Makefile | 2 +-
tests/checkasm/checkasm.c | 1 +
tests/checkasm/checkasm.h | 1 +
tests/checkasm/sw_yuv2rgb.c | 211 ++++++++++++++++++++++++++++++++++++
tests/fate/checkasm.mak | 1 +
5 files changed, 215 insertions(+), 1 deletion(-)
create mode 100644 tests/checkasm/sw_yuv2rgb.c
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index f20732b37a..3a7670e24b 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -63,7 +63,7 @@ AVFILTEROBJS-$(CONFIG_SOBEL_FILTER) += vf_convolution.o
CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes)
# swscale tests
-SWSCALEOBJS += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o
+SWSCALEOBJS += sw_gbrp.o sw_range_convert.o sw_rgb.o sw_scale.o sw_yuv2rgb.o
CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWSCALEOBJS)
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index b88b128e4c..de0024099a 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -254,6 +254,7 @@ static const struct {
{ "sw_range_convert", checkasm_check_sw_range_convert },
{ "sw_rgb", checkasm_check_sw_rgb },
{ "sw_scale", checkasm_check_sw_scale },
+ { "sw_yuv2rgb", checkasm_check_sw_yuv2rgb },
#endif
#if CONFIG_AVUTIL
{ "fixed_dsp", checkasm_check_fixed_dsp },
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index e544007b67..4d5f3e387e 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -122,6 +122,7 @@ void checkasm_check_sw_gbrp(void);
void checkasm_check_sw_range_convert(void);
void checkasm_check_sw_rgb(void);
void checkasm_check_sw_scale(void);
+void checkasm_check_sw_yuv2rgb(void);
void checkasm_check_takdsp(void);
void checkasm_check_utvideodsp(void);
void checkasm_check_v210dec(void);
diff --git a/tests/checkasm/sw_yuv2rgb.c b/tests/checkasm/sw_yuv2rgb.c
new file mode 100644
index 0000000000..02ed9a74d5
--- /dev/null
+++ b/tests/checkasm/sw_yuv2rgb.c
@@ -0,0 +1,211 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/pixdesc.h"
+
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+
+#include "checkasm.h"
+
+#define randomize_buffers(buf, size) \
+ do { \
+ for (int j = 0; j < size; j += 4) \
+ AV_WN32(buf + j, rnd()); \
+ } while (0)
+
+static const int dst_fmts[] = {
+// AV_PIX_FMT_BGR48BE,
+// AV_PIX_FMT_BGR48LE,
+// AV_PIX_FMT_RGB48BE,
+// AV_PIX_FMT_RGB48LE,
+ AV_PIX_FMT_ARGB,
+ AV_PIX_FMT_ABGR,
+ AV_PIX_FMT_RGBA,
+ AV_PIX_FMT_BGRA,
+ AV_PIX_FMT_RGB24,
+ AV_PIX_FMT_BGR24,
+ AV_PIX_FMT_RGB565,
+ AV_PIX_FMT_BGR565,
+ AV_PIX_FMT_RGB555,
+ AV_PIX_FMT_BGR555,
+// AV_PIX_FMT_RGB444,
+// AV_PIX_FMT_BGR444,
+// AV_PIX_FMT_RGB8,
+// AV_PIX_FMT_BGR8,
+// AV_PIX_FMT_RGB4,
+// AV_PIX_FMT_BGR4,
+// AV_PIX_FMT_RGB4_BYTE,
+// AV_PIX_FMT_BGR4_BYTE,
+// AV_PIX_FMT_MONOBLACK,
+};
+
+static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+ for (size_t i = 0; i < n; i++) {
+ if (abs(ref[i] - test[i]) > accuracy)
+ return 1;
+ }
+ return 0;
+}
+
+static int cmp_555_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+ const uint16_t *ref16 = (const uint16_t *) ref;
+ const uint16_t *test16 = (const uint16_t *) test;
+ for (size_t i = 0; i < n; i++) {
+ if (abs(( ref16[i] & 0x1f) - ( test16[i] & 0x1f)) > accuracy)
+ return 1;
+ if (abs(((ref16[i] >> 5) & 0x1f) - ((test16[i] >> 5) & 0x1f)) > accuracy)
+ return 1;
+ if (abs(((ref16[i] >> 10) & 0x1f) - ((test16[i] >> 10) & 0x1f)) > accuracy)
+ return 1;
+ }
+ return 0;
+}
+
+static int cmp_565_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
+{
+ const uint16_t *ref16 = (const uint16_t *) ref;
+ const uint16_t *test16 = (const uint16_t *) test;
+ for (size_t i = 0; i < n; i++) {
+ if (abs(( ref16[i] & 0x1f) - ( test16[i] & 0x1f)) > accuracy)
+ return 1;
+ if (abs(((ref16[i] >> 5) & 0x3f) - ((test16[i] >> 5) & 0x3f)) > accuracy)
+ return 1;
+ if (abs(((ref16[i] >> 11) & 0x1f) - ((test16[i] >> 11) & 0x1f)) > accuracy)
+ return 1;
+ }
+ return 0;
+}
+
+static void check_yuv2rgb(int src_pix_fmt)
+{
+ const AVPixFmtDescriptor *src_desc = av_pix_fmt_desc_get(src_pix_fmt);
+#define MAX_LINE_SIZE 1920
+ static const int input_sizes[] = {8, 128, 1080, MAX_LINE_SIZE};
+
+ declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
+ int, SwsContext *c, const uint8_t *src[],
+ int srcStride[], int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[]);
+
+ LOCAL_ALIGNED_8(uint8_t, src_y, [MAX_LINE_SIZE * 2]);
+ LOCAL_ALIGNED_8(uint8_t, src_u, [MAX_LINE_SIZE]);
+ LOCAL_ALIGNED_8(uint8_t, src_v, [MAX_LINE_SIZE]);
+ LOCAL_ALIGNED_8(uint8_t, src_a, [MAX_LINE_SIZE * 2]);
+ const uint8_t *src[4] = { src_y, src_u, src_v, src_a };
+
+ LOCAL_ALIGNED_8(uint8_t, dst0_, [2 * MAX_LINE_SIZE * 6]);
+ uint8_t *dst0[4] = { dst0_ };
+ uint8_t *lines0[2] = { dst0_, dst0_ + MAX_LINE_SIZE * 6 };
+
+ LOCAL_ALIGNED_8(uint8_t, dst1_, [2 * MAX_LINE_SIZE * 6]);
+ uint8_t *dst1[4] = { dst1_ };
+ uint8_t *lines1[2] = { dst1_, dst1_ + MAX_LINE_SIZE * 6 };
+
+ randomize_buffers(src_y, MAX_LINE_SIZE * 2);
+ randomize_buffers(src_u, MAX_LINE_SIZE);
+ randomize_buffers(src_v, MAX_LINE_SIZE);
+ randomize_buffers(src_a, MAX_LINE_SIZE * 2);
+
+ for (int dfi = 0; dfi < FF_ARRAY_ELEMS(dst_fmts); dfi++) {
+ int dst_pix_fmt = dst_fmts[dfi];
+ const AVPixFmtDescriptor *dst_desc = av_pix_fmt_desc_get(dst_pix_fmt);
+ int sample_size = av_get_padded_bits_per_pixel(dst_desc) >> 3;
+ for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
+ struct SwsContext *ctx;
+ int log_level;
+ int width = input_sizes[isi];
+ int srcSliceY = 0;
+ int srcSliceH = 2;
+ int srcStride[4] = {
+ width,
+ width >> src_desc->log2_chroma_w,
+ width >> src_desc->log2_chroma_w,
+ width,
+ };
+ int dstStride[4] = { MAX_LINE_SIZE * 6 };
+
+ // override log level to prevent spamming of the message
+ // "No accelerated colorspace conversion found from %s to %s"
+ log_level = av_log_get_level();
+ av_log_set_level(AV_LOG_ERROR);
+ ctx = sws_getContext(width, srcSliceH, src_pix_fmt,
+ width, srcSliceH, dst_pix_fmt,
+ 0, NULL, NULL, NULL);
+ av_log_set_level(log_level);
+ if (!ctx)
+ fail();
+
+ if (check_func(ctx->convert_unscaled, "%s_%s_%d", src_desc->name, dst_desc->name, width)) {
+ memset(dst0_, 0xFF, 2 * MAX_LINE_SIZE * 6);
+ memset(dst1_, 0xFF, 2 * MAX_LINE_SIZE * 6);
+
+ call_ref(ctx, src, srcStride, srcSliceY,
+ srcSliceH, dst0, dstStride);
+ call_new(ctx, src, srcStride, srcSliceY,
+ srcSliceH, dst1, dstStride);
+
+ if (dst_pix_fmt == AV_PIX_FMT_ARGB ||
+ dst_pix_fmt == AV_PIX_FMT_ABGR ||
+ dst_pix_fmt == AV_PIX_FMT_RGBA ||
+ dst_pix_fmt == AV_PIX_FMT_BGRA ||
+ dst_pix_fmt == AV_PIX_FMT_RGB24 ||
+ dst_pix_fmt == AV_PIX_FMT_BGR24) {
+ if (cmp_off_by_n(lines0[0], lines1[0], width * sample_size, 3) ||
+ cmp_off_by_n(lines0[1], lines1[1], width * sample_size, 3))
+ fail();
+ } else if (dst_pix_fmt == AV_PIX_FMT_RGB565 ||
+ dst_pix_fmt == AV_PIX_FMT_BGR565) {
+ if (cmp_565_by_n(lines0[0], lines1[0], width, 2) ||
+ cmp_565_by_n(lines0[1], lines1[1], width, 2))
+ fail();
+ } else if (dst_pix_fmt == AV_PIX_FMT_RGB555 ||
+ dst_pix_fmt == AV_PIX_FMT_BGR555) {
+ if (cmp_555_by_n(lines0[0], lines1[0], width, 2) ||
+ cmp_555_by_n(lines0[1], lines1[1], width, 2))
+ fail();
+ } else {
+ fail();
+ }
+
+ bench_new(ctx, src, srcStride, srcSliceY,
+ srcSliceH, dst0, dstStride);
+ }
+ sws_freeContext(ctx);
+ }
+ }
+}
+
+#undef MAX_LINE_SIZE
+
+void checkasm_check_sw_yuv2rgb(void)
+{
+ check_yuv2rgb(AV_PIX_FMT_YUV420P);
+ report("yuv420p");
+ check_yuv2rgb(AV_PIX_FMT_YUV422P);
+ report("yuv422p");
+ check_yuv2rgb(AV_PIX_FMT_YUVA420P);
+ report("yuva420p");
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 0ed2ea5be6..49832b09bf 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -44,6 +44,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp \
fate-checkasm-sw_range_convert \
fate-checkasm-sw_rgb \
fate-checkasm-sw_scale \
+ fate-checkasm-sw_yuv2rgb \
fate-checkasm-takdsp \
fate-checkasm-utvideodsp \
fate-checkasm-v210dec \
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb
2024-06-25 21:19 ` Ramiro Polla
@ 2024-06-27 14:02 ` Ramiro Polla
2024-06-30 0:33 ` Sean McGovern
0 siblings, 1 reply; 14+ messages in thread
From: Ramiro Polla @ 2024-06-27 14:02 UTC (permalink / raw)
To: ffmpeg-devel
On Tue, Jun 25, 2024 at 11:19 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> On Thu, Jun 20, 2024 at 4:59 PM Ramiro Polla <ramiro.polla@gmail.com> wrote:
> > checkasm gets a little spammy with this commit by printing "No
> > accelerated colorspace conversion found from %s to %s.\n" from
> > libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
> > downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
> > the call to sws_getContext()?
>
> New patch attached that overrides the log level to prevent spamming.
I'll apply this tomorrow if there are no comments.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb
2024-06-27 14:02 ` Ramiro Polla
@ 2024-06-30 0:33 ` Sean McGovern
0 siblings, 0 replies; 14+ messages in thread
From: Sean McGovern @ 2024-06-30 0:33 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Hi Ramiro,
On Thu, Jun 27, 2024, 10:37 Ramiro Polla <ramiro.polla@gmail.com> wrote:
> On Tue, Jun 25, 2024 at 11:19 PM Ramiro Polla <ramiro.polla@gmail.com>
> wrote:
> > On Thu, Jun 20, 2024 at 4:59 PM Ramiro Polla <ramiro.polla@gmail.com>
> wrote:
> > > checkasm gets a little spammy with this commit by printing "No
> > > accelerated colorspace conversion found from %s to %s.\n" from
> > > libswscale/yuv2rgb.c multiple times. Any suggestions? Perhaps
> > > downgrading that message to AV_LOG_VERBOSE, or raising loglevel around
> > > the call to sws_getContext()?
> >
> > New patch attached that overrides the log level to prevent spamming.
>
> I'll apply this tomorrow if there are no comments.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
It looks like this exposed an issue in the AltiVec acceleration in
libswscale.
libswscale/ppc/yuv2rgb.c looks a bit too complex for me to investigate. I'd
be curious to see if this fails on a 64-bit POWER machine. I'll try to test
it later.
-- Sean McGovern
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2024-06-30 0:33 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-16 22:28 [FFmpeg-devel] [PATCH 1/6] swscale/yuv2rgb: fix conversion for widths not aligned to 8 Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 2/6] swscale/yuv2rgb: add macros to simplify code generation Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 3/6] swscale/yuv2rgb: fix yuv422p input in C code Ramiro Polla
2024-06-19 10:13 ` Michael Niedermayer
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 4/6] swscale/yuv2rgb: reindent after previous commit Ramiro Polla
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 5/6] checkasm: add tests for yuv2rgb Ramiro Polla
2024-06-20 14:59 ` Ramiro Polla
2024-06-25 21:19 ` Ramiro Polla
2024-06-27 14:02 ` Ramiro Polla
2024-06-30 0:33 ` Sean McGovern
2024-06-16 22:28 ` [FFmpeg-devel] [PATCH 6/6] swscale/yuv2rgb/x86: remove mmx/mmxext yuv2rgb functions Ramiro Polla
2024-06-16 23:15 ` James Almer
2024-06-17 12:46 ` Ramiro Polla
2024-06-20 14:50 ` Ramiro Polla
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git