Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] swscale/output: Use av_sat_add32 in yuv2rgba64 templates to avoid underflow
@ 2022-12-12 20:08 Drew Dunne
  2022-12-12 20:09 ` Drew Dunne
  2022-12-13 20:32 ` Michael Niedermayer
  0 siblings, 2 replies; 3+ messages in thread
From: Drew Dunne @ 2022-12-12 20:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Drew Dunne

Previously I sent this patch to solve an overflow in these templates. That
patch wasn't used in favor of one that biased the output calculations to
avoid the double clipping. Now I've found an underflow case, which I've put
the command below, and I'll attach an input image in a reply.

./ffmpeg \
  -f rawvideo -video_size 64x64 -pixel_format yuva420p10le \
  -i yuv2rgb_underflow_w64h64.yuva \
  -filter_complex \
  "scale=flags=bicubic+full_chroma_int+full_chroma_inp+bitexact+accurate_rnd:in_color_matrix=bt2020:out_color_matrix=bt2020:in_range=mpeg:out_range=mpeg,format=rgba64[out]" \
  -f rawvideo -codec:v:0 rawvideo -pixel_format rgba64 -map '[out]' \
  -y underflow_w64h64.rgba64

The previous overflow case was in this thread:
https://ffmpeg.org/pipermail/ffmpeg-devel/2022-November/303532.html

---
 libswscale/output.c | 96 ++++++++++++++++++++++-----------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/libswscale/output.c b/libswscale/output.c
index 5c85bff971..8abf043d73 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -1109,20 +1109,20 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
         B =                            U * c->yuv2rgb_u2b_coeff;
 
         // 8 bits: 30 - 22 = 8 bits, 16 bits: 30 bits - 14 = 16 bits
-        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) + (1<<15), 16));
-        output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) + (1<<15), 16));
-        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) + (1<<15), 16));
+        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y1)) >> 14) + (1<<15), 16));
+        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y1)) >> 14) + (1<<15), 16));
+        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y1)) >> 14) + (1<<15), 16));
         if (eightbytes) {
             output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
             output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
             dest += 8;
         } else {
-            output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+            output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
             dest += 6;
         }
     }
@@ -1175,20 +1175,20 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
             A2 += 1 << 13;
         }
 
-        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) + (1<<15), 16));
-        output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) + (1<<15), 16));
-        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) + (1<<15), 16));
+        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y1)) >> 14) + (1<<15), 16));
+        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y1)) >> 14) + (1<<15), 16));
+        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y1)) >> 14) + (1<<15), 16));
         if (eightbytes) {
             output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
             output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
             dest += 8;
         } else {
-            output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+            output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
             dest += 6;
         }
     }
@@ -1232,20 +1232,20 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y1)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y1)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y1)) >> 14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-                output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
                 dest += 8;
             } else {
-                output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
                 dest += 6;
             }
         }
@@ -1278,20 +1278,20 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y1)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y1)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y1)) >> 14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-                output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
                 dest += 8;
             } else {
-                output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
                 dest += 6;
             }
         }
@@ -1351,9 +1351,9 @@ yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
         B =                            U * c->yuv2rgb_u2b_coeff;
 
         // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
-        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y)>>14) + (1<<15), 16));
-        output_pixel(&dest[1], av_clip_uintp2(((  G + Y)>>14) + (1<<15), 16));
-        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y)>>14) + (1<<15), 16));
+        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y))>>14) + (1<<15), 16));
+        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y))>>14) + (1<<15), 16));
+        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y))>>14) + (1<<15), 16));
         if (eightbytes) {
             output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
             dest += 4;
@@ -1404,9 +1404,9 @@ yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2],
             A += 1 << 13;
         }
 
-        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) + (1<<15), 16));
-        output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) + (1<<15), 16));
-        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) + (1<<15), 16));
+        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y))>>14) + (1<<15), 16));
+        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y))>>14) + (1<<15), 16));
+        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y))>>14) + (1<<15), 16));
         if (eightbytes) {
             output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
             dest += 4;
@@ -1448,9 +1448,9 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y))>>14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y))>>14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y))>>14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
                 dest += 4;
@@ -1481,9 +1481,9 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y))>>14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y))>>14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y))>>14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
                 dest += 4;
-- 
2.39.0.rc1.256.g54fd8350bd-goog

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-12-13 20:32 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-12 20:08 [FFmpeg-devel] [PATCH] swscale/output: Use av_sat_add32 in yuv2rgba64 templates to avoid underflow Drew Dunne
2022-12-12 20:09 ` Drew Dunne
2022-12-13 20:32 ` Michael Niedermayer

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git