Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] swscale/output: Use av_sat_add32 in yuv2rgba64 templates to avoid underflow
@ 2022-12-12 20:08 Drew Dunne
  2022-12-12 20:09 ` Drew Dunne
  2022-12-13 20:32 ` Michael Niedermayer
  0 siblings, 2 replies; 3+ messages in thread
From: Drew Dunne @ 2022-12-12 20:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Drew Dunne

Previously I sent this patch to solve an overflow in these templates. That
patch wasn't used in favor of one that biased the output calculations to
avoid the double clipping. Now I've found an underflow case, which I've put
the command below, and I'll attach an input image in a reply.

./ffmpeg \
  -f rawvideo -video_size 64x64 -pixel_format yuva420p10le \
  -i yuv2rgb_underflow_w64h64.yuva \
  -filter_complex \
  "scale=flags=bicubic+full_chroma_int+full_chroma_inp+bitexact+accurate_rnd:in_color_matrix=bt2020:out_color_matrix=bt2020:in_range=mpeg:out_range=mpeg,format=rgba64[out]" \
  -f rawvideo -codec:v:0 rawvideo -pixel_format rgba64 -map '[out]' \
  -y underflow_w64h64.rgba64

The previous overflow case was in this thread:
https://ffmpeg.org/pipermail/ffmpeg-devel/2022-November/303532.html

---
 libswscale/output.c | 96 ++++++++++++++++++++++-----------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/libswscale/output.c b/libswscale/output.c
index 5c85bff971..8abf043d73 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -1109,20 +1109,20 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
         B =                            U * c->yuv2rgb_u2b_coeff;
 
         // 8 bits: 30 - 22 = 8 bits, 16 bits: 30 bits - 14 = 16 bits
-        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) + (1<<15), 16));
-        output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) + (1<<15), 16));
-        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) + (1<<15), 16));
+        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y1)) >> 14) + (1<<15), 16));
+        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y1)) >> 14) + (1<<15), 16));
+        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y1)) >> 14) + (1<<15), 16));
         if (eightbytes) {
             output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
             output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
             dest += 8;
         } else {
-            output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+            output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
             dest += 6;
         }
     }
@@ -1175,20 +1175,20 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
             A2 += 1 << 13;
         }
 
-        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) + (1<<15), 16));
-        output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) + (1<<15), 16));
-        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) + (1<<15), 16));
+        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y1)) >> 14) + (1<<15), 16));
+        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y1)) >> 14) + (1<<15), 16));
+        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y1)) >> 14) + (1<<15), 16));
         if (eightbytes) {
             output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
             output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
             dest += 8;
         } else {
-            output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-            output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+            output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
             dest += 6;
         }
     }
@@ -1232,20 +1232,20 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y1)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y1)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y1)) >> 14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-                output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
                 dest += 8;
             } else {
-                output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
                 dest += 6;
             }
         }
@@ -1278,20 +1278,20 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y1)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y1)) >> 14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y1)) >> 14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-                output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
                 dest += 8;
             } else {
-                output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G, Y2)) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R, Y2)) >> 14) + (1<<15), 16));
                 dest += 6;
             }
         }
@@ -1351,9 +1351,9 @@ yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
         B =                            U * c->yuv2rgb_u2b_coeff;
 
         // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
-        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y)>>14) + (1<<15), 16));
-        output_pixel(&dest[1], av_clip_uintp2(((  G + Y)>>14) + (1<<15), 16));
-        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y)>>14) + (1<<15), 16));
+        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y))>>14) + (1<<15), 16));
+        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y))>>14) + (1<<15), 16));
+        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y))>>14) + (1<<15), 16));
         if (eightbytes) {
             output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
             dest += 4;
@@ -1404,9 +1404,9 @@ yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2],
             A += 1 << 13;
         }
 
-        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) + (1<<15), 16));
-        output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) + (1<<15), 16));
-        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) + (1<<15), 16));
+        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y))>>14) + (1<<15), 16));
+        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y))>>14) + (1<<15), 16));
+        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y))>>14) + (1<<15), 16));
         if (eightbytes) {
             output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
             dest += 4;
@@ -1448,9 +1448,9 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y))>>14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y))>>14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y))>>14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
                 dest += 4;
@@ -1481,9 +1481,9 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y))>>14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y))>>14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y))>>14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
                 dest += 4;
-- 
2.39.0.rc1.256.g54fd8350bd-goog

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [FFmpeg-devel] [PATCH] swscale/output: Use av_sat_add32 in yuv2rgba64 templates to avoid underflow
  2022-12-12 20:08 [FFmpeg-devel] [PATCH] swscale/output: Use av_sat_add32 in yuv2rgba64 templates to avoid underflow Drew Dunne
@ 2022-12-12 20:09 ` Drew Dunne
  2022-12-13 20:32 ` Michael Niedermayer
  1 sibling, 0 replies; 3+ messages in thread
From: Drew Dunne @ 2022-12-12 20:09 UTC (permalink / raw)
  To: ffmpeg-devel

[-- Attachment #1: Type: text/plain, Size: 13984 bytes --]

Here is the problematic input video attached.

On Mon, Dec 12, 2022 at 3:08 PM Drew Dunne <asdunne@google.com> wrote:

> Previously I sent this patch to solve an overflow in these templates. That
> patch wasn't used in favor of one that biased the output calculations to
> avoid the double clipping. Now I've found an underflow case, which I've put
> the command below, and I'll attach an input image in a reply.
>
> ./ffmpeg \
>   -f rawvideo -video_size 64x64 -pixel_format yuva420p10le \
>   -i yuv2rgb_underflow_w64h64.yuva \
>   -filter_complex \
>
> "scale=flags=bicubic+full_chroma_int+full_chroma_inp+bitexact+accurate_rnd:in_color_matrix=bt2020:out_color_matrix=bt2020:in_range=mpeg:out_range=mpeg,format=rgba64[out]"
> \
>   -f rawvideo -codec:v:0 rawvideo -pixel_format rgba64 -map '[out]' \
>   -y underflow_w64h64.rgba64
>
> The previous overflow case was in this thread:
> https://ffmpeg.org/pipermail/ffmpeg-devel/2022-November/303532.html
>
> ---
>  libswscale/output.c | 96 ++++++++++++++++++++++-----------------------
>  1 file changed, 48 insertions(+), 48 deletions(-)
>
> diff --git a/libswscale/output.c b/libswscale/output.c
> index 5c85bff971..8abf043d73 100644
> --- a/libswscale/output.c
> +++ b/libswscale/output.c
> @@ -1109,20 +1109,20 @@ yuv2rgba64_X_c_template(SwsContext *c, const
> int16_t *lumFilter,
>          B =                            U * c->yuv2rgb_u2b_coeff;
>
>          // 8 bits: 30 - 22 = 8 bits, 16 bits: 30 bits - 14 = 16 bits
> -        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) +
> (1<<15), 16));
> -        output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) +
> (1<<15), 16));
> -        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) +
> (1<<15), 16));
> +        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y1)) >>
> 14) + (1<<15), 16));
> +        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y1)) >>
> 14) + (1<<15), 16));
> +        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y1)) >>
> 14) + (1<<15), 16));
>          if (eightbytes) {
>              output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
> -            output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) +
> (1<<15), 16));
> +            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B,
> Y2)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G,
> Y2)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R,
> Y2)) >> 14) + (1<<15), 16));
>              output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
>              dest += 8;
>          } else {
> -            output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) +
> (1<<15), 16));
> +            output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B,
> Y2)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G,
> Y2)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R,
> Y2)) >> 14) + (1<<15), 16));
>              dest += 6;
>          }
>      }
> @@ -1175,20 +1175,20 @@ yuv2rgba64_2_c_template(SwsContext *c, const
> int32_t *buf[2],
>              A2 += 1 << 13;
>          }
>
> -        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) +
> (1<<15), 16));
> -        output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) +
> (1<<15), 16));
> -        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) +
> (1<<15), 16));
> +        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B, Y1)) >>
> 14) + (1<<15), 16));
> +        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G, Y1)) >>
> 14) + (1<<15), 16));
> +        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R, Y1)) >>
> 14) + (1<<15), 16));
>          if (eightbytes) {
>              output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
> -            output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) +
> (1<<15), 16));
> +            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B,
> Y2)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G,
> Y2)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R,
> Y2)) >> 14) + (1<<15), 16));
>              output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
>              dest += 8;
>          } else {
> -            output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) +
> (1<<15), 16));
> +            output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B,
> Y2)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G,
> Y2)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R,
> Y2)) >> 14) + (1<<15), 16));
>              dest += 6;
>          }
>      }
> @@ -1232,20 +1232,20 @@ yuv2rgba64_1_c_template(SwsContext *c, const
> int32_t *buf0,
>              G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
>              B =                            U * c->yuv2rgb_u2b_coeff;
>
> -            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) +
> (1<<15), 16));
> +            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B,
> Y1)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G,
> Y1)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R,
> Y1)) >> 14) + (1<<15), 16));
>              if (eightbytes) {
>                  output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >>
> 14);
> -                output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14)
> + (1<<15), 16));
> -                output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14)
> + (1<<15), 16));
> -                output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14)
> + (1<<15), 16));
> +                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B,
> Y2)) >> 14) + (1<<15), 16));
> +                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G,
> Y2)) >> 14) + (1<<15), 16));
> +                output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R,
> Y2)) >> 14) + (1<<15), 16));
>                  output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >>
> 14);
>                  dest += 8;
>              } else {
> -                output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14)
> + (1<<15), 16));
> -                output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14)
> + (1<<15), 16));
> -                output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14)
> + (1<<15), 16));
> +                output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B,
> Y2)) >> 14) + (1<<15), 16));
> +                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G,
> Y2)) >> 14) + (1<<15), 16));
> +                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R,
> Y2)) >> 14) + (1<<15), 16));
>                  dest += 6;
>              }
>          }
> @@ -1278,20 +1278,20 @@ yuv2rgba64_1_c_template(SwsContext *c, const
> int32_t *buf0,
>              G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
>              B =                            U * c->yuv2rgb_u2b_coeff;
>
> -            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) +
> (1<<15), 16));
> +            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B,
> Y1)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G,
> Y1)) >> 14) + (1<<15), 16));
> +            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R,
> Y1)) >> 14) + (1<<15), 16));
>              if (eightbytes) {
>                  output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >>
> 14);
> -                output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14)
> + (1<<15), 16));
> -                output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14)
> + (1<<15), 16));
> -                output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14)
> + (1<<15), 16));
> +                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(R_B,
> Y2)) >> 14) + (1<<15), 16));
> +                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(  G,
> Y2)) >> 14) + (1<<15), 16));
> +                output_pixel(&dest[6], av_clip_uintp2(((av_sat_add32(B_R,
> Y2)) >> 14) + (1<<15), 16));
>                  output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >>
> 14);
>                  dest += 8;
>              } else {
> -                output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14)
> + (1<<15), 16));
> -                output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14)
> + (1<<15), 16));
> -                output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14)
> + (1<<15), 16));
> +                output_pixel(&dest[3], av_clip_uintp2(((av_sat_add32(R_B,
> Y2)) >> 14) + (1<<15), 16));
> +                output_pixel(&dest[4], av_clip_uintp2(((av_sat_add32(  G,
> Y2)) >> 14) + (1<<15), 16));
> +                output_pixel(&dest[5], av_clip_uintp2(((av_sat_add32(B_R,
> Y2)) >> 14) + (1<<15), 16));
>                  dest += 6;
>              }
>          }
> @@ -1351,9 +1351,9 @@ yuv2rgba64_full_X_c_template(SwsContext *c, const
> int16_t *lumFilter,
>          B =                            U * c->yuv2rgb_u2b_coeff;
>
>          // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
> -        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y)>>14) + (1<<15),
> 16));
> -        output_pixel(&dest[1], av_clip_uintp2(((  G + Y)>>14) + (1<<15),
> 16));
> -        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y)>>14) + (1<<15),
> 16));
> +        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B,
> Y))>>14) + (1<<15), 16));
> +        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G,
> Y))>>14) + (1<<15), 16));
> +        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R,
> Y))>>14) + (1<<15), 16));
>          if (eightbytes) {
>              output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
>              dest += 4;
> @@ -1404,9 +1404,9 @@ yuv2rgba64_full_2_c_template(SwsContext *c, const
> int32_t *buf[2],
>              A += 1 << 13;
>          }
>
> -        output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) +
> (1<<15), 16));
> -        output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) +
> (1<<15), 16));
> -        output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) +
> (1<<15), 16));
> +        output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B,
> Y))>>14) + (1<<15), 16));
> +        output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G,
> Y))>>14) + (1<<15), 16));
> +        output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R,
> Y))>>14) + (1<<15), 16));
>          if (eightbytes) {
>              output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
>              dest += 4;
> @@ -1448,9 +1448,9 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const
> int32_t *buf0,
>              G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
>              B =                            U * c->yuv2rgb_u2b_coeff;
>
> -            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) +
> (1<<15), 16));
> +            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B,
> Y))>>14) + (1<<15), 16));
> +            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G,
> Y))>>14) + (1<<15), 16));
> +            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R,
> Y))>>14) + (1<<15), 16));
>              if (eightbytes) {
>                  output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
>                  dest += 4;
> @@ -1481,9 +1481,9 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const
> int32_t *buf0,
>              G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
>              B =                            U * c->yuv2rgb_u2b_coeff;
>
> -            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) +
> (1<<15), 16));
> -            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) +
> (1<<15), 16));
> +            output_pixel(&dest[0], av_clip_uintp2(((av_sat_add32(R_B,
> Y))>>14) + (1<<15), 16));
> +            output_pixel(&dest[1], av_clip_uintp2(((av_sat_add32(  G,
> Y))>>14) + (1<<15), 16));
> +            output_pixel(&dest[2], av_clip_uintp2(((av_sat_add32(B_R,
> Y))>>14) + (1<<15), 16));
>              if (eightbytes) {
>                  output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
>                  dest += 4;
> --
> 2.39.0.rc1.256.g54fd8350bd-goog
>
>

-- 
Drew Dunne
asdunne@google.com

[-- Attachment #2: yuv2rgb_underflow_w64h64.yuva --]
[-- Type: application/octet-stream, Size: 20480 bytes --]

[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [FFmpeg-devel] [PATCH] swscale/output: Use av_sat_add32 in yuv2rgba64 templates to avoid underflow
  2022-12-12 20:08 [FFmpeg-devel] [PATCH] swscale/output: Use av_sat_add32 in yuv2rgba64 templates to avoid underflow Drew Dunne
  2022-12-12 20:09 ` Drew Dunne
@ 2022-12-13 20:32 ` Michael Niedermayer
  1 sibling, 0 replies; 3+ messages in thread
From: Michael Niedermayer @ 2022-12-13 20:32 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2566 bytes --]

Hi

On Mon, Dec 12, 2022 at 08:08:28PM +0000, Drew Dunne wrote:
> Previously I sent this patch to solve an overflow in these templates. That
> patch wasn't used in favor of one that biased the output calculations to
> avoid the double clipping. Now I've found an underflow case, which I've put
> the command below, and I'll attach an input image in a reply.
> 
> ./ffmpeg \
>   -f rawvideo -video_size 64x64 -pixel_format yuva420p10le \
>   -i yuv2rgb_underflow_w64h64.yuva \
>   -filter_complex \
>   "scale=flags=bicubic+full_chroma_int+full_chroma_inp+bitexact+accurate_rnd:in_color_matrix=bt2020:out_color_matrix=bt2020:in_range=mpeg:out_range=mpeg,format=rgba64[out]" \
>   -f rawvideo -codec:v:0 rawvideo -pixel_format rgba64 -map '[out]' \
>   -y underflow_w64h64.rgba64

can you trigger a under/overflow with real input ?
I mean something where each pixel has a RGB value that can be represented
or is close to such value

Iam asking because if this never happens with real input then a solution
would be to use unsigend ints

If this happens with real input values then input values from
0.0 to 1.0 would have to generate a -1.5 or +2.5 on output
thats quite a overshoot
your example if iam reading it correctly uses white and black pixls
with which have chroma values maxed out. but white and black in reality
would have all R=G=B and thus no color,

the extra cliping takes time and it does not feel like the correct fix for
this
If someone gives me a pixel that has luma only achievable with R=G=B=0 and
then at  the same time chroma only achievable with G=R=MAX B=0 or something
similar. and then the surrounding pixels also add more overshooting during
scaling then iam not sure in which situation the actually color matters
because this is not a "real" color

That said, if you can make a change that handles such odd fuzzed cases
with cliping i dont mind but i think we should not slow down real input
for cases that are not as long as we correct all undefined behavior
and of course ensure this cannot actually occur with real input

Now as you send a 2nd such patch i thought i explain my view a bit better

also does this atually trigger under/overflows in all teh chnaged components
or just some of teh components ?

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Let us carefully observe those good qualities wherein our enemies excel us
and endeavor to excel them, by avoiding what is faulty, and imitating what
is excellent in them. -- Plutarch

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-12-13 20:32 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-12 20:08 [FFmpeg-devel] [PATCH] swscale/output: Use av_sat_add32 in yuv2rgba64 templates to avoid underflow Drew Dunne
2022-12-12 20:09 ` Drew Dunne
2022-12-13 20:32 ` Michael Niedermayer

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git