From 9de09c4db6c914eeec505d4365850fda3bf86b8d Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 4 Jun 2025 03:20:02 +0200
Subject: [PATCH 1/6] avfilter/vf_overlay: Don't perform UB pointer arithmetic

This happens when the pixel format of the output does not
have an alpha channel. It leads to FATE failures with
the ffmpeg-filter_colorkey, filter-overlay-dvdsub-2397
filter-overlay,
filter-overlay_{gbrp_gbrap,nv12,nv21,yuv420,yuv420_yuva420,
yuv420p10,yuv422_yuva422,yuv422p10,yuv444_yuva444,yuv444p10}
and sub2video tests when using Clang UBSan.

Fix this by only performing the pointer arithmetic when
it is going to be used. This can be checked via variables
that compile-time constants due to inlining, so that the
checks are free. Given that the pointer is potentially
used as a function argument, the compiler could elide
the calculation, but not it can. The size of .text decreased
by 1632B with GCC 14 and by 1392B with Clang 19 (both -O3).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavfilter/vf_overlay.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index 8560ed7c17..0a9ff60ebb 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -467,7 +467,7 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext
     int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub);                                                              \
     int yp = y>>vsub;                                                                                      \
     int xp = x>>hsub;                                                                                      \
-    uint##depth##_t *s, *sp, *d, *dp, *dap, *a, *da, *ap;                                                  \
+    uint##depth##_t *s, *sp, *d, *dp, *dap, *a, *ap;                                                       \
     int jmax, j, k, kmax;                                                                                  \
     int slice_start, slice_end;                                                                            \
     const uint##depth##_t max = (1 << nbits) - 1;                                                          \
@@ -486,14 +486,15 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext
                       + (yp + slice_start) * dst->linesize[dst_plane]                                      \
                       + dst_offset);                                                                       \
     ap = (uint##depth##_t *)(src->data[3] + (slice_start << vsub) * src->linesize[3]);                     \
-    dap = (uint##depth##_t *)(dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3]);             \
+    if (main_has_alpha)                                                                                    \
+        dap = (uint##depth##_t *)(dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3]);         \
                                                                                                            \
     for (j = slice_start; j < slice_end; j++) {                                                            \
         k = FFMAX(-xp, 0);                                                                                 \
         d = dp + (xp+k) * dst_step;                                                                        \
         s = sp + k;                                                                                        \
         a = ap + (k<<hsub);                                                                                \
-        da = dap + ((xp+k) << hsub);                                                                       \
+        uint##depth##_t *da = main_has_alpha ? dap + ((xp+k) << hsub) : NULL;                              \
         kmax = FFMIN(-xp + dst_wp, src_wp);                                                                \
                                                                                                            \
         if (nbits == 8 && ((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) {                       \
@@ -502,7 +503,8 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext
                                                                                                            \
             s += c;                                                                                        \
             d += dst_step * c;                                                                             \
-            da += (1 << hsub) * c;                                                                         \
+            if (main_has_alpha)                                                                            \
+                da += (1 << hsub) * c;                                                                     \
             a += (1 << hsub) * c;                                                                          \
             k += c;                                                                                        \
         }                                                                                                  \
@@ -560,13 +562,15 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext
             }                                                                                              \
             s++;                                                                                           \
             d += dst_step;                                                                                 \
-            da += 1 << hsub;                                                                               \
+            if (main_has_alpha)                                                                            \
+                da += 1 << hsub;                                                                           \
             a += 1 << hsub;                                                                                \
         }                                                                                                  \
         dp += dst->linesize[dst_plane] / bytes;                                                            \
         sp += src->linesize[i] / bytes;                                                                    \
         ap += (1 << vsub) * src->linesize[3] / bytes;                                                      \
-        dap += (1 << vsub) * dst->linesize[3] / bytes;                                                     \
+        if (main_has_alpha)                                                                                \
+            dap += (1 << vsub) * dst->linesize[3] / bytes;                                                 \
     }                                                                                                      \
 }
 DEFINE_BLEND_PLANE(8, 8)
-- 
2.45.2