[FFmpeg-devel] [PR] swscale/swscale_internal: Move altivec parts to ppc/ (PR #22278)

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

From: mkver via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: mkver <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PR] swscale/swscale_internal: Move altivec parts to ppc/ (PR #22278)
Date: Tue, 24 Feb 2026 16:59:48 -0000
Message-ID: <177195238877.25.2545377984576741198@29965ddac10e> (raw)

PR #22278 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22278
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22278.patch

Alternative to part of #22274.


>From 19f937a30263acb4ed7b1d37d7417f428f43076d Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Tue, 24 Feb 2026 15:16:35 +0100
Subject: [PATCH 1/2] swscale/utils: Move altivec init/free code to
 yuv2rgb_altivec.c

This is in preparation for removing the util_altivec.h inclusion
in swscale_internal.h, which causes problems (on PPC) because
it redefines bool to something different from stdbool.h.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libswscale/ppc/yuv2rgb_altivec.c | 31 +++++++++++++++++++++++++++++++
 libswscale/swscale_internal.h    |  3 +++
 libswscale/utils.c               | 24 ++++--------------------
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index a6b6650e02..3efb33d22e 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -96,6 +96,8 @@
 #include "libswscale/swscale_internal.h"
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
+#include "libavutil/error.h"
+#include "libavutil/mem.h"
 #include "libavutil/mem_internal.h"
 #include "libavutil/pixdesc.h"
 #include "yuv2rgb_altivec.h"
@@ -871,4 +873,33 @@ YUV2PACKEDX_WRAPPER(rgba,  AV_PIX_FMT_RGBA);
 YUV2PACKEDX_WRAPPER(rgb24, AV_PIX_FMT_RGB24);
 YUV2PACKEDX_WRAPPER(bgr24, AV_PIX_FMT_BGR24);
 
+av_cold int ff_sws_init_altivec_bufs(SwsInternal *c)
+{
+    const SwsContext *const sws = &c->opts;
+
+    c->vYCoeffsBank = av_malloc_array(sws->dst_h, c->vLumFilterSize * sizeof(*c->vYCoeffsBank));
+    c->vCCoeffsBank = av_malloc_array(c->chrDstH, c->vChrFilterSize * sizeof(*c->vCCoeffsBank));
+    if (!c->vYCoeffsBank || !c->vCCoeffsBank)
+        return AVERROR(ENOMEM);
+
+    for (int i = 0; i < c->vLumFilterSize * sws->dst_h; ++i) {
+        short *p = (short *)&c->vYCoeffsBank[i];
+        for (int j = 0; j < 8; ++j)
+            p[j] = c->vLumFilter[i];
+    }
+
+    for (int i = 0; i < c->vChrFilterSize * c->chrDstH; ++i) {
+        short *p = (short *)&c->vCCoeffsBank[i];
+        for (int j = 0; j < 8; ++j)
+            p[j] = c->vChrFilter[i];
+    }
+
+    return 0;
+}
+
+av_cold void ff_sws_free_altivec_bufs(SwsInternal *c)
+{
+    av_freep(&c->vYCoeffsBank);
+    av_freep(&c->vCCoeffsBank);
+}
 #endif /* HAVE_ALTIVEC */
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 5c58272664..036ccfe938 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -1036,6 +1036,9 @@ void ff_sws_init_swscale_arm(SwsInternal *c);
 void ff_sws_init_swscale_loongarch(SwsInternal *c);
 void ff_sws_init_swscale_riscv(SwsInternal *c);
 
+int ff_sws_init_altivec_bufs(SwsInternal *c);
+void ff_sws_free_altivec_bufs(SwsInternal *c);
+
 void ff_hyscale_fast_c(SwsInternal *c, int16_t *dst, int dstWidth,
                        const uint8_t *src, int srcW, int xInc);
 void ff_hcscale_fast_c(SwsInternal *c, int16_t *dst1, int16_t *dst2,
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 52095ab2c7..a81c216b6c 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1725,24 +1725,9 @@ av_cold int ff_sws_init_single_context(SwsContext *sws, SwsFilter *srcFilter,
             goto fail;
 
 #if HAVE_ALTIVEC
-        c->vYCoeffsBank = av_malloc_array(sws->dst_h, c->vLumFilterSize * sizeof(*c->vYCoeffsBank));
-        c->vCCoeffsBank = av_malloc_array(c->chrDstH, c->vChrFilterSize * sizeof(*c->vCCoeffsBank));
-        if (c->vYCoeffsBank == NULL || c->vCCoeffsBank == NULL)
-            goto nomem;
-
-        for (i = 0; i < c->vLumFilterSize * sws->dst_h; i++) {
-            int j;
-            short *p = (short *)&c->vYCoeffsBank[i];
-            for (j = 0; j < 8; j++)
-                p[j] = c->vLumFilter[i];
-        }
-
-        for (i = 0; i < c->vChrFilterSize * c->chrDstH; i++) {
-            int j;
-            short *p = (short *)&c->vCCoeffsBank[i];
-            for (j = 0; j < 8; j++)
-                p[j] = c->vChrFilter[i];
-        }
+        ret = ff_sws_init_altivec_bufs(c);
+        if (ret < 0)
+            goto fail;
 #endif
     }
 
@@ -2282,8 +2267,7 @@ void sws_freeContext(SwsContext *sws)
     av_freep(&c->hLumFilter);
     av_freep(&c->hChrFilter);
 #if HAVE_ALTIVEC
-    av_freep(&c->vYCoeffsBank);
-    av_freep(&c->vCCoeffsBank);
+    ff_sws_free_altivec_bufs(c);
 #endif
 
     av_freep(&c->vLumFilterPos);
-- 
2.52.0


>From ee99b9c035c8c52c8aecca0bf14d71723e4ef044 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Tue, 24 Feb 2026 16:41:07 +0100
Subject: [PATCH 2/2] swscale/swscale_internal: Move altivec parts to ppc/

Up until now, several altivec-specific fields are directly
put into SwsInternal #if HAVE_ALTIVEC is true. These fields
are of altivec-specific vector types and therefore
require altivec specific headers to be included.

Unfortunately, said altivec specific headers redefine
bool in a manner that is incompatible with stdbool.
swscale/ops.h uses bool and this led graph.c and ops.c
to disagree about the layout of structures from ops.h,
leading to heap corruption [1], [2] in the sws-unscaled
FATE test.

Fix this by moving the altivec-specific parts out of SwsInternal
and into a structure that extends SwsInternal and is allocated
jointly with it. Said structure is local to yuv2rgb_altivec.c,
because this is the only file accessing said fields. Should
more files need them, an altivec-specific swscale header would
need to be added.

Thanks to jfiusdq <jfiusdq@proton.me> for analyzing the issue.

[1]: https://fate.ffmpeg.org/report.cgi?slot=ppc64-linux-gcc-14.3-asan&time=20260224065643
[2]: https://fate.ffmpeg.org/report.cgi?slot=ppc64-linux-gcc-14.3&time=20260224051615

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libswscale/ppc/yuv2rgb_altivec.c | 100 +++++++++++++++++++++----------
 libswscale/ppc/yuv2yuv_altivec.c |   1 +
 libswscale/swscale_internal.h    |  21 +++----
 libswscale/utils.c               |   2 +-
 4 files changed, 77 insertions(+), 47 deletions(-)

diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 3efb33d22e..cad3f8d7c0 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -103,6 +103,34 @@
 #include "yuv2rgb_altivec.h"
 
 #if HAVE_ALTIVEC
+// util_altivec.h includes the system header altivec.h which redefines bool,
+// making it incompatible with stdbool.h. It therefore must not be included
+// before another one of our headers that may use bool.
+#include "libavutil/ppc/util_altivec.h"
+#include <assert.h>
+
+typedef struct SwsInternalAltivec {
+    SwsInternal c;
+
+    vector signed short   CY;
+    vector signed short   CRV;
+    vector signed short   CBU;
+    vector signed short   CGU;
+    vector signed short   CGV;
+    vector signed short   OY;
+    vector unsigned short CSHIFT;
+    vector signed short  *vYCoeffsBank, *vCCoeffsBank;
+} SwsInternalAltivec;
+// Check that the sizes of the types match.
+// Note that given that SwsInternal is always allocated via av_malloc,
+// every pointer to an SwsInternal is always sufficiently aligned.
+static_assert(sizeof(SwsInternalAltivec) <= sizeof(SwsInternal) + SWSINTERNAL_ADDITIONAL_ASM_SIZE,
+              "SWSINTERNAL_ADDITIONAL_ASM_SIZE needs to be increased");
+
+static inline SwsInternalAltivec *sws_internal_altivec(SwsInternal *c)
+{
+    return (SwsInternalAltivec*)c;
+}
 
 #undef PROFILE_THE_BEAST
 #undef INC_SCALING
@@ -259,25 +287,26 @@ static inline void cvtyuvtoRGB(SwsInternal *c, vector signed short Y,
                                vector signed short *R, vector signed short *G,
                                vector signed short *B)
 {
+    SwsInternalAltivec *const a = sws_internal_altivec(c);
     vector signed short vx, ux, uvx;
 
-    Y = vec_mradds(Y, c->CY, c->OY);
+    Y = vec_mradds(Y, a->CY, a->OY);
     U = vec_sub(U, (vector signed short)
                        vec_splat((vector signed short) { 128 }, 0));
     V = vec_sub(V, (vector signed short)
                        vec_splat((vector signed short) { 128 }, 0));
 
-    // ux  = (CBU * (u << c->CSHIFT) + 0x4000) >> 15;
-    ux = vec_sl(U, c->CSHIFT);
-    *B = vec_mradds(ux, c->CBU, Y);
+    // ux  = (CBU * (u << a->CSHIFT) + 0x4000) >> 15;
+    ux = vec_sl(U, a->CSHIFT);
+    *B = vec_mradds(ux, a->CBU, Y);
 
-    // vx  = (CRV * (v << c->CSHIFT) + 0x4000) >> 15;
-    vx = vec_sl(V, c->CSHIFT);
-    *R = vec_mradds(vx, c->CRV, Y);
+    // vx  = (CRV * (v << a->CSHIFT) + 0x4000) >> 15;
+    vx = vec_sl(V, a->CSHIFT);
+    *R = vec_mradds(vx, a->CRV, Y);
 
     // uvx = ((CGU * u) + (CGV * v)) >> 15;
-    uvx = vec_mradds(U, c->CGU, Y);
-    *G  = vec_mradds(V, c->CGV, uvx);
+    uvx = vec_mradds(U, a->CGU, Y);
+    *G  = vec_mradds(V, a->CGV, uvx);
 }
 
 /*
@@ -301,6 +330,7 @@ static int altivec_ ## name(SwsInternal *c, const unsigned char *const *in,   \
                             const int *instrides, int srcSliceY, int srcSliceH,   \
                             unsigned char *const *oplanes, const int *outstrides) \
 {                                                                             \
+    SwsInternalAltivec *const a = sws_internal_altivec(c);                    \
     int w = c->opts.src_w;                                                    \
     int h = srcSliceH;                                                        \
     int i, j;                                                                 \
@@ -318,13 +348,13 @@ static int altivec_ ## name(SwsInternal *c, const unsigned char *const *in,   \
     vector signed short R1, G1, B1;                                           \
     vector unsigned char R, G, B;                                             \
                                                                               \
-    vector signed short lCY       = c->CY;                                    \
-    vector signed short lOY       = c->OY;                                    \
-    vector signed short lCRV      = c->CRV;                                   \
-    vector signed short lCBU      = c->CBU;                                   \
-    vector signed short lCGU      = c->CGU;                                   \
-    vector signed short lCGV      = c->CGV;                                   \
-    vector unsigned short lCSHIFT = c->CSHIFT;                                \
+    vector signed short lCY       = a->CY;                                    \
+    vector signed short lOY       = a->OY;                                    \
+    vector signed short lCRV      = a->CRV;                                   \
+    vector signed short lCBU      = a->CBU;                                   \
+    vector signed short lCGU      = a->CGU;                                   \
+    vector signed short lCGV      = a->CGV;                                   \
+    vector unsigned short lCSHIFT = a->CSHIFT;                                \
                                                                               \
     const ubyte *y1i = in[0];                                                 \
     const ubyte *y2i = in[0] + instrides[0];                                  \
@@ -612,6 +642,8 @@ av_cold void ff_yuv2rgb_init_tables_ppc(SwsInternal *c,
                                         int saturation)
 {
 #if HAVE_ALTIVEC
+    SwsInternalAltivec *const a = sws_internal_altivec(c);
+
     union {
         DECLARE_ALIGNED(16, signed short, tmp)[8];
         vector signed short vec;
@@ -627,13 +659,13 @@ av_cold void ff_yuv2rgb_init_tables_ppc(SwsInternal *c,
     buf.tmp[4] = -((inv_table[2] >> 1) * (contrast >> 16) * (saturation >> 16));  // cgu
     buf.tmp[5] = -((inv_table[3] >> 1) * (contrast >> 16) * (saturation >> 16));  // cgv
 
-    c->CSHIFT = (vector unsigned short) vec_splat_u16(2);
-    c->CY     = vec_splat((vector signed short) buf.vec, 0);
-    c->OY     = vec_splat((vector signed short) buf.vec, 1);
-    c->CRV    = vec_splat((vector signed short) buf.vec, 2);
-    c->CBU    = vec_splat((vector signed short) buf.vec, 3);
-    c->CGU    = vec_splat((vector signed short) buf.vec, 4);
-    c->CGV    = vec_splat((vector signed short) buf.vec, 5);
+    a->CSHIFT = (vector unsigned short) vec_splat_u16(2);
+    a->CY     = vec_splat((vector signed short) buf.vec, 0);
+    a->OY     = vec_splat((vector signed short) buf.vec, 1);
+    a->CRV    = vec_splat((vector signed short) buf.vec, 2);
+    a->CBU    = vec_splat((vector signed short) buf.vec, 3);
+    a->CGU    = vec_splat((vector signed short) buf.vec, 4);
+    a->CGV    = vec_splat((vector signed short) buf.vec, 5);
     return;
 #endif /* HAVE_ALTIVEC */
 }
@@ -653,6 +685,8 @@ static av_always_inline void yuv2packedX_altivec(SwsInternal *c,
                                                  int dstW, int dstY,
                                                  enum AVPixelFormat target)
 {
+    SwsInternalAltivec *const a = sws_internal_altivec(c);
+
     int i, j;
     vector signed short X, X0, X1, Y0, U0, V0, Y1, U1, V1, U, V;
     vector signed short R0, G0, B0, R1, G1, B1;
@@ -666,8 +700,8 @@ static av_always_inline void yuv2packedX_altivec(SwsInternal *c,
 
     vector signed short *YCoeffs, *CCoeffs;
 
-    YCoeffs = c->vYCoeffsBank + dstY * lumFilterSize;
-    CCoeffs = c->vCCoeffsBank + dstY * chrFilterSize;
+    YCoeffs = a->vYCoeffsBank + dstY * lumFilterSize;
+    CCoeffs = a->vCCoeffsBank + dstY * chrFilterSize;
 
     out = (vector unsigned char *) dest;
 
@@ -876,20 +910,21 @@ YUV2PACKEDX_WRAPPER(bgr24, AV_PIX_FMT_BGR24);
 av_cold int ff_sws_init_altivec_bufs(SwsInternal *c)
 {
     const SwsContext *const sws = &c->opts;
+    SwsInternalAltivec *const a = sws_internal_altivec(c);
 
-    c->vYCoeffsBank = av_malloc_array(sws->dst_h, c->vLumFilterSize * sizeof(*c->vYCoeffsBank));
-    c->vCCoeffsBank = av_malloc_array(c->chrDstH, c->vChrFilterSize * sizeof(*c->vCCoeffsBank));
-    if (!c->vYCoeffsBank || !c->vCCoeffsBank)
+    a->vYCoeffsBank = av_malloc_array(sws->dst_h, c->vLumFilterSize * sizeof(*a->vYCoeffsBank));
+    a->vCCoeffsBank = av_malloc_array(c->chrDstH, c->vChrFilterSize * sizeof(*a->vCCoeffsBank));
+    if (!a->vYCoeffsBank || !a->vCCoeffsBank)
         return AVERROR(ENOMEM);
 
     for (int i = 0; i < c->vLumFilterSize * sws->dst_h; ++i) {
-        short *p = (short *)&c->vYCoeffsBank[i];
+        short *p = (short *)&a->vYCoeffsBank[i];
         for (int j = 0; j < 8; ++j)
             p[j] = c->vLumFilter[i];
     }
 
     for (int i = 0; i < c->vChrFilterSize * c->chrDstH; ++i) {
-        short *p = (short *)&c->vCCoeffsBank[i];
+        short *p = (short *)&a->vCCoeffsBank[i];
         for (int j = 0; j < 8; ++j)
             p[j] = c->vChrFilter[i];
     }
@@ -899,7 +934,8 @@ av_cold int ff_sws_init_altivec_bufs(SwsInternal *c)
 
 av_cold void ff_sws_free_altivec_bufs(SwsInternal *c)
 {
-    av_freep(&c->vYCoeffsBank);
-    av_freep(&c->vCCoeffsBank);
+    SwsInternalAltivec *const a = sws_internal_altivec(c);
+    av_freep(&a->vYCoeffsBank);
+    av_freep(&a->vCCoeffsBank);
 }
 #endif /* HAVE_ALTIVEC */
diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c
index 7884c38d69..3e3bbea23a 100644
--- a/libswscale/ppc/yuv2yuv_altivec.c
+++ b/libswscale/ppc/yuv2yuv_altivec.c
@@ -30,6 +30,7 @@
 #include "libswscale/swscale_internal.h"
 
 #if HAVE_ALTIVEC
+#include "libavutil/ppc/util_altivec.h"
 
 static int yv12toyuy2_unscaled_altivec(SwsInternal *c, const uint8_t *const src[],
                                        const int srcStride[], int srcSliceY,
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 036ccfe938..b9e93642b1 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -37,11 +37,15 @@
 #include "libavutil/pixfmt.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/slicethread.h"
-#if HAVE_ALTIVEC
-#include "libavutil/ppc/util_altivec.h"
-#endif
 #include "libavutil/half2float.h"
 
+#if HAVE_ALTIVEC
+#define SWSINTERNAL_ADDITIONAL_ASM_SIZE (7*16 + 2*8 + /* alignment */ 16)
+#endif
+#ifndef SWSINTERNAL_ADDITIONAL_ASM_SIZE
+#define SWSINTERNAL_ADDITIONAL_ASM_SIZE 0
+#endif
+
 #define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
 
 #define YUVRGB_TABLE_HEADROOM 512
@@ -544,17 +548,6 @@ struct SwsInternal {
 
     const uint8_t *chrDither8, *lumDither8;
 
-#if HAVE_ALTIVEC
-    vector signed short   CY;
-    vector signed short   CRV;
-    vector signed short   CBU;
-    vector signed short   CGU;
-    vector signed short   CGV;
-    vector signed short   OY;
-    vector unsigned short CSHIFT;
-    vector signed short  *vYCoeffsBank, *vCCoeffsBank;
-#endif
-
     int use_mmx_vfilter;
 
 /* pre defined color-spaces gamma */
diff --git a/libswscale/utils.c b/libswscale/utils.c
index a81c216b6c..0416497aa9 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1027,7 +1027,7 @@ int sws_getColorspaceDetails(SwsContext *sws, int **inv_table,
 
 SwsContext *sws_alloc_context(void)
 {
-    SwsInternal *c = (SwsInternal *) av_mallocz(sizeof(SwsInternal));
+    SwsInternal *c = av_mallocz(sizeof(*c) + SWSINTERNAL_ADDITIONAL_ASM_SIZE);
     if (!c)
         return NULL;
 
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

                 reply	other threads:[~2026-02-24 21:47 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=177195238877.25.2545377984576741198@29965ddac10e \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=code@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git