From: mkver via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: mkver <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PR] swscale/swscale_internal: Move altivec parts to ppc/ (PR #22278)
Date: Tue, 24 Feb 2026 16:59:48 -0000
Message-ID: <177195238877.25.2545377984576741198@29965ddac10e> (raw)
PR #22278 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22278
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22278.patch
Alternative to part of #22274.
>From 19f937a30263acb4ed7b1d37d7417f428f43076d Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Tue, 24 Feb 2026 15:16:35 +0100
Subject: [PATCH 1/2] swscale/utils: Move altivec init/free code to
yuv2rgb_altivec.c
This is in preparation for removing the util_altivec.h inclusion
in swscale_internal.h, which causes problems (on PPC) because
it redefines bool to something different from stdbool.h.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libswscale/ppc/yuv2rgb_altivec.c | 31 +++++++++++++++++++++++++++++++
libswscale/swscale_internal.h | 3 +++
libswscale/utils.c | 24 ++++--------------------
3 files changed, 38 insertions(+), 20 deletions(-)
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index a6b6650e02..3efb33d22e 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -96,6 +96,8 @@
#include "libswscale/swscale_internal.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
+#include "libavutil/error.h"
+#include "libavutil/mem.h"
#include "libavutil/mem_internal.h"
#include "libavutil/pixdesc.h"
#include "yuv2rgb_altivec.h"
@@ -871,4 +873,33 @@ YUV2PACKEDX_WRAPPER(rgba, AV_PIX_FMT_RGBA);
YUV2PACKEDX_WRAPPER(rgb24, AV_PIX_FMT_RGB24);
YUV2PACKEDX_WRAPPER(bgr24, AV_PIX_FMT_BGR24);
+av_cold int ff_sws_init_altivec_bufs(SwsInternal *c)
+{
+ const SwsContext *const sws = &c->opts;
+
+ c->vYCoeffsBank = av_malloc_array(sws->dst_h, c->vLumFilterSize * sizeof(*c->vYCoeffsBank));
+ c->vCCoeffsBank = av_malloc_array(c->chrDstH, c->vChrFilterSize * sizeof(*c->vCCoeffsBank));
+ if (!c->vYCoeffsBank || !c->vCCoeffsBank)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < c->vLumFilterSize * sws->dst_h; ++i) {
+ short *p = (short *)&c->vYCoeffsBank[i];
+ for (int j = 0; j < 8; ++j)
+ p[j] = c->vLumFilter[i];
+ }
+
+ for (int i = 0; i < c->vChrFilterSize * c->chrDstH; ++i) {
+ short *p = (short *)&c->vCCoeffsBank[i];
+ for (int j = 0; j < 8; ++j)
+ p[j] = c->vChrFilter[i];
+ }
+
+ return 0;
+}
+
+av_cold void ff_sws_free_altivec_bufs(SwsInternal *c)
+{
+ av_freep(&c->vYCoeffsBank);
+ av_freep(&c->vCCoeffsBank);
+}
#endif /* HAVE_ALTIVEC */
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 5c58272664..036ccfe938 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -1036,6 +1036,9 @@ void ff_sws_init_swscale_arm(SwsInternal *c);
void ff_sws_init_swscale_loongarch(SwsInternal *c);
void ff_sws_init_swscale_riscv(SwsInternal *c);
+int ff_sws_init_altivec_bufs(SwsInternal *c);
+void ff_sws_free_altivec_bufs(SwsInternal *c);
+
void ff_hyscale_fast_c(SwsInternal *c, int16_t *dst, int dstWidth,
const uint8_t *src, int srcW, int xInc);
void ff_hcscale_fast_c(SwsInternal *c, int16_t *dst1, int16_t *dst2,
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 52095ab2c7..a81c216b6c 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1725,24 +1725,9 @@ av_cold int ff_sws_init_single_context(SwsContext *sws, SwsFilter *srcFilter,
goto fail;
#if HAVE_ALTIVEC
- c->vYCoeffsBank = av_malloc_array(sws->dst_h, c->vLumFilterSize * sizeof(*c->vYCoeffsBank));
- c->vCCoeffsBank = av_malloc_array(c->chrDstH, c->vChrFilterSize * sizeof(*c->vCCoeffsBank));
- if (c->vYCoeffsBank == NULL || c->vCCoeffsBank == NULL)
- goto nomem;
-
- for (i = 0; i < c->vLumFilterSize * sws->dst_h; i++) {
- int j;
- short *p = (short *)&c->vYCoeffsBank[i];
- for (j = 0; j < 8; j++)
- p[j] = c->vLumFilter[i];
- }
-
- for (i = 0; i < c->vChrFilterSize * c->chrDstH; i++) {
- int j;
- short *p = (short *)&c->vCCoeffsBank[i];
- for (j = 0; j < 8; j++)
- p[j] = c->vChrFilter[i];
- }
+ ret = ff_sws_init_altivec_bufs(c);
+ if (ret < 0)
+ goto fail;
#endif
}
@@ -2282,8 +2267,7 @@ void sws_freeContext(SwsContext *sws)
av_freep(&c->hLumFilter);
av_freep(&c->hChrFilter);
#if HAVE_ALTIVEC
- av_freep(&c->vYCoeffsBank);
- av_freep(&c->vCCoeffsBank);
+ ff_sws_free_altivec_bufs(c);
#endif
av_freep(&c->vLumFilterPos);
--
2.52.0
>From ee99b9c035c8c52c8aecca0bf14d71723e4ef044 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Tue, 24 Feb 2026 16:41:07 +0100
Subject: [PATCH 2/2] swscale/swscale_internal: Move altivec parts to ppc/
Up until now, several altivec-specific fields are directly
put into SwsInternal #if HAVE_ALTIVEC is true. These fields
are of altivec-specific vector types and therefore
require altivec specific headers to be included.
Unfortunately, said altivec specific headers redefine
bool in a manner that is incompatible with stdbool.
swscale/ops.h uses bool and this led graph.c and ops.c
to disagree about the layout of structures from ops.h,
leading to heap corruption [1], [2] in the sws-unscaled
FATE test.
Fix this by moving the altivec-specific parts out of SwsInternal
and into a structure that extends SwsInternal and is allocated
jointly with it. Said structure is local to yuv2rgb_altivec.c,
because this is the only file accessing said fields. Should
more files need them, an altivec-specific swscale header would
need to be added.
Thanks to jfiusdq <jfiusdq@proton.me> for analyzing the issue.
[1]: https://fate.ffmpeg.org/report.cgi?slot=ppc64-linux-gcc-14.3-asan&time=20260224065643
[2]: https://fate.ffmpeg.org/report.cgi?slot=ppc64-linux-gcc-14.3&time=20260224051615
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libswscale/ppc/yuv2rgb_altivec.c | 100 +++++++++++++++++++++----------
libswscale/ppc/yuv2yuv_altivec.c | 1 +
libswscale/swscale_internal.h | 21 +++----
libswscale/utils.c | 2 +-
4 files changed, 77 insertions(+), 47 deletions(-)
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 3efb33d22e..cad3f8d7c0 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -103,6 +103,34 @@
#include "yuv2rgb_altivec.h"
#if HAVE_ALTIVEC
+// util_altivec.h includes the system header altivec.h which redefines bool,
+// making it incompatible with stdbool.h. It therefore must not be included
+// before another one of our headers that may use bool.
+#include "libavutil/ppc/util_altivec.h"
+#include <assert.h>
+
+typedef struct SwsInternalAltivec {
+ SwsInternal c;
+
+ vector signed short CY;
+ vector signed short CRV;
+ vector signed short CBU;
+ vector signed short CGU;
+ vector signed short CGV;
+ vector signed short OY;
+ vector unsigned short CSHIFT;
+ vector signed short *vYCoeffsBank, *vCCoeffsBank;
+} SwsInternalAltivec;
+// Check that the sizes of the types match.
+// Note that given that SwsInternal is always allocated via av_malloc,
+// every pointer to an SwsInternal is always sufficiently aligned.
+static_assert(sizeof(SwsInternalAltivec) <= sizeof(SwsInternal) + SWSINTERNAL_ADDITIONAL_ASM_SIZE,
+ "SWSINTERNAL_ADDITIONAL_ASM_SIZE needs to be increased");
+
+static inline SwsInternalAltivec *sws_internal_altivec(SwsInternal *c)
+{
+ return (SwsInternalAltivec*)c;
+}
#undef PROFILE_THE_BEAST
#undef INC_SCALING
@@ -259,25 +287,26 @@ static inline void cvtyuvtoRGB(SwsInternal *c, vector signed short Y,
vector signed short *R, vector signed short *G,
vector signed short *B)
{
+ SwsInternalAltivec *const a = sws_internal_altivec(c);
vector signed short vx, ux, uvx;
- Y = vec_mradds(Y, c->CY, c->OY);
+ Y = vec_mradds(Y, a->CY, a->OY);
U = vec_sub(U, (vector signed short)
vec_splat((vector signed short) { 128 }, 0));
V = vec_sub(V, (vector signed short)
vec_splat((vector signed short) { 128 }, 0));
- // ux = (CBU * (u << c->CSHIFT) + 0x4000) >> 15;
- ux = vec_sl(U, c->CSHIFT);
- *B = vec_mradds(ux, c->CBU, Y);
+ // ux = (CBU * (u << a->CSHIFT) + 0x4000) >> 15;
+ ux = vec_sl(U, a->CSHIFT);
+ *B = vec_mradds(ux, a->CBU, Y);
- // vx = (CRV * (v << c->CSHIFT) + 0x4000) >> 15;
- vx = vec_sl(V, c->CSHIFT);
- *R = vec_mradds(vx, c->CRV, Y);
+ // vx = (CRV * (v << a->CSHIFT) + 0x4000) >> 15;
+ vx = vec_sl(V, a->CSHIFT);
+ *R = vec_mradds(vx, a->CRV, Y);
// uvx = ((CGU * u) + (CGV * v)) >> 15;
- uvx = vec_mradds(U, c->CGU, Y);
- *G = vec_mradds(V, c->CGV, uvx);
+ uvx = vec_mradds(U, a->CGU, Y);
+ *G = vec_mradds(V, a->CGV, uvx);
}
/*
@@ -301,6 +330,7 @@ static int altivec_ ## name(SwsInternal *c, const unsigned char *const *in, \
const int *instrides, int srcSliceY, int srcSliceH, \
unsigned char *const *oplanes, const int *outstrides) \
{ \
+ SwsInternalAltivec *const a = sws_internal_altivec(c); \
int w = c->opts.src_w; \
int h = srcSliceH; \
int i, j; \
@@ -318,13 +348,13 @@ static int altivec_ ## name(SwsInternal *c, const unsigned char *const *in, \
vector signed short R1, G1, B1; \
vector unsigned char R, G, B; \
\
- vector signed short lCY = c->CY; \
- vector signed short lOY = c->OY; \
- vector signed short lCRV = c->CRV; \
- vector signed short lCBU = c->CBU; \
- vector signed short lCGU = c->CGU; \
- vector signed short lCGV = c->CGV; \
- vector unsigned short lCSHIFT = c->CSHIFT; \
+ vector signed short lCY = a->CY; \
+ vector signed short lOY = a->OY; \
+ vector signed short lCRV = a->CRV; \
+ vector signed short lCBU = a->CBU; \
+ vector signed short lCGU = a->CGU; \
+ vector signed short lCGV = a->CGV; \
+ vector unsigned short lCSHIFT = a->CSHIFT; \
\
const ubyte *y1i = in[0]; \
const ubyte *y2i = in[0] + instrides[0]; \
@@ -612,6 +642,8 @@ av_cold void ff_yuv2rgb_init_tables_ppc(SwsInternal *c,
int saturation)
{
#if HAVE_ALTIVEC
+ SwsInternalAltivec *const a = sws_internal_altivec(c);
+
union {
DECLARE_ALIGNED(16, signed short, tmp)[8];
vector signed short vec;
@@ -627,13 +659,13 @@ av_cold void ff_yuv2rgb_init_tables_ppc(SwsInternal *c,
buf.tmp[4] = -((inv_table[2] >> 1) * (contrast >> 16) * (saturation >> 16)); // cgu
buf.tmp[5] = -((inv_table[3] >> 1) * (contrast >> 16) * (saturation >> 16)); // cgv
- c->CSHIFT = (vector unsigned short) vec_splat_u16(2);
- c->CY = vec_splat((vector signed short) buf.vec, 0);
- c->OY = vec_splat((vector signed short) buf.vec, 1);
- c->CRV = vec_splat((vector signed short) buf.vec, 2);
- c->CBU = vec_splat((vector signed short) buf.vec, 3);
- c->CGU = vec_splat((vector signed short) buf.vec, 4);
- c->CGV = vec_splat((vector signed short) buf.vec, 5);
+ a->CSHIFT = (vector unsigned short) vec_splat_u16(2);
+ a->CY = vec_splat((vector signed short) buf.vec, 0);
+ a->OY = vec_splat((vector signed short) buf.vec, 1);
+ a->CRV = vec_splat((vector signed short) buf.vec, 2);
+ a->CBU = vec_splat((vector signed short) buf.vec, 3);
+ a->CGU = vec_splat((vector signed short) buf.vec, 4);
+ a->CGV = vec_splat((vector signed short) buf.vec, 5);
return;
#endif /* HAVE_ALTIVEC */
}
@@ -653,6 +685,8 @@ static av_always_inline void yuv2packedX_altivec(SwsInternal *c,
int dstW, int dstY,
enum AVPixelFormat target)
{
+ SwsInternalAltivec *const a = sws_internal_altivec(c);
+
int i, j;
vector signed short X, X0, X1, Y0, U0, V0, Y1, U1, V1, U, V;
vector signed short R0, G0, B0, R1, G1, B1;
@@ -666,8 +700,8 @@ static av_always_inline void yuv2packedX_altivec(SwsInternal *c,
vector signed short *YCoeffs, *CCoeffs;
- YCoeffs = c->vYCoeffsBank + dstY * lumFilterSize;
- CCoeffs = c->vCCoeffsBank + dstY * chrFilterSize;
+ YCoeffs = a->vYCoeffsBank + dstY * lumFilterSize;
+ CCoeffs = a->vCCoeffsBank + dstY * chrFilterSize;
out = (vector unsigned char *) dest;
@@ -876,20 +910,21 @@ YUV2PACKEDX_WRAPPER(bgr24, AV_PIX_FMT_BGR24);
av_cold int ff_sws_init_altivec_bufs(SwsInternal *c)
{
const SwsContext *const sws = &c->opts;
+ SwsInternalAltivec *const a = sws_internal_altivec(c);
- c->vYCoeffsBank = av_malloc_array(sws->dst_h, c->vLumFilterSize * sizeof(*c->vYCoeffsBank));
- c->vCCoeffsBank = av_malloc_array(c->chrDstH, c->vChrFilterSize * sizeof(*c->vCCoeffsBank));
- if (!c->vYCoeffsBank || !c->vCCoeffsBank)
+ a->vYCoeffsBank = av_malloc_array(sws->dst_h, c->vLumFilterSize * sizeof(*a->vYCoeffsBank));
+ a->vCCoeffsBank = av_malloc_array(c->chrDstH, c->vChrFilterSize * sizeof(*a->vCCoeffsBank));
+ if (!a->vYCoeffsBank || !a->vCCoeffsBank)
return AVERROR(ENOMEM);
for (int i = 0; i < c->vLumFilterSize * sws->dst_h; ++i) {
- short *p = (short *)&c->vYCoeffsBank[i];
+ short *p = (short *)&a->vYCoeffsBank[i];
for (int j = 0; j < 8; ++j)
p[j] = c->vLumFilter[i];
}
for (int i = 0; i < c->vChrFilterSize * c->chrDstH; ++i) {
- short *p = (short *)&c->vCCoeffsBank[i];
+ short *p = (short *)&a->vCCoeffsBank[i];
for (int j = 0; j < 8; ++j)
p[j] = c->vChrFilter[i];
}
@@ -899,7 +934,8 @@ av_cold int ff_sws_init_altivec_bufs(SwsInternal *c)
av_cold void ff_sws_free_altivec_bufs(SwsInternal *c)
{
- av_freep(&c->vYCoeffsBank);
- av_freep(&c->vCCoeffsBank);
+ SwsInternalAltivec *const a = sws_internal_altivec(c);
+ av_freep(&a->vYCoeffsBank);
+ av_freep(&a->vCCoeffsBank);
}
#endif /* HAVE_ALTIVEC */
diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c
index 7884c38d69..3e3bbea23a 100644
--- a/libswscale/ppc/yuv2yuv_altivec.c
+++ b/libswscale/ppc/yuv2yuv_altivec.c
@@ -30,6 +30,7 @@
#include "libswscale/swscale_internal.h"
#if HAVE_ALTIVEC
+#include "libavutil/ppc/util_altivec.h"
static int yv12toyuy2_unscaled_altivec(SwsInternal *c, const uint8_t *const src[],
const int srcStride[], int srcSliceY,
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 036ccfe938..b9e93642b1 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -37,11 +37,15 @@
#include "libavutil/pixfmt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/slicethread.h"
-#if HAVE_ALTIVEC
-#include "libavutil/ppc/util_altivec.h"
-#endif
#include "libavutil/half2float.h"
+#if HAVE_ALTIVEC
+#define SWSINTERNAL_ADDITIONAL_ASM_SIZE (7*16 + 2*8 + /* alignment */ 16)
+#endif
+#ifndef SWSINTERNAL_ADDITIONAL_ASM_SIZE
+#define SWSINTERNAL_ADDITIONAL_ASM_SIZE 0
+#endif
+
#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
#define YUVRGB_TABLE_HEADROOM 512
@@ -544,17 +548,6 @@ struct SwsInternal {
const uint8_t *chrDither8, *lumDither8;
-#if HAVE_ALTIVEC
- vector signed short CY;
- vector signed short CRV;
- vector signed short CBU;
- vector signed short CGU;
- vector signed short CGV;
- vector signed short OY;
- vector unsigned short CSHIFT;
- vector signed short *vYCoeffsBank, *vCCoeffsBank;
-#endif
-
int use_mmx_vfilter;
/* pre defined color-spaces gamma */
diff --git a/libswscale/utils.c b/libswscale/utils.c
index a81c216b6c..0416497aa9 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1027,7 +1027,7 @@ int sws_getColorspaceDetails(SwsContext *sws, int **inv_table,
SwsContext *sws_alloc_context(void)
{
- SwsInternal *c = (SwsInternal *) av_mallocz(sizeof(SwsInternal));
+ SwsInternal *c = av_mallocz(sizeof(*c) + SWSINTERNAL_ADDITIONAL_ASM_SIZE);
if (!c)
return NULL;
--
2.52.0
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2026-02-24 21:47 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=177195238877.25.2545377984576741198@29965ddac10e \
--to=ffmpeg-devel@ffmpeg.org \
--cc=code@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git