From: mkver via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: mkver <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH] avcodec/ppc/vc1dsp_altivec: Don't read too much data (PR #21127)
Date: Mon, 08 Dec 2025 05:41:12 -0000
Message-ID: <176517247359.39.11626402283044147287@2cb04c0e5124> (raw)
PR #21127 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21127
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21127.patch
>From 79925ef71461facbfdbaf0444b980b668f500aa3 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 8 Dec 2025 06:14:24 +0100
Subject: [PATCH 1/2] avcodec/ppc/vc1dsp_altivec: Don't read too much data
vc1_inv_trans_8x4_altivec() is supposed to process a block
of 8x4 words, yet it read and processed eight lines. This led
to ASAN failures (see [1]) that this commit intends to fix.
It should also lead to performance improvements, but I don't have
real hardware to bench it.
[1]: https://fate.ffmpeg.org/report.cgi?time=20251207214004&slot=ppc64-linux-gcc-14.3-asan
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/ppc/vc1dsp_altivec.c | 66 +++++++++++++++++----------------
1 file changed, 34 insertions(+), 32 deletions(-)
diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
index bbadb2aaee..dd0473664e 100644
--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -235,7 +235,7 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, ptrdiff_t stride,
{
vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
- vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
+ vector signed int s8, s9, sA, sB;
vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4));
const vector unsigned int vec_7 = vec_splat_u32(7);
@@ -253,40 +253,42 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, ptrdiff_t stride,
src1 = vec_ld( 16, block);
src2 = vec_ld( 32, block);
src3 = vec_ld( 48, block);
- src4 = vec_ld( 64, block);
- src5 = vec_ld( 80, block);
- src6 = vec_ld( 96, block);
- src7 = vec_ld(112, block);
- TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
- s0 = vec_unpackl(src0);
- s1 = vec_unpackl(src1);
- s2 = vec_unpackl(src2);
- s3 = vec_unpackl(src3);
- s4 = vec_unpackl(src4);
- s5 = vec_unpackl(src5);
- s6 = vec_unpackl(src6);
- s7 = vec_unpackl(src7);
- s8 = vec_unpackh(src0);
- s9 = vec_unpackh(src1);
- sA = vec_unpackh(src2);
- sB = vec_unpackh(src3);
- sC = vec_unpackh(src4);
- sD = vec_unpackh(src5);
- sE = vec_unpackh(src6);
- sF = vec_unpackh(src7);
+// Transpose 8x4 matrix of 16-bit elements (in-place)
+ vec_s16 A1, B1, C1, D1;
+ vec_s16 A2, B2, C2, D2;
+
+ A1 = vec_mergeh(src0, src2);
+ B1 = vec_mergel(src0, src2);
+ C1 = vec_mergeh(src1, src3);
+ D1 = vec_mergel(src1, src3);
+
+ A2 = vec_mergeh(A1, C1);
+ B2 = vec_mergel(A1, C1);
+ C2 = vec_mergeh(B1, D1);
+ D2 = vec_mergel(B1, D1);
+
+ s0 = vec_unpackh(A2);
+ s1 = vec_unpackl(A2);
+ s2 = vec_unpackh(B2);
+ s3 = vec_unpackl(B2);
+ s4 = vec_unpackh(C2);
+ s5 = vec_unpackl(C2);
+ s6 = vec_unpackh(D2);
+ s7 = vec_unpackl(D2);
+
STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
- STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
- SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
- src0 = vec_pack(s8, s0);
- src1 = vec_pack(s9, s1);
- src2 = vec_pack(sA, s2);
- src3 = vec_pack(sB, s3);
- src4 = vec_pack(sC, s4);
- src5 = vec_pack(sD, s5);
- src6 = vec_pack(sE, s6);
- src7 = vec_pack(sF, s7);
+
+ src0 = vec_pack(s0, s0);
+ src1 = vec_pack(s1, s1);
+ src2 = vec_pack(s2, s2);
+ src3 = vec_pack(s3, s3);
+ src4 = vec_pack(s4, s4);
+ src5 = vec_pack(s5, s5);
+ src6 = vec_pack(s6, s6);
+ src7 = vec_pack(s7, s7);
+
TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
s0 = vec_unpackh(src0);
--
2.49.1
>From 0d1fe859603bce2c837b863d12d3a120e906098c Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 8 Dec 2025 06:36:28 +0100
Subject: [PATCH 2/2] avcodec/ppc/vc1dsp_altive, h264chroma_template: Disable
unused variables
Move the variables only used by big-endian code inside the #if
HAVE_BIGENDIAN blocks.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/ppc/h264chroma_template.c | 10 ++++------
libavcodec/ppc/vc1dsp_altivec.c | 2 +-
2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/libavcodec/ppc/h264chroma_template.c b/libavcodec/ppc/h264chroma_template.c
index c64856bb14..9455a55dd1 100644
--- a/libavcodec/ppc/h264chroma_template.c
+++ b/libavcodec/ppc/h264chroma_template.c
@@ -129,7 +129,6 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, const uint8_t * src,
const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
const vec_u16 v6us = vec_splat_u16(6);
- vec_u8 vsrcperm0, vsrcperm1;
vec_u8 vsrc0uc, vsrc1uc;
vec_s16 vsrc0ssH, vsrc1ssH;
vec_u8 vsrc2uc, vsrc3uc;
@@ -138,8 +137,8 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, const uint8_t * src,
#if HAVE_BIGENDIAN
register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
- vsrcperm0 = vec_lvsl(0, src);
- vsrcperm1 = vec_lvsl(1, src);
+ vec_u8 vsrcperm0 = vec_lvsl(0, src);
+ vec_u8 vsrcperm1 = vec_lvsl(1, src);
#endif
if (((unsigned long)dst) % 16 == 0) {
@@ -204,7 +203,6 @@ static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t *dst, const uint8_t *sr
const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
const vec_u16 v6us = vec_splat_u16(6);
- vec_u8 vsrcperm0, vsrcperm1;
vec_u8 vsrc0uc, vsrc1uc;
vec_s16 vsrc0ssH, vsrc1ssH;
vec_u8 vsrc2uc, vsrc3uc;
@@ -213,8 +211,8 @@ static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t *dst, const uint8_t *sr
#if HAVE_BIGENDIAN
register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
- vsrcperm0 = vec_lvsl(0, src);
- vsrcperm1 = vec_lvsl(1, src);
+ vec_u8 vsrcperm0 = vec_lvsl(0, src);
+ vec_u8 vsrcperm1 = vec_lvsl(1, src);
#endif
if (((unsigned long)dst) % 16 == 0) {
diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
index dd0473664e..31e9b0010d 100644
--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -247,7 +247,6 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, ptrdiff_t stride,
const vector unsigned int vec_1 = vec_splat_u32(1);
vector unsigned char tmp;
vector signed short tmp2, tmp3;
- vector unsigned char perm0, perm1, p0, p1, p;
src0 = vec_ld( 0, block);
src1 = vec_ld( 16, block);
@@ -309,6 +308,7 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, ptrdiff_t stride,
src3 = vec_pack(s3, sB);
#if HAVE_BIGENDIAN
+ vector unsigned char perm0, perm1, p0, p1, p;
p0 = vec_lvsl (0, dest);
p1 = vec_lvsl (stride, dest);
p = vec_splat_u8 (-1);
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2025-12-08 5:42 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=176517247359.39.11626402283044147287@2cb04c0e5124 \
--to=ffmpeg-devel@ffmpeg.org \
--cc=code@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git