* [FFmpeg-devel] [PR] avcodec/x86/bswapdsp: Minor improvements (PR #22307)
@ 2026-02-27 13:03 mkver via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: mkver via ffmpeg-devel @ 2026-02-27 13:03 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: mkver
PR #22307 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22307
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22307.patch
>From 451d53eb3db21189d9ca66a3a3b6684eb8e34efb Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 27 Feb 2026 13:19:47 +0100
Subject: [PATCH 1/3] avcodec/x86/bswapdsp: Avoid register copies
No change in benchmarks here.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/bswapdsp.asm | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index 31c6c48a21..12fd494ffe 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -33,10 +33,10 @@ SECTION .text
; %1 = aligned/unaligned
%macro BSWAP_LOOPS 1
mov r3d, r2d
- sar r2d, 3
+ sar r3d, 3
jz .left4_%1
%if cpuflag(avx2)
- sar r2d, 1
+ sar r3d, 1
jz .left8_%1
%endif
.loop8_%1:
@@ -65,12 +65,11 @@ SECTION .text
%endif
add r0, mmsize*2
add r1, mmsize*2
- dec r2d
+ dec r3d
jnz .loop8_%1
%if cpuflag(avx2)
.left8_%1:
- mov r2d, r3d
- test r3d, 8
+ test r2d, 8
jz .left4_%1
mov%1 m0, [r1]
pshufb m0, m2
@@ -79,8 +78,7 @@ SECTION .text
add r0, mmsize
%endif
.left4_%1:
- mov r2d, r3d
- test r3d, 4
+ test r2d, 4
jz .left
mov%1 xm0, [r1]
%if cpuflag(ssse3)
--
2.52.0
>From 3db6adc772ebfadf0537390740883ab6feed2841 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 27 Feb 2026 13:24:04 +0100
Subject: [PATCH 2/3] avcodec/x86/bswapdsp: combine shifting, avoid check for
AVX2
This avoids a check and a shift if >=8 elements are processed;
it adds a check if < 8 elements are processed (which should
be rare).
No change in benchmarks here.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/bswapdsp.asm | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index 12fd494ffe..f89ca76cf1 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -33,11 +33,12 @@ SECTION .text
; %1 = aligned/unaligned
%macro BSWAP_LOOPS 1
mov r3d, r2d
+%if cpuflag(avx2)
+ sar r3d, 4
+ jz .left8_%1
+%else
sar r3d, 3
jz .left4_%1
-%if cpuflag(avx2)
- sar r3d, 1
- jz .left8_%1
%endif
.loop8_%1:
mov%1 m0, [r1 + 0]
--
2.52.0
>From 311a587c7f2b90f54a04bb19505736cf9f304a48 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 27 Feb 2026 13:54:21 +0100
Subject: [PATCH 3/3] avcodec/x86/bswapdsp: Avoid aligned vs unaligned
codepaths for AVX2
For modern cpus (like those supporting AVX2) loads and stores
using the unaligned versions of instructions are as fast
as aligned ones if the address is aligned, so remove
the aligned AVX2 version (and the alignment check) and just
remove the unaligned one.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/bswapdsp.asm | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index f89ca76cf1..2b80d8a75e 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -100,10 +100,15 @@ SECTION .text
; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
%macro BSWAP32_BUF 0
-%if cpuflag(ssse3)||cpuflag(avx2)
+%if cpuflag(avx2)
+cglobal bswap32_buf, 3,4,3
+ vbroadcasti128 m2, [pb_bswap32]
+ BSWAP_LOOPS u
+%else
+%if cpuflag(ssse3)
cglobal bswap32_buf, 3,4,3
mov r3, r1
- VBROADCASTI128 m2, [pb_bswap32]
+ mova m2, [pb_bswap32]
%else
cglobal bswap32_buf, 3,4,5
mov r3, r1
@@ -115,6 +120,7 @@ cglobal bswap32_buf, 3,4,5
jmp .left
.start_align:
BSWAP_LOOPS a
+%endif
.left:
%if cpuflag(ssse3)
test r2d, 2
--
2.52.0
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2026-02-27 13:04 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-02-27 13:03 [FFmpeg-devel] [PR] avcodec/x86/bswapdsp: Minor improvements (PR #22307) mkver via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git