Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: mkver via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: mkver <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PR] avcodec/x86/bswapdsp: Minor improvements (PR #22307)
Date: Fri, 27 Feb 2026 13:03:51 -0000
Message-ID: <177219743245.25.4048680044388236040@29965ddac10e> (raw)

PR #22307 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22307
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22307.patch


>From 451d53eb3db21189d9ca66a3a3b6684eb8e34efb Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 27 Feb 2026 13:19:47 +0100
Subject: [PATCH 1/3] avcodec/x86/bswapdsp: Avoid register copies

No change in benchmarks here.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/x86/bswapdsp.asm | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index 31c6c48a21..12fd494ffe 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -33,10 +33,10 @@ SECTION .text
 ; %1 = aligned/unaligned
 %macro BSWAP_LOOPS  1
     mov      r3d, r2d
-    sar      r2d, 3
+    sar      r3d, 3
     jz       .left4_%1
 %if cpuflag(avx2)
-    sar      r2d, 1
+    sar      r3d, 1
     jz       .left8_%1
 %endif
 .loop8_%1:
@@ -65,12 +65,11 @@ SECTION .text
 %endif
     add      r0, mmsize*2
     add      r1, mmsize*2
-    dec      r2d
+    dec      r3d
     jnz      .loop8_%1
 %if cpuflag(avx2)
 .left8_%1:
-    mov      r2d, r3d
-    test     r3d, 8
+    test     r2d, 8
     jz       .left4_%1
     mov%1    m0, [r1]
     pshufb   m0, m2
@@ -79,8 +78,7 @@ SECTION .text
     add r0, mmsize
 %endif
 .left4_%1:
-    mov      r2d, r3d
-    test     r3d, 4
+    test     r2d, 4
     jz       .left
     mov%1    xm0, [r1]
 %if cpuflag(ssse3)
-- 
2.52.0


>From 3db6adc772ebfadf0537390740883ab6feed2841 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 27 Feb 2026 13:24:04 +0100
Subject: [PATCH 2/3] avcodec/x86/bswapdsp: combine shifting, avoid check for
 AVX2

This avoids a check and a shift if >=8 elements are processed;
it adds a check if < 8 elements are processed (which should
be rare).
No change in benchmarks here.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/x86/bswapdsp.asm | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index 12fd494ffe..f89ca76cf1 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -33,11 +33,12 @@ SECTION .text
 ; %1 = aligned/unaligned
 %macro BSWAP_LOOPS  1
     mov      r3d, r2d
+%if cpuflag(avx2)
+    sar      r3d, 4
+    jz       .left8_%1
+%else
     sar      r3d, 3
     jz       .left4_%1
-%if cpuflag(avx2)
-    sar      r3d, 1
-    jz       .left8_%1
 %endif
 .loop8_%1:
     mov%1    m0, [r1 +  0]
-- 
2.52.0


>From 311a587c7f2b90f54a04bb19505736cf9f304a48 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 27 Feb 2026 13:54:21 +0100
Subject: [PATCH 3/3] avcodec/x86/bswapdsp: Avoid aligned vs unaligned
 codepaths for AVX2

For modern cpus (like those supporting AVX2) loads and stores
using the unaligned versions of instructions are as fast
as aligned ones if the address is aligned, so remove
the aligned AVX2 version (and the alignment check) and just
remove the unaligned one.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/x86/bswapdsp.asm | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index f89ca76cf1..2b80d8a75e 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -100,10 +100,15 @@ SECTION .text
 
 ; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
 %macro BSWAP32_BUF 0
-%if cpuflag(ssse3)||cpuflag(avx2)
+%if cpuflag(avx2)
+cglobal bswap32_buf, 3,4,3
+    vbroadcasti128  m2, [pb_bswap32]
+    BSWAP_LOOPS  u
+%else
+%if cpuflag(ssse3)
 cglobal bswap32_buf, 3,4,3
     mov      r3, r1
-    VBROADCASTI128  m2, [pb_bswap32]
+    mova     m2, [pb_bswap32]
 %else
 cglobal bswap32_buf, 3,4,5
     mov      r3, r1
@@ -115,6 +120,7 @@ cglobal bswap32_buf, 3,4,5
     jmp      .left
 .start_align:
     BSWAP_LOOPS  a
+%endif
 .left:
 %if cpuflag(ssse3)
     test     r2d, 2
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

                 reply	other threads:[~2026-02-27 13:04 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=177219743245.25.4048680044388236040@29965ddac10e \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=code@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git