Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: James Almer <jamrial@gmail.com>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH] swscale/x86/rgb2rgb: replace shuffle_bytes_2103_mmxext with an SSE2 version
Date: Wed,  5 Jun 2024 17:51:16 -0300
Message-ID: <20240605205116.3258-1-jamrial@gmail.com> (raw)

shuffle_bytes_2103_c: 46.5
shuffle_bytes_2103_mmxext: 29.3
shuffle_bytes_2103_sse2: 12.5

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libswscale/x86/rgb2rgb.c     |  6 ++----
 libswscale/x86/rgb_2_rgb.asm | 30 +++++++++++-------------------
 2 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index 21ccfafe51..912fe431b3 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -116,7 +116,7 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
 
 #endif /* HAVE_INLINE_ASM */
 
-void ff_shuffle_bytes_2103_mmxext(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_2103_sse2(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_shuffle_bytes_2103_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_shuffle_bytes_0321_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_shuffle_bytes_1230_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
@@ -154,10 +154,8 @@ av_cold void rgb2rgb_init_x86(void)
         rgb2rgb_init_avx();
 #endif /* HAVE_INLINE_ASM */
 
-    if (EXTERNAL_MMXEXT(cpu_flags)) {
-        shuffle_bytes_2103 = ff_shuffle_bytes_2103_mmxext;
-    }
     if (EXTERNAL_SSE2(cpu_flags)) {
+        shuffle_bytes_2103 = ff_shuffle_bytes_2103_sse2;
 #if ARCH_X86_64
         uyvytoyuv422 = ff_uyvytoyuv422_sse2;
 #endif
diff --git a/libswscale/x86/rgb_2_rgb.asm b/libswscale/x86/rgb_2_rgb.asm
index 0bf1278718..2d2ac778b7 100644
--- a/libswscale/x86/rgb_2_rgb.asm
+++ b/libswscale/x86/rgb_2_rgb.asm
@@ -25,7 +25,7 @@
 
 SECTION_RODATA
 
-pb_mask_shuffle2103_mmx times 8 dw 255
+pb_mask_shuffle2103 times 8 dw 255
 pb_shuffle2103: db 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15
 pb_shuffle0321: db 0, 3, 2, 1, 4, 7, 6, 5, 8, 11, 10, 9, 12, 15, 14, 13
 pb_shuffle1230: db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
@@ -50,11 +50,10 @@ SECTION .text
 ;------------------------------------------------------------------------------
 ; shuffle_bytes_2103_mmext (const uint8_t *src, uint8_t *dst, int src_size)
 ;------------------------------------------------------------------------------
-INIT_MMX mmxext
-cglobal shuffle_bytes_2103, 3, 5, 8, src, dst, w, tmp, x
-    mova   m6, [pb_mask_shuffle2103_mmx]
-    mova   m7, m6
-    psllq  m7, 8
+INIT_XMM sse2
+cglobal shuffle_bytes_2103, 3, 5, 4, src, dst, w, tmp, x
+    mova   m2, [pb_mask_shuffle2103]
+    psllq  m3, m2, 8
 
     movsxdifnidn wq, wd
     mov xq, wq
@@ -86,28 +85,21 @@ jge .end
 
 .loop_simd:
     movu     m0, [srcq+wq]
-    movu     m1, [srcq+wq+8]
-
-    pshufw   m3, m0, 177
-    pshufw   m5, m1, 177
 
-    pand     m0, m7
-    pand     m3, m6
+    pshuflw   m1, m0, 0xb1
+    pshufhw   m1, m1, 0xb1
 
-    pand     m1, m7
-    pand     m5, m6
+    pand     m0, m3
+    pand     m1, m2
 
-    por      m0, m3
-    por      m1, m5
+    por      m0, m1
 
     movu      [dstq+wq], m0
-    movu  [dstq+wq + 8], m1
 
-    add              wq, mmsize*2
+    add              wq, mmsize
     jl .loop_simd
 
 .end:
-    emms
     RET
 
 ;------------------------------------------------------------------------------
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

             reply	other threads:[~2024-06-05 20:51 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-05 20:51 James Almer [this message]
2024-06-06 14:15 ` [FFmpeg-devel] [PATCH] swscale/x86/rgb2rgb: add SSE2 shuffle_bytes functions James Almer
2024-06-06 14:48   ` Andreas Rheinhardt
2024-06-06 15:45     ` James Almer
2024-06-08 15:55       ` Andreas Rheinhardt
2024-06-08 16:21         ` Rémi Denis-Courmont
2024-06-09 15:36         ` James Almer
2024-06-09 16:05           ` Rémi Denis-Courmont
2024-06-10 17:06           ` James Almer
2024-06-11  5:18             ` Andreas Rheinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240605205116.3258-1-jamrial@gmail.com \
    --to=jamrial@gmail.com \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git