Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: James Darnley <jdarnley@obe.tv>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH 3/3] avfilter/yadif: add avx2 filter_line function
Date: Fri, 10 Feb 2023 14:06:57 +0100
Message-ID: <20230210130657.455866-3-jdarnley@obe.tv> (raw)
In-Reply-To: <20230210130657.455866-1-jdarnley@obe.tv>

Zen 2 (Ryzen 7 3700X):
1.73x faster (3603±586.3 vs. 2082±317.1 decicycles) compared with ssse3

Using an SD y4m file speed increases from ~ 3600 fps to ~4700.
---
 libavfilter/x86/vf_yadif.asm    | 83 +++++++++++++++++++++++----------
 libavfilter/x86/vf_yadif_init.c |  4 ++
 2 files changed, 62 insertions(+), 25 deletions(-)

diff --git a/libavfilter/x86/vf_yadif.asm b/libavfilter/x86/vf_yadif.asm
index 809cebdd3f..571febfca3 100644
--- a/libavfilter/x86/vf_yadif.asm
+++ b/libavfilter/x86/vf_yadif.asm
@@ -25,11 +25,30 @@
 
 SECTION_RODATA
 
-pb_1: times 16 db 1
-pw_1: times  8 dw 1
+pb_1: times 32 db 1
+pw_1: times 16 dw 1
 
 SECTION .text
 
+%unmacro RSHIFT 2
+
+%macro RSHIFT 2
+%if mmsize == 32
+    vextracti128 xm7, %1, 1
+    palignr xmm %+ %1, xm7, xmm %+ %1, 2
+%else
+    psrldq %1, %2
+%endif
+%endmacro
+
+%macro UNPACK 1
+%if mmsize == 32
+    pmovzxbw %1, xmm %+ %1
+%else
+    punpcklbw %1, m7
+%endif
+%endmacro
+
 %macro CHECK 2
     movu      m2, [curq+t1+%1]
     movu      m3, [curq+t0+%2]
@@ -40,7 +59,7 @@ SECTION .text
     pand      m4, [pb_1]
     psubusb   m5, m4
     RSHIFT    m5, 1
-    punpcklbw m5, m7
+    UNPACK    m5
     mova      m4, m2
     psubusb   m2, m3
     psubusb   m3, m4
@@ -49,9 +68,9 @@ SECTION .text
     mova      m4, m2
     RSHIFT    m3, 1
     RSHIFT    m4, 2
-    punpcklbw m2, m7
-    punpcklbw m3, m7
-    punpcklbw m4, m7
+    UNPACK    m2
+    UNPACK    m3
+    UNPACK    m4
     paddw     m2, m3
     paddw     m2, m4
 %endmacro
@@ -81,13 +100,19 @@ SECTION .text
 %endmacro
 
 %macro LOAD 2
-    movh      %1, %2
-    punpcklbw %1, m7
+    %if mmsize == 32
+        pmovzxbw %1, %2
+    %else
+        movh      %1, %2
+        punpcklbw %1, m7
+    %endif
 %endmacro
 
 %macro FILTER 3
 .loop%1:
-    pxor         m7, m7
+    %if mmsize != 32
+        pxor         m7, m7
+    %endif
     LOAD         m0, [curq+t1]
     LOAD         m1, [curq+t0]
     LOAD         m2, [%2]
@@ -95,9 +120,9 @@ SECTION .text
     mova         m4, m3
     paddw        m3, m2
     psraw        m3, 1
-    mova   [rsp+ 0], m0
-    mova   [rsp+16], m3
-    mova   [rsp+32], m1
+    mova   [rsp+0*mmsize], m0
+    mova   [rsp+1*mmsize], m3
+    mova   [rsp+2*mmsize], m1
     psubw        m2, m4
     ABS1         m2, m4
     LOAD         m3, [prevq+t1]
@@ -119,7 +144,7 @@ SECTION .text
     paddw        m3, m4
     psrlw        m3, 1
     pmaxsw       m2, m3
-    mova   [rsp+48], m2
+    mova   [rsp+3*mmsize], m2
 
     paddw        m1, m0
     paddw        m0, m0
@@ -134,9 +159,9 @@ SECTION .text
     psubusb      m3, m4
     pmaxub       m2, m3
     mova         m3, m2
-    psrldq       m3, 2
-    punpcklbw    m2, m7
-    punpcklbw    m3, m7
+    RSHIFT       m3, 2
+    UNPACK       m2
+    UNPACK       m3
     paddw        m0, m2
     paddw        m0, m3
     psubw        m0, [pw_1]
@@ -150,7 +175,7 @@ SECTION .text
     CHECK 1, -3
     CHECK2
 
-    mova         m6, [rsp+48]
+    mova         m6, [rsp+3*mmsize]
     cmp   DWORD r8m, 2
     jge .end%1
     LOAD         m2, [%2+t1*2]
@@ -161,9 +186,9 @@ SECTION .text
     paddw        m3, m5
     psrlw        m2, 1
     psrlw        m3, 1
-    mova         m4, [rsp+ 0]
-    mova         m5, [rsp+16]
-    mova         m7, [rsp+32]
+    mova         m4, [rsp+0*mmsize]
+    mova         m5, [rsp+1*mmsize]
+    mova         m7, [rsp+2*mmsize]
     psubw        m2, m4
     psubw        m3, m7
     mova         m0, m5
@@ -182,15 +207,21 @@ SECTION .text
     pmaxsw       m6, m4
 
 .end%1:
-    mova         m2, [rsp+16]
+    mova         m2, [rsp+1*mmsize]
     mova         m3, m2
     psubw        m2, m6
     paddw        m3, m6
     pmaxsw       m1, m2
     pminsw       m1, m3
-    packuswb     m1, m1
 
-    movh     [dstq], m1
+    %if mmsize == 32
+        vextracti128 xm4, ym1, 1
+        packuswb xm1, xm4
+        movu [dstq], xm1
+    %else
+        packuswb     m1, m1
+        movh     [dstq], m1
+    %endif
     add        dstq, mmsize/2
     add       prevq, mmsize/2
     add        curq, mmsize/2
@@ -201,10 +232,10 @@ SECTION .text
 
 %macro YADIF 0
 %if ARCH_X86_32
-cglobal yadif_filter_line, 4, 6, 8, 80, dst, prev, cur, next, w, prefs, \
+cglobal yadif_filter_line, 4, 6, 8, 4*mmsize, dst, prev, cur, next, w, prefs, \
                                         mrefs, parity, mode
 %else
-cglobal yadif_filter_line, 4, 7, 8, 80, dst, prev, cur, next, w, prefs, \
+cglobal yadif_filter_line, 4, 7, 8, 4*mmsize, dst, prev, cur, next, w, prefs, \
                                         mrefs, parity, mode
 %endif
 %if ARCH_X86_32
@@ -233,3 +264,5 @@ INIT_XMM ssse3
 YADIF
 INIT_XMM sse2
 YADIF
+INIT_YMM avx2
+YADIF
diff --git a/libavfilter/x86/vf_yadif_init.c b/libavfilter/x86/vf_yadif_init.c
index d648f0f835..48858dc295 100644
--- a/libavfilter/x86/vf_yadif_init.c
+++ b/libavfilter/x86/vf_yadif_init.c
@@ -29,6 +29,8 @@ void ff_yadif_filter_line_sse2(void *dst, void *prev, void *cur,
 void ff_yadif_filter_line_ssse3(void *dst, void *prev, void *cur,
                                 void *next, int w, int prefs,
                                 int mrefs, int parity, int mode);
+void ff_yadif_filter_line_avx2(void *dst, void *prev, void *cur, void *next,
+        int w, int prefs, int mrefs, int parity, int mode);
 
 void ff_yadif_filter_line_16bit_sse2(void *dst, void *prev, void *cur,
                                      void *next, int w, int prefs,
@@ -68,5 +70,7 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif, int bit_depth)
             yadif->filter_line = ff_yadif_filter_line_sse2;
         if (EXTERNAL_SSSE3(cpu_flags))
             yadif->filter_line = ff_yadif_filter_line_ssse3;
+        if (EXTERNAL_AVX2(cpu_flags))
+            yadif->filter_line = ff_yadif_filter_line_avx2;
     }
 }
-- 
2.39.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  parent reply	other threads:[~2023-02-10 13:09 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-10 13:06 [FFmpeg-devel] [PATCH 1/3] avfilter: move yadif's filter_line init into a dedicated function James Darnley
2023-02-10 13:06 ` [FFmpeg-devel] [PATCH 2/3] checkasm: add test for yadif James Darnley
2023-02-10 13:06 ` James Darnley [this message]
2023-02-20 12:55   ` [FFmpeg-devel] [PATCH 3/3] avfilter/yadif: add avx2 filter_line function James Darnley
2023-02-20 12:55     ` James Darnley

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230210130657.455866-3-jdarnley@obe.tv \
    --to=jdarnley@obe.tv \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git