From: James Darnley <jdarnley@obe.tv> To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH 3/3] avfilter/yadif: add avx2 filter_line function Date: Fri, 10 Feb 2023 14:06:57 +0100 Message-ID: <20230210130657.455866-3-jdarnley@obe.tv> (raw) In-Reply-To: <20230210130657.455866-1-jdarnley@obe.tv> Zen 2 (Ryzen 7 3700X): 1.73x faster (3603±586.3 vs. 2082±317.1 decicycles) compared with ssse3 Using an SD y4m file speed increases from ~ 3600 fps to ~4700. --- libavfilter/x86/vf_yadif.asm | 83 +++++++++++++++++++++++---------- libavfilter/x86/vf_yadif_init.c | 4 ++ 2 files changed, 62 insertions(+), 25 deletions(-) diff --git a/libavfilter/x86/vf_yadif.asm b/libavfilter/x86/vf_yadif.asm index 809cebdd3f..571febfca3 100644 --- a/libavfilter/x86/vf_yadif.asm +++ b/libavfilter/x86/vf_yadif.asm @@ -25,11 +25,30 @@ SECTION_RODATA -pb_1: times 16 db 1 -pw_1: times 8 dw 1 +pb_1: times 32 db 1 +pw_1: times 16 dw 1 SECTION .text +%unmacro RSHIFT 2 + +%macro RSHIFT 2 +%if mmsize == 32 + vextracti128 xm7, %1, 1 + palignr xmm %+ %1, xm7, xmm %+ %1, 2 +%else + psrldq %1, %2 +%endif +%endmacro + +%macro UNPACK 1 +%if mmsize == 32 + pmovzxbw %1, xmm %+ %1 +%else + punpcklbw %1, m7 +%endif +%endmacro + %macro CHECK 2 movu m2, [curq+t1+%1] movu m3, [curq+t0+%2] @@ -40,7 +59,7 @@ SECTION .text pand m4, [pb_1] psubusb m5, m4 RSHIFT m5, 1 - punpcklbw m5, m7 + UNPACK m5 mova m4, m2 psubusb m2, m3 psubusb m3, m4 @@ -49,9 +68,9 @@ SECTION .text mova m4, m2 RSHIFT m3, 1 RSHIFT m4, 2 - punpcklbw m2, m7 - punpcklbw m3, m7 - punpcklbw m4, m7 + UNPACK m2 + UNPACK m3 + UNPACK m4 paddw m2, m3 paddw m2, m4 %endmacro @@ -81,13 +100,19 @@ SECTION .text %endmacro %macro LOAD 2 - movh %1, %2 - punpcklbw %1, m7 + %if mmsize == 32 + pmovzxbw %1, %2 + %else + movh %1, %2 + punpcklbw %1, m7 + %endif %endmacro %macro FILTER 3 .loop%1: - pxor m7, m7 + %if mmsize != 32 + pxor m7, m7 + %endif LOAD m0, [curq+t1] LOAD m1, [curq+t0] LOAD m2, [%2] @@ -95,9 +120,9 @@ SECTION .text mova m4, m3 paddw m3, m2 psraw m3, 1 - mova [rsp+ 0], m0 - mova [rsp+16], m3 - mova [rsp+32], m1 + mova [rsp+0*mmsize], m0 + mova [rsp+1*mmsize], m3 + mova [rsp+2*mmsize], m1 psubw m2, m4 ABS1 m2, m4 LOAD m3, [prevq+t1] @@ -119,7 +144,7 @@ SECTION .text paddw m3, m4 psrlw m3, 1 pmaxsw m2, m3 - mova [rsp+48], m2 + mova [rsp+3*mmsize], m2 paddw m1, m0 paddw m0, m0 @@ -134,9 +159,9 @@ SECTION .text psubusb m3, m4 pmaxub m2, m3 mova m3, m2 - psrldq m3, 2 - punpcklbw m2, m7 - punpcklbw m3, m7 + RSHIFT m3, 2 + UNPACK m2 + UNPACK m3 paddw m0, m2 paddw m0, m3 psubw m0, [pw_1] @@ -150,7 +175,7 @@ SECTION .text CHECK 1, -3 CHECK2 - mova m6, [rsp+48] + mova m6, [rsp+3*mmsize] cmp DWORD r8m, 2 jge .end%1 LOAD m2, [%2+t1*2] @@ -161,9 +186,9 @@ SECTION .text paddw m3, m5 psrlw m2, 1 psrlw m3, 1 - mova m4, [rsp+ 0] - mova m5, [rsp+16] - mova m7, [rsp+32] + mova m4, [rsp+0*mmsize] + mova m5, [rsp+1*mmsize] + mova m7, [rsp+2*mmsize] psubw m2, m4 psubw m3, m7 mova m0, m5 @@ -182,15 +207,21 @@ SECTION .text pmaxsw m6, m4 .end%1: - mova m2, [rsp+16] + mova m2, [rsp+1*mmsize] mova m3, m2 psubw m2, m6 paddw m3, m6 pmaxsw m1, m2 pminsw m1, m3 - packuswb m1, m1 - movh [dstq], m1 + %if mmsize == 32 + vextracti128 xm4, ym1, 1 + packuswb xm1, xm4 + movu [dstq], xm1 + %else + packuswb m1, m1 + movh [dstq], m1 + %endif add dstq, mmsize/2 add prevq, mmsize/2 add curq, mmsize/2 @@ -201,10 +232,10 @@ SECTION .text %macro YADIF 0 %if ARCH_X86_32 -cglobal yadif_filter_line, 4, 6, 8, 80, dst, prev, cur, next, w, prefs, \ +cglobal yadif_filter_line, 4, 6, 8, 4*mmsize, dst, prev, cur, next, w, prefs, \ mrefs, parity, mode %else -cglobal yadif_filter_line, 4, 7, 8, 80, dst, prev, cur, next, w, prefs, \ +cglobal yadif_filter_line, 4, 7, 8, 4*mmsize, dst, prev, cur, next, w, prefs, \ mrefs, parity, mode %endif %if ARCH_X86_32 @@ -233,3 +264,5 @@ INIT_XMM ssse3 YADIF INIT_XMM sse2 YADIF +INIT_YMM avx2 +YADIF diff --git a/libavfilter/x86/vf_yadif_init.c b/libavfilter/x86/vf_yadif_init.c index d648f0f835..48858dc295 100644 --- a/libavfilter/x86/vf_yadif_init.c +++ b/libavfilter/x86/vf_yadif_init.c @@ -29,6 +29,8 @@ void ff_yadif_filter_line_sse2(void *dst, void *prev, void *cur, void ff_yadif_filter_line_ssse3(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int parity, int mode); +void ff_yadif_filter_line_avx2(void *dst, void *prev, void *cur, void *next, + int w, int prefs, int mrefs, int parity, int mode); void ff_yadif_filter_line_16bit_sse2(void *dst, void *prev, void *cur, void *next, int w, int prefs, @@ -68,5 +70,7 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif, int bit_depth) yadif->filter_line = ff_yadif_filter_line_sse2; if (EXTERNAL_SSSE3(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_ssse3; + if (EXTERNAL_AVX2(cpu_flags)) + yadif->filter_line = ff_yadif_filter_line_avx2; } } -- 2.39.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2023-02-10 13:09 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-02-10 13:06 [FFmpeg-devel] [PATCH 1/3] avfilter: move yadif's filter_line init into a dedicated function James Darnley 2023-02-10 13:06 ` [FFmpeg-devel] [PATCH 2/3] checkasm: add test for yadif James Darnley 2023-02-10 13:06 ` James Darnley [this message] 2023-02-20 12:55 ` [FFmpeg-devel] [PATCH 3/3] avfilter/yadif: add avx2 filter_line function James Darnley 2023-02-20 12:55 ` James Darnley
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230210130657.455866-3-jdarnley@obe.tv \ --to=jdarnley@obe.tv \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git