From: James Darnley <jdarnley@obe.tv> To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH v2 5/5] avfilter/bwdif: add avx2 filter_line function Date: Mon, 20 Mar 2023 17:49:25 +0100 Message-ID: <20230320164925.299207-5-jdarnley@obe.tv> (raw) In-Reply-To: <20230320164925.299207-1-jdarnley@obe.tv> 8-bit: 2.24x faster (1925±1.3 vs. 859±2.2 decicycles) compared with ssse3 10-bit: 2.00x faster (1703±1.7 vs. 853±2.0 decicycles) compared with ssse3 --- Fixed the word broadcast libavfilter/x86/vf_bwdif.asm | 29 ++++++++++++++++++++++++----- libavfilter/x86/vf_bwdif_init.c | 12 ++++++++++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/libavfilter/x86/vf_bwdif.asm b/libavfilter/x86/vf_bwdif.asm index 0b453da53b..c93b41ec48 100644 --- a/libavfilter/x86/vf_bwdif.asm +++ b/libavfilter/x86/vf_bwdif.asm @@ -26,18 +26,22 @@ %include "libavutil/x86/x86util.asm" -SECTION_RODATA +SECTION_RODATA 32 -pw_coefhf: times 4 dw 1016, 5570 -pw_coefhf1: times 8 dw -3801 -pw_coefsp: times 4 dw 5077, -981 -pw_splfdif: times 4 dw -768, 768 +pw_coefhf: times 8 dw 1016, 5570 +pw_coefhf1: times 16 dw -3801 +pw_coefsp: times 8 dw 5077, -981 +pw_splfdif: times 8 dw -768, 768 SECTION .text %macro LOAD8 2 + %if mmsize == 32 + pmovzxbw %1, %2 + %else movh %1, %2 punpcklbw %1, m7 + %endif %endmacro %macro LOAD12 2 @@ -45,8 +49,14 @@ SECTION .text %endmacro %macro DISP8 0 + %if mmsize == 32 + vextracti128 xm1, m2, 1 + packuswb xm2, xm1 + movu [dstq], xm2 + %else packuswb m2, m2 movh [dstq], m2 + %endif %endmacro %macro DISP12 0 @@ -244,8 +254,12 @@ cglobal bwdif_filter_line_12bit, 4, 9, 13, 0, dst, prev, cur, next, w, \ prefs, mrefs, prefs2, mrefs2, \ prefs3, mrefs3, prefs4, \ mrefs4, parity, clip_max + %if mmsize == 32 + vpbroadcastw m12, WORD clip_maxm + %else movd m12, DWORD clip_maxm SPLATW m12, m12, 0 + %endif %else cglobal bwdif_filter_line_12bit, 4, 6, 8, 80, dst, prev, cur, next, w, \ prefs, mrefs, prefs2, mrefs2, \ @@ -264,3 +278,8 @@ INIT_XMM ssse3 BWDIF INIT_XMM sse2 BWDIF + +%if HAVE_AVX2_EXTERNAL && ARCH_X86_64 +INIT_YMM avx2 +BWDIF +%endif diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c index ba7bc40c3d..f833318c10 100644 --- a/libavfilter/x86/vf_bwdif_init.c +++ b/libavfilter/x86/vf_bwdif_init.c @@ -32,6 +32,10 @@ void ff_bwdif_filter_line_ssse3(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int prefs2, int mrefs2, int prefs3, int mrefs3, int prefs4, int mrefs4, int parity, int clip_max); +void ff_bwdif_filter_line_avx2(void *dst, void *prev, void *cur, void *next, + int w, int prefs, int mrefs, int prefs2, + int mrefs2, int prefs3, int mrefs3, int prefs4, + int mrefs4, int parity, int clip_max); void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int prefs2, @@ -41,6 +45,10 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *ne int w, int prefs, int mrefs, int prefs2, int mrefs2, int prefs3, int mrefs3, int prefs4, int mrefs4, int parity, int clip_max); +void ff_bwdif_filter_line_12bit_avx2(void *dst, void *prev, void *cur, void *next, + int w, int prefs, int mrefs, int prefs2, + int mrefs2, int prefs3, int mrefs3, int prefs4, + int mrefs4, int parity, int clip_max); av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth) { @@ -51,10 +59,14 @@ av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth) bwdif->filter_line = ff_bwdif_filter_line_sse2; if (EXTERNAL_SSSE3(cpu_flags)) bwdif->filter_line = ff_bwdif_filter_line_ssse3; + if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags)) + bwdif->filter_line = ff_bwdif_filter_line_avx2; } else if (bit_depth <= 12) { if (EXTERNAL_SSE2(cpu_flags)) bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2; if (EXTERNAL_SSSE3(cpu_flags)) bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3; + if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags)) + bwdif->filter_line = ff_bwdif_filter_line_12bit_avx2; } } -- 2.39.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
prev parent reply other threads:[~2023-03-20 16:52 UTC|newest] Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-03-20 16:49 [FFmpeg-devel] [PATCH v2 1/5] avfilter/bwdif: move filter_line init to a dedicated function James Darnley 2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 2/5] checkasm: add test for bwdif James Darnley 2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 3/5] tests: add bwdif to fate filter tests James Darnley 2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data James Darnley 2023-03-21 16:32 ` Thomas Mundt 2023-03-23 16:54 ` James Darnley 2023-03-20 16:49 ` James Darnley [this message]
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230320164925.299207-5-jdarnley@obe.tv \ --to=jdarnley@obe.tv \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git