Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Thomas Mundt <tmundt75@gmail.com>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Subject: Re: [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function for bwdif
Date: Sat, 11 Mar 2023 17:14:29 +0100
Message-ID: <CAC5+Sy6+A5b3RHZprUUh0Uw2WFQFCkz2riuC9cP7LvB5JMzZ8A@mail.gmail.com> (raw)
In-Reply-To: <20230220195703.1297421-3-jdarnley@obe.tv>

Hi James,

Am Mo., 20. Feb. 2023 um 20:59 Uhr schrieb James Darnley <jdarnley@obe.tv>:

> 2.24x faster (1925±1.3 vs. 859±2.2 decicycles) compared with ssse3
> ---
>  libavfilter/x86/vf_bwdif.asm    | 29 ++++++++++++++++++++++++-----
>  libavfilter/x86/vf_bwdif_init.c | 12 ++++++++++++
>  2 files changed, 36 insertions(+), 5 deletions(-)
>
> diff --git a/libavfilter/x86/vf_bwdif.asm b/libavfilter/x86/vf_bwdif.asm
> index 0b453da53b..5cc61435fd 100644
> --- a/libavfilter/x86/vf_bwdif.asm
> +++ b/libavfilter/x86/vf_bwdif.asm
> @@ -26,18 +26,22 @@
>
>  %include "libavutil/x86/x86util.asm"
>
> -SECTION_RODATA
> +SECTION_RODATA 32
>
> -pw_coefhf:  times 4 dw  1016, 5570
> -pw_coefhf1: times 8 dw -3801
> -pw_coefsp:  times 4 dw  5077, -981
> -pw_splfdif: times 4 dw  -768,  768
> +pw_coefhf:  times 8 dw  1016, 5570
> +pw_coefhf1: times 16 dw -3801
> +pw_coefsp:  times 8 dw  5077, -981
> +pw_splfdif: times 8 dw  -768,  768
>
>  SECTION .text
>
>  %macro LOAD8 2
> +    %if mmsize == 32
> +        pmovzxbw %1, %2
> +    %else
>      movh         %1, %2
>      punpcklbw    %1, m7
> +    %endif
>  %endmacro
>
>  %macro LOAD12 2
> @@ -45,8 +49,14 @@ SECTION .text
>  %endmacro
>
>  %macro DISP8 0
> +    %if mmsize == 32
> +        vextracti128  xm1,    m2, 1
> +        packuswb      xm2,   xm1
> +        movu         [dstq], xm2
> +    %else
>      packuswb     m2, m2
>      movh     [dstq], m2
> +    %endif
>  %endmacro
>
>  %macro DISP12 0
> @@ -244,8 +254,12 @@ cglobal bwdif_filter_line_12bit, 4, 9, 13, 0, dst,
> prev, cur, next, w, \
>                                                prefs, mrefs, prefs2,
> mrefs2, \
>                                                prefs3, mrefs3, prefs4, \
>                                                mrefs4, parity, clip_max
> +    %if mmsize == 32
> +        vpbroadcastd m12, DWORD clip_maxm
>

I get a green pattern at bit depths > 8.
Looks good with:
vpbroadcastw m12, WORD clip_maxm

+    %else
>      movd        m12, DWORD clip_maxm
>      SPLATW      m12, m12, 0
> +    %endif
>  %else
>  cglobal bwdif_filter_line_12bit, 4, 6, 8, 80, dst, prev, cur, next, w, \
>                                                prefs, mrefs, prefs2,
> mrefs2, \
> @@ -264,3 +278,8 @@ INIT_XMM ssse3
>  BWDIF
>  INIT_XMM sse2
>  BWDIF
> +
> +%if HAVE_AVX2_EXTERNAL && ARCH_X86_64
> +INIT_YMM avx2
> +BWDIF
> +%endif
> diff --git a/libavfilter/x86/vf_bwdif_init.c
> b/libavfilter/x86/vf_bwdif_init.c
> index ba7bc40c3d..f833318c10 100644
> --- a/libavfilter/x86/vf_bwdif_init.c
> +++ b/libavfilter/x86/vf_bwdif_init.c
> @@ -32,6 +32,10 @@ void ff_bwdif_filter_line_ssse3(void *dst, void *prev,
> void *cur, void *next,
>                                  int w, int prefs, int mrefs, int prefs2,
>                                  int mrefs2, int prefs3, int mrefs3, int
> prefs4,
>                                  int mrefs4, int parity, int clip_max);
> +void ff_bwdif_filter_line_avx2(void *dst, void *prev, void *cur, void
> *next,
> +                               int w, int prefs, int mrefs, int prefs2,
> +                               int mrefs2, int prefs3, int mrefs3, int
> prefs4,
> +                               int mrefs4, int parity, int clip_max);
>
>  void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur,
> void *next,
>                                       int w, int prefs, int mrefs, int
> prefs2,
> @@ -41,6 +45,10 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void
> *prev, void *cur, void *ne
>                                        int w, int prefs, int mrefs, int
> prefs2,
>                                        int mrefs2, int prefs3, int mrefs3,
> int prefs4,
>                                        int mrefs4, int parity, int
> clip_max);
> +void ff_bwdif_filter_line_12bit_avx2(void *dst, void *prev, void *cur,
> void *next,
> +                                     int w, int prefs, int mrefs, int
> prefs2,
> +                                     int mrefs2, int prefs3, int mrefs3,
> int prefs4,
> +                                     int mrefs4, int parity, int
> clip_max);
>
>  av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
>  {
> @@ -51,10 +59,14 @@ av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif,
> int bit_depth)
>              bwdif->filter_line = ff_bwdif_filter_line_sse2;
>          if (EXTERNAL_SSSE3(cpu_flags))
>              bwdif->filter_line = ff_bwdif_filter_line_ssse3;
> +        if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
> +            bwdif->filter_line = ff_bwdif_filter_line_avx2;
>      } else if (bit_depth <= 12) {
>          if (EXTERNAL_SSE2(cpu_flags))
>              bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2;
>          if (EXTERNAL_SSSE3(cpu_flags))
>              bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3;
> +        if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
> +            bwdif->filter_line = ff_bwdif_filter_line_12bit_avx2;
>      }
>  }
> --
> 2.39.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  reply	other threads:[~2023-03-11 16:14 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-20 19:57 [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function James Darnley
2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif James Darnley
2023-03-11 16:18   ` Thomas Mundt
2023-03-13 11:04     ` James Darnley
2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function " James Darnley
2023-03-11 16:14   ` Thomas Mundt [this message]
2023-03-13 11:08     ` James Darnley
2023-02-24  1:19 ` [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function Thomas Mundt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAC5+Sy6+A5b3RHZprUUh0Uw2WFQFCkz2riuC9cP7LvB5JMzZ8A@mail.gmail.com \
    --to=tmundt75@gmail.com \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git