Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation
@ 2022-05-26  4:42 FacelessLake
  2022-05-31 10:23 ` [FFmpeg-devel] (ping) " Sam Blackriver
  2022-05-31 12:16 ` [FFmpeg-devel] " Ronald S. Bultje
  0 siblings, 2 replies; 3+ messages in thread
From: FacelessLake @ 2022-05-26  4:42 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Semen Belozerov

From: Semen Belozerov <sinonim147@gmail.com>

---
 libavcodec/x86/vp9dsp_init_16bpp.c    |  2 +
 libavcodec/x86/vp9intrapred_16bpp.asm | 54 +++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c b/libavcodec/x86/vp9dsp_init_16bpp.c
index b17826326f..e5afea1512 100644
--- a/libavcodec/x86/vp9dsp_init_16bpp.c
+++ b/libavcodec/x86/vp9dsp_init_16bpp.c
@@ -55,6 +55,7 @@ decl_ipred_fn(dl,       32,     16, avx2);
 decl_ipred_fn(dr,       16,     16, avx2);
 decl_ipred_fn(dr,       32,     16, avx2);
 decl_ipred_fn(vl,       16,     16, avx2);
+decl_ipred_fn(hd,       16,     16, avx2);
 
 #define decl_ipred_dir_funcs(type) \
 decl_ipred_fns(type, 16, sse2,  sse2); \
@@ -141,6 +142,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
         init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2);
         init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2);
         init_ipred_func(vl, VERT_LEFT, 16, 16, avx2);
+        init_ipred_func(hd, HOR_DOWN, 16, 16, avx2);
 #if ARCH_X86_64
         init_ipred_func(dr, DIAG_DOWN_RIGHT, 32, 16, avx2);
 #endif
diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm b/libavcodec/x86/vp9intrapred_16bpp.asm
index 0dad91ac5c..808056a809 100644
--- a/libavcodec/x86/vp9intrapred_16bpp.asm
+++ b/libavcodec/x86/vp9intrapred_16bpp.asm
@@ -1273,6 +1273,60 @@ cglobal vp9_ipred_vl_16x16_16, 4, 5, 7, dst, stride, l, a
     mova    [dst4q+stride3q*4], m1                     ; 15 IJKLMNOPPPPPPPPP
     RET
 
+cglobal vp9_ipred_hd_16x16_16, 4, 5, 7, dst, stride, l, a
+    movu                    m0, [aq-2]                 ; *abcdefghijklmno
+    mova                    m1, [lq]                   ; klmnopqrstuvwxyz
+    vperm2i128              m2, m1, m0, q0201          ; stuvwxyz*abcdefg
+    vpalignr                m3, m2, m1, 2              ; lmnopqrstuvwxyz*
+    vpalignr                m4, m2, m1, 4              ; mnopqrstuvwxyz*a
+    LOWPASS                  4,  3,  1                 ; LMNOPQRSTUVWXYZ#
+    pavgw                   m3, m1                     ; klmnopqrstuvwxyz
+    mova                    m1, [aq]                   ; abcdefghijklmnop
+    movu                    m2, [aq+2]                 ; bcdefghijklmnop.
+    LOWPASS                  2,  1,  0                 ; ABCDEFGHIJKLMNO.
+    vpunpcklwd              m0, m3, m4                 ; kLlMmNnOsTtUuVvW
+    vpunpckhwd              m1, m3, m4                 ; oPpQqRrSwXxYyZz#
+    vperm2i128              m3, m1, m0, q0002          ; kLlMmNnOoPpQqRrS
+    vperm2i128              m4, m0, m1, q0301          ; sTtUuVvWwXxYyZz#
+    vperm2i128              m0, m4, m2, q0201          ; wXxYyZz#ABCDEFGH
+    vperm2i128              m1, m3, m4, q0201          ; oPpQqRrSsTtUuVvW
+    DEFINE_ARGS dst, stride, stride3, stride5, dst5
+    lea               stride3q, [strideq*3]
+    lea               stride5q, [stride3q+strideq*2]
+    lea                  dst5q, [dstq+stride5q]
+
+    mova    [dst5q+stride5q*2], m3                     ; 15 kLlMmNnOoPpQqRrS
+    mova    [dst5q+stride3q*2], m1                     ; 11 oPpQqRrSsTtUuVvW
+    mova     [dst5q+strideq*2], m4                     ; 7  sTtUuVvWwXxYyZz#
+    mova     [dstq+stride3q*1], m0                     ; 3  wXxYyZz#ABCDEFGH
+    vpalignr                m5, m4, m1, 4
+    mova     [dstq+stride5q*2], m5                     ; 10 pQqRrSsTtUuVvWwX
+    vpalignr                m5, m0, m4, 4
+    vpalignr                m6, m2, m0, 4
+    mova     [dstq+stride3q*2], m5                     ; 6  tUuVvWwXxYyZz#AB
+    mova      [dstq+strideq*2], m6                     ; 2  xYyZz#ABCDEFGHIJ
+    vpalignr                m5, m4, m1, 8
+    mova     [dst5q+strideq*4], m5                     ; 9  qRrSsTtUuVvWwXxY
+    vpalignr                m5, m0, m4, 8
+    vpalignr                m6, m2, m0, 8
+    mova     [dstq+stride5q*1], m5                     ; 5  uVvWwXxYyZz#ABCD
+    mova      [dstq+strideq*1], m6                     ; 1  yZz#ABCDEFGHIJKL
+    vpalignr                m5, m1, m3, 12
+    vpalignr                m6, m4, m1, 12
+    mova     [dstq+stride3q*4], m5                     ; 12 nOoPpQqRrSsTtUuV
+    mova      [dst5q+stride3q], m6                     ; 8  rSsTtUuVvWwXxYyZ
+    vpalignr                m5, m0, m4, 12
+    vpalignr                m6, m2, m0, 12
+    mova      [dstq+strideq*4], m5                     ; 4  nOoPpQqRrSsTtUuV
+    mova      [dstq+strideq*0], m6                     ; 0  z#ABCDEFGHIJKLMN
+    sub                  dst5q, strideq
+    vpalignr                m5, m1, m3, 4
+    mova    [dst5q+stride5q*2], m5                     ; 14 lMmNnOoPpQqRrSsT
+    sub                  dst5q, strideq
+    vpalignr                m5, m1, m3, 8
+    mova    [dst5q+stride5q*2], m5                    ; 13 mNnOoPpQqRrSsTtU
+    RET
+
 %if ARCH_X86_64
 cglobal vp9_ipred_dr_32x32_16, 4, 7, 10, dst, stride, l, a
     mova                    m0, [lq+mmsize*0+0]        ; l[0-15]
-- 
2.36.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [FFmpeg-devel] (ping) Re: [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation
  2022-05-26  4:42 [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation FacelessLake
@ 2022-05-31 10:23 ` Sam Blackriver
  2022-05-31 12:16 ` [FFmpeg-devel] " Ronald S. Bultje
  1 sibling, 0 replies; 3+ messages in thread
From: Sam Blackriver @ 2022-05-31 10:23 UTC (permalink / raw)
  To: ffmpeg-devel

Чт, 26 мая 2022 г. в 11:43 AM, FacelessLake <blackriver741@gmail.com>:

> From: Semen Belozerov <sinonim147@gmail.com>
>
> ---
>  libavcodec/x86/vp9dsp_init_16bpp.c    |  2 +
>  libavcodec/x86/vp9intrapred_16bpp.asm | 54 +++++++++++++++++++++++++++
>  2 files changed, 56 insertions(+)
>
> diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c
> b/libavcodec/x86/vp9dsp_init_16bpp.c
> index b17826326f..e5afea1512 100644
> --- a/libavcodec/x86/vp9dsp_init_16bpp.c
> +++ b/libavcodec/x86/vp9dsp_init_16bpp.c
> @@ -55,6 +55,7 @@ decl_ipred_fn(dl,       32,     16, avx2);
>  decl_ipred_fn(dr,       16,     16, avx2);
>  decl_ipred_fn(dr,       32,     16, avx2);
>  decl_ipred_fn(vl,       16,     16, avx2);
> +decl_ipred_fn(hd,       16,     16, avx2);
>
>  #define decl_ipred_dir_funcs(type) \
>  decl_ipred_fns(type, 16, sse2,  sse2); \
> @@ -141,6 +142,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext
> *dsp)
>          init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2);
>          init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2);
>          init_ipred_func(vl, VERT_LEFT, 16, 16, avx2);
> +        init_ipred_func(hd, HOR_DOWN, 16, 16, avx2);
>  #if ARCH_X86_64
>          init_ipred_func(dr, DIAG_DOWN_RIGHT, 32, 16, avx2);
>  #endif
> diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm
> b/libavcodec/x86/vp9intrapred_16bpp.asm
> index 0dad91ac5c..808056a809 100644
> --- a/libavcodec/x86/vp9intrapred_16bpp.asm
> +++ b/libavcodec/x86/vp9intrapred_16bpp.asm
> @@ -1273,6 +1273,60 @@ cglobal vp9_ipred_vl_16x16_16, 4, 5, 7, dst,
> stride, l, a
>      mova    [dst4q+stride3q*4], m1                     ; 15
> IJKLMNOPPPPPPPPP
>      RET
>
> +cglobal vp9_ipred_hd_16x16_16, 4, 5, 7, dst, stride, l, a
> +    movu                    m0, [aq-2]                 ; *abcdefghijklmno
> +    mova                    m1, [lq]                   ; klmnopqrstuvwxyz
> +    vperm2i128              m2, m1, m0, q0201          ; stuvwxyz*abcdefg
> +    vpalignr                m3, m2, m1, 2              ; lmnopqrstuvwxyz*
> +    vpalignr                m4, m2, m1, 4              ; mnopqrstuvwxyz*a
> +    LOWPASS                  4,  3,  1                 ; LMNOPQRSTUVWXYZ#
> +    pavgw                   m3, m1                     ; klmnopqrstuvwxyz
> +    mova                    m1, [aq]                   ; abcdefghijklmnop
> +    movu                    m2, [aq+2]                 ; bcdefghijklmnop.
> +    LOWPASS                  2,  1,  0                 ; ABCDEFGHIJKLMNO.
> +    vpunpcklwd              m0, m3, m4                 ; kLlMmNnOsTtUuVvW
> +    vpunpckhwd              m1, m3, m4                 ; oPpQqRrSwXxYyZz#
> +    vperm2i128              m3, m1, m0, q0002          ; kLlMmNnOoPpQqRrS
> +    vperm2i128              m4, m0, m1, q0301          ; sTtUuVvWwXxYyZz#
> +    vperm2i128              m0, m4, m2, q0201          ; wXxYyZz#ABCDEFGH
> +    vperm2i128              m1, m3, m4, q0201          ; oPpQqRrSsTtUuVvW
> +    DEFINE_ARGS dst, stride, stride3, stride5, dst5
> +    lea               stride3q, [strideq*3]
> +    lea               stride5q, [stride3q+strideq*2]
> +    lea                  dst5q, [dstq+stride5q]
> +
> +    mova    [dst5q+stride5q*2], m3                     ; 15
> kLlMmNnOoPpQqRrS
> +    mova    [dst5q+stride3q*2], m1                     ; 11
> oPpQqRrSsTtUuVvW
> +    mova     [dst5q+strideq*2], m4                     ; 7
> sTtUuVvWwXxYyZz#
> +    mova     [dstq+stride3q*1], m0                     ; 3
> wXxYyZz#ABCDEFGH
> +    vpalignr                m5, m4, m1, 4
> +    mova     [dstq+stride5q*2], m5                     ; 10
> pQqRrSsTtUuVvWwX
> +    vpalignr                m5, m0, m4, 4
> +    vpalignr                m6, m2, m0, 4
> +    mova     [dstq+stride3q*2], m5                     ; 6
> tUuVvWwXxYyZz#AB
> +    mova      [dstq+strideq*2], m6                     ; 2
> xYyZz#ABCDEFGHIJ
> +    vpalignr                m5, m4, m1, 8
> +    mova     [dst5q+strideq*4], m5                     ; 9
> qRrSsTtUuVvWwXxY
> +    vpalignr                m5, m0, m4, 8
> +    vpalignr                m6, m2, m0, 8
> +    mova     [dstq+stride5q*1], m5                     ; 5
> uVvWwXxYyZz#ABCD
> +    mova      [dstq+strideq*1], m6                     ; 1
> yZz#ABCDEFGHIJKL
> +    vpalignr                m5, m1, m3, 12
> +    vpalignr                m6, m4, m1, 12
> +    mova     [dstq+stride3q*4], m5                     ; 12
> nOoPpQqRrSsTtUuV
> +    mova      [dst5q+stride3q], m6                     ; 8
> rSsTtUuVvWwXxYyZ
> +    vpalignr                m5, m0, m4, 12
> +    vpalignr                m6, m2, m0, 12
> +    mova      [dstq+strideq*4], m5                     ; 4
> nOoPpQqRrSsTtUuV
> +    mova      [dstq+strideq*0], m6                     ; 0
> z#ABCDEFGHIJKLMN
> +    sub                  dst5q, strideq
> +    vpalignr                m5, m1, m3, 4
> +    mova    [dst5q+stride5q*2], m5                     ; 14
> lMmNnOoPpQqRrSsT
> +    sub                  dst5q, strideq
> +    vpalignr                m5, m1, m3, 8
> +    mova    [dst5q+stride5q*2], m5                    ; 13
> mNnOoPpQqRrSsTtU
> +    RET
> +
>  %if ARCH_X86_64
>  cglobal vp9_ipred_dr_32x32_16, 4, 7, 10, dst, stride, l, a
>      mova                    m0, [lq+mmsize*0+0]        ; l[0-15]
> --
> 2.36.1
>
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation
  2022-05-26  4:42 [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation FacelessLake
  2022-05-31 10:23 ` [FFmpeg-devel] (ping) " Sam Blackriver
@ 2022-05-31 12:16 ` Ronald S. Bultje
  1 sibling, 0 replies; 3+ messages in thread
From: Ronald S. Bultje @ 2022-05-31 12:16 UTC (permalink / raw)
  To: FFmpeg development discussions and patches; +Cc: Semen Belozerov

Hi,

On Thu, May 26, 2022 at 12:43 AM FacelessLake <blackriver741@gmail.com>
wrote:

> From: Semen Belozerov <sinonim147@gmail.com>
>
> ---
>  libavcodec/x86/vp9dsp_init_16bpp.c    |  2 +
>  libavcodec/x86/vp9intrapred_16bpp.asm | 54 +++++++++++++++++++++++++++
>  2 files changed, 56 insertions(+)
>

Apologies for forgetting about this, this is now merged.

Thanks,
Ronald
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-05-31 12:16 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-26  4:42 [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation FacelessLake
2022-05-31 10:23 ` [FFmpeg-devel] (ping) " Sam Blackriver
2022-05-31 12:16 ` [FFmpeg-devel] " Ronald S. Bultje

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git