* [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation
@ 2022-05-26 4:42 FacelessLake
2022-05-31 10:23 ` [FFmpeg-devel] (ping) " Sam Blackriver
2022-05-31 12:16 ` [FFmpeg-devel] " Ronald S. Bultje
0 siblings, 2 replies; 3+ messages in thread
From: FacelessLake @ 2022-05-26 4:42 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Semen Belozerov
From: Semen Belozerov <sinonim147@gmail.com>
---
libavcodec/x86/vp9dsp_init_16bpp.c | 2 +
libavcodec/x86/vp9intrapred_16bpp.asm | 54 +++++++++++++++++++++++++++
2 files changed, 56 insertions(+)
diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c b/libavcodec/x86/vp9dsp_init_16bpp.c
index b17826326f..e5afea1512 100644
--- a/libavcodec/x86/vp9dsp_init_16bpp.c
+++ b/libavcodec/x86/vp9dsp_init_16bpp.c
@@ -55,6 +55,7 @@ decl_ipred_fn(dl, 32, 16, avx2);
decl_ipred_fn(dr, 16, 16, avx2);
decl_ipred_fn(dr, 32, 16, avx2);
decl_ipred_fn(vl, 16, 16, avx2);
+decl_ipred_fn(hd, 16, 16, avx2);
#define decl_ipred_dir_funcs(type) \
decl_ipred_fns(type, 16, sse2, sse2); \
@@ -141,6 +142,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2);
init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2);
init_ipred_func(vl, VERT_LEFT, 16, 16, avx2);
+ init_ipred_func(hd, HOR_DOWN, 16, 16, avx2);
#if ARCH_X86_64
init_ipred_func(dr, DIAG_DOWN_RIGHT, 32, 16, avx2);
#endif
diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm b/libavcodec/x86/vp9intrapred_16bpp.asm
index 0dad91ac5c..808056a809 100644
--- a/libavcodec/x86/vp9intrapred_16bpp.asm
+++ b/libavcodec/x86/vp9intrapred_16bpp.asm
@@ -1273,6 +1273,60 @@ cglobal vp9_ipred_vl_16x16_16, 4, 5, 7, dst, stride, l, a
mova [dst4q+stride3q*4], m1 ; 15 IJKLMNOPPPPPPPPP
RET
+cglobal vp9_ipred_hd_16x16_16, 4, 5, 7, dst, stride, l, a
+ movu m0, [aq-2] ; *abcdefghijklmno
+ mova m1, [lq] ; klmnopqrstuvwxyz
+ vperm2i128 m2, m1, m0, q0201 ; stuvwxyz*abcdefg
+ vpalignr m3, m2, m1, 2 ; lmnopqrstuvwxyz*
+ vpalignr m4, m2, m1, 4 ; mnopqrstuvwxyz*a
+ LOWPASS 4, 3, 1 ; LMNOPQRSTUVWXYZ#
+ pavgw m3, m1 ; klmnopqrstuvwxyz
+ mova m1, [aq] ; abcdefghijklmnop
+ movu m2, [aq+2] ; bcdefghijklmnop.
+ LOWPASS 2, 1, 0 ; ABCDEFGHIJKLMNO.
+ vpunpcklwd m0, m3, m4 ; kLlMmNnOsTtUuVvW
+ vpunpckhwd m1, m3, m4 ; oPpQqRrSwXxYyZz#
+ vperm2i128 m3, m1, m0, q0002 ; kLlMmNnOoPpQqRrS
+ vperm2i128 m4, m0, m1, q0301 ; sTtUuVvWwXxYyZz#
+ vperm2i128 m0, m4, m2, q0201 ; wXxYyZz#ABCDEFGH
+ vperm2i128 m1, m3, m4, q0201 ; oPpQqRrSsTtUuVvW
+ DEFINE_ARGS dst, stride, stride3, stride5, dst5
+ lea stride3q, [strideq*3]
+ lea stride5q, [stride3q+strideq*2]
+ lea dst5q, [dstq+stride5q]
+
+ mova [dst5q+stride5q*2], m3 ; 15 kLlMmNnOoPpQqRrS
+ mova [dst5q+stride3q*2], m1 ; 11 oPpQqRrSsTtUuVvW
+ mova [dst5q+strideq*2], m4 ; 7 sTtUuVvWwXxYyZz#
+ mova [dstq+stride3q*1], m0 ; 3 wXxYyZz#ABCDEFGH
+ vpalignr m5, m4, m1, 4
+ mova [dstq+stride5q*2], m5 ; 10 pQqRrSsTtUuVvWwX
+ vpalignr m5, m0, m4, 4
+ vpalignr m6, m2, m0, 4
+ mova [dstq+stride3q*2], m5 ; 6 tUuVvWwXxYyZz#AB
+ mova [dstq+strideq*2], m6 ; 2 xYyZz#ABCDEFGHIJ
+ vpalignr m5, m4, m1, 8
+ mova [dst5q+strideq*4], m5 ; 9 qRrSsTtUuVvWwXxY
+ vpalignr m5, m0, m4, 8
+ vpalignr m6, m2, m0, 8
+ mova [dstq+stride5q*1], m5 ; 5 uVvWwXxYyZz#ABCD
+ mova [dstq+strideq*1], m6 ; 1 yZz#ABCDEFGHIJKL
+ vpalignr m5, m1, m3, 12
+ vpalignr m6, m4, m1, 12
+ mova [dstq+stride3q*4], m5 ; 12 nOoPpQqRrSsTtUuV
+ mova [dst5q+stride3q], m6 ; 8 rSsTtUuVvWwXxYyZ
+ vpalignr m5, m0, m4, 12
+ vpalignr m6, m2, m0, 12
+ mova [dstq+strideq*4], m5 ; 4 nOoPpQqRrSsTtUuV
+ mova [dstq+strideq*0], m6 ; 0 z#ABCDEFGHIJKLMN
+ sub dst5q, strideq
+ vpalignr m5, m1, m3, 4
+ mova [dst5q+stride5q*2], m5 ; 14 lMmNnOoPpQqRrSsT
+ sub dst5q, strideq
+ vpalignr m5, m1, m3, 8
+ mova [dst5q+stride5q*2], m5 ; 13 mNnOoPpQqRrSsTtU
+ RET
+
%if ARCH_X86_64
cglobal vp9_ipred_dr_32x32_16, 4, 7, 10, dst, stride, l, a
mova m0, [lq+mmsize*0+0] ; l[0-15]
--
2.36.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
* [FFmpeg-devel] (ping) Re: [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation
2022-05-26 4:42 [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation FacelessLake
@ 2022-05-31 10:23 ` Sam Blackriver
2022-05-31 12:16 ` [FFmpeg-devel] " Ronald S. Bultje
1 sibling, 0 replies; 3+ messages in thread
From: Sam Blackriver @ 2022-05-31 10:23 UTC (permalink / raw)
To: ffmpeg-devel
Чт, 26 мая 2022 г. в 11:43 AM, FacelessLake <blackriver741@gmail.com>:
> From: Semen Belozerov <sinonim147@gmail.com>
>
> ---
> libavcodec/x86/vp9dsp_init_16bpp.c | 2 +
> libavcodec/x86/vp9intrapred_16bpp.asm | 54 +++++++++++++++++++++++++++
> 2 files changed, 56 insertions(+)
>
> diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c
> b/libavcodec/x86/vp9dsp_init_16bpp.c
> index b17826326f..e5afea1512 100644
> --- a/libavcodec/x86/vp9dsp_init_16bpp.c
> +++ b/libavcodec/x86/vp9dsp_init_16bpp.c
> @@ -55,6 +55,7 @@ decl_ipred_fn(dl, 32, 16, avx2);
> decl_ipred_fn(dr, 16, 16, avx2);
> decl_ipred_fn(dr, 32, 16, avx2);
> decl_ipred_fn(vl, 16, 16, avx2);
> +decl_ipred_fn(hd, 16, 16, avx2);
>
> #define decl_ipred_dir_funcs(type) \
> decl_ipred_fns(type, 16, sse2, sse2); \
> @@ -141,6 +142,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext
> *dsp)
> init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2);
> init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2);
> init_ipred_func(vl, VERT_LEFT, 16, 16, avx2);
> + init_ipred_func(hd, HOR_DOWN, 16, 16, avx2);
> #if ARCH_X86_64
> init_ipred_func(dr, DIAG_DOWN_RIGHT, 32, 16, avx2);
> #endif
> diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm
> b/libavcodec/x86/vp9intrapred_16bpp.asm
> index 0dad91ac5c..808056a809 100644
> --- a/libavcodec/x86/vp9intrapred_16bpp.asm
> +++ b/libavcodec/x86/vp9intrapred_16bpp.asm
> @@ -1273,6 +1273,60 @@ cglobal vp9_ipred_vl_16x16_16, 4, 5, 7, dst,
> stride, l, a
> mova [dst4q+stride3q*4], m1 ; 15
> IJKLMNOPPPPPPPPP
> RET
>
> +cglobal vp9_ipred_hd_16x16_16, 4, 5, 7, dst, stride, l, a
> + movu m0, [aq-2] ; *abcdefghijklmno
> + mova m1, [lq] ; klmnopqrstuvwxyz
> + vperm2i128 m2, m1, m0, q0201 ; stuvwxyz*abcdefg
> + vpalignr m3, m2, m1, 2 ; lmnopqrstuvwxyz*
> + vpalignr m4, m2, m1, 4 ; mnopqrstuvwxyz*a
> + LOWPASS 4, 3, 1 ; LMNOPQRSTUVWXYZ#
> + pavgw m3, m1 ; klmnopqrstuvwxyz
> + mova m1, [aq] ; abcdefghijklmnop
> + movu m2, [aq+2] ; bcdefghijklmnop.
> + LOWPASS 2, 1, 0 ; ABCDEFGHIJKLMNO.
> + vpunpcklwd m0, m3, m4 ; kLlMmNnOsTtUuVvW
> + vpunpckhwd m1, m3, m4 ; oPpQqRrSwXxYyZz#
> + vperm2i128 m3, m1, m0, q0002 ; kLlMmNnOoPpQqRrS
> + vperm2i128 m4, m0, m1, q0301 ; sTtUuVvWwXxYyZz#
> + vperm2i128 m0, m4, m2, q0201 ; wXxYyZz#ABCDEFGH
> + vperm2i128 m1, m3, m4, q0201 ; oPpQqRrSsTtUuVvW
> + DEFINE_ARGS dst, stride, stride3, stride5, dst5
> + lea stride3q, [strideq*3]
> + lea stride5q, [stride3q+strideq*2]
> + lea dst5q, [dstq+stride5q]
> +
> + mova [dst5q+stride5q*2], m3 ; 15
> kLlMmNnOoPpQqRrS
> + mova [dst5q+stride3q*2], m1 ; 11
> oPpQqRrSsTtUuVvW
> + mova [dst5q+strideq*2], m4 ; 7
> sTtUuVvWwXxYyZz#
> + mova [dstq+stride3q*1], m0 ; 3
> wXxYyZz#ABCDEFGH
> + vpalignr m5, m4, m1, 4
> + mova [dstq+stride5q*2], m5 ; 10
> pQqRrSsTtUuVvWwX
> + vpalignr m5, m0, m4, 4
> + vpalignr m6, m2, m0, 4
> + mova [dstq+stride3q*2], m5 ; 6
> tUuVvWwXxYyZz#AB
> + mova [dstq+strideq*2], m6 ; 2
> xYyZz#ABCDEFGHIJ
> + vpalignr m5, m4, m1, 8
> + mova [dst5q+strideq*4], m5 ; 9
> qRrSsTtUuVvWwXxY
> + vpalignr m5, m0, m4, 8
> + vpalignr m6, m2, m0, 8
> + mova [dstq+stride5q*1], m5 ; 5
> uVvWwXxYyZz#ABCD
> + mova [dstq+strideq*1], m6 ; 1
> yZz#ABCDEFGHIJKL
> + vpalignr m5, m1, m3, 12
> + vpalignr m6, m4, m1, 12
> + mova [dstq+stride3q*4], m5 ; 12
> nOoPpQqRrSsTtUuV
> + mova [dst5q+stride3q], m6 ; 8
> rSsTtUuVvWwXxYyZ
> + vpalignr m5, m0, m4, 12
> + vpalignr m6, m2, m0, 12
> + mova [dstq+strideq*4], m5 ; 4
> nOoPpQqRrSsTtUuV
> + mova [dstq+strideq*0], m6 ; 0
> z#ABCDEFGHIJKLMN
> + sub dst5q, strideq
> + vpalignr m5, m1, m3, 4
> + mova [dst5q+stride5q*2], m5 ; 14
> lMmNnOoPpQqRrSsT
> + sub dst5q, strideq
> + vpalignr m5, m1, m3, 8
> + mova [dst5q+stride5q*2], m5 ; 13
> mNnOoPpQqRrSsTtU
> + RET
> +
> %if ARCH_X86_64
> cglobal vp9_ipred_dr_32x32_16, 4, 7, 10, dst, stride, l, a
> mova m0, [lq+mmsize*0+0] ; l[0-15]
> --
> 2.36.1
>
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation
2022-05-26 4:42 [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation FacelessLake
2022-05-31 10:23 ` [FFmpeg-devel] (ping) " Sam Blackriver
@ 2022-05-31 12:16 ` Ronald S. Bultje
1 sibling, 0 replies; 3+ messages in thread
From: Ronald S. Bultje @ 2022-05-31 12:16 UTC (permalink / raw)
To: FFmpeg development discussions and patches; +Cc: Semen Belozerov
Hi,
On Thu, May 26, 2022 at 12:43 AM FacelessLake <blackriver741@gmail.com>
wrote:
> From: Semen Belozerov <sinonim147@gmail.com>
>
> ---
> libavcodec/x86/vp9dsp_init_16bpp.c | 2 +
> libavcodec/x86/vp9intrapred_16bpp.asm | 54 +++++++++++++++++++++++++++
> 2 files changed, 56 insertions(+)
>
Apologies for forgetting about this, this is now merged.
Thanks,
Ronald
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2022-05-31 12:16 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-26 4:42 [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_hd_16x16_16 avx2 implementation FacelessLake
2022-05-31 10:23 ` [FFmpeg-devel] (ping) " Sam Blackriver
2022-05-31 12:16 ` [FFmpeg-devel] " Ronald S. Bultje
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git