Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function
@ 2023-02-20 19:57 James Darnley
  2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif James Darnley
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: James Darnley @ 2023-02-20 19:57 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libavfilter/bwdif.h             |  3 ++-
 libavfilter/vf_bwdif.c          | 13 +++++++++----
 libavfilter/x86/vf_bwdif_init.c |  4 +---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h
index 889ff772ed..5749345f78 100644
--- a/libavfilter/bwdif.h
+++ b/libavfilter/bwdif.h
@@ -37,6 +37,7 @@ typedef struct BWDIFContext {
                         int parity, int clip_max, int spat);
 } BWDIFContext;
 
-void ff_bwdif_init_x86(BWDIFContext *bwdif);
+void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth);
+void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth);
 
 #endif /* AVFILTER_BWDIF_H */
diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
index 65c617ebb3..34e8c5e234 100644
--- a/libavfilter/vf_bwdif.c
+++ b/libavfilter/vf_bwdif.c
@@ -340,7 +340,14 @@ static int config_props(AVFilterLink *link)
 
     yadif->csp = av_pix_fmt_desc_get(link->format);
     yadif->filter = filter;
-    if (yadif->csp->comp[0].depth > 8) {
+    ff_bwdif_init_filter_line(s, yadif->csp->comp[0].depth);
+
+    return 0;
+}
+
+av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth)
+{
+    if (bit_depth > 8) {
         s->filter_intra = filter_intra_16bit;
         s->filter_line  = filter_line_c_16bit;
         s->filter_edge  = filter_edge_16bit;
@@ -351,10 +358,8 @@ static int config_props(AVFilterLink *link)
     }
 
 #if ARCH_X86
-    ff_bwdif_init_x86(s);
+    ff_bwdif_init_x86(s, bit_depth);
 #endif
-
-    return 0;
 }
 
 
diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c
index e24e5cd9b1..ba7bc40c3d 100644
--- a/libavfilter/x86/vf_bwdif_init.c
+++ b/libavfilter/x86/vf_bwdif_init.c
@@ -42,11 +42,9 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *ne
                                       int mrefs2, int prefs3, int mrefs3, int prefs4,
                                       int mrefs4, int parity, int clip_max);
 
-av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif)
+av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
 {
-    YADIFContext *yadif = &bwdif->yadif;
     int cpu_flags = av_get_cpu_flags();
-    int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth;
 
     if (bit_depth <= 8) {
         if (EXTERNAL_SSE2(cpu_flags))
-- 
2.39.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif
  2023-02-20 19:57 [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function James Darnley
@ 2023-02-20 19:57 ` James Darnley
  2023-03-11 16:18   ` Thomas Mundt
  2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function " James Darnley
  2023-02-24  1:19 ` [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function Thomas Mundt
  2 siblings, 1 reply; 8+ messages in thread
From: James Darnley @ 2023-02-20 19:57 UTC (permalink / raw)
  To: ffmpeg-devel

---
 tests/checkasm/Makefile   |  1 +
 tests/checkasm/checkasm.c |  3 ++
 tests/checkasm/checkasm.h |  1 +
 tests/checkasm/vf_bwdif.c | 70 +++++++++++++++++++++++++++++++++++++++
 tests/fate/checkasm.mak   |  1 +
 5 files changed, 76 insertions(+)
 create mode 100644 tests/checkasm/vf_bwdif.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index a6f06c7007..b6a43f181f 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -40,6 +40,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC)          += $(AVCODECOBJS-yes)
 # libavfilter tests
 AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
 AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
+AVFILTEROBJS-$(CONFIG_BWDIF_FILTER)      += vf_bwdif.o
 AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
 AVFILTEROBJS-$(CONFIG_EQ_FILTER)         += vf_eq.o
 AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)      += vf_gblur.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index e96d84a7da..5e729cf0e0 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -179,6 +179,9 @@ static const struct {
     #if CONFIG_BLEND_FILTER
         { "vf_blend", checkasm_check_blend },
     #endif
+    #if CONFIG_BWDIF_FILTER
+        { "vf_bwdif", checkasm_check_vf_bwdif },
+    #endif
     #if CONFIG_COLORSPACE_FILTER
         { "vf_colorspace", checkasm_check_colorspace },
     #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 8744a81218..e9e73c6fa0 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -82,6 +82,7 @@ void checkasm_check_utvideodsp(void);
 void checkasm_check_v210dec(void);
 void checkasm_check_v210enc(void);
 void checkasm_check_vc1dsp(void);
+void checkasm_check_vf_bwdif(void);
 void checkasm_check_vf_eq(void);
 void checkasm_check_vf_gblur(void);
 void checkasm_check_vf_hflip(void);
diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
new file mode 100644
index 0000000000..e27f9b7494
--- /dev/null
+++ b/tests/checkasm/vf_bwdif.c
@@ -0,0 +1,70 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/internal.h"
+#include "libavfilter/bwdif.h"
+
+#define WIDTH 256
+
+#define randomize_buffers(buf0, buf1, mask, count) \
+    for (size_t i; i < count; i++) \
+        buf0[i] = buf1[i] = rnd() & mask
+
+void checkasm_check_vf_bwdif(void)
+{
+    BWDIFContext ctx_8, ctx_10, ctx_16;
+
+    ff_bwdif_init_filter_line(&ctx_8, 8);
+    ff_bwdif_init_filter_line(&ctx_10, 10);
+    ff_bwdif_init_filter_line(&ctx_16, 16);
+
+    if (check_func(ctx_8.filter_line, "bwdif8")) {
+        uint8_t prev0[9*WIDTH], prev1[9*WIDTH];
+        uint8_t next0[9*WIDTH], next1[9*WIDTH];
+        uint8_t cur0[9*WIDTH], cur1[9*WIDTH];
+        uint8_t dst0[WIDTH], dst1[WIDTH];
+
+        declare_func(void, void *dst, void *prev, void *cur, void *next,
+                        int w, int prefs, int mrefs, int prefs2, int mrefs2,
+                        int prefs3, int mrefs3, int prefs4, int mrefs4,
+                        int parity, int clip_max);
+
+        randomize_buffers(prev0, prev1, 0xff, 9*WIDTH);
+        randomize_buffers(next0, next1, 0xff, 9*WIDTH);
+        randomize_buffers(cur0, cur1, 0xff, 9*WIDTH);
+
+        call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH, WIDTH,
+                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+                0, 0xff);
+        call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
+                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+                0, 0xff);
+
+        if (memcmp(dst0, dst1, WIDTH)
+                || memcmp(prev0, prev1, sizeof prev0)
+                || memcmp(next0, next1, sizeof next0)
+                || memcmp(cur0, cur1, sizeof cur0))
+            fail();
+        bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
+                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+                0, 0xff);
+    }
+    report("bwdif8");
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index a4e95541f5..6a7d4a1226 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -37,6 +37,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp                                  \
                 fate-checkasm-v210enc                                   \
                 fate-checkasm-vc1dsp                                    \
                 fate-checkasm-vf_blend                                  \
+                fate-checkasm-vf_bwdif                                  \
                 fate-checkasm-vf_colorspace                             \
                 fate-checkasm-vf_eq                                     \
                 fate-checkasm-vf_gblur                                  \
-- 
2.39.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function for bwdif
  2023-02-20 19:57 [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function James Darnley
  2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif James Darnley
@ 2023-02-20 19:57 ` James Darnley
  2023-03-11 16:14   ` Thomas Mundt
  2023-02-24  1:19 ` [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function Thomas Mundt
  2 siblings, 1 reply; 8+ messages in thread
From: James Darnley @ 2023-02-20 19:57 UTC (permalink / raw)
  To: ffmpeg-devel

2.24x faster (1925±1.3 vs. 859±2.2 decicycles) compared with ssse3
---
 libavfilter/x86/vf_bwdif.asm    | 29 ++++++++++++++++++++++++-----
 libavfilter/x86/vf_bwdif_init.c | 12 ++++++++++++
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/libavfilter/x86/vf_bwdif.asm b/libavfilter/x86/vf_bwdif.asm
index 0b453da53b..5cc61435fd 100644
--- a/libavfilter/x86/vf_bwdif.asm
+++ b/libavfilter/x86/vf_bwdif.asm
@@ -26,18 +26,22 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_RODATA
+SECTION_RODATA 32
 
-pw_coefhf:  times 4 dw  1016, 5570
-pw_coefhf1: times 8 dw -3801
-pw_coefsp:  times 4 dw  5077, -981
-pw_splfdif: times 4 dw  -768,  768
+pw_coefhf:  times 8 dw  1016, 5570
+pw_coefhf1: times 16 dw -3801
+pw_coefsp:  times 8 dw  5077, -981
+pw_splfdif: times 8 dw  -768,  768
 
 SECTION .text
 
 %macro LOAD8 2
+    %if mmsize == 32
+        pmovzxbw %1, %2
+    %else
     movh         %1, %2
     punpcklbw    %1, m7
+    %endif
 %endmacro
 
 %macro LOAD12 2
@@ -45,8 +49,14 @@ SECTION .text
 %endmacro
 
 %macro DISP8 0
+    %if mmsize == 32
+        vextracti128  xm1,    m2, 1
+        packuswb      xm2,   xm1
+        movu         [dstq], xm2
+    %else
     packuswb     m2, m2
     movh     [dstq], m2
+    %endif
 %endmacro
 
 %macro DISP12 0
@@ -244,8 +254,12 @@ cglobal bwdif_filter_line_12bit, 4, 9, 13, 0, dst, prev, cur, next, w, \
                                               prefs, mrefs, prefs2, mrefs2, \
                                               prefs3, mrefs3, prefs4, \
                                               mrefs4, parity, clip_max
+    %if mmsize == 32
+        vpbroadcastd m12, DWORD clip_maxm
+    %else
     movd        m12, DWORD clip_maxm
     SPLATW      m12, m12, 0
+    %endif
 %else
 cglobal bwdif_filter_line_12bit, 4, 6, 8, 80, dst, prev, cur, next, w, \
                                               prefs, mrefs, prefs2, mrefs2, \
@@ -264,3 +278,8 @@ INIT_XMM ssse3
 BWDIF
 INIT_XMM sse2
 BWDIF
+
+%if HAVE_AVX2_EXTERNAL && ARCH_X86_64
+INIT_YMM avx2
+BWDIF
+%endif
diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c
index ba7bc40c3d..f833318c10 100644
--- a/libavfilter/x86/vf_bwdif_init.c
+++ b/libavfilter/x86/vf_bwdif_init.c
@@ -32,6 +32,10 @@ void ff_bwdif_filter_line_ssse3(void *dst, void *prev, void *cur, void *next,
                                 int w, int prefs, int mrefs, int prefs2,
                                 int mrefs2, int prefs3, int mrefs3, int prefs4,
                                 int mrefs4, int parity, int clip_max);
+void ff_bwdif_filter_line_avx2(void *dst, void *prev, void *cur, void *next,
+                               int w, int prefs, int mrefs, int prefs2,
+                               int mrefs2, int prefs3, int mrefs3, int prefs4,
+                               int mrefs4, int parity, int clip_max);
 
 void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur, void *next,
                                      int w, int prefs, int mrefs, int prefs2,
@@ -41,6 +45,10 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *ne
                                       int w, int prefs, int mrefs, int prefs2,
                                       int mrefs2, int prefs3, int mrefs3, int prefs4,
                                       int mrefs4, int parity, int clip_max);
+void ff_bwdif_filter_line_12bit_avx2(void *dst, void *prev, void *cur, void *next,
+                                     int w, int prefs, int mrefs, int prefs2,
+                                     int mrefs2, int prefs3, int mrefs3, int prefs4,
+                                     int mrefs4, int parity, int clip_max);
 
 av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
 {
@@ -51,10 +59,14 @@ av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
             bwdif->filter_line = ff_bwdif_filter_line_sse2;
         if (EXTERNAL_SSSE3(cpu_flags))
             bwdif->filter_line = ff_bwdif_filter_line_ssse3;
+        if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
+            bwdif->filter_line = ff_bwdif_filter_line_avx2;
     } else if (bit_depth <= 12) {
         if (EXTERNAL_SSE2(cpu_flags))
             bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2;
         if (EXTERNAL_SSSE3(cpu_flags))
             bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3;
+        if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
+            bwdif->filter_line = ff_bwdif_filter_line_12bit_avx2;
     }
 }
-- 
2.39.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function
  2023-02-20 19:57 [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function James Darnley
  2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif James Darnley
  2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function " James Darnley
@ 2023-02-24  1:19 ` Thomas Mundt
  2 siblings, 0 replies; 8+ messages in thread
From: Thomas Mundt @ 2023-02-24  1:19 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Hi James,

James Darnley <jdarnley@obe.tv> schrieb am Mo., 20. Feb. 2023, 13:59:

> ---
>  libavfilter/bwdif.h             |  3 ++-
>  libavfilter/vf_bwdif.c          | 13 +++++++++----
>  libavfilter/x86/vf_bwdif_init.c |  4 +---
>  3 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h
> index 889ff772ed..5749345f78 100644
> --- a/libavfilter/bwdif.h
> +++ b/libavfilter/bwdif.h
> @@ -37,6 +37,7 @@ typedef struct BWDIFContext {
>                          int parity, int clip_max, int spat);
>  } BWDIFContext;
>
> -void ff_bwdif_init_x86(BWDIFContext *bwdif);
> +void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth);
> +void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth);
>
>  #endif /* AVFILTER_BWDIF_H */
> diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
> index 65c617ebb3..34e8c5e234 100644
> --- a/libavfilter/vf_bwdif.c
> +++ b/libavfilter/vf_bwdif.c
> @@ -340,7 +340,14 @@ static int config_props(AVFilterLink *link)
>
>      yadif->csp = av_pix_fmt_desc_get(link->format);
>      yadif->filter = filter;
> -    if (yadif->csp->comp[0].depth > 8) {
> +    ff_bwdif_init_filter_line(s, yadif->csp->comp[0].depth);
> +
> +    return 0;
> +}
> +
> +av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth)
> +{
> +    if (bit_depth > 8) {
>          s->filter_intra = filter_intra_16bit;
>          s->filter_line  = filter_line_c_16bit;
>          s->filter_edge  = filter_edge_16bit;
> @@ -351,10 +358,8 @@ static int config_props(AVFilterLink *link)
>      }
>
>  #if ARCH_X86
> -    ff_bwdif_init_x86(s);
> +    ff_bwdif_init_x86(s, bit_depth);
>  #endif
> -
> -    return 0;
>  }
>
>
> diff --git a/libavfilter/x86/vf_bwdif_init.c
> b/libavfilter/x86/vf_bwdif_init.c
> index e24e5cd9b1..ba7bc40c3d 100644
> --- a/libavfilter/x86/vf_bwdif_init.c
> +++ b/libavfilter/x86/vf_bwdif_init.c
> @@ -42,11 +42,9 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void
> *prev, void *cur, void *ne
>                                        int mrefs2, int prefs3, int mrefs3,
> int prefs4,
>                                        int mrefs4, int parity, int
> clip_max);
>
> -av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif)
> +av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
>  {
> -    YADIFContext *yadif = &bwdif->yadif;
>      int cpu_flags = av_get_cpu_flags();
> -    int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth;
>
>      if (bit_depth <= 8) {
>          if (EXTERNAL_SSE2(cpu_flags))
> --
> 2.39.1
>

I'm travelling at the moment and can only look at your patch set on my
phone. At first glance it looks good. If you are not in a hurry, I would
test it in two to three weeks when I am back home and have time.

Best Regards,
Thomas

>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function for bwdif
  2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function " James Darnley
@ 2023-03-11 16:14   ` Thomas Mundt
  2023-03-13 11:08     ` James Darnley
  0 siblings, 1 reply; 8+ messages in thread
From: Thomas Mundt @ 2023-03-11 16:14 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Hi James,

Am Mo., 20. Feb. 2023 um 20:59 Uhr schrieb James Darnley <jdarnley@obe.tv>:

> 2.24x faster (1925±1.3 vs. 859±2.2 decicycles) compared with ssse3
> ---
>  libavfilter/x86/vf_bwdif.asm    | 29 ++++++++++++++++++++++++-----
>  libavfilter/x86/vf_bwdif_init.c | 12 ++++++++++++
>  2 files changed, 36 insertions(+), 5 deletions(-)
>
> diff --git a/libavfilter/x86/vf_bwdif.asm b/libavfilter/x86/vf_bwdif.asm
> index 0b453da53b..5cc61435fd 100644
> --- a/libavfilter/x86/vf_bwdif.asm
> +++ b/libavfilter/x86/vf_bwdif.asm
> @@ -26,18 +26,22 @@
>
>  %include "libavutil/x86/x86util.asm"
>
> -SECTION_RODATA
> +SECTION_RODATA 32
>
> -pw_coefhf:  times 4 dw  1016, 5570
> -pw_coefhf1: times 8 dw -3801
> -pw_coefsp:  times 4 dw  5077, -981
> -pw_splfdif: times 4 dw  -768,  768
> +pw_coefhf:  times 8 dw  1016, 5570
> +pw_coefhf1: times 16 dw -3801
> +pw_coefsp:  times 8 dw  5077, -981
> +pw_splfdif: times 8 dw  -768,  768
>
>  SECTION .text
>
>  %macro LOAD8 2
> +    %if mmsize == 32
> +        pmovzxbw %1, %2
> +    %else
>      movh         %1, %2
>      punpcklbw    %1, m7
> +    %endif
>  %endmacro
>
>  %macro LOAD12 2
> @@ -45,8 +49,14 @@ SECTION .text
>  %endmacro
>
>  %macro DISP8 0
> +    %if mmsize == 32
> +        vextracti128  xm1,    m2, 1
> +        packuswb      xm2,   xm1
> +        movu         [dstq], xm2
> +    %else
>      packuswb     m2, m2
>      movh     [dstq], m2
> +    %endif
>  %endmacro
>
>  %macro DISP12 0
> @@ -244,8 +254,12 @@ cglobal bwdif_filter_line_12bit, 4, 9, 13, 0, dst,
> prev, cur, next, w, \
>                                                prefs, mrefs, prefs2,
> mrefs2, \
>                                                prefs3, mrefs3, prefs4, \
>                                                mrefs4, parity, clip_max
> +    %if mmsize == 32
> +        vpbroadcastd m12, DWORD clip_maxm
>

I get a green pattern at bit depths > 8.
Looks good with:
vpbroadcastw m12, WORD clip_maxm

+    %else
>      movd        m12, DWORD clip_maxm
>      SPLATW      m12, m12, 0
> +    %endif
>  %else
>  cglobal bwdif_filter_line_12bit, 4, 6, 8, 80, dst, prev, cur, next, w, \
>                                                prefs, mrefs, prefs2,
> mrefs2, \
> @@ -264,3 +278,8 @@ INIT_XMM ssse3
>  BWDIF
>  INIT_XMM sse2
>  BWDIF
> +
> +%if HAVE_AVX2_EXTERNAL && ARCH_X86_64
> +INIT_YMM avx2
> +BWDIF
> +%endif
> diff --git a/libavfilter/x86/vf_bwdif_init.c
> b/libavfilter/x86/vf_bwdif_init.c
> index ba7bc40c3d..f833318c10 100644
> --- a/libavfilter/x86/vf_bwdif_init.c
> +++ b/libavfilter/x86/vf_bwdif_init.c
> @@ -32,6 +32,10 @@ void ff_bwdif_filter_line_ssse3(void *dst, void *prev,
> void *cur, void *next,
>                                  int w, int prefs, int mrefs, int prefs2,
>                                  int mrefs2, int prefs3, int mrefs3, int
> prefs4,
>                                  int mrefs4, int parity, int clip_max);
> +void ff_bwdif_filter_line_avx2(void *dst, void *prev, void *cur, void
> *next,
> +                               int w, int prefs, int mrefs, int prefs2,
> +                               int mrefs2, int prefs3, int mrefs3, int
> prefs4,
> +                               int mrefs4, int parity, int clip_max);
>
>  void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur,
> void *next,
>                                       int w, int prefs, int mrefs, int
> prefs2,
> @@ -41,6 +45,10 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void
> *prev, void *cur, void *ne
>                                        int w, int prefs, int mrefs, int
> prefs2,
>                                        int mrefs2, int prefs3, int mrefs3,
> int prefs4,
>                                        int mrefs4, int parity, int
> clip_max);
> +void ff_bwdif_filter_line_12bit_avx2(void *dst, void *prev, void *cur,
> void *next,
> +                                     int w, int prefs, int mrefs, int
> prefs2,
> +                                     int mrefs2, int prefs3, int mrefs3,
> int prefs4,
> +                                     int mrefs4, int parity, int
> clip_max);
>
>  av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
>  {
> @@ -51,10 +59,14 @@ av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif,
> int bit_depth)
>              bwdif->filter_line = ff_bwdif_filter_line_sse2;
>          if (EXTERNAL_SSSE3(cpu_flags))
>              bwdif->filter_line = ff_bwdif_filter_line_ssse3;
> +        if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
> +            bwdif->filter_line = ff_bwdif_filter_line_avx2;
>      } else if (bit_depth <= 12) {
>          if (EXTERNAL_SSE2(cpu_flags))
>              bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2;
>          if (EXTERNAL_SSSE3(cpu_flags))
>              bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3;
> +        if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
> +            bwdif->filter_line = ff_bwdif_filter_line_12bit_avx2;
>      }
>  }
> --
> 2.39.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif
  2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif James Darnley
@ 2023-03-11 16:18   ` Thomas Mundt
  2023-03-13 11:04     ` James Darnley
  0 siblings, 1 reply; 8+ messages in thread
From: Thomas Mundt @ 2023-03-11 16:18 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Hi James,

Am Mo., 20. Feb. 2023 um 20:59 Uhr schrieb James Darnley <jdarnley@obe.tv>:

> ---
>  tests/checkasm/Makefile   |  1 +
>  tests/checkasm/checkasm.c |  3 ++
>  tests/checkasm/checkasm.h |  1 +
>  tests/checkasm/vf_bwdif.c | 70 +++++++++++++++++++++++++++++++++++++++
>  tests/fate/checkasm.mak   |  1 +
>  5 files changed, 76 insertions(+)
>  create mode 100644 tests/checkasm/vf_bwdif.c
>
> diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
> index a6f06c7007..b6a43f181f 100644
> --- a/tests/checkasm/Makefile
> +++ b/tests/checkasm/Makefile
> @@ -40,6 +40,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC)          +=
> $(AVCODECOBJS-yes)
>  # libavfilter tests
>  AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
>  AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
> +AVFILTEROBJS-$(CONFIG_BWDIF_FILTER)      += vf_bwdif.o
>  AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
>  AVFILTEROBJS-$(CONFIG_EQ_FILTER)         += vf_eq.o
>  AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)      += vf_gblur.o
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index e96d84a7da..5e729cf0e0 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -179,6 +179,9 @@ static const struct {
>      #if CONFIG_BLEND_FILTER
>          { "vf_blend", checkasm_check_blend },
>      #endif
> +    #if CONFIG_BWDIF_FILTER
> +        { "vf_bwdif", checkasm_check_vf_bwdif },
> +    #endif
>      #if CONFIG_COLORSPACE_FILTER
>          { "vf_colorspace", checkasm_check_colorspace },
>      #endif
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index 8744a81218..e9e73c6fa0 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -82,6 +82,7 @@ void checkasm_check_utvideodsp(void);
>  void checkasm_check_v210dec(void);
>  void checkasm_check_v210enc(void);
>  void checkasm_check_vc1dsp(void);
> +void checkasm_check_vf_bwdif(void);
>  void checkasm_check_vf_eq(void);
>  void checkasm_check_vf_gblur(void);
>  void checkasm_check_vf_hflip(void);
> diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
> new file mode 100644
> index 0000000000..e27f9b7494
> --- /dev/null
> +++ b/tests/checkasm/vf_bwdif.c
> @@ -0,0 +1,70 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +#include <string.h>
> +#include "checkasm.h"
> +#include "libavcodec/internal.h"
> +#include "libavfilter/bwdif.h"
> +
> +#define WIDTH 256
> +
> +#define randomize_buffers(buf0, buf1, mask, count) \
> +    for (size_t i; i < count; i++) \
> +        buf0[i] = buf1[i] = rnd() & mask
> +
> +void checkasm_check_vf_bwdif(void)
> +{
> +    BWDIFContext ctx_8, ctx_10, ctx_16;
> +
> +    ff_bwdif_init_filter_line(&ctx_8, 8);
> +    ff_bwdif_init_filter_line(&ctx_10, 10);
> +    ff_bwdif_init_filter_line(&ctx_16, 16);
> +
> +    if (check_func(ctx_8.filter_line, "bwdif8")) {
> +        uint8_t prev0[9*WIDTH], prev1[9*WIDTH];
> +        uint8_t next0[9*WIDTH], next1[9*WIDTH];
> +        uint8_t cur0[9*WIDTH], cur1[9*WIDTH];
> +        uint8_t dst0[WIDTH], dst1[WIDTH];
> +
> +        declare_func(void, void *dst, void *prev, void *cur, void *next,
> +                        int w, int prefs, int mrefs, int prefs2, int
> mrefs2,
> +                        int prefs3, int mrefs3, int prefs4, int mrefs4,
> +                        int parity, int clip_max);
> +
> +        randomize_buffers(prev0, prev1, 0xff, 9*WIDTH);
> +        randomize_buffers(next0, next1, 0xff, 9*WIDTH);
> +        randomize_buffers(cur0, cur1, 0xff, 9*WIDTH);
> +
> +        call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH,
> WIDTH,
> +                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH,
> 4*WIDTH, -4*WIDTH,
> +                0, 0xff);
> +        call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH,
> WIDTH,
> +                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH,
> 4*WIDTH, -4*WIDTH,
> +                0, 0xff);
> +
> +        if (memcmp(dst0, dst1, WIDTH)
> +                || memcmp(prev0, prev1, sizeof prev0)
> +                || memcmp(next0, next1, sizeof next0)
> +                || memcmp(cur0, cur1, sizeof cur0))
> +            fail();
> +        bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH,
> WIDTH,
> +                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH,
> 4*WIDTH, -4*WIDTH,
> +                0, 0xff);
> +    }
> +    report("bwdif8");
> +}
>

I'm not familiar with checkasm tests, but isn't this one limited to a bit
depth of 8?


> diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
> index a4e95541f5..6a7d4a1226 100644
> --- a/tests/fate/checkasm.mak
> +++ b/tests/fate/checkasm.mak
> @@ -37,6 +37,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp
>                 \
>                  fate-checkasm-v210enc                                   \
>                  fate-checkasm-vc1dsp                                    \
>                  fate-checkasm-vf_blend                                  \
> +                fate-checkasm-vf_bwdif                                  \
>                  fate-checkasm-vf_colorspace                             \
>                  fate-checkasm-vf_eq                                     \
>                  fate-checkasm-vf_gblur                                  \
> --
> 2.39.1
>

Best Regards,
Thomas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif
  2023-03-11 16:18   ` Thomas Mundt
@ 2023-03-13 11:04     ` James Darnley
  0 siblings, 0 replies; 8+ messages in thread
From: James Darnley @ 2023-03-13 11:04 UTC (permalink / raw)
  To: ffmpeg-devel

On 3/11/23 17:18, Thomas Mundt wrote:

>> <snip>
>>
> 
> I'm not familiar with checkasm tests, but isn't this one limited to a bit
> depth of 8?

Yes, that was the idea because I was only intending to modify the 8-bit 
function, for now.  The function pointer is the same for all depths so 
you need to initialize it with a different depth.  Judging from your 
other email I might need to write them anyway.

[re-sending to list]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function for bwdif
  2023-03-11 16:14   ` Thomas Mundt
@ 2023-03-13 11:08     ` James Darnley
  0 siblings, 0 replies; 8+ messages in thread
From: James Darnley @ 2023-03-13 11:08 UTC (permalink / raw)
  To: ffmpeg-devel

On 3/11/23 17:14, Thomas Mundt wrote:

>> +    %if mmsize == 32
>> +        vpbroadcastd m12, DWORD clip_maxm
>>
> 
> I get a green pattern at bit depths > 8.
> Looks good with:
> vpbroadcastw m12, WORD clip_maxm
> 
> +    %else
>>       movd        m12, DWORD clip_maxm
>>       SPLATW      m12, m12, 0
>> +    %endif

Of course it should be a word broadcast!

But why doesn't my checkasm test catch it?

>>               bwdif->filter_line = ff_bwdif_filter_line_sse2;
>>           if (EXTERNAL_SSSE3(cpu_flags))
>>               bwdif->filter_line = ff_bwdif_filter_line_ssse3;
>> +        if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
>> +            bwdif->filter_line = ff_bwdif_filter_line_avx2;
>>       } else if (bit_depth <= 12) {
>>           if (EXTERNAL_SSE2(cpu_flags))
>>               bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2;
>>           if (EXTERNAL_SSSE3(cpu_flags))
>>               bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3;
>> +        if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
>> +            bwdif->filter_line = ff_bwdif_filter_line_12bit_avx2;
>>       }
>>   }

I was intending to only modify/write the 8-bit function so this is a 
mistake.

Thanks.  I'll be back with a version 2.

[re-sending to list]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-03-13 11:08 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-20 19:57 [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function James Darnley
2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif James Darnley
2023-03-11 16:18   ` Thomas Mundt
2023-03-13 11:04     ` James Darnley
2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function " James Darnley
2023-03-11 16:14   ` Thomas Mundt
2023-03-13 11:08     ` James Darnley
2023-02-24  1:19 ` [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function Thomas Mundt

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git