Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH v2 1/5] avfilter/bwdif: move filter_line init to a dedicated function
@ 2023-03-20 16:49 James Darnley
  2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 2/5] checkasm: add test for bwdif James Darnley
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: James Darnley @ 2023-03-20 16:49 UTC (permalink / raw)
  To: ffmpeg-devel

---
No changes

 libavfilter/bwdif.h             |  3 ++-
 libavfilter/vf_bwdif.c          | 13 +++++++++----
 libavfilter/x86/vf_bwdif_init.c |  4 +---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h
index 889ff772ed..5749345f78 100644
--- a/libavfilter/bwdif.h
+++ b/libavfilter/bwdif.h
@@ -37,6 +37,7 @@ typedef struct BWDIFContext {
                         int parity, int clip_max, int spat);
 } BWDIFContext;
 
-void ff_bwdif_init_x86(BWDIFContext *bwdif);
+void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth);
+void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth);
 
 #endif /* AVFILTER_BWDIF_H */
diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
index 65c617ebb3..34e8c5e234 100644
--- a/libavfilter/vf_bwdif.c
+++ b/libavfilter/vf_bwdif.c
@@ -340,7 +340,14 @@ static int config_props(AVFilterLink *link)
 
     yadif->csp = av_pix_fmt_desc_get(link->format);
     yadif->filter = filter;
-    if (yadif->csp->comp[0].depth > 8) {
+    ff_bwdif_init_filter_line(s, yadif->csp->comp[0].depth);
+
+    return 0;
+}
+
+av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth)
+{
+    if (bit_depth > 8) {
         s->filter_intra = filter_intra_16bit;
         s->filter_line  = filter_line_c_16bit;
         s->filter_edge  = filter_edge_16bit;
@@ -351,10 +358,8 @@ static int config_props(AVFilterLink *link)
     }
 
 #if ARCH_X86
-    ff_bwdif_init_x86(s);
+    ff_bwdif_init_x86(s, bit_depth);
 #endif
-
-    return 0;
 }
 
 
diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c
index e24e5cd9b1..ba7bc40c3d 100644
--- a/libavfilter/x86/vf_bwdif_init.c
+++ b/libavfilter/x86/vf_bwdif_init.c
@@ -42,11 +42,9 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *ne
                                       int mrefs2, int prefs3, int mrefs3, int prefs4,
                                       int mrefs4, int parity, int clip_max);
 
-av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif)
+av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
 {
-    YADIFContext *yadif = &bwdif->yadif;
     int cpu_flags = av_get_cpu_flags();
-    int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth;
 
     if (bit_depth <= 8) {
         if (EXTERNAL_SSE2(cpu_flags))
-- 
2.39.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH v2 2/5] checkasm: add test for bwdif
  2023-03-20 16:49 [FFmpeg-devel] [PATCH v2 1/5] avfilter/bwdif: move filter_line init to a dedicated function James Darnley
@ 2023-03-20 16:49 ` James Darnley
  2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 3/5] tests: add bwdif to fate filter tests James Darnley
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: James Darnley @ 2023-03-20 16:49 UTC (permalink / raw)
  To: ffmpeg-devel

---
Fixed a small bug

 tests/checkasm/Makefile   |  1 +
 tests/checkasm/checkasm.c |  3 ++
 tests/checkasm/checkasm.h |  1 +
 tests/checkasm/vf_bwdif.c | 69 +++++++++++++++++++++++++++++++++++++++
 tests/fate/checkasm.mak   |  1 +
 5 files changed, 75 insertions(+)
 create mode 100644 tests/checkasm/vf_bwdif.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index a6f06c7007..b6a43f181f 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -40,6 +40,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC)          += $(AVCODECOBJS-yes)
 # libavfilter tests
 AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
 AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
+AVFILTEROBJS-$(CONFIG_BWDIF_FILTER)      += vf_bwdif.o
 AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
 AVFILTEROBJS-$(CONFIG_EQ_FILTER)         += vf_eq.o
 AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)      += vf_gblur.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index e96d84a7da..5e729cf0e0 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -179,6 +179,9 @@ static const struct {
     #if CONFIG_BLEND_FILTER
         { "vf_blend", checkasm_check_blend },
     #endif
+    #if CONFIG_BWDIF_FILTER
+        { "vf_bwdif", checkasm_check_vf_bwdif },
+    #endif
     #if CONFIG_COLORSPACE_FILTER
         { "vf_colorspace", checkasm_check_colorspace },
     #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 8744a81218..e9e73c6fa0 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -82,6 +82,7 @@ void checkasm_check_utvideodsp(void);
 void checkasm_check_v210dec(void);
 void checkasm_check_v210enc(void);
 void checkasm_check_vc1dsp(void);
+void checkasm_check_vf_bwdif(void);
 void checkasm_check_vf_eq(void);
 void checkasm_check_vf_gblur(void);
 void checkasm_check_vf_hflip(void);
diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
new file mode 100644
index 0000000000..5c2e16cffc
--- /dev/null
+++ b/tests/checkasm/vf_bwdif.c
@@ -0,0 +1,69 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/internal.h"
+#include "libavfilter/bwdif.h"
+
+#define WIDTH 256
+
+#define randomize_buffers(buf0, buf1, mask, count) \
+    for (size_t i = 0; i < count; i++) \
+        buf0[i] = buf1[i] = rnd() & mask
+
+void checkasm_check_vf_bwdif(void)
+{
+    BWDIFContext ctx_8, ctx_10;
+
+    ff_bwdif_init_filter_line(&ctx_8, 8);
+    ff_bwdif_init_filter_line(&ctx_10, 10);
+
+    if (check_func(ctx_8.filter_line, "bwdif8")) {
+        uint8_t prev0[9*WIDTH], prev1[9*WIDTH];
+        uint8_t next0[9*WIDTH], next1[9*WIDTH];
+        uint8_t cur0[9*WIDTH], cur1[9*WIDTH];
+        uint8_t dst0[WIDTH], dst1[WIDTH];
+
+        declare_func(void, void *dst, void *prev, void *cur, void *next,
+                        int w, int prefs, int mrefs, int prefs2, int mrefs2,
+                        int prefs3, int mrefs3, int prefs4, int mrefs4,
+                        int parity, int clip_max);
+
+        randomize_buffers(prev0, prev1, 0xff, 9*WIDTH);
+        randomize_buffers(next0, next1, 0xff, 9*WIDTH);
+        randomize_buffers(cur0, cur1, 0xff, 9*WIDTH);
+
+        call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH, WIDTH,
+                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+                0, 0xff);
+        call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
+                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+                0, 0xff);
+
+        if (memcmp(dst0, dst1, WIDTH)
+                || memcmp(prev0, prev1, sizeof prev0)
+                || memcmp(next0, next1, sizeof next0)
+                || memcmp(cur0, cur1, sizeof cur0))
+            fail();
+        bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
+                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+                0, 0xff);
+    }
+    report("bwdif8");
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index a4e95541f5..6a7d4a1226 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -37,6 +37,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp                                  \
                 fate-checkasm-v210enc                                   \
                 fate-checkasm-vc1dsp                                    \
                 fate-checkasm-vf_blend                                  \
+                fate-checkasm-vf_bwdif                                  \
                 fate-checkasm-vf_colorspace                             \
                 fate-checkasm-vf_eq                                     \
                 fate-checkasm-vf_gblur                                  \
-- 
2.39.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH v2 3/5] tests: add bwdif to fate filter tests
  2023-03-20 16:49 [FFmpeg-devel] [PATCH v2 1/5] avfilter/bwdif: move filter_line init to a dedicated function James Darnley
  2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 2/5] checkasm: add test for bwdif James Darnley
@ 2023-03-20 16:49 ` James Darnley
  2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data James Darnley
  2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 5/5] avfilter/bwdif: add avx2 filter_line function James Darnley
  3 siblings, 0 replies; 7+ messages in thread
From: James Darnley @ 2023-03-20 16:49 UTC (permalink / raw)
  To: ffmpeg-devel

---
 tests/fate/filter-video.mak | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index 444adc6be6..c588286c38 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -11,6 +11,15 @@ fate-filter-owdenoise-sample: CMP = oneoff
 FATE_FILTER_SAMPLES-$(call FILTERDEMDEC, PERMS DELOGO, RM, RV30) += fate-filter-delogo
 fate-filter-delogo: CMD = framecrc -i $(TARGET_SAMPLES)/real/rv30.rm -vf perms=random,delogo=show=0:x=290:y=25:w=26:h=16 -an
 
+FATE_BWDIF-$(call FILTERDEMDEC, BWDIF, MPEGTS, MPEG2VIDEO) += fate-filter-bwdif-mode0 fate-filter-bwdif-mode1
+fate-filter-bwdif-mode0: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -frames:v 30 -vf bwdif=send_frame
+fate-filter-bwdif-mode1: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -frames:v 59 -vf bwdif=send_field
+
+FATE_BWDIF-$(call FILTERDEMDEC, BWDIF SCALE, MPEGTS, MPEG2VIDEO) += fate-filter-bwdif10
+fate-filter-bwdif10: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -flags bitexact -pix_fmt yuv420p10le -frames:v 30 -vf scale,bwdif=0
+
+FATE_FILTER_SAMPLES-yes += $(FATE_BWDIF-yes)
+
 FATE_YADIF-$(call FILTERDEMDEC, YADIF, MPEGTS, MPEG2VIDEO) += fate-filter-yadif-mode0 fate-filter-yadif-mode1
 fate-filter-yadif-mode0: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -frames:v 30 -vf yadif=0
 fate-filter-yadif-mode1: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -frames:v 59 -vf yadif=1
-- 
2.39.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data
  2023-03-20 16:49 [FFmpeg-devel] [PATCH v2 1/5] avfilter/bwdif: move filter_line init to a dedicated function James Darnley
  2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 2/5] checkasm: add test for bwdif James Darnley
  2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 3/5] tests: add bwdif to fate filter tests James Darnley
@ 2023-03-20 16:49 ` James Darnley
  2023-03-21 16:32   ` Thomas Mundt
  2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 5/5] avfilter/bwdif: add avx2 filter_line function James Darnley
  3 siblings, 1 reply; 7+ messages in thread
From: James Darnley @ 2023-03-20 16:49 UTC (permalink / raw)
  To: ffmpeg-devel

Also deduplicate to share with the 8-bit test.
---
Should I squash this into the commit adding the checkasm test?

 tests/checkasm/vf_bwdif.c | 73 +++++++++++++++++++++++----------------
 1 file changed, 44 insertions(+), 29 deletions(-)

diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
index 5c2e16cffc..46224bb575 100644
--- a/tests/checkasm/vf_bwdif.c
+++ b/tests/checkasm/vf_bwdif.c
@@ -27,6 +27,44 @@
     for (size_t i = 0; i < count; i++) \
         buf0[i] = buf1[i] = rnd() & mask
 
+#define BODY(type, depth)                                                      \
+    do {                                                                       \
+        type prev0[9*WIDTH], prev1[9*WIDTH];                                   \
+        type next0[9*WIDTH], next1[9*WIDTH];                                   \
+        type cur0[9*WIDTH], cur1[9*WIDTH];                                     \
+        type dst0[WIDTH], dst1[WIDTH];                                         \
+        const int stride = WIDTH;                                              \
+        const int mask = (1<<depth)-1;                                         \
+                                                                               \
+        declare_func(void, void *dst, void *prev, void *cur, void *next,       \
+                        int w, int prefs, int mrefs, int prefs2, int mrefs2,   \
+                        int prefs3, int mrefs3, int prefs4, int mrefs4,        \
+                        int parity, int clip_max);                             \
+                                                                               \
+        randomize_buffers(prev0, prev1, mask, 9*WIDTH);                        \
+        randomize_buffers(next0, next1, mask, 9*WIDTH);                        \
+        randomize_buffers( cur0,  cur1, mask, 9*WIDTH);                        \
+                                                                               \
+        call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH,       \
+                WIDTH, stride, -stride, 2*stride, -2*stride,                   \
+                3*stride, -3*stride, 4*stride, -4*stride,                      \
+                0, mask);                                                      \
+        call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH,       \
+                WIDTH, stride, -stride, 2*stride, -2*stride,                   \
+                3*stride, -3*stride, 4*stride, -4*stride,                      \
+                0, mask);                                                      \
+                                                                               \
+        if (memcmp(dst0, dst1, sizeof dst0)                                    \
+                || memcmp(prev0, prev1, sizeof prev0)                          \
+                || memcmp(next0, next1, sizeof next0)                          \
+                || memcmp( cur0,  cur1, sizeof cur0))                          \
+            fail();                                                            \
+        bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH,      \
+                WIDTH, stride, -stride, 2*stride, -2*stride,                   \
+                3*stride, -3*stride, 4*stride, -4*stride,                      \
+                0, mask);                                                      \
+    } while (0)
+
 void checkasm_check_vf_bwdif(void)
 {
     BWDIFContext ctx_8, ctx_10;
@@ -35,35 +73,12 @@ void checkasm_check_vf_bwdif(void)
     ff_bwdif_init_filter_line(&ctx_10, 10);
 
     if (check_func(ctx_8.filter_line, "bwdif8")) {
-        uint8_t prev0[9*WIDTH], prev1[9*WIDTH];
-        uint8_t next0[9*WIDTH], next1[9*WIDTH];
-        uint8_t cur0[9*WIDTH], cur1[9*WIDTH];
-        uint8_t dst0[WIDTH], dst1[WIDTH];
-
-        declare_func(void, void *dst, void *prev, void *cur, void *next,
-                        int w, int prefs, int mrefs, int prefs2, int mrefs2,
-                        int prefs3, int mrefs3, int prefs4, int mrefs4,
-                        int parity, int clip_max);
-
-        randomize_buffers(prev0, prev1, 0xff, 9*WIDTH);
-        randomize_buffers(next0, next1, 0xff, 9*WIDTH);
-        randomize_buffers(cur0, cur1, 0xff, 9*WIDTH);
-
-        call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH, WIDTH,
-                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
-                0, 0xff);
-        call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
-                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
-                0, 0xff);
+        BODY(uint8_t, 8);
+        report("bwdif8");
+    }
 
-        if (memcmp(dst0, dst1, WIDTH)
-                || memcmp(prev0, prev1, sizeof prev0)
-                || memcmp(next0, next1, sizeof next0)
-                || memcmp(cur0, cur1, sizeof cur0))
-            fail();
-        bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
-                WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
-                0, 0xff);
+    if (check_func(ctx_10.filter_line, "bwdif10")) {
+        BODY(uint16_t, 10);
+        report("bwdif10");
     }
-    report("bwdif8");
 }
-- 
2.39.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH v2 5/5] avfilter/bwdif: add avx2 filter_line function
  2023-03-20 16:49 [FFmpeg-devel] [PATCH v2 1/5] avfilter/bwdif: move filter_line init to a dedicated function James Darnley
                   ` (2 preceding siblings ...)
  2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data James Darnley
@ 2023-03-20 16:49 ` James Darnley
  3 siblings, 0 replies; 7+ messages in thread
From: James Darnley @ 2023-03-20 16:49 UTC (permalink / raw)
  To: ffmpeg-devel

8-bit:
2.24x faster (1925±1.3 vs. 859±2.2 decicycles) compared with ssse3
10-bit:
2.00x faster (1703±1.7 vs. 853±2.0 decicycles) compared with ssse3
---
Fixed the word broadcast

 libavfilter/x86/vf_bwdif.asm    | 29 ++++++++++++++++++++++++-----
 libavfilter/x86/vf_bwdif_init.c | 12 ++++++++++++
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/libavfilter/x86/vf_bwdif.asm b/libavfilter/x86/vf_bwdif.asm
index 0b453da53b..c93b41ec48 100644
--- a/libavfilter/x86/vf_bwdif.asm
+++ b/libavfilter/x86/vf_bwdif.asm
@@ -26,18 +26,22 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_RODATA
+SECTION_RODATA 32
 
-pw_coefhf:  times 4 dw  1016, 5570
-pw_coefhf1: times 8 dw -3801
-pw_coefsp:  times 4 dw  5077, -981
-pw_splfdif: times 4 dw  -768,  768
+pw_coefhf:  times 8 dw  1016, 5570
+pw_coefhf1: times 16 dw -3801
+pw_coefsp:  times 8 dw  5077, -981
+pw_splfdif: times 8 dw  -768,  768
 
 SECTION .text
 
 %macro LOAD8 2
+    %if mmsize == 32
+        pmovzxbw %1, %2
+    %else
     movh         %1, %2
     punpcklbw    %1, m7
+    %endif
 %endmacro
 
 %macro LOAD12 2
@@ -45,8 +49,14 @@ SECTION .text
 %endmacro
 
 %macro DISP8 0
+    %if mmsize == 32
+        vextracti128  xm1,    m2, 1
+        packuswb      xm2,   xm1
+        movu         [dstq], xm2
+    %else
     packuswb     m2, m2
     movh     [dstq], m2
+    %endif
 %endmacro
 
 %macro DISP12 0
@@ -244,8 +254,12 @@ cglobal bwdif_filter_line_12bit, 4, 9, 13, 0, dst, prev, cur, next, w, \
                                               prefs, mrefs, prefs2, mrefs2, \
                                               prefs3, mrefs3, prefs4, \
                                               mrefs4, parity, clip_max
+    %if mmsize == 32
+        vpbroadcastw m12, WORD clip_maxm
+    %else
     movd        m12, DWORD clip_maxm
     SPLATW      m12, m12, 0
+    %endif
 %else
 cglobal bwdif_filter_line_12bit, 4, 6, 8, 80, dst, prev, cur, next, w, \
                                               prefs, mrefs, prefs2, mrefs2, \
@@ -264,3 +278,8 @@ INIT_XMM ssse3
 BWDIF
 INIT_XMM sse2
 BWDIF
+
+%if HAVE_AVX2_EXTERNAL && ARCH_X86_64
+INIT_YMM avx2
+BWDIF
+%endif
diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c
index ba7bc40c3d..f833318c10 100644
--- a/libavfilter/x86/vf_bwdif_init.c
+++ b/libavfilter/x86/vf_bwdif_init.c
@@ -32,6 +32,10 @@ void ff_bwdif_filter_line_ssse3(void *dst, void *prev, void *cur, void *next,
                                 int w, int prefs, int mrefs, int prefs2,
                                 int mrefs2, int prefs3, int mrefs3, int prefs4,
                                 int mrefs4, int parity, int clip_max);
+void ff_bwdif_filter_line_avx2(void *dst, void *prev, void *cur, void *next,
+                               int w, int prefs, int mrefs, int prefs2,
+                               int mrefs2, int prefs3, int mrefs3, int prefs4,
+                               int mrefs4, int parity, int clip_max);
 
 void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur, void *next,
                                      int w, int prefs, int mrefs, int prefs2,
@@ -41,6 +45,10 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *ne
                                       int w, int prefs, int mrefs, int prefs2,
                                       int mrefs2, int prefs3, int mrefs3, int prefs4,
                                       int mrefs4, int parity, int clip_max);
+void ff_bwdif_filter_line_12bit_avx2(void *dst, void *prev, void *cur, void *next,
+                                     int w, int prefs, int mrefs, int prefs2,
+                                     int mrefs2, int prefs3, int mrefs3, int prefs4,
+                                     int mrefs4, int parity, int clip_max);
 
 av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
 {
@@ -51,10 +59,14 @@ av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
             bwdif->filter_line = ff_bwdif_filter_line_sse2;
         if (EXTERNAL_SSSE3(cpu_flags))
             bwdif->filter_line = ff_bwdif_filter_line_ssse3;
+        if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
+            bwdif->filter_line = ff_bwdif_filter_line_avx2;
     } else if (bit_depth <= 12) {
         if (EXTERNAL_SSE2(cpu_flags))
             bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2;
         if (EXTERNAL_SSSE3(cpu_flags))
             bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3;
+        if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
+            bwdif->filter_line = ff_bwdif_filter_line_12bit_avx2;
     }
 }
-- 
2.39.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data
  2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data James Darnley
@ 2023-03-21 16:32   ` Thomas Mundt
  2023-03-23 16:54     ` James Darnley
  0 siblings, 1 reply; 7+ messages in thread
From: Thomas Mundt @ 2023-03-21 16:32 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Am Mo., 20. März 2023 um 17:52 Uhr schrieb James Darnley <jdarnley@obe.tv>:

> Also deduplicate to share with the 8-bit test.
> ---
> Should I squash this into the commit adding the checkasm test?
>

Yes, that might be clearer.
Please also add the fate reference files to the fate filter test.
Otherwise the patch set LGTM now.

Thanks,
Thomas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data
  2023-03-21 16:32   ` Thomas Mundt
@ 2023-03-23 16:54     ` James Darnley
  0 siblings, 0 replies; 7+ messages in thread
From: James Darnley @ 2023-03-23 16:54 UTC (permalink / raw)
  To: ffmpeg-devel

On 3/21/23 17:32, Thomas Mundt wrote:
> Am Mo., 20. März 2023 um 17:52 Uhr schrieb James Darnley <jdarnley@obe.tv>:
> 
>> Also deduplicate to share with the 8-bit test.
>> ---
>> Should I squash this into the commit adding the checkasm test?
>>
> 
> Yes, that might be clearer.

Will do.

> Please also add the fate reference files to the fate filter test.
> Otherwise the patch set LGTM now.

Thanks I completely forgot.  You run it with GEN=1 then forget about it 
when it keeps working.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-03-23 16:57 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-20 16:49 [FFmpeg-devel] [PATCH v2 1/5] avfilter/bwdif: move filter_line init to a dedicated function James Darnley
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 2/5] checkasm: add test for bwdif James Darnley
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 3/5] tests: add bwdif to fate filter tests James Darnley
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data James Darnley
2023-03-21 16:32   ` Thomas Mundt
2023-03-23 16:54     ` James Darnley
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 5/5] avfilter/bwdif: add avx2 filter_line function James Darnley

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git