* [FFmpeg-devel] [PATCH v2 2/5] checkasm: add test for bwdif
2023-03-20 16:49 [FFmpeg-devel] [PATCH v2 1/5] avfilter/bwdif: move filter_line init to a dedicated function James Darnley
@ 2023-03-20 16:49 ` James Darnley
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 3/5] tests: add bwdif to fate filter tests James Darnley
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: James Darnley @ 2023-03-20 16:49 UTC (permalink / raw)
To: ffmpeg-devel
---
Fixed a small bug
tests/checkasm/Makefile | 1 +
tests/checkasm/checkasm.c | 3 ++
tests/checkasm/checkasm.h | 1 +
tests/checkasm/vf_bwdif.c | 69 +++++++++++++++++++++++++++++++++++++++
tests/fate/checkasm.mak | 1 +
5 files changed, 75 insertions(+)
create mode 100644 tests/checkasm/vf_bwdif.c
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index a6f06c7007..b6a43f181f 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -40,6 +40,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes)
# libavfilter tests
AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
+AVFILTEROBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o
AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o
AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index e96d84a7da..5e729cf0e0 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -179,6 +179,9 @@ static const struct {
#if CONFIG_BLEND_FILTER
{ "vf_blend", checkasm_check_blend },
#endif
+ #if CONFIG_BWDIF_FILTER
+ { "vf_bwdif", checkasm_check_vf_bwdif },
+ #endif
#if CONFIG_COLORSPACE_FILTER
{ "vf_colorspace", checkasm_check_colorspace },
#endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 8744a81218..e9e73c6fa0 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -82,6 +82,7 @@ void checkasm_check_utvideodsp(void);
void checkasm_check_v210dec(void);
void checkasm_check_v210enc(void);
void checkasm_check_vc1dsp(void);
+void checkasm_check_vf_bwdif(void);
void checkasm_check_vf_eq(void);
void checkasm_check_vf_gblur(void);
void checkasm_check_vf_hflip(void);
diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
new file mode 100644
index 0000000000..5c2e16cffc
--- /dev/null
+++ b/tests/checkasm/vf_bwdif.c
@@ -0,0 +1,69 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/internal.h"
+#include "libavfilter/bwdif.h"
+
+#define WIDTH 256
+
+#define randomize_buffers(buf0, buf1, mask, count) \
+ for (size_t i = 0; i < count; i++) \
+ buf0[i] = buf1[i] = rnd() & mask
+
+void checkasm_check_vf_bwdif(void)
+{
+ BWDIFContext ctx_8, ctx_10;
+
+ ff_bwdif_init_filter_line(&ctx_8, 8);
+ ff_bwdif_init_filter_line(&ctx_10, 10);
+
+ if (check_func(ctx_8.filter_line, "bwdif8")) {
+ uint8_t prev0[9*WIDTH], prev1[9*WIDTH];
+ uint8_t next0[9*WIDTH], next1[9*WIDTH];
+ uint8_t cur0[9*WIDTH], cur1[9*WIDTH];
+ uint8_t dst0[WIDTH], dst1[WIDTH];
+
+ declare_func(void, void *dst, void *prev, void *cur, void *next,
+ int w, int prefs, int mrefs, int prefs2, int mrefs2,
+ int prefs3, int mrefs3, int prefs4, int mrefs4,
+ int parity, int clip_max);
+
+ randomize_buffers(prev0, prev1, 0xff, 9*WIDTH);
+ randomize_buffers(next0, next1, 0xff, 9*WIDTH);
+ randomize_buffers(cur0, cur1, 0xff, 9*WIDTH);
+
+ call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH, WIDTH,
+ WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+ 0, 0xff);
+ call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
+ WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+ 0, 0xff);
+
+ if (memcmp(dst0, dst1, WIDTH)
+ || memcmp(prev0, prev1, sizeof prev0)
+ || memcmp(next0, next1, sizeof next0)
+ || memcmp(cur0, cur1, sizeof cur0))
+ fail();
+ bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
+ WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+ 0, 0xff);
+ }
+ report("bwdif8");
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index a4e95541f5..6a7d4a1226 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -37,6 +37,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \
fate-checkasm-v210enc \
fate-checkasm-vc1dsp \
fate-checkasm-vf_blend \
+ fate-checkasm-vf_bwdif \
fate-checkasm-vf_colorspace \
fate-checkasm-vf_eq \
fate-checkasm-vf_gblur \
--
2.39.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH v2 3/5] tests: add bwdif to fate filter tests
2023-03-20 16:49 [FFmpeg-devel] [PATCH v2 1/5] avfilter/bwdif: move filter_line init to a dedicated function James Darnley
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 2/5] checkasm: add test for bwdif James Darnley
@ 2023-03-20 16:49 ` James Darnley
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data James Darnley
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 5/5] avfilter/bwdif: add avx2 filter_line function James Darnley
3 siblings, 0 replies; 7+ messages in thread
From: James Darnley @ 2023-03-20 16:49 UTC (permalink / raw)
To: ffmpeg-devel
---
tests/fate/filter-video.mak | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index 444adc6be6..c588286c38 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -11,6 +11,15 @@ fate-filter-owdenoise-sample: CMP = oneoff
FATE_FILTER_SAMPLES-$(call FILTERDEMDEC, PERMS DELOGO, RM, RV30) += fate-filter-delogo
fate-filter-delogo: CMD = framecrc -i $(TARGET_SAMPLES)/real/rv30.rm -vf perms=random,delogo=show=0:x=290:y=25:w=26:h=16 -an
+FATE_BWDIF-$(call FILTERDEMDEC, BWDIF, MPEGTS, MPEG2VIDEO) += fate-filter-bwdif-mode0 fate-filter-bwdif-mode1
+fate-filter-bwdif-mode0: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -frames:v 30 -vf bwdif=send_frame
+fate-filter-bwdif-mode1: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -frames:v 59 -vf bwdif=send_field
+
+FATE_BWDIF-$(call FILTERDEMDEC, BWDIF SCALE, MPEGTS, MPEG2VIDEO) += fate-filter-bwdif10
+fate-filter-bwdif10: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -flags bitexact -pix_fmt yuv420p10le -frames:v 30 -vf scale,bwdif=0
+
+FATE_FILTER_SAMPLES-yes += $(FATE_BWDIF-yes)
+
FATE_YADIF-$(call FILTERDEMDEC, YADIF, MPEGTS, MPEG2VIDEO) += fate-filter-yadif-mode0 fate-filter-yadif-mode1
fate-filter-yadif-mode0: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -frames:v 30 -vf yadif=0
fate-filter-yadif-mode1: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -frames:v 59 -vf yadif=1
--
2.39.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data
2023-03-20 16:49 [FFmpeg-devel] [PATCH v2 1/5] avfilter/bwdif: move filter_line init to a dedicated function James Darnley
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 2/5] checkasm: add test for bwdif James Darnley
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 3/5] tests: add bwdif to fate filter tests James Darnley
@ 2023-03-20 16:49 ` James Darnley
2023-03-21 16:32 ` Thomas Mundt
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 5/5] avfilter/bwdif: add avx2 filter_line function James Darnley
3 siblings, 1 reply; 7+ messages in thread
From: James Darnley @ 2023-03-20 16:49 UTC (permalink / raw)
To: ffmpeg-devel
Also deduplicate to share with the 8-bit test.
---
Should I squash this into the commit adding the checkasm test?
tests/checkasm/vf_bwdif.c | 73 +++++++++++++++++++++++----------------
1 file changed, 44 insertions(+), 29 deletions(-)
diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
index 5c2e16cffc..46224bb575 100644
--- a/tests/checkasm/vf_bwdif.c
+++ b/tests/checkasm/vf_bwdif.c
@@ -27,6 +27,44 @@
for (size_t i = 0; i < count; i++) \
buf0[i] = buf1[i] = rnd() & mask
+#define BODY(type, depth) \
+ do { \
+ type prev0[9*WIDTH], prev1[9*WIDTH]; \
+ type next0[9*WIDTH], next1[9*WIDTH]; \
+ type cur0[9*WIDTH], cur1[9*WIDTH]; \
+ type dst0[WIDTH], dst1[WIDTH]; \
+ const int stride = WIDTH; \
+ const int mask = (1<<depth)-1; \
+ \
+ declare_func(void, void *dst, void *prev, void *cur, void *next, \
+ int w, int prefs, int mrefs, int prefs2, int mrefs2, \
+ int prefs3, int mrefs3, int prefs4, int mrefs4, \
+ int parity, int clip_max); \
+ \
+ randomize_buffers(prev0, prev1, mask, 9*WIDTH); \
+ randomize_buffers(next0, next1, mask, 9*WIDTH); \
+ randomize_buffers( cur0, cur1, mask, 9*WIDTH); \
+ \
+ call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH, \
+ WIDTH, stride, -stride, 2*stride, -2*stride, \
+ 3*stride, -3*stride, 4*stride, -4*stride, \
+ 0, mask); \
+ call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, \
+ WIDTH, stride, -stride, 2*stride, -2*stride, \
+ 3*stride, -3*stride, 4*stride, -4*stride, \
+ 0, mask); \
+ \
+ if (memcmp(dst0, dst1, sizeof dst0) \
+ || memcmp(prev0, prev1, sizeof prev0) \
+ || memcmp(next0, next1, sizeof next0) \
+ || memcmp( cur0, cur1, sizeof cur0)) \
+ fail(); \
+ bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, \
+ WIDTH, stride, -stride, 2*stride, -2*stride, \
+ 3*stride, -3*stride, 4*stride, -4*stride, \
+ 0, mask); \
+ } while (0)
+
void checkasm_check_vf_bwdif(void)
{
BWDIFContext ctx_8, ctx_10;
@@ -35,35 +73,12 @@ void checkasm_check_vf_bwdif(void)
ff_bwdif_init_filter_line(&ctx_10, 10);
if (check_func(ctx_8.filter_line, "bwdif8")) {
- uint8_t prev0[9*WIDTH], prev1[9*WIDTH];
- uint8_t next0[9*WIDTH], next1[9*WIDTH];
- uint8_t cur0[9*WIDTH], cur1[9*WIDTH];
- uint8_t dst0[WIDTH], dst1[WIDTH];
-
- declare_func(void, void *dst, void *prev, void *cur, void *next,
- int w, int prefs, int mrefs, int prefs2, int mrefs2,
- int prefs3, int mrefs3, int prefs4, int mrefs4,
- int parity, int clip_max);
-
- randomize_buffers(prev0, prev1, 0xff, 9*WIDTH);
- randomize_buffers(next0, next1, 0xff, 9*WIDTH);
- randomize_buffers(cur0, cur1, 0xff, 9*WIDTH);
-
- call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH, WIDTH,
- WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
- 0, 0xff);
- call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
- WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
- 0, 0xff);
+ BODY(uint8_t, 8);
+ report("bwdif8");
+ }
- if (memcmp(dst0, dst1, WIDTH)
- || memcmp(prev0, prev1, sizeof prev0)
- || memcmp(next0, next1, sizeof next0)
- || memcmp(cur0, cur1, sizeof cur0))
- fail();
- bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
- WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
- 0, 0xff);
+ if (check_func(ctx_10.filter_line, "bwdif10")) {
+ BODY(uint16_t, 10);
+ report("bwdif10");
}
- report("bwdif8");
}
--
2.39.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data James Darnley
@ 2023-03-21 16:32 ` Thomas Mundt
2023-03-23 16:54 ` James Darnley
0 siblings, 1 reply; 7+ messages in thread
From: Thomas Mundt @ 2023-03-21 16:32 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Am Mo., 20. März 2023 um 17:52 Uhr schrieb James Darnley <jdarnley@obe.tv>:
> Also deduplicate to share with the 8-bit test.
> ---
> Should I squash this into the commit adding the checkasm test?
>
Yes, that might be clearer.
Please also add the fate reference files to the fate filter test.
Otherwise the patch set LGTM now.
Thanks,
Thomas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data
2023-03-21 16:32 ` Thomas Mundt
@ 2023-03-23 16:54 ` James Darnley
0 siblings, 0 replies; 7+ messages in thread
From: James Darnley @ 2023-03-23 16:54 UTC (permalink / raw)
To: ffmpeg-devel
On 3/21/23 17:32, Thomas Mundt wrote:
> Am Mo., 20. März 2023 um 17:52 Uhr schrieb James Darnley <jdarnley@obe.tv>:
>
>> Also deduplicate to share with the 8-bit test.
>> ---
>> Should I squash this into the commit adding the checkasm test?
>>
>
> Yes, that might be clearer.
Will do.
> Please also add the fate reference files to the fate filter test.
> Otherwise the patch set LGTM now.
Thanks I completely forgot. You run it with GEN=1 then forget about it
when it keeps working.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH v2 5/5] avfilter/bwdif: add avx2 filter_line function
2023-03-20 16:49 [FFmpeg-devel] [PATCH v2 1/5] avfilter/bwdif: move filter_line init to a dedicated function James Darnley
` (2 preceding siblings ...)
2023-03-20 16:49 ` [FFmpeg-devel] [PATCH v2 4/5] checkasm: add a test for 10-bit data James Darnley
@ 2023-03-20 16:49 ` James Darnley
3 siblings, 0 replies; 7+ messages in thread
From: James Darnley @ 2023-03-20 16:49 UTC (permalink / raw)
To: ffmpeg-devel
8-bit:
2.24x faster (1925±1.3 vs. 859±2.2 decicycles) compared with ssse3
10-bit:
2.00x faster (1703±1.7 vs. 853±2.0 decicycles) compared with ssse3
---
Fixed the word broadcast
libavfilter/x86/vf_bwdif.asm | 29 ++++++++++++++++++++++++-----
libavfilter/x86/vf_bwdif_init.c | 12 ++++++++++++
2 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/libavfilter/x86/vf_bwdif.asm b/libavfilter/x86/vf_bwdif.asm
index 0b453da53b..c93b41ec48 100644
--- a/libavfilter/x86/vf_bwdif.asm
+++ b/libavfilter/x86/vf_bwdif.asm
@@ -26,18 +26,22 @@
%include "libavutil/x86/x86util.asm"
-SECTION_RODATA
+SECTION_RODATA 32
-pw_coefhf: times 4 dw 1016, 5570
-pw_coefhf1: times 8 dw -3801
-pw_coefsp: times 4 dw 5077, -981
-pw_splfdif: times 4 dw -768, 768
+pw_coefhf: times 8 dw 1016, 5570
+pw_coefhf1: times 16 dw -3801
+pw_coefsp: times 8 dw 5077, -981
+pw_splfdif: times 8 dw -768, 768
SECTION .text
%macro LOAD8 2
+ %if mmsize == 32
+ pmovzxbw %1, %2
+ %else
movh %1, %2
punpcklbw %1, m7
+ %endif
%endmacro
%macro LOAD12 2
@@ -45,8 +49,14 @@ SECTION .text
%endmacro
%macro DISP8 0
+ %if mmsize == 32
+ vextracti128 xm1, m2, 1
+ packuswb xm2, xm1
+ movu [dstq], xm2
+ %else
packuswb m2, m2
movh [dstq], m2
+ %endif
%endmacro
%macro DISP12 0
@@ -244,8 +254,12 @@ cglobal bwdif_filter_line_12bit, 4, 9, 13, 0, dst, prev, cur, next, w, \
prefs, mrefs, prefs2, mrefs2, \
prefs3, mrefs3, prefs4, \
mrefs4, parity, clip_max
+ %if mmsize == 32
+ vpbroadcastw m12, WORD clip_maxm
+ %else
movd m12, DWORD clip_maxm
SPLATW m12, m12, 0
+ %endif
%else
cglobal bwdif_filter_line_12bit, 4, 6, 8, 80, dst, prev, cur, next, w, \
prefs, mrefs, prefs2, mrefs2, \
@@ -264,3 +278,8 @@ INIT_XMM ssse3
BWDIF
INIT_XMM sse2
BWDIF
+
+%if HAVE_AVX2_EXTERNAL && ARCH_X86_64
+INIT_YMM avx2
+BWDIF
+%endif
diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c
index ba7bc40c3d..f833318c10 100644
--- a/libavfilter/x86/vf_bwdif_init.c
+++ b/libavfilter/x86/vf_bwdif_init.c
@@ -32,6 +32,10 @@ void ff_bwdif_filter_line_ssse3(void *dst, void *prev, void *cur, void *next,
int w, int prefs, int mrefs, int prefs2,
int mrefs2, int prefs3, int mrefs3, int prefs4,
int mrefs4, int parity, int clip_max);
+void ff_bwdif_filter_line_avx2(void *dst, void *prev, void *cur, void *next,
+ int w, int prefs, int mrefs, int prefs2,
+ int mrefs2, int prefs3, int mrefs3, int prefs4,
+ int mrefs4, int parity, int clip_max);
void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur, void *next,
int w, int prefs, int mrefs, int prefs2,
@@ -41,6 +45,10 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *ne
int w, int prefs, int mrefs, int prefs2,
int mrefs2, int prefs3, int mrefs3, int prefs4,
int mrefs4, int parity, int clip_max);
+void ff_bwdif_filter_line_12bit_avx2(void *dst, void *prev, void *cur, void *next,
+ int w, int prefs, int mrefs, int prefs2,
+ int mrefs2, int prefs3, int mrefs3, int prefs4,
+ int mrefs4, int parity, int clip_max);
av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
{
@@ -51,10 +59,14 @@ av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
bwdif->filter_line = ff_bwdif_filter_line_sse2;
if (EXTERNAL_SSSE3(cpu_flags))
bwdif->filter_line = ff_bwdif_filter_line_ssse3;
+ if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
+ bwdif->filter_line = ff_bwdif_filter_line_avx2;
} else if (bit_depth <= 12) {
if (EXTERNAL_SSE2(cpu_flags))
bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2;
if (EXTERNAL_SSSE3(cpu_flags))
bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3;
+ if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
+ bwdif->filter_line = ff_bwdif_filter_line_12bit_avx2;
}
}
--
2.39.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread