* [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif
2023-02-20 19:57 [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function James Darnley
@ 2023-02-20 19:57 ` James Darnley
2023-03-11 16:18 ` Thomas Mundt
2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function " James Darnley
2023-02-24 1:19 ` [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function Thomas Mundt
2 siblings, 1 reply; 8+ messages in thread
From: James Darnley @ 2023-02-20 19:57 UTC (permalink / raw)
To: ffmpeg-devel
---
tests/checkasm/Makefile | 1 +
tests/checkasm/checkasm.c | 3 ++
tests/checkasm/checkasm.h | 1 +
tests/checkasm/vf_bwdif.c | 70 +++++++++++++++++++++++++++++++++++++++
tests/fate/checkasm.mak | 1 +
5 files changed, 76 insertions(+)
create mode 100644 tests/checkasm/vf_bwdif.c
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index a6f06c7007..b6a43f181f 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -40,6 +40,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes)
# libavfilter tests
AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
+AVFILTEROBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o
AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o
AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index e96d84a7da..5e729cf0e0 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -179,6 +179,9 @@ static const struct {
#if CONFIG_BLEND_FILTER
{ "vf_blend", checkasm_check_blend },
#endif
+ #if CONFIG_BWDIF_FILTER
+ { "vf_bwdif", checkasm_check_vf_bwdif },
+ #endif
#if CONFIG_COLORSPACE_FILTER
{ "vf_colorspace", checkasm_check_colorspace },
#endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 8744a81218..e9e73c6fa0 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -82,6 +82,7 @@ void checkasm_check_utvideodsp(void);
void checkasm_check_v210dec(void);
void checkasm_check_v210enc(void);
void checkasm_check_vc1dsp(void);
+void checkasm_check_vf_bwdif(void);
void checkasm_check_vf_eq(void);
void checkasm_check_vf_gblur(void);
void checkasm_check_vf_hflip(void);
diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
new file mode 100644
index 0000000000..e27f9b7494
--- /dev/null
+++ b/tests/checkasm/vf_bwdif.c
@@ -0,0 +1,70 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/internal.h"
+#include "libavfilter/bwdif.h"
+
+#define WIDTH 256
+
+#define randomize_buffers(buf0, buf1, mask, count) \
+ for (size_t i; i < count; i++) \
+ buf0[i] = buf1[i] = rnd() & mask
+
+void checkasm_check_vf_bwdif(void)
+{
+ BWDIFContext ctx_8, ctx_10, ctx_16;
+
+ ff_bwdif_init_filter_line(&ctx_8, 8);
+ ff_bwdif_init_filter_line(&ctx_10, 10);
+ ff_bwdif_init_filter_line(&ctx_16, 16);
+
+ if (check_func(ctx_8.filter_line, "bwdif8")) {
+ uint8_t prev0[9*WIDTH], prev1[9*WIDTH];
+ uint8_t next0[9*WIDTH], next1[9*WIDTH];
+ uint8_t cur0[9*WIDTH], cur1[9*WIDTH];
+ uint8_t dst0[WIDTH], dst1[WIDTH];
+
+ declare_func(void, void *dst, void *prev, void *cur, void *next,
+ int w, int prefs, int mrefs, int prefs2, int mrefs2,
+ int prefs3, int mrefs3, int prefs4, int mrefs4,
+ int parity, int clip_max);
+
+ randomize_buffers(prev0, prev1, 0xff, 9*WIDTH);
+ randomize_buffers(next0, next1, 0xff, 9*WIDTH);
+ randomize_buffers(cur0, cur1, 0xff, 9*WIDTH);
+
+ call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH, WIDTH,
+ WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+ 0, 0xff);
+ call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
+ WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+ 0, 0xff);
+
+ if (memcmp(dst0, dst1, WIDTH)
+ || memcmp(prev0, prev1, sizeof prev0)
+ || memcmp(next0, next1, sizeof next0)
+ || memcmp(cur0, cur1, sizeof cur0))
+ fail();
+ bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH, WIDTH,
+ WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH, 4*WIDTH, -4*WIDTH,
+ 0, 0xff);
+ }
+ report("bwdif8");
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index a4e95541f5..6a7d4a1226 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -37,6 +37,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \
fate-checkasm-v210enc \
fate-checkasm-vc1dsp \
fate-checkasm-vf_blend \
+ fate-checkasm-vf_bwdif \
fate-checkasm-vf_colorspace \
fate-checkasm-vf_eq \
fate-checkasm-vf_gblur \
--
2.39.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif
2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif James Darnley
@ 2023-03-11 16:18 ` Thomas Mundt
2023-03-13 11:04 ` James Darnley
0 siblings, 1 reply; 8+ messages in thread
From: Thomas Mundt @ 2023-03-11 16:18 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Hi James,
Am Mo., 20. Feb. 2023 um 20:59 Uhr schrieb James Darnley <jdarnley@obe.tv>:
> ---
> tests/checkasm/Makefile | 1 +
> tests/checkasm/checkasm.c | 3 ++
> tests/checkasm/checkasm.h | 1 +
> tests/checkasm/vf_bwdif.c | 70 +++++++++++++++++++++++++++++++++++++++
> tests/fate/checkasm.mak | 1 +
> 5 files changed, 76 insertions(+)
> create mode 100644 tests/checkasm/vf_bwdif.c
>
> diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
> index a6f06c7007..b6a43f181f 100644
> --- a/tests/checkasm/Makefile
> +++ b/tests/checkasm/Makefile
> @@ -40,6 +40,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC) +=
> $(AVCODECOBJS-yes)
> # libavfilter tests
> AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
> AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
> +AVFILTEROBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o
> AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
> AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o
> AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index e96d84a7da..5e729cf0e0 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -179,6 +179,9 @@ static const struct {
> #if CONFIG_BLEND_FILTER
> { "vf_blend", checkasm_check_blend },
> #endif
> + #if CONFIG_BWDIF_FILTER
> + { "vf_bwdif", checkasm_check_vf_bwdif },
> + #endif
> #if CONFIG_COLORSPACE_FILTER
> { "vf_colorspace", checkasm_check_colorspace },
> #endif
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index 8744a81218..e9e73c6fa0 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -82,6 +82,7 @@ void checkasm_check_utvideodsp(void);
> void checkasm_check_v210dec(void);
> void checkasm_check_v210enc(void);
> void checkasm_check_vc1dsp(void);
> +void checkasm_check_vf_bwdif(void);
> void checkasm_check_vf_eq(void);
> void checkasm_check_vf_gblur(void);
> void checkasm_check_vf_hflip(void);
> diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
> new file mode 100644
> index 0000000000..e27f9b7494
> --- /dev/null
> +++ b/tests/checkasm/vf_bwdif.c
> @@ -0,0 +1,70 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +#include <string.h>
> +#include "checkasm.h"
> +#include "libavcodec/internal.h"
> +#include "libavfilter/bwdif.h"
> +
> +#define WIDTH 256
> +
> +#define randomize_buffers(buf0, buf1, mask, count) \
> + for (size_t i; i < count; i++) \
> + buf0[i] = buf1[i] = rnd() & mask
> +
> +void checkasm_check_vf_bwdif(void)
> +{
> + BWDIFContext ctx_8, ctx_10, ctx_16;
> +
> + ff_bwdif_init_filter_line(&ctx_8, 8);
> + ff_bwdif_init_filter_line(&ctx_10, 10);
> + ff_bwdif_init_filter_line(&ctx_16, 16);
> +
> + if (check_func(ctx_8.filter_line, "bwdif8")) {
> + uint8_t prev0[9*WIDTH], prev1[9*WIDTH];
> + uint8_t next0[9*WIDTH], next1[9*WIDTH];
> + uint8_t cur0[9*WIDTH], cur1[9*WIDTH];
> + uint8_t dst0[WIDTH], dst1[WIDTH];
> +
> + declare_func(void, void *dst, void *prev, void *cur, void *next,
> + int w, int prefs, int mrefs, int prefs2, int
> mrefs2,
> + int prefs3, int mrefs3, int prefs4, int mrefs4,
> + int parity, int clip_max);
> +
> + randomize_buffers(prev0, prev1, 0xff, 9*WIDTH);
> + randomize_buffers(next0, next1, 0xff, 9*WIDTH);
> + randomize_buffers(cur0, cur1, 0xff, 9*WIDTH);
> +
> + call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH,
> WIDTH,
> + WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH,
> 4*WIDTH, -4*WIDTH,
> + 0, 0xff);
> + call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH,
> WIDTH,
> + WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH,
> 4*WIDTH, -4*WIDTH,
> + 0, 0xff);
> +
> + if (memcmp(dst0, dst1, WIDTH)
> + || memcmp(prev0, prev1, sizeof prev0)
> + || memcmp(next0, next1, sizeof next0)
> + || memcmp(cur0, cur1, sizeof cur0))
> + fail();
> + bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH,
> WIDTH,
> + WIDTH, -WIDTH, 2*WIDTH, -2*WIDTH, 3*WIDTH, -3*WIDTH,
> 4*WIDTH, -4*WIDTH,
> + 0, 0xff);
> + }
> + report("bwdif8");
> +}
>
I'm not familiar with checkasm tests, but isn't this one limited to a bit
depth of 8?
> diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
> index a4e95541f5..6a7d4a1226 100644
> --- a/tests/fate/checkasm.mak
> +++ b/tests/fate/checkasm.mak
> @@ -37,6 +37,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp
> \
> fate-checkasm-v210enc \
> fate-checkasm-vc1dsp \
> fate-checkasm-vf_blend \
> + fate-checkasm-vf_bwdif \
> fate-checkasm-vf_colorspace \
> fate-checkasm-vf_eq \
> fate-checkasm-vf_gblur \
> --
> 2.39.1
>
Best Regards,
Thomas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif
2023-03-11 16:18 ` Thomas Mundt
@ 2023-03-13 11:04 ` James Darnley
0 siblings, 0 replies; 8+ messages in thread
From: James Darnley @ 2023-03-13 11:04 UTC (permalink / raw)
To: ffmpeg-devel
On 3/11/23 17:18, Thomas Mundt wrote:
>> <snip>
>>
>
> I'm not familiar with checkasm tests, but isn't this one limited to a bit
> depth of 8?
Yes, that was the idea because I was only intending to modify the 8-bit
function, for now. The function pointer is the same for all depths so
you need to initialize it with a different depth. Judging from your
other email I might need to write them anyway.
[re-sending to list]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function for bwdif
2023-02-20 19:57 [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function James Darnley
2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif James Darnley
@ 2023-02-20 19:57 ` James Darnley
2023-03-11 16:14 ` Thomas Mundt
2023-02-24 1:19 ` [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function Thomas Mundt
2 siblings, 1 reply; 8+ messages in thread
From: James Darnley @ 2023-02-20 19:57 UTC (permalink / raw)
To: ffmpeg-devel
2.24x faster (1925±1.3 vs. 859±2.2 decicycles) compared with ssse3
---
libavfilter/x86/vf_bwdif.asm | 29 ++++++++++++++++++++++++-----
libavfilter/x86/vf_bwdif_init.c | 12 ++++++++++++
2 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/libavfilter/x86/vf_bwdif.asm b/libavfilter/x86/vf_bwdif.asm
index 0b453da53b..5cc61435fd 100644
--- a/libavfilter/x86/vf_bwdif.asm
+++ b/libavfilter/x86/vf_bwdif.asm
@@ -26,18 +26,22 @@
%include "libavutil/x86/x86util.asm"
-SECTION_RODATA
+SECTION_RODATA 32
-pw_coefhf: times 4 dw 1016, 5570
-pw_coefhf1: times 8 dw -3801
-pw_coefsp: times 4 dw 5077, -981
-pw_splfdif: times 4 dw -768, 768
+pw_coefhf: times 8 dw 1016, 5570
+pw_coefhf1: times 16 dw -3801
+pw_coefsp: times 8 dw 5077, -981
+pw_splfdif: times 8 dw -768, 768
SECTION .text
%macro LOAD8 2
+ %if mmsize == 32
+ pmovzxbw %1, %2
+ %else
movh %1, %2
punpcklbw %1, m7
+ %endif
%endmacro
%macro LOAD12 2
@@ -45,8 +49,14 @@ SECTION .text
%endmacro
%macro DISP8 0
+ %if mmsize == 32
+ vextracti128 xm1, m2, 1
+ packuswb xm2, xm1
+ movu [dstq], xm2
+ %else
packuswb m2, m2
movh [dstq], m2
+ %endif
%endmacro
%macro DISP12 0
@@ -244,8 +254,12 @@ cglobal bwdif_filter_line_12bit, 4, 9, 13, 0, dst, prev, cur, next, w, \
prefs, mrefs, prefs2, mrefs2, \
prefs3, mrefs3, prefs4, \
mrefs4, parity, clip_max
+ %if mmsize == 32
+ vpbroadcastd m12, DWORD clip_maxm
+ %else
movd m12, DWORD clip_maxm
SPLATW m12, m12, 0
+ %endif
%else
cglobal bwdif_filter_line_12bit, 4, 6, 8, 80, dst, prev, cur, next, w, \
prefs, mrefs, prefs2, mrefs2, \
@@ -264,3 +278,8 @@ INIT_XMM ssse3
BWDIF
INIT_XMM sse2
BWDIF
+
+%if HAVE_AVX2_EXTERNAL && ARCH_X86_64
+INIT_YMM avx2
+BWDIF
+%endif
diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c
index ba7bc40c3d..f833318c10 100644
--- a/libavfilter/x86/vf_bwdif_init.c
+++ b/libavfilter/x86/vf_bwdif_init.c
@@ -32,6 +32,10 @@ void ff_bwdif_filter_line_ssse3(void *dst, void *prev, void *cur, void *next,
int w, int prefs, int mrefs, int prefs2,
int mrefs2, int prefs3, int mrefs3, int prefs4,
int mrefs4, int parity, int clip_max);
+void ff_bwdif_filter_line_avx2(void *dst, void *prev, void *cur, void *next,
+ int w, int prefs, int mrefs, int prefs2,
+ int mrefs2, int prefs3, int mrefs3, int prefs4,
+ int mrefs4, int parity, int clip_max);
void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur, void *next,
int w, int prefs, int mrefs, int prefs2,
@@ -41,6 +45,10 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *ne
int w, int prefs, int mrefs, int prefs2,
int mrefs2, int prefs3, int mrefs3, int prefs4,
int mrefs4, int parity, int clip_max);
+void ff_bwdif_filter_line_12bit_avx2(void *dst, void *prev, void *cur, void *next,
+ int w, int prefs, int mrefs, int prefs2,
+ int mrefs2, int prefs3, int mrefs3, int prefs4,
+ int mrefs4, int parity, int clip_max);
av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
{
@@ -51,10 +59,14 @@ av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
bwdif->filter_line = ff_bwdif_filter_line_sse2;
if (EXTERNAL_SSSE3(cpu_flags))
bwdif->filter_line = ff_bwdif_filter_line_ssse3;
+ if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
+ bwdif->filter_line = ff_bwdif_filter_line_avx2;
} else if (bit_depth <= 12) {
if (EXTERNAL_SSE2(cpu_flags))
bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2;
if (EXTERNAL_SSSE3(cpu_flags))
bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3;
+ if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
+ bwdif->filter_line = ff_bwdif_filter_line_12bit_avx2;
}
}
--
2.39.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function for bwdif
2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function " James Darnley
@ 2023-03-11 16:14 ` Thomas Mundt
2023-03-13 11:08 ` James Darnley
0 siblings, 1 reply; 8+ messages in thread
From: Thomas Mundt @ 2023-03-11 16:14 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Hi James,
Am Mo., 20. Feb. 2023 um 20:59 Uhr schrieb James Darnley <jdarnley@obe.tv>:
> 2.24x faster (1925±1.3 vs. 859±2.2 decicycles) compared with ssse3
> ---
> libavfilter/x86/vf_bwdif.asm | 29 ++++++++++++++++++++++++-----
> libavfilter/x86/vf_bwdif_init.c | 12 ++++++++++++
> 2 files changed, 36 insertions(+), 5 deletions(-)
>
> diff --git a/libavfilter/x86/vf_bwdif.asm b/libavfilter/x86/vf_bwdif.asm
> index 0b453da53b..5cc61435fd 100644
> --- a/libavfilter/x86/vf_bwdif.asm
> +++ b/libavfilter/x86/vf_bwdif.asm
> @@ -26,18 +26,22 @@
>
> %include "libavutil/x86/x86util.asm"
>
> -SECTION_RODATA
> +SECTION_RODATA 32
>
> -pw_coefhf: times 4 dw 1016, 5570
> -pw_coefhf1: times 8 dw -3801
> -pw_coefsp: times 4 dw 5077, -981
> -pw_splfdif: times 4 dw -768, 768
> +pw_coefhf: times 8 dw 1016, 5570
> +pw_coefhf1: times 16 dw -3801
> +pw_coefsp: times 8 dw 5077, -981
> +pw_splfdif: times 8 dw -768, 768
>
> SECTION .text
>
> %macro LOAD8 2
> + %if mmsize == 32
> + pmovzxbw %1, %2
> + %else
> movh %1, %2
> punpcklbw %1, m7
> + %endif
> %endmacro
>
> %macro LOAD12 2
> @@ -45,8 +49,14 @@ SECTION .text
> %endmacro
>
> %macro DISP8 0
> + %if mmsize == 32
> + vextracti128 xm1, m2, 1
> + packuswb xm2, xm1
> + movu [dstq], xm2
> + %else
> packuswb m2, m2
> movh [dstq], m2
> + %endif
> %endmacro
>
> %macro DISP12 0
> @@ -244,8 +254,12 @@ cglobal bwdif_filter_line_12bit, 4, 9, 13, 0, dst,
> prev, cur, next, w, \
> prefs, mrefs, prefs2,
> mrefs2, \
> prefs3, mrefs3, prefs4, \
> mrefs4, parity, clip_max
> + %if mmsize == 32
> + vpbroadcastd m12, DWORD clip_maxm
>
I get a green pattern at bit depths > 8.
Looks good with:
vpbroadcastw m12, WORD clip_maxm
+ %else
> movd m12, DWORD clip_maxm
> SPLATW m12, m12, 0
> + %endif
> %else
> cglobal bwdif_filter_line_12bit, 4, 6, 8, 80, dst, prev, cur, next, w, \
> prefs, mrefs, prefs2,
> mrefs2, \
> @@ -264,3 +278,8 @@ INIT_XMM ssse3
> BWDIF
> INIT_XMM sse2
> BWDIF
> +
> +%if HAVE_AVX2_EXTERNAL && ARCH_X86_64
> +INIT_YMM avx2
> +BWDIF
> +%endif
> diff --git a/libavfilter/x86/vf_bwdif_init.c
> b/libavfilter/x86/vf_bwdif_init.c
> index ba7bc40c3d..f833318c10 100644
> --- a/libavfilter/x86/vf_bwdif_init.c
> +++ b/libavfilter/x86/vf_bwdif_init.c
> @@ -32,6 +32,10 @@ void ff_bwdif_filter_line_ssse3(void *dst, void *prev,
> void *cur, void *next,
> int w, int prefs, int mrefs, int prefs2,
> int mrefs2, int prefs3, int mrefs3, int
> prefs4,
> int mrefs4, int parity, int clip_max);
> +void ff_bwdif_filter_line_avx2(void *dst, void *prev, void *cur, void
> *next,
> + int w, int prefs, int mrefs, int prefs2,
> + int mrefs2, int prefs3, int mrefs3, int
> prefs4,
> + int mrefs4, int parity, int clip_max);
>
> void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur,
> void *next,
> int w, int prefs, int mrefs, int
> prefs2,
> @@ -41,6 +45,10 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void
> *prev, void *cur, void *ne
> int w, int prefs, int mrefs, int
> prefs2,
> int mrefs2, int prefs3, int mrefs3,
> int prefs4,
> int mrefs4, int parity, int
> clip_max);
> +void ff_bwdif_filter_line_12bit_avx2(void *dst, void *prev, void *cur,
> void *next,
> + int w, int prefs, int mrefs, int
> prefs2,
> + int mrefs2, int prefs3, int mrefs3,
> int prefs4,
> + int mrefs4, int parity, int
> clip_max);
>
> av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
> {
> @@ -51,10 +59,14 @@ av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif,
> int bit_depth)
> bwdif->filter_line = ff_bwdif_filter_line_sse2;
> if (EXTERNAL_SSSE3(cpu_flags))
> bwdif->filter_line = ff_bwdif_filter_line_ssse3;
> + if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
> + bwdif->filter_line = ff_bwdif_filter_line_avx2;
> } else if (bit_depth <= 12) {
> if (EXTERNAL_SSE2(cpu_flags))
> bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2;
> if (EXTERNAL_SSSE3(cpu_flags))
> bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3;
> + if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
> + bwdif->filter_line = ff_bwdif_filter_line_12bit_avx2;
> }
> }
> --
> 2.39.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function for bwdif
2023-03-11 16:14 ` Thomas Mundt
@ 2023-03-13 11:08 ` James Darnley
0 siblings, 0 replies; 8+ messages in thread
From: James Darnley @ 2023-03-13 11:08 UTC (permalink / raw)
To: ffmpeg-devel
On 3/11/23 17:14, Thomas Mundt wrote:
>> + %if mmsize == 32
>> + vpbroadcastd m12, DWORD clip_maxm
>>
>
> I get a green pattern at bit depths > 8.
> Looks good with:
> vpbroadcastw m12, WORD clip_maxm
>
> + %else
>> movd m12, DWORD clip_maxm
>> SPLATW m12, m12, 0
>> + %endif
Of course it should be a word broadcast!
But why doesn't my checkasm test catch it?
>> bwdif->filter_line = ff_bwdif_filter_line_sse2;
>> if (EXTERNAL_SSSE3(cpu_flags))
>> bwdif->filter_line = ff_bwdif_filter_line_ssse3;
>> + if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
>> + bwdif->filter_line = ff_bwdif_filter_line_avx2;
>> } else if (bit_depth <= 12) {
>> if (EXTERNAL_SSE2(cpu_flags))
>> bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2;
>> if (EXTERNAL_SSSE3(cpu_flags))
>> bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3;
>> + if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags))
>> + bwdif->filter_line = ff_bwdif_filter_line_12bit_avx2;
>> }
>> }
I was intending to only modify/write the 8-bit function so this is a
mistake.
Thanks. I'll be back with a version 2.
[re-sending to list]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function
2023-02-20 19:57 [FFmpeg-devel] [PATCH 1/3] avfilter: move bwdif's filter_line init into a dedicated function James Darnley
2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 2/3] checkasm: add test for bwdif James Darnley
2023-02-20 19:57 ` [FFmpeg-devel] [PATCH 3/3] avfilter: add avx2 filter_line function " James Darnley
@ 2023-02-24 1:19 ` Thomas Mundt
2 siblings, 0 replies; 8+ messages in thread
From: Thomas Mundt @ 2023-02-24 1:19 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Hi James,
James Darnley <jdarnley@obe.tv> schrieb am Mo., 20. Feb. 2023, 13:59:
> ---
> libavfilter/bwdif.h | 3 ++-
> libavfilter/vf_bwdif.c | 13 +++++++++----
> libavfilter/x86/vf_bwdif_init.c | 4 +---
> 3 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h
> index 889ff772ed..5749345f78 100644
> --- a/libavfilter/bwdif.h
> +++ b/libavfilter/bwdif.h
> @@ -37,6 +37,7 @@ typedef struct BWDIFContext {
> int parity, int clip_max, int spat);
> } BWDIFContext;
>
> -void ff_bwdif_init_x86(BWDIFContext *bwdif);
> +void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth);
> +void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth);
>
> #endif /* AVFILTER_BWDIF_H */
> diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
> index 65c617ebb3..34e8c5e234 100644
> --- a/libavfilter/vf_bwdif.c
> +++ b/libavfilter/vf_bwdif.c
> @@ -340,7 +340,14 @@ static int config_props(AVFilterLink *link)
>
> yadif->csp = av_pix_fmt_desc_get(link->format);
> yadif->filter = filter;
> - if (yadif->csp->comp[0].depth > 8) {
> + ff_bwdif_init_filter_line(s, yadif->csp->comp[0].depth);
> +
> + return 0;
> +}
> +
> +av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth)
> +{
> + if (bit_depth > 8) {
> s->filter_intra = filter_intra_16bit;
> s->filter_line = filter_line_c_16bit;
> s->filter_edge = filter_edge_16bit;
> @@ -351,10 +358,8 @@ static int config_props(AVFilterLink *link)
> }
>
> #if ARCH_X86
> - ff_bwdif_init_x86(s);
> + ff_bwdif_init_x86(s, bit_depth);
> #endif
> -
> - return 0;
> }
>
>
> diff --git a/libavfilter/x86/vf_bwdif_init.c
> b/libavfilter/x86/vf_bwdif_init.c
> index e24e5cd9b1..ba7bc40c3d 100644
> --- a/libavfilter/x86/vf_bwdif_init.c
> +++ b/libavfilter/x86/vf_bwdif_init.c
> @@ -42,11 +42,9 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void
> *prev, void *cur, void *ne
> int mrefs2, int prefs3, int mrefs3,
> int prefs4,
> int mrefs4, int parity, int
> clip_max);
>
> -av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif)
> +av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
> {
> - YADIFContext *yadif = &bwdif->yadif;
> int cpu_flags = av_get_cpu_flags();
> - int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth;
>
> if (bit_depth <= 8) {
> if (EXTERNAL_SSE2(cpu_flags))
> --
> 2.39.1
>
I'm travelling at the moment and can only look at your patch set on my
phone. At first glance it looks good. If you are not in a hurry, I would
test it in two to three weeks when I am back home and have time.
Best Regards,
Thomas
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread