From: Niklas Haas <ffmpeg@haasn.xyz> To: ffmpeg-devel@ffmpeg.org Cc: Niklas Haas <git@haasn.dev> Subject: [FFmpeg-devel] [PATCH v2 3/3] avfilter/vf_colordetect: add x86 SIMD implementation Date: Wed, 16 Jul 2025 18:24:57 +0200 Message-ID: <20250716162536.394049-4-ffmpeg@haasn.xyz> (raw) In-Reply-To: <20250716162536.394049-1-ffmpeg@haasn.xyz> From: Niklas Haas <git@haasn.dev> alphadetect8_full_c: 5658.2 ( 1.00x) alphadetect8_full_avx2: 215.1 (26.31x) alphadetect8_full_avx512: 133.5 (42.40x) alphadetect8_limited_c: 7391.5 ( 1.00x) alphadetect8_limited_avx2: 649.3 (11.38x) alphadetect8_limited_avx512: 330.5 (22.36x) alphadetect16_full_c: 3027.4 ( 1.00x) alphadetect16_full_avx2: 209.4 (14.46x) alphadetect16_full_avx512: 141.4 (21.41x) alphadetect16_limited_c: 3880.9 ( 1.00x) alphadetect16_limited_avx2: 734.9 ( 5.28x) alphadetect16_limited_avx512: 349.2 (11.11x) rangedetect8_c: 5854.2 ( 1.00x) rangedetect8_avx2: 138.9 (42.15x) rangedetect8_avx512: 106.2 (55.12x) rangedetect16_c: 4122.0 ( 1.00x) rangedetect16_avx2: 138.6 (29.74x) rangedetect16_avx512: 104.1 (39.60x) --- libavfilter/vf_colordetect.c | 4 + libavfilter/vf_colordetect.h | 3 + libavfilter/x86/Makefile | 2 + libavfilter/x86/vf_colordetect.asm | 154 ++++++++++++++++++++++++++ libavfilter/x86/vf_colordetect_init.c | 105 ++++++++++++++++++ 5 files changed, 268 insertions(+) create mode 100644 libavfilter/x86/vf_colordetect.asm create mode 100644 libavfilter/x86/vf_colordetect_init.c diff --git a/libavfilter/vf_colordetect.c b/libavfilter/vf_colordetect.c index 0fb892634f..642bec62c6 100644 --- a/libavfilter/vf_colordetect.c +++ b/libavfilter/vf_colordetect.c @@ -219,6 +219,10 @@ static av_cold void uninit(AVFilterContext *ctx) av_cold void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth, enum AVColorRange color_range) { +#if ARCH_X86 + ff_color_detect_dsp_init_x86(dsp, depth, color_range); +#endif + if (!dsp->detect_range) dsp->detect_range = depth > 8 ? ff_detect_range16_c : ff_detect_range_c; if (!dsp->detect_alpha) { diff --git a/libavfilter/vf_colordetect.h b/libavfilter/vf_colordetect.h index 8998ed83d4..78d296433d 100644 --- a/libavfilter/vf_colordetect.h +++ b/libavfilter/vf_colordetect.h @@ -41,6 +41,9 @@ typedef struct FFColorDetectDSPContext { void ff_color_detect_dsp_init(FFColorDetectDSPContext *dsp, int depth, enum AVColorRange color_range); +void ff_color_detect_dsp_init_x86(FFColorDetectDSPContext *dsp, int depth, + enum AVColorRange color_range); + static inline int ff_detect_range_c(const uint8_t *data, ptrdiff_t stride, ptrdiff_t width, ptrdiff_t height, int mpeg_min, int mpeg_max) diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index 0efe3f8d2c..3a7f761ad4 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -5,6 +5,7 @@ OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn_init.o OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise_init.o OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o +OBJS-$(CONFIG_COLORDETECT_FILTER) += x86/vf_colordetect_init.o OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128_init.o @@ -51,6 +52,7 @@ X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn.o X86ASM-OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise.o X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o +X86ASM-OBJS-$(CONFIG_COLORDETECT_FILTER) += x86/vf_colordetect.o X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o X86ASM-OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128.o diff --git a/libavfilter/x86/vf_colordetect.asm b/libavfilter/x86/vf_colordetect.asm new file mode 100644 index 0000000000..38ffbff757 --- /dev/null +++ b/libavfilter/x86/vf_colordetect.asm @@ -0,0 +1,154 @@ +;***************************************************************************** +;* x86-optimized functions for blackdetect filter +;* +;* Copyright (C) 2025 Niklas Haas +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;***************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +%macro detect_range_fn 1 ; suffix +cglobal detect_range%1, 6, 7, 5, data, stride, width, height, mpeg_min, mpeg_max, x + movd xm0, mpeg_mind + movd xm1, mpeg_maxd + vpbroadcast%1 m0, xm0 + vpbroadcast%1 m1, xm1 + add dataq, widthq + neg widthq +.lineloop: + mova m2, m0 + mova m3, m1 + mov xq, widthq + .loop: + movu m4, [dataq + xq] + pminu%1 m2, m4 + pmaxu%1 m3, m4 + add xq, mmsize + jl .loop + + ; test if the data is out of range + pxor m2, m0 +%if cpuflag(avx512) + vpternlogq m2, m3, m1, 0xF6 ; m2 |= m3 ^ m1 + vptestmq k1, m2, m2 + kortestb k1, k1 +%else + pxor m3, m1 + por m2, m3 + ptest m2, m2 +%endif + jnz .end + add dataq, strideq + dec heightq + jg .lineloop +.end: + setnz al + movzx rax, al + RET +%endmacro + +%macro detect_alpha_fn 3 ; suffix, hsuffix, range +cglobal detect_alpha%1_%3, 6, 7, 6, color, color_stride, alpha, alpha_stride, width, height, x + pxor m0, m0 + add colorq, widthq + add alphaq, widthq + neg widthq +%ifidn %3, limited +%if ARCH_X86_64 + movq xm3, r6mp ; p + movq xm4, r7mp ; q + movq xm5, r8mp ; k +%else + movd xm3, r6mp ; p + movd xm4, r7mp ; q + movd xm5, r8mp ; k +%endif + vpbroadcast%2 m3, xm3 + vpbroadcast%2 m4, xm4 + vpbroadcast%2 m5, xm5 +%endif +.lineloop: + mov xq, widthq + .loop: + %ifidn %3, full + movu m1, [colorq + xq] + movu m2, [alphaq + xq] + pmaxu%1 m1, m2 + %else + pmovzx%1%2 m1, [colorq + xq] + pmovzx%1%2 m2, [alphaq + xq] + pmull%2 m1, m3 + pmull%2 m2, m4 + %ifidn %1, b + psubusw m1, m5 + %else + pmaxu%2 m1, m5 + psub%2 m1, m5 + %endif + pmaxu%2 m1, m2 + %endif + %if cpuflag(avx512) + vpternlogq m0, m1, m2, 0xF6 ; m0 |= m1 ^ m2 + %else + pxor m1, m2 + por m0, m1 + %endif + %ifidn %3, full + add xq, mmsize + %else + add xq, mmsize >> 1 + %endif + jl .loop + +%if cpuflag(avx512) + vptestmq k1, m0, m0 + kortestb k1, k1 +%else + ptest m0, m0 +%endif + jnz .found + + add colorq, color_strideq + add alphaq, alpha_strideq + dec heightq + jg .lineloop + xor rax, rax + RET + +.found: + mov rax, 1 + RET +%endmacro + +INIT_YMM avx2 +detect_range_fn b +detect_range_fn w +detect_alpha_fn b, w, full +detect_alpha_fn w, d, full +detect_alpha_fn b, w, limited +detect_alpha_fn w, d, limited + +INIT_ZMM avx512 +detect_range_fn b +detect_range_fn w +detect_alpha_fn b, w, full +detect_alpha_fn w, d, full +detect_alpha_fn b, w, limited +detect_alpha_fn w, d, limited diff --git a/libavfilter/x86/vf_colordetect_init.c b/libavfilter/x86/vf_colordetect_init.c new file mode 100644 index 0000000000..62a7e87388 --- /dev/null +++ b/libavfilter/x86/vf_colordetect_init.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2025 Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/x86/cpu.h" +#include "libavfilter/vf_colordetect.h" + +#define DETECT_RANGE_FUNC(FUNC_NAME, ASM_FUNC_NAME, C_FUNC_NAME, SHIFT, MMSIZE) \ +int ASM_FUNC_NAME(const uint8_t *src, ptrdiff_t stride, \ + ptrdiff_t width, ptrdiff_t height, int min, int max); \ + \ +static int FUNC_NAME(const uint8_t *src, ptrdiff_t stride, \ + ptrdiff_t width, ptrdiff_t height, int min, int max) \ +{ \ + ptrdiff_t bytes = (width << SHIFT) & ~(MMSIZE - 1); \ + int ret = ASM_FUNC_NAME(src, stride, bytes, height, min, max); \ + if (ret) \ + return ret; \ + \ + return C_FUNC_NAME(src + bytes, stride, width - (bytes >> SHIFT), \ + height, min, max); \ +} + +#define DETECT_ALPHA_FUNC(FUNC_NAME, ASM_FUNC_NAME, C_FUNC_NAME, SHIFT, MMSIZE) \ +int ASM_FUNC_NAME(const uint8_t *color, ptrdiff_t color_stride, \ + const uint8_t *alpha, ptrdiff_t alpha_stride, \ + ptrdiff_t width, ptrdiff_t height, int p, int q, int k); \ + \ +static int FUNC_NAME(const uint8_t *color, ptrdiff_t color_stride, \ + const uint8_t *alpha, ptrdiff_t alpha_stride, \ + ptrdiff_t width, ptrdiff_t height, int p, int q, int k) \ +{ \ + ptrdiff_t bytes = (width << SHIFT) & ~(MMSIZE - 1); \ + int ret = ASM_FUNC_NAME(color, color_stride, alpha, alpha_stride, \ + bytes, height, p, q, k); \ + if (ret) \ + return ret; \ + \ + return C_FUNC_NAME(color + bytes, color_stride, alpha + bytes, alpha_stride,\ + width - (bytes >> SHIFT), height, p, q, k); \ +} + +#if HAVE_X86ASM +#if HAVE_AVX512_EXTERNAL +DETECT_RANGE_FUNC(detect_range_avx512, ff_detect_rangeb_avx512, ff_detect_range_c, 0, 64) +DETECT_RANGE_FUNC(detect_range16_avx512, ff_detect_rangew_avx512, ff_detect_range16_c, 1, 64) +DETECT_ALPHA_FUNC(detect_alpha_full_avx512, ff_detect_alphab_full_avx512, ff_detect_alpha_full_c, 0, 64) +DETECT_ALPHA_FUNC(detect_alpha16_full_avx512, ff_detect_alphaw_full_avx512, ff_detect_alpha16_full_c, 1, 64) +DETECT_ALPHA_FUNC(detect_alpha_limited_avx512, ff_detect_alphab_limited_avx512, ff_detect_alpha_limited_c, 0, 64) +DETECT_ALPHA_FUNC(detect_alpha16_limited_avx512, ff_detect_alphaw_limited_avx512, ff_detect_alpha16_limited_c, 1, 64) +#endif +#if HAVE_AVX2_EXTERNAL +DETECT_RANGE_FUNC(detect_range_avx2, ff_detect_rangeb_avx2, ff_detect_range_c, 0, 32) +DETECT_RANGE_FUNC(detect_range16_avx2, ff_detect_rangew_avx2, ff_detect_range16_c, 1, 32) +DETECT_ALPHA_FUNC(detect_alpha_full_avx2, ff_detect_alphab_full_avx2, ff_detect_alpha_full_c, 0, 32) +DETECT_ALPHA_FUNC(detect_alpha16_full_avx2, ff_detect_alphaw_full_avx2, ff_detect_alpha16_full_c, 1, 32) +DETECT_ALPHA_FUNC(detect_alpha_limited_avx2, ff_detect_alphab_limited_avx2, ff_detect_alpha_limited_c, 0, 32) +DETECT_ALPHA_FUNC(detect_alpha16_limited_avx2, ff_detect_alphaw_limited_avx2, ff_detect_alpha16_limited_c, 1, 32) +#endif +#endif + +av_cold void ff_color_detect_dsp_init_x86(FFColorDetectDSPContext *dsp, int depth, + enum AVColorRange color_range) +{ +#if HAVE_X86ASM + int cpu_flags = av_get_cpu_flags(); +#if HAVE_AVX2_EXTERNAL + if (EXTERNAL_AVX2(cpu_flags)) { + dsp->detect_range = depth > 8 ? detect_range16_avx2 : detect_range_avx2; + if (color_range == AVCOL_RANGE_JPEG) { + dsp->detect_alpha = depth > 8 ? detect_alpha16_full_avx2 : detect_alpha_full_avx2; + } else { + dsp->detect_alpha = depth > 8 ? detect_alpha16_limited_avx2 : detect_alpha_limited_avx2; + } + } +#endif +#if HAVE_AVX512_EXTERNAL + if (EXTERNAL_AVX512(cpu_flags)) { + dsp->detect_range = depth > 8 ? detect_range16_avx512 : detect_range_avx512; + if (color_range == AVCOL_RANGE_JPEG) { + dsp->detect_alpha = depth > 8 ? detect_alpha16_full_avx512 : detect_alpha_full_avx512; + } else { + dsp->detect_alpha = depth > 8 ? detect_alpha16_limited_avx512 : detect_alpha_limited_avx512; + } + } +#endif +#endif +} -- 2.50.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2025-07-16 16:26 UTC|newest] Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top 2025-07-16 16:24 [FFmpeg-devel] [PATCH v2 0/3] avfilter: add vf_colordetect filter Niklas Haas 2025-07-16 16:24 ` [FFmpeg-devel] [PATCH v2 1/3] avfilter/vf_colordetect: add new color range detection filter Niklas Haas 2025-07-16 16:24 ` [FFmpeg-devel] [PATCH v2 2/3] tests/checkasm: add check for vf_colordetect Niklas Haas 2025-07-16 22:48 ` James Almer 2025-07-16 16:24 ` Niklas Haas [this message] 2025-07-16 20:06 ` [FFmpeg-devel] [PATCH v2 3/3] avfilter/vf_colordetect: add x86 SIMD implementation Henrik Gramner via ffmpeg-devel 2025-07-17 9:37 ` Niklas Haas 2025-07-16 20:25 ` James Almer 2025-07-16 21:49 ` Niklas Haas 2025-07-16 22:21 ` James Almer 2025-07-17 9:41 ` Niklas Haas
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20250716162536.394049-4-ffmpeg@haasn.xyz \ --to=ffmpeg@haasn.xyz \ --cc=ffmpeg-devel@ffmpeg.org \ --cc=git@haasn.dev \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git