From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.ffmpeg.org (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id D35EE4A243 for ; Thu, 17 Jul 2025 10:45:38 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.ffmpeg.org (Postfix) with ESMTP id BD46A68ED4C; Thu, 17 Jul 2025 13:45:34 +0300 (EEST) Received: from haasn.dev (haasn.dev [78.46.187.166]) by ffbox0-bg.ffmpeg.org (Postfix) with ESMTP id B320D68ED07 for ; Thu, 17 Jul 2025 13:45:27 +0300 (EEST) Received: from haasn.dev (unknown [10.30.1.1]) by haasn.dev (Postfix) with UTF8SMTP id 7EC0640E94; Thu, 17 Jul 2025 12:45:27 +0200 (CEST) From: Niklas Haas To: ffmpeg-devel@ffmpeg.org Date: Thu, 17 Jul 2025 12:45:24 +0200 Message-ID: <20250717104525.1290708-1-ffmpeg@haasn.xyz> X-Mailer: git-send-email 2.50.1 MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v2 1/2] avfilter/vf_blackdetect: add AVX2 SIMD version X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Niklas Haas Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: From: Niklas Haas Requested by a user. Even with autovectorization enabled, the compiler performs a quite poor job of optimizing this function, due to not being able to take advantage of the pmaxub + pcmpeqb trick for counting the number of pixels less than or equal-to a threshold. blackdetect8_c: 4625.0 ( 1.00x) blackdetect8_avx2: 155.1 (29.83x) blackdetect16_c: 2529.4 ( 1.00x) blackdetect16_avx2: 163.6 (15.46x) --- libavfilter/vf_blackdetect.c | 33 +++--------- libavfilter/vf_blackdetect.h | 71 ++++++++++++++++++++++++++ libavfilter/x86/Makefile | 2 + libavfilter/x86/vf_blackdetect.asm | 73 +++++++++++++++++++++++++++ libavfilter/x86/vf_blackdetect_init.c | 34 +++++++++++++ 5 files changed, 188 insertions(+), 25 deletions(-) create mode 100644 libavfilter/vf_blackdetect.h create mode 100644 libavfilter/x86/vf_blackdetect.asm create mode 100644 libavfilter/x86/vf_blackdetect_init.c diff --git a/libavfilter/vf_blackdetect.c b/libavfilter/vf_blackdetect.c index 8be33a814d..b233bdfd60 100644 --- a/libavfilter/vf_blackdetect.c +++ b/libavfilter/vf_blackdetect.c @@ -33,6 +33,7 @@ #include "filters.h" #include "formats.h" #include "video.h" +#include "vf_blackdetect.h" typedef struct BlackDetectContext { const AVClass *class; @@ -53,6 +54,8 @@ typedef struct BlackDetectContext { int depth; int nb_threads; unsigned int *counter; + + ff_blackdetect_fn func; } BlackDetectContext; #define OFFSET(x) offsetof(BlackDetectContext, x) @@ -133,6 +136,7 @@ static int config_input(AVFilterLink *inlink) s->time_base = inlink->time_base; s->black_min_duration = s->black_min_duration_time / av_q2d(s->time_base); s->counter = av_calloc(s->nb_threads, sizeof(*s->counter)); + s->func = ff_blackdetect_get_fn(depth); if (!s->counter) return AVERROR(ENOMEM); @@ -160,37 +164,16 @@ static int black_counter(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { BlackDetectContext *s = ctx->priv; - const unsigned int threshold = s->pixel_black_th_i; - unsigned int *counterp = &s->counter[jobnr]; - AVFrame *in = arg; + const AVFrame *in = arg; const int plane = s->alpha ? 3 : 0; const int linesize = in->linesize[plane]; - const int w = in->width; const int h = in->height; const int start = (h * jobnr) / nb_jobs; const int end = (h * (jobnr+1)) / nb_jobs; - const int size = end - start; - unsigned int counter = 0; - - if (s->depth == 8) { - const uint8_t *p = in->data[plane] + start * linesize; - - for (int i = 0; i < size; i++) { - for (int x = 0; x < w; x++) - counter += p[x] <= threshold; - p += linesize; - } - } else { - const uint16_t *p = (const uint16_t *)(in->data[plane] + start * linesize); - - for (int i = 0; i < size; i++) { - for (int x = 0; x < w; x++) - counter += p[x] <= threshold; - p += linesize / 2; - } - } - *counterp = counter; + s->counter[jobnr] = s->func(in->data[plane] + start * linesize, + linesize, in->width, end - start, + s->pixel_black_th_i); return 0; } diff --git a/libavfilter/vf_blackdetect.h b/libavfilter/vf_blackdetect.h new file mode 100644 index 0000000000..361da2c5bc --- /dev/null +++ b/libavfilter/vf_blackdetect.h @@ -0,0 +1,71 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VF_BLACKDETECT_H +#define AVFILTER_VF_BLACKDETECT_H + +#include +#include + +typedef unsigned (*ff_blackdetect_fn)(const uint8_t *src, ptrdiff_t stride, + ptrdiff_t width, ptrdiff_t height, + unsigned threshold); + +ff_blackdetect_fn ff_blackdetect_get_fn_x86(int depth); + +static unsigned count_pixels8_c(const uint8_t *src, ptrdiff_t stride, + ptrdiff_t width, ptrdiff_t height, + unsigned threshold) +{ + unsigned int counter = 0; + while (height--) { + for (int x = 0; x < width; x++) + counter += src[x] <= threshold; + src += stride; + } + return counter; +} + +static unsigned count_pixels16_c(const uint8_t *src, ptrdiff_t stride, + ptrdiff_t width, ptrdiff_t height, + unsigned threshold) +{ + unsigned int counter = 0; + while (height--) { + const uint16_t *src16 = (const uint16_t *) src; + for (int x = 0; x < width; x++) + counter += src16[x] <= threshold; + src += stride; + } + return counter; +} + + +static inline ff_blackdetect_fn ff_blackdetect_get_fn(int depth) +{ + ff_blackdetect_fn fn = NULL; +#if ARCH_X86 + fn = ff_blackdetect_get_fn_x86(depth); +#endif + + if (!fn) + fn = depth == 8 ? count_pixels8_c : count_pixels16_c; + return fn; +} + +#endif /* AVFILTER_VF_BLACKDETECT_H */ diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index 0efe3f8d2c..86f7119a7b 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -3,6 +3,7 @@ OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn_init.o OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise_init.o +OBJS-$(CONFIG_BLACKDETECT_FILTER) += x86/vf_blackdetect_init.o OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o @@ -49,6 +50,7 @@ X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn.o X86ASM-OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise.o +X86ASM-OBJS-$(CONFIG_BLACKDETECT_FILTER) += x86/vf_blackdetect.o X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o diff --git a/libavfilter/x86/vf_blackdetect.asm b/libavfilter/x86/vf_blackdetect.asm new file mode 100644 index 0000000000..78c24c5adc --- /dev/null +++ b/libavfilter/x86/vf_blackdetect.asm @@ -0,0 +1,73 @@ +;***************************************************************************** +;* x86-optimized functions for blackdetect filter +;* +;* Copyright (C) 2025 Niklas Haas +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;***************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +%macro count_pixels_fn 1 ; depth +cglobal blackdetect_%1, 5, 7, 2, src, stride, width, height, threshold + movd xm1, thresholdd + %if %1 == 8 + vpbroadcastb m1, xm1 + %else + vpbroadcastw m1, xm1 + shl widthq, 1 + %endif + add srcq, widthq + neg widthq + xor r4, r4 + mov r5, widthq + jmp .start +.loop: + popcnt r6d, r6d + add r4, r6 +.start: + movu m0, [srcq + r5] + %if %1 == 8 + pmaxub m0, m1 + pcmpeqb m0, m1 + %else + pmaxuw m0, m1 + pcmpeqw m0, m1 + %endif + pmovmskb r6d, m0 + add r5, mmsize + jl .loop + ; handle tail by shifting away unused high elements + shlx r6d, r6d, r5d + popcnt r6d, r6d + add r4, r6 + add srcq, strideq + mov r5, widthq + dec heightq + jg .start + %if %1 > 8 + shr r4, 1 + %endif + mov rax, r4 + RET +%endmacro + +INIT_YMM avx2 +count_pixels_fn 8 +count_pixels_fn 16 diff --git a/libavfilter/x86/vf_blackdetect_init.c b/libavfilter/x86/vf_blackdetect_init.c new file mode 100644 index 0000000000..3780072589 --- /dev/null +++ b/libavfilter/x86/vf_blackdetect_init.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2025 Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/x86/cpu.h" +#include "libavfilter/vf_blackdetect.h" + +unsigned ff_blackdetect_8_avx2(const uint8_t *, ptrdiff_t, ptrdiff_t, ptrdiff_t, unsigned); +unsigned ff_blackdetect_16_avx2(const uint8_t *, ptrdiff_t, ptrdiff_t, ptrdiff_t, unsigned); + +av_cold ff_blackdetect_fn ff_blackdetect_get_fn_x86(int depth) +{ + int cpu_flags = av_get_cpu_flags(); + if (EXTERNAL_AVX2(cpu_flags)) + return depth == 8 ? ff_blackdetect_8_avx2 : ff_blackdetect_16_avx2; + return NULL; +} -- 2.50.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".