From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.ffmpeg.org (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id CDEB64B6AA for ; Wed, 21 May 2025 12:51:59 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.ffmpeg.org (Postfix) with ESMTP id 9D1B968D64D; Wed, 21 May 2025 15:48:55 +0300 (EEST) Received: from haasn.dev (haasn.dev [78.46.187.166]) by ffbox0-bg.ffmpeg.org (Postfix) with ESMTP id 4BD4468D2E2 for ; Wed, 21 May 2025 15:48:32 +0300 (EEST) Received: from haasn.dev (unknown [10.30.1.1]) by haasn.dev (Postfix) with UTF8SMTP id ED55E436C8; Wed, 21 May 2025 14:48:27 +0200 (CEST) From: Niklas Haas To: ffmpeg-devel@ffmpeg.org Date: Wed, 21 May 2025 14:43:56 +0200 Message-ID: <20250521124824.49657-11-ffmpeg@haasn.xyz> X-Mailer: git-send-email 2.49.0 In-Reply-To: <20250521124824.49657-1-ffmpeg@haasn.xyz> References: <20250521124824.49657-1-ffmpeg@haasn.xyz> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v2 10/17] swscale/optimizer: add packed shuffle solver X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Niklas Haas Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: From: Niklas Haas This can turn any compatible sequence of operations into a single packed shuffle, including packed swizzling, grayscale->RGB conversion, endianness swapping, RGB bit depth conversions, rgb24->rgb0 alpha clearing and more. --- libswscale/ops_internal.h | 17 +++++++ libswscale/ops_optimizer.c | 96 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/libswscale/ops_internal.h b/libswscale/ops_internal.h index 9fd866430b..ab957b0837 100644 --- a/libswscale/ops_internal.h +++ b/libswscale/ops_internal.h @@ -105,4 +105,21 @@ int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend, */ int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out); +/** + * "Solve" an op list into a fixed shuffle mask, with an optional ability to + * also directly clear the output value (for e.g. rgb24 -> rgb0). + * + * @param ops The operation list to decompose. + * @param shuffle The output shuffle mask. + * @param size The size (in bytes) of the output shuffle mask. + * @param clear_val If nonzero, this index will be used to clear the output. + * @param read_bytes Returns the number of bytes read per shuffle iteration. + * @param write_bytes Returns the number of bytes written per shuffle iteration. + * + * @return The number of pixels processed per iteration, or a negative error + code; in particular AVERROR(ENOTSUP) for unsupported operations. + */ +int ff_sws_solve_shuffle(const SwsOpList *ops, uint8_t shuffle[], int size, + uint8_t clear_val, int *read_bytes, int *write_bytes); + #endif diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c index d503bf7bf3..9cde60ed58 100644 --- a/libswscale/ops_optimizer.c +++ b/libswscale/ops_optimizer.c @@ -19,9 +19,11 @@ */ #include "libavutil/avassert.h" +#include #include "libavutil/rational.h" #include "ops.h" +#include "ops_internal.h" #define Q(N) ((AVRational) { N, 1 }) @@ -781,3 +783,97 @@ retry: return 0; } + +int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], + int shuffle_size, uint8_t clear_val, + int *out_read_bytes, int *out_write_bytes) +{ + const SwsOp read = ops->ops[0]; + const int read_size = ff_sws_pixel_type_size(read.type); + uint32_t mask[4] = {0}; + + if (!ops->num_ops || read.op != SWS_OP_READ) + return AVERROR(EINVAL); + if (read.rw.frac || (!read.rw.packed && read.rw.elems > 1)) + return AVERROR(ENOTSUP); + + for (int i = 0; i < read.rw.elems; i++) + mask[i] = 0x01010101 * i * read_size + 0x03020100; + + for (int opidx = 1; opidx < ops->num_ops; opidx++) { + const SwsOp *op = &ops->ops[opidx]; + switch (op->op) { + case SWS_OP_SWIZZLE: { + uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] }; + for (int i = 0; i < 4; i++) + mask[i] = orig[op->swizzle.in[i]]; + break; + } + + case SWS_OP_SWAP_BYTES: + for (int i = 0; i < 4; i++) { + switch (ff_sws_pixel_type_size(op->type)) { + case 2: mask[i] = av_bswap16(mask[i]); break; + case 4: mask[i] = av_bswap32(mask[i]); break; + } + } + break; + + case SWS_OP_CLEAR: + for (int i = 0; i < 4; i++) { + if (!op->c.q4[i].den) + continue; + if (op->c.q4[i].num != 0 || !clear_val) + return AVERROR(ENOTSUP); + mask[i] = 0x1010101ul * clear_val; + } + break; + + case SWS_OP_CONVERT: { + if (!op->convert.expand) + return AVERROR(ENOTSUP); + for (int i = 0; i < 4; i++) { + switch (ff_sws_pixel_type_size(op->type)) { + case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break; + case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break; + } + } + break; + } + + case SWS_OP_WRITE: { + if (op->rw.frac || !op->rw.packed) + return AVERROR(ENOTSUP); + + /* Initialize to no-op */ + memset(shuffle, clear_val, shuffle_size); + + const int write_size = ff_sws_pixel_type_size(op->type); + const int read_chunk = read.rw.elems * read_size; + const int write_chunk = op->rw.elems * write_size; + const int num_groups = shuffle_size / FFMAX(read_chunk, write_chunk); + for (int n = 0; n < num_groups; n++) { + const int base_in = n * read_chunk; + const int base_out = n * write_chunk; + for (int i = 0; i < op->rw.elems; i++) { + const int offset = base_out + i * write_size; + for (int b = 0; b < write_size; b++) { + const uint8_t idx = mask[i] >> (b * 8); + if (idx != clear_val) + shuffle[offset + b] = base_in + idx; + } + } + } + + *out_read_bytes = num_groups * read_chunk; + *out_write_bytes = num_groups * write_chunk; + return num_groups; + } + + default: + return AVERROR(ENOTSUP); + } + } + + return AVERROR(EINVAL); +} -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".