Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Niklas Haas <ffmpeg@haasn.xyz>
To: ffmpeg-devel@ffmpeg.org
Cc: Niklas Haas <git@haasn.dev>
Subject: [FFmpeg-devel] [PATCH v6 11/18] swscale/optimizer: add packed shuffle solver
Date: Wed, 11 Jun 2025 14:47:17 +0200
Message-ID: <20250611124805.73196-12-ffmpeg@haasn.xyz> (raw)
In-Reply-To: <20250611124805.73196-1-ffmpeg@haasn.xyz>

From: Niklas Haas <git@haasn.dev>

This can turn any compatible sequence of operations into a single packed
shuffle, including packed swizzling, grayscale->RGB conversion, endianness
swapping, RGB bit depth conversions, rgb24->rgb0 alpha clearing and more.
---
 libswscale/ops_internal.h  | 28 +++++++++++
 libswscale/ops_optimizer.c | 96 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 124 insertions(+)

diff --git a/libswscale/ops_internal.h b/libswscale/ops_internal.h
index 2fbd8a55d0..e7b6fb1c4c 100644
--- a/libswscale/ops_internal.h
+++ b/libswscale/ops_internal.h
@@ -109,4 +109,32 @@ int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
  */
 int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out);
 
+/**
+ * "Solve" an op list into a fixed shuffle mask, with an optional ability to
+ * also directly clear the output value (for e.g. rgb24 -> rgb0). This can
+ * accept any operation chain that only consists of the following operations:
+ *
+ * - SWS_OP_READ (non-planar, non-fractional)
+ * - SWS_OP_SWIZZLE
+ * - SWS_OP_SWAP_BYTES
+ * - SWS_OP_CLEAR to zero (when clear_val is specified)
+ * - SWS_OP_CONVERT (integer expand)
+ * - SWS_OP_WRITE (non-planar, non-fractional)
+ *
+ * Basically, any operation that purely consists of moving around and reording
+ * bytes within a single plane, can be turned into a shuffle mask.
+ *
+ * @param ops         The operation list to decompose.
+ * @param shuffle     The output shuffle mask.
+ * @param size        The size (in bytes) of the output shuffle mask.
+ * @param clear_val   If nonzero, this index will be used to clear the output.
+ * @param read_bytes  Returns the number of bytes read per shuffle iteration.
+ * @param write_bytes Returns the number of bytes written per shuffle iteration.
+ *
+ * @return  The number of pixels processed per iteration, or a negative error
+            code; in particular AVERROR(ENOTSUP) for unsupported operations.
+ */
+int ff_sws_solve_shuffle(const SwsOpList *ops, uint8_t shuffle[], int size,
+                         uint8_t clear_val, int *read_bytes, int *write_bytes);
+
 #endif
diff --git a/libswscale/ops_optimizer.c b/libswscale/ops_optimizer.c
index 80e75ed34b..23c9aae76d 100644
--- a/libswscale/ops_optimizer.c
+++ b/libswscale/ops_optimizer.c
@@ -19,9 +19,11 @@
  */
 
 #include "libavutil/avassert.h"
+#include <libavutil/bswap.h>
 #include "libavutil/rational.h"
 
 #include "ops.h"
+#include "ops_internal.h"
 
 #define Q(N) ((AVRational) { N, 1 })
 
@@ -780,3 +782,97 @@ retry:
 
     return 0;
 }
+
+int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
+                         int shuffle_size, uint8_t clear_val,
+                         int *out_read_bytes, int *out_write_bytes)
+{
+    const SwsOp read = ops->ops[0];
+    const int read_size = ff_sws_pixel_type_size(read.type);
+    uint32_t mask[4] = {0};
+
+    if (!ops->num_ops || read.op != SWS_OP_READ)
+        return AVERROR(EINVAL);
+    if (read.rw.frac || (!read.rw.packed && read.rw.elems > 1))
+        return AVERROR(ENOTSUP);
+
+    for (int i = 0; i < read.rw.elems; i++)
+        mask[i] = 0x01010101 * i * read_size + 0x03020100;
+
+    for (int opidx = 1; opidx < ops->num_ops; opidx++) {
+        const SwsOp *op = &ops->ops[opidx];
+        switch (op->op) {
+        case SWS_OP_SWIZZLE: {
+            uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
+            for (int i = 0; i < 4; i++)
+                mask[i] = orig[op->swizzle.in[i]];
+            break;
+        }
+
+        case SWS_OP_SWAP_BYTES:
+            for (int i = 0; i < 4; i++) {
+                switch (ff_sws_pixel_type_size(op->type)) {
+                case 2: mask[i] = av_bswap16(mask[i]); break;
+                case 4: mask[i] = av_bswap32(mask[i]); break;
+                }
+            }
+            break;
+
+        case SWS_OP_CLEAR:
+            for (int i = 0; i < 4; i++) {
+                if (!op->c.q4[i].den)
+                    continue;
+                if (op->c.q4[i].num != 0 || !clear_val)
+                    return AVERROR(ENOTSUP);
+                mask[i] = 0x1010101ul * clear_val;
+            }
+            break;
+
+        case SWS_OP_CONVERT: {
+            if (!op->convert.expand)
+                return AVERROR(ENOTSUP);
+            for (int i = 0; i < 4; i++) {
+                switch (ff_sws_pixel_type_size(op->type)) {
+                case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF);   break;
+                case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
+                }
+            }
+            break;
+        }
+
+        case SWS_OP_WRITE: {
+            if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
+                return AVERROR(ENOTSUP);
+
+            /* Initialize to no-op */
+            memset(shuffle, clear_val, shuffle_size);
+
+            const int write_size  = ff_sws_pixel_type_size(op->type);
+            const int read_chunk  = read.rw.elems * read_size;
+            const int write_chunk = op->rw.elems * write_size;
+            const int num_groups  = shuffle_size / FFMAX(read_chunk, write_chunk);
+            for (int n = 0; n < num_groups; n++) {
+                const int base_in  = n * read_chunk;
+                const int base_out = n * write_chunk;
+                for (int i = 0; i < op->rw.elems; i++) {
+                    const int offset = base_out + i * write_size;
+                    for (int b = 0; b < write_size; b++) {
+                        const uint8_t idx = mask[i] >> (b * 8);
+                        if (idx != clear_val)
+                            shuffle[offset + b] = base_in + idx;
+                    }
+                }
+            }
+
+            *out_read_bytes  = num_groups * read_chunk;
+            *out_write_bytes = num_groups * write_chunk;
+            return num_groups;
+        }
+
+        default:
+            return AVERROR(ENOTSUP);
+        }
+    }
+
+    return AVERROR(EINVAL);
+}
-- 
2.49.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  parent reply	other threads:[~2025-06-11 12:49 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-11 12:47 [FFmpeg-devel] [PATCH v6 00/18] swscale: new ops framework Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 01/18] swscale/graph: pass per-pass image pointers to setup() Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 02/18] swscale/format: rename legacy format conversion table Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 03/18] swscale/format: add ff_fmt_clear() Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 04/18] tests/checkasm: increase number of runs in between measurements Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 05/18] tests/checkasm: generalize DEF_CHECKASM_CHECK_FUNC to floats Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 06/18] swscale: add SWS_UNSTABLE flag Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 07/18] swscale/ops: introduce new low level framework Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 08/18] swscale/optimizer: add high-level ops optimizer Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 09/18] swscale/ops_internal: add internal ops backend API Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 10/18] swscale/ops: add dispatch layer Niklas Haas
2025-06-11 12:47 ` Niklas Haas [this message]
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 12/18] swscale/ops_chain: add internal abstraction for kernel linking Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 13/18] swscale/ops_backend: add reference backend basend on C templates Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 14/18] swscale/ops_memcpy: add 'memcpy' backend for plane->plane copies Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 15/18] swscale/x86: add SIMD backend Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 16/18] tests/checkasm: add checkasm tests for swscale ops Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 17/18] swscale/format: add new format decode/encode logic Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 18/18] swscale/graph: allow experimental use of new format handler Niklas Haas
2025-06-11 17:59 ` [FFmpeg-devel] [PATCH v6 00/18] swscale: new ops framework Niklas Haas
2025-06-16 12:31 ` Niklas Haas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250611124805.73196-12-ffmpeg@haasn.xyz \
    --to=ffmpeg@haasn.xyz \
    --cc=ffmpeg-devel@ffmpeg.org \
    --cc=git@haasn.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git