Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Niklas Haas <ffmpeg@haasn.xyz>
To: ffmpeg-devel@ffmpeg.org
Cc: Niklas Haas <git@haasn.dev>
Subject: [FFmpeg-devel] [PATCH v6 14/18] swscale/ops_memcpy: add 'memcpy' backend for plane->plane copies
Date: Wed, 11 Jun 2025 14:47:20 +0200
Message-ID: <20250611124805.73196-15-ffmpeg@haasn.xyz> (raw)
In-Reply-To: <20250611124805.73196-1-ffmpeg@haasn.xyz>

From: Niklas Haas <git@haasn.dev>

Provides a generic fast path for any operation list that can be decomposed
into a series of memcpy and memset operations.

25% faster than the x86 backend for yuv444p -> yuva444p
33% faster than the x86 backend for gray -> yuvj444p
---
 libswscale/Makefile     |   1 +
 libswscale/ops.c        |   2 +
 libswscale/ops_memcpy.c | 132 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+)
 create mode 100644 libswscale/ops_memcpy.c

diff --git a/libswscale/Makefile b/libswscale/Makefile
index d3cb7bc555..0fed799542 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -18,6 +18,7 @@ OBJS = alphablend.o                                     \
        ops.o                                            \
        ops_backend.o                                    \
        ops_chain.o                                      \
+       ops_memcpy.o                                     \
        ops_optimizer.o                                  \
        options.o                                        \
        output.o                                         \
diff --git a/libswscale/ops.c b/libswscale/ops.c
index 7115ccee2c..1576e9411b 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -28,8 +28,10 @@
 #include "ops_internal.h"
 
 extern SwsOpBackend backend_c;
+extern SwsOpBackend backend_murder;
 
 const SwsOpBackend * const ff_sws_op_backends[] = {
+    &backend_murder,
     &backend_c,
     NULL
 };
diff --git a/libswscale/ops_memcpy.c b/libswscale/ops_memcpy.c
new file mode 100644
index 0000000000..ef4784faa4
--- /dev/null
+++ b/libswscale/ops_memcpy.c
@@ -0,0 +1,132 @@
+/**
+ * Copyright (C) 2025 Niklas Haas
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+
+#include "ops_backend.h"
+
+typedef struct MemcpyPriv {
+    int num_planes;
+    int index[4]; /* or -1 to clear plane */
+    uint8_t clear_value[4];
+} MemcpyPriv;
+
+/* Memcpy backend for trivial cases */
+
+static void process(const SwsOpExec *exec, const void *priv,
+                    int x_start, int y_start, int x_end, int y_end)
+{
+    const MemcpyPriv *p = priv;
+    const int lines = y_end - y_start;
+    av_assert1(x_start == 0 && x_end == exec->width);
+
+    for (int i = 0; i < p->num_planes; i++) {
+        uint8_t *out = exec->out[i];
+        const int idx = p->index[i];
+        if (idx < 0) {
+            memset(out, p->clear_value[i], exec->out_stride[i] * lines);
+        } else if (exec->out_stride[i] == exec->in_stride[idx]) {
+            memcpy(out, exec->in[idx], exec->out_stride[i] * lines);
+        } else {
+            const int bytes = x_end * exec->block_size_out;
+            const uint8_t *in = exec->in[idx];
+            for (int y = y_start; y < y_end; y++) {
+                memcpy(out, in, bytes);
+                out += exec->out_stride[i];
+                in  += exec->in_stride[idx];
+            }
+        }
+    }
+}
+
+static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
+{
+    MemcpyPriv p = {0};
+
+    for (int n = 0; n < ops->num_ops; n++) {
+        const SwsOp *op = &ops->ops[n];
+        switch (op->op) {
+        case SWS_OP_READ:
+            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac)
+                return AVERROR(ENOTSUP);
+            for (int i = 0; i < op->rw.elems; i++)
+                p.index[i] = i;
+            break;
+
+        case SWS_OP_SWIZZLE: {
+            const MemcpyPriv orig = p;
+            for (int i = 0; i < 4; i++) {
+                /* Explicitly exclude swizzle masks that contain duplicates,
+                 * because these are wasteful to implement as a memcpy */
+                for (int j = 0; j < i; j++) {
+                    if (op->swizzle.in[i] == op->swizzle.in[j])
+                        return AVERROR(ENOTSUP);
+                }
+                p.index[i] = orig.index[op->swizzle.in[i]];
+            }
+            break;
+        }
+
+        case SWS_OP_CLEAR:
+            for (int i = 0; i < 4; i++) {
+                if (!op->c.q4[i].den)
+                    continue;
+                if (op->c.q4[i].den != 1)
+                    return AVERROR(ENOTSUP);
+
+                /* Ensure all bytes to be cleared are the same, because we
+                 * can't memset on multi-byte sequences */
+                uint8_t val = op->c.q4[i].num & 0xFF;
+                uint32_t ref = val;
+                switch (ff_sws_pixel_type_size(op->type)) {
+                case 2: ref *= 0x101; break;
+                case 4: ref *= 0x1010101; break;
+                }
+                if (ref != op->c.q4[i].num)
+                    return AVERROR(ENOTSUP);
+                p.clear_value[i] = val;
+                p.index[i] = -1;
+            }
+            break;
+
+        case SWS_OP_WRITE:
+            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac)
+                return AVERROR(ENOTSUP);
+            p.num_planes = op->rw.elems;
+            break;
+
+        default:
+            return AVERROR(ENOTSUP);
+        }
+    }
+
+    *out = (SwsCompiledOp) {
+        .block_size = 1,
+        .func = process,
+        .priv = av_memdup(&p, sizeof(p)),
+        .free = av_free,
+    };
+    return out->priv ? 0 : AVERROR(ENOMEM);
+}
+
+SwsOpBackend backend_murder = {
+    .name    = "memcpy",
+    .compile = compile,
+};
-- 
2.49.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  parent reply	other threads:[~2025-06-11 12:50 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-11 12:47 [FFmpeg-devel] [PATCH v6 00/18] swscale: new ops framework Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 01/18] swscale/graph: pass per-pass image pointers to setup() Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 02/18] swscale/format: rename legacy format conversion table Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 03/18] swscale/format: add ff_fmt_clear() Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 04/18] tests/checkasm: increase number of runs in between measurements Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 05/18] tests/checkasm: generalize DEF_CHECKASM_CHECK_FUNC to floats Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 06/18] swscale: add SWS_UNSTABLE flag Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 07/18] swscale/ops: introduce new low level framework Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 08/18] swscale/optimizer: add high-level ops optimizer Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 09/18] swscale/ops_internal: add internal ops backend API Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 10/18] swscale/ops: add dispatch layer Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 11/18] swscale/optimizer: add packed shuffle solver Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 12/18] swscale/ops_chain: add internal abstraction for kernel linking Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 13/18] swscale/ops_backend: add reference backend basend on C templates Niklas Haas
2025-06-11 12:47 ` Niklas Haas [this message]
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 15/18] swscale/x86: add SIMD backend Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 16/18] tests/checkasm: add checkasm tests for swscale ops Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 17/18] swscale/format: add new format decode/encode logic Niklas Haas
2025-06-11 12:47 ` [FFmpeg-devel] [PATCH v6 18/18] swscale/graph: allow experimental use of new format handler Niklas Haas
2025-06-11 17:59 ` [FFmpeg-devel] [PATCH v6 00/18] swscale: new ops framework Niklas Haas
2025-06-16 12:31 ` Niklas Haas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250611124805.73196-15-ffmpeg@haasn.xyz \
    --to=ffmpeg@haasn.xyz \
    --cc=ffmpeg-devel@ffmpeg.org \
    --cc=git@haasn.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git