From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
Received: from ffbox0-bg.ffmpeg.org (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by master.gitmailbox.com (Postfix) with ESMTPS id CDF424DBDF
	for <ffmpegdev@gitmailbox.com>; Wed,  4 Jun 2025 10:25:56 +0000 (UTC)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.ffmpeg.org (Postfix) with ESMTP id 739C268D37F;
	Wed,  4 Jun 2025 13:23:42 +0300 (EEST)
Received: from haasn.dev (haasn.dev [78.46.187.166])
 by ffbox0-bg.ffmpeg.org (Postfix) with ESMTP id 96E1968CF7B
 for <ffmpeg-devel@ffmpeg.org>; Wed,  4 Jun 2025 13:23:23 +0300 (EEST)
Received: from haasn.dev (unknown [10.30.1.1])
 by haasn.dev (Postfix) with UTF8SMTP id 51B3C4418B;
 Wed,  4 Jun 2025 12:23:19 +0200 (CEST)
From: Niklas Haas <ffmpeg@haasn.xyz>
To: ffmpeg-devel@ffmpeg.org
Date: Wed,  4 Jun 2025 12:20:15 +0200
Message-ID: <20250604102315.38134-14-ffmpeg@haasn.xyz>
X-Mailer: git-send-email 2.49.0
In-Reply-To: <20250604102315.38134-1-ffmpeg@haasn.xyz>
References: <20250604102315.38134-1-ffmpeg@haasn.xyz>
MIME-Version: 1.0
Subject: [FFmpeg-devel] [PATCH v4 13/17] swscale/ops_memcpy: add 'memcpy'
 backend for plane->plane copies
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: Niklas Haas <git@haasn.dev>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Archived-At: <https://master.gitmailbox.com/ffmpegdev/20250604102315.38134-14-ffmpeg@haasn.xyz/>
List-Archive: <https://master.gitmailbox.com/ffmpegdev/>
List-Post: <mailto:ffmpegdev@gitmailbox.com>

From: Niklas Haas <git@haasn.dev>

Provides a generic fast path for any operation list that can be decomposed
into a series of memcpy and memset operations.

25% faster than the x86 backend for yuv444p -> yuva444p
33% faster than the x86 backend for gray -> yuvj444p
---
 libswscale/Makefile     |   1 +
 libswscale/ops.c        |   2 +
 libswscale/ops_memcpy.c | 132 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+)
 create mode 100644 libswscale/ops_memcpy.c

diff --git a/libswscale/Makefile b/libswscale/Makefile
index 6e5696c5a6..136d33f6bc 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -18,6 +18,7 @@ OBJS = alphablend.o                                     \
        ops.o                                            \
        ops_backend.o                                    \
        ops_chain.o                                      \
+       ops_memcpy.o                                     \
        ops_optimizer.o                                  \
        options.o                                        \
        output.o                                         \
diff --git a/libswscale/ops.c b/libswscale/ops.c
index fe7ea6a565..c7bdbd305c 100644
--- a/libswscale/ops.c
+++ b/libswscale/ops.c
@@ -28,8 +28,10 @@
 #include "ops_internal.h"
 
 extern SwsOpBackend backend_c;
+extern SwsOpBackend backend_murder;
 
 const SwsOpBackend * const ff_sws_op_backends[] = {
+    &backend_murder,
     &backend_c,
     NULL
 };
diff --git a/libswscale/ops_memcpy.c b/libswscale/ops_memcpy.c
new file mode 100644
index 0000000000..ef4784faa4
--- /dev/null
+++ b/libswscale/ops_memcpy.c
@@ -0,0 +1,132 @@
+/**
+ * Copyright (C) 2025 Niklas Haas
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+
+#include "ops_backend.h"
+
+typedef struct MemcpyPriv {
+    int num_planes;
+    int index[4]; /* or -1 to clear plane */
+    uint8_t clear_value[4];
+} MemcpyPriv;
+
+/* Memcpy backend for trivial cases */
+
+static void process(const SwsOpExec *exec, const void *priv,
+                    int x_start, int y_start, int x_end, int y_end)
+{
+    const MemcpyPriv *p = priv;
+    const int lines = y_end - y_start;
+    av_assert1(x_start == 0 && x_end == exec->width);
+
+    for (int i = 0; i < p->num_planes; i++) {
+        uint8_t *out = exec->out[i];
+        const int idx = p->index[i];
+        if (idx < 0) {
+            memset(out, p->clear_value[i], exec->out_stride[i] * lines);
+        } else if (exec->out_stride[i] == exec->in_stride[idx]) {
+            memcpy(out, exec->in[idx], exec->out_stride[i] * lines);
+        } else {
+            const int bytes = x_end * exec->block_size_out;
+            const uint8_t *in = exec->in[idx];
+            for (int y = y_start; y < y_end; y++) {
+                memcpy(out, in, bytes);
+                out += exec->out_stride[i];
+                in  += exec->in_stride[idx];
+            }
+        }
+    }
+}
+
+static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
+{
+    MemcpyPriv p = {0};
+
+    for (int n = 0; n < ops->num_ops; n++) {
+        const SwsOp *op = &ops->ops[n];
+        switch (op->op) {
+        case SWS_OP_READ:
+            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac)
+                return AVERROR(ENOTSUP);
+            for (int i = 0; i < op->rw.elems; i++)
+                p.index[i] = i;
+            break;
+
+        case SWS_OP_SWIZZLE: {
+            const MemcpyPriv orig = p;
+            for (int i = 0; i < 4; i++) {
+                /* Explicitly exclude swizzle masks that contain duplicates,
+                 * because these are wasteful to implement as a memcpy */
+                for (int j = 0; j < i; j++) {
+                    if (op->swizzle.in[i] == op->swizzle.in[j])
+                        return AVERROR(ENOTSUP);
+                }
+                p.index[i] = orig.index[op->swizzle.in[i]];
+            }
+            break;
+        }
+
+        case SWS_OP_CLEAR:
+            for (int i = 0; i < 4; i++) {
+                if (!op->c.q4[i].den)
+                    continue;
+                if (op->c.q4[i].den != 1)
+                    return AVERROR(ENOTSUP);
+
+                /* Ensure all bytes to be cleared are the same, because we
+                 * can't memset on multi-byte sequences */
+                uint8_t val = op->c.q4[i].num & 0xFF;
+                uint32_t ref = val;
+                switch (ff_sws_pixel_type_size(op->type)) {
+                case 2: ref *= 0x101; break;
+                case 4: ref *= 0x1010101; break;
+                }
+                if (ref != op->c.q4[i].num)
+                    return AVERROR(ENOTSUP);
+                p.clear_value[i] = val;
+                p.index[i] = -1;
+            }
+            break;
+
+        case SWS_OP_WRITE:
+            if ((op->rw.packed && op->rw.elems != 1) || op->rw.frac)
+                return AVERROR(ENOTSUP);
+            p.num_planes = op->rw.elems;
+            break;
+
+        default:
+            return AVERROR(ENOTSUP);
+        }
+    }
+
+    *out = (SwsCompiledOp) {
+        .block_size = 1,
+        .func = process,
+        .priv = av_memdup(&p, sizeof(p)),
+        .free = av_free,
+    };
+    return out->priv ? 0 : AVERROR(ENOMEM);
+}
+
+SwsOpBackend backend_murder = {
+    .name    = "memcpy",
+    .compile = compile,
+};
-- 
2.49.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".