Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH v2] avfilter/vf_lut3d_opencl Initial support for OpenCL implementation of vf_lut3d.
       [not found] <20250501170711.60035-1-jendas1.ref@yahoo.com>
@ 2025-05-01 17:07 ` Jan Studený via ffmpeg-devel
  2025-05-09 10:20   ` Jan Studený via ffmpeg-devel
  0 siblings, 1 reply; 2+ messages in thread
From: Jan Studený via ffmpeg-devel @ 2025-05-01 17:07 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Jan Studený

The comile error is fixed by adding opencl dependency to configure.

---
 configure                     |   1 +
 libavfilter/Makefile          |   1 +
 libavfilter/allfilters.c      |   1 +
 libavfilter/opencl/lut3d.cl   | 177 +++++++++++++
 libavfilter/opencl_source.h   |   2 +
 libavfilter/vf_lut3d_opencl.c | 460 ++++++++++++++++++++++++++++++++++
 6 files changed, 642 insertions(+)
 create mode 100644 libavfilter/opencl/lut3d.cl
 create mode 100644 libavfilter/vf_lut3d_opencl.c

diff --git a/configure b/configure
index ee270b770c..9b9ea3b39b 100755
--- a/configure
+++ b/configure
@@ -3934,6 +3934,7 @@ ladspa_filter_deps="ladspa libdl"
 lcevc_filter_deps="liblcevc_dec"
 lensfun_filter_deps="liblensfun version3"
 libplacebo_filter_deps="libplacebo vulkan"
+lut3d_opencl_filter_deps="opencl"
 lv2_filter_deps="lv2"
 mcdeint_filter_deps="avcodec gpl"
 metadata_filter_deps="avformat"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 7c0d879ec9..6524d0f91a 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -378,6 +378,7 @@ OBJS-$(CONFIG_LUT1D_FILTER)                  += vf_lut3d.o
 OBJS-$(CONFIG_LUT_FILTER)                    += vf_lut.o
 OBJS-$(CONFIG_LUT2_FILTER)                   += vf_lut2.o framesync.o
 OBJS-$(CONFIG_LUT3D_FILTER)                  += vf_lut3d.o framesync.o
+OBJS-$(CONFIG_LUT3D_OPENCL_FILTER)           += vf_lut3d_opencl.o opencl.o opencl/lut3d.o
 OBJS-$(CONFIG_LUTRGB_FILTER)                 += vf_lut.o
 OBJS-$(CONFIG_LUTYUV_FILTER)                 += vf_lut.o
 OBJS-$(CONFIG_MASKEDCLAMP_FILTER)            += vf_maskedclamp.o framesync.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 740d9ab265..72c2f48ac4 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -353,6 +353,7 @@ extern const FFFilter ff_vf_lut;
 extern const FFFilter ff_vf_lut1d;
 extern const FFFilter ff_vf_lut2;
 extern const FFFilter ff_vf_lut3d;
+extern const FFFilter ff_vf_lut3d_opencl;
 extern const FFFilter ff_vf_lutrgb;
 extern const FFFilter ff_vf_lutyuv;
 extern const FFFilter ff_vf_maskedclamp;
diff --git a/libavfilter/opencl/lut3d.cl b/libavfilter/opencl/lut3d.cl
new file mode 100644
index 0000000000..16dfecdc4e
--- /dev/null
+++ b/libavfilter/opencl/lut3d.cl
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2025 Jan Studeny
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+typedef struct rgbvec {
+    float r, g, b, a;
+} rgbvec;
+
+#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+
+#define NEAR(x) ((int)((x) + .5))
+#define PREV(x) ((int)(x))
+#define NEXT(x) (MIN((int)(x) + 1, lut_edge_size - 1))
+
+/**
+ * Get the nearest defined point
+ */
+static rgbvec interp_nearest(float4 px, __global const rgbvec *lut, int lut_edge_size)
+{
+    int r = NEAR(px[0]);
+    int g = NEAR(px[1]);
+    int b = NEAR(px[2]);
+    int index = r * lut_edge_size * lut_edge_size + g * lut_edge_size + b;
+    return lut[index];
+}
+
+static float lerpf(float v0, float v1, float f)
+{
+    return v0 + (v1 - v0) * f;
+}
+
+static rgbvec lerp(const rgbvec *v0, const rgbvec *v1, float f)
+{
+    rgbvec v = {
+        lerpf(v0->r, v1->r, f), lerpf(v0->g, v1->g, f), lerpf(v0->b, v1->b, f)
+    };
+    return v;
+}
+/**
+ * Interpolate using the 8 vertices of a cube
+ * @see https://en.wikipedia.org/wiki/Trilinear_interpolation
+ */
+static rgbvec interp_trilinear(float4 px, __global const rgbvec *lut, int lut_edge_size)
+{
+    const int lutsize2 = lut_edge_size * lut_edge_size;
+    const int lutsize  = lut_edge_size;
+
+    const int prev[] = { PREV(px[0]), PREV(px[1]), PREV(px[2]) };
+    const int next[] = { NEXT(px[0]), NEXT(px[1]), NEXT(px[2]) };
+
+    const rgbvec d = {
+        px[0] - prev[0],
+        px[1] - prev[1],
+        px[2] - prev[2]
+    };
+
+    const rgbvec c000 = lut[prev[0] * lutsize2 + prev[1] * lutsize + prev[2]];
+    const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
+    const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
+    const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
+    const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
+    const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
+    const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
+    const rgbvec c111 = lut[next[0] * lutsize2 + next[1] * lutsize + next[2]];
+
+    const rgbvec c00  = lerp(&c000, &c100, d.r);
+    const rgbvec c10  = lerp(&c010, &c110, d.r);
+    const rgbvec c01  = lerp(&c001, &c101, d.r);
+    const rgbvec c11  = lerp(&c011, &c111, d.r);
+
+    const rgbvec c0   = lerp(&c00,  &c10,  d.g);
+    const rgbvec c1   = lerp(&c01,  &c11,  d.g);
+
+    return lerp(&c0, &c1, d.b);
+}
+
+/**
+ * Tetrahedral interpolation. Based on code found in Truelight Software Library paper.
+ * @see http://www.filmlight.ltd.uk/pdf/whitepapers/FL-TL-TN-0057-SoftwareLib.pdf
+ */
+
+static rgbvec interp_tetrahedral(float4 px, __global const rgbvec *lut, int lut_edge_size)
+{
+    const int lutsize2 = lut_edge_size*lut_edge_size;
+    const int lutsize  = lut_edge_size;
+    const int prev[] = {PREV(px[0]), PREV(px[1]), PREV(px[2])};
+    const int next[] = {NEXT(px[0]), NEXT(px[1]), NEXT(px[2])};
+    const rgbvec d = {px[0] - prev[0], px[1] - prev[1], px[2] - prev[2]};
+    const rgbvec c000 = lut[prev[0] * lutsize2 + prev[1] * lutsize + prev[2]];
+    const rgbvec c111 = lut[next[0] * lutsize2 + next[1] * lutsize + next[2]];
+    rgbvec c;
+    if (d.r > d.g) {
+        if (d.g > d.b) {
+            const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
+            const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
+            c.r = (1-d.r) * c000.r + (d.r-d.g) * c100.r + (d.g-d.b) * c110.r + (d.b) * c111.r;
+            c.g = (1-d.r) * c000.g + (d.r-d.g) * c100.g + (d.g-d.b) * c110.g + (d.b) * c111.g;
+            c.b = (1-d.r) * c000.b + (d.r-d.g) * c100.b + (d.g-d.b) * c110.b + (d.b) * c111.b;
+        } else if (d.r > d.b) {
+            const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
+            const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
+            c.r = (1-d.r) * c000.r + (d.r-d.b) * c100.r + (d.b-d.g) * c101.r + (d.g) * c111.r;
+            c.g = (1-d.r) * c000.g + (d.r-d.b) * c100.g + (d.b-d.g) * c101.g + (d.g) * c111.g;
+            c.b = (1-d.r) * c000.b + (d.r-d.b) * c100.b + (d.b-d.g) * c101.b + (d.g) * c111.b;
+        } else {
+            const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
+            const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
+            c.r = (1-d.b) * c000.r + (d.b-d.r) * c001.r + (d.r-d.g) * c101.r + (d.g) * c111.r;
+            c.g = (1-d.b) * c000.g + (d.b-d.r) * c001.g + (d.r-d.g) * c101.g + (d.g) * c111.g;
+            c.b = (1-d.b) * c000.b + (d.b-d.r) * c001.b + (d.r-d.g) * c101.b + (d.g) * c111.b;
+        }
+    } else {
+        if (d.b > d.g) {
+            const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
+            const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
+            c.r = (1-d.b) * c000.r + (d.b-d.g) * c001.r + (d.g-d.r) * c011.r + (d.r) * c111.r;
+            c.g = (1-d.b) * c000.g + (d.b-d.g) * c001.g + (d.g-d.r) * c011.g + (d.r) * c111.g;
+            c.b = (1-d.b) * c000.b + (d.b-d.g) * c001.b + (d.g-d.r) * c011.b + (d.r) * c111.b;
+        } else if (d.b > d.r) {
+            const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
+            const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
+            c.r = (1-d.g) * c000.r + (d.g-d.b) * c010.r + (d.b-d.r) * c011.r + (d.r) * c111.r;
+            c.g = (1-d.g) * c000.g + (d.g-d.b) * c010.g + (d.b-d.r) * c011.g + (d.r) * c111.g;
+            c.b = (1-d.g) * c000.b + (d.g-d.b) * c010.b + (d.b-d.r) * c011.b + (d.r) * c111.b;
+        } else {
+            const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
+            const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
+            c.r = (1-d.g) * c000.r + (d.g-d.r) * c010.r + (d.r-d.b) * c110.r + (d.b) * c111.r;
+            c.g = (1-d.g) * c000.g + (d.g-d.r) * c010.g + (d.r-d.b) * c110.g + (d.b) * c111.g;
+            c.b = (1-d.g) * c000.b + (d.g-d.r) * c010.b + (d.r-d.b) * c110.b + (d.b) * c111.b;
+        }
+    }
+    return c;
+}
+
+#define LUT3D_KERNEL(INTERP_FUNC)                                      \
+__kernel void lut3d_##INTERP_FUNC(                                     \
+    __read_only  image2d_t src,                                        \
+    __write_only image2d_t dst,                                        \
+    __global const rgbvec* lut,                                        \
+    int lut_edge_size)                                                 \
+{                                                                      \
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |           \
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |           \
+                               CLK_FILTER_NEAREST);                    \
+                                                                       \
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));             \
+    float4 px  = read_imagef(src, sampler, loc);                       \
+                                                                       \
+    for (int i = 0; i < 3; i++) {                                      \
+        px[i] *= (lut_edge_size - 1);                                  \
+    }                                                                  \
+                                                                       \
+    rgbvec lutpx = INTERP_FUNC(px, lut, lut_edge_size);                \
+                                                                       \
+    write_imagef(dst, loc, (float4)(lutpx.r, lutpx.g, lutpx.b, 0.0f)); \
+}
+
+LUT3D_KERNEL(interp_nearest)
+LUT3D_KERNEL(interp_trilinear)
+LUT3D_KERNEL(interp_tetrahedral)
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index b6930fb686..d143286d21 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -26,6 +26,7 @@ extern const char *ff_source_convolution_cl;
 extern const char *ff_source_deshake_cl;
 extern const char *ff_source_neighbor_cl;
 extern const char *ff_source_nlmeans_cl;
+extern const char *ff_source_lut3d_cl;
 extern const char *ff_source_overlay_cl;
 extern const char *ff_source_pad_cl;
 extern const char *ff_source_remap_cl;
@@ -34,4 +35,5 @@ extern const char *ff_source_transpose_cl;
 extern const char *ff_source_unsharp_cl;
 extern const char *ff_source_xfade_cl;
 
+
 #endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/vf_lut3d_opencl.c b/libavfilter/vf_lut3d_opencl.c
new file mode 100644
index 0000000000..bb7d10ed37
--- /dev/null
+++ b/libavfilter/vf_lut3d_opencl.c
@@ -0,0 +1,460 @@
+/*
+ * Copyright (c) 2025 Jan Studeny
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+
+#include "libavutil/file_open.h"
+
+#include "avfilter.h"
+#include "filters.h"
+#include "opencl.h"
+#include "drawutils.h"
+#include "opencl_source.h"
+#include "video.h"
+
+
+#define MAX_LINE_SIZE 512
+
+enum interp_mode {
+    INTERPOLATE_NEAREST,
+    INTERPOLATE_TRILINEAR,
+    INTERPOLATE_TETRAHEDRAL,
+    INTERPOLATE_PYRAMID,
+    INTERPOLATE_PRISM,
+    NB_INTERP_MODE
+};
+
+typedef struct rgbvec {
+    cl_float r, g, b, a;
+} rgbvec;
+
+#define MAX_LEVEL 256
+
+
+typedef struct LUT3DOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    int              initialised;
+    cl_kernel        kernel;
+    cl_command_queue command_queue;
+    cl_mem lut3d_buf;
+
+    struct rgbvec *lut;
+    int lutsize;
+    int lutsize2;
+    struct rgbvec scale;
+    int interpolation;          ///<interp_mode
+    char *file;
+} LUT3DOpenCLContext;
+
+static int allocate_3dlut(AVFilterContext *ctx, int lutsize)
+{
+    LUT3DOpenCLContext *lut3d = ctx->priv;
+    if (lutsize < 2 || lutsize > MAX_LEVEL) {
+        av_log(ctx, AV_LOG_ERROR, "Too large or invalid 3D LUT size\n");
+        return AVERROR(EINVAL);
+    }
+
+    av_freep(&lut3d->lut);
+    lut3d->lut = av_malloc_array(lutsize * lutsize * lutsize, sizeof(*lut3d->lut));
+    if (!lut3d->lut)
+        return AVERROR(ENOMEM);
+
+    lut3d->lutsize = lutsize;
+    lut3d->lutsize2 = lutsize * lutsize;
+    return 0;
+}
+
+static int set_identity_matrix(AVFilterContext *ctx, int size)
+{
+    LUT3DOpenCLContext *lut3d = ctx->priv;
+    int ret, i, j, k;
+    const int size2 = size * size;
+    const float c = 1. / (size - 1);
+
+    ret = allocate_3dlut(ctx, size);
+    if (ret < 0)
+        return ret;
+
+    for (k = 0; k < size; k++) {
+        for (j = 0; j < size; j++) {
+            for (i = 0; i < size; i++) {
+                struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
+                vec->r = k * c;
+                vec->g = j * c;
+                vec->b = i * c;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int skip_line(const char *p)
+{
+    while (*p && av_isspace(*p))
+        p++;
+    return !*p || *p == '#';
+}
+
+#define NEXT_LINE(loop_cond) do {                           \
+    if (!fgets(line, sizeof(line), f)) {                    \
+        av_log(ctx, AV_LOG_ERROR, "Unexpected EOF\n");      \
+        return AVERROR_INVALIDDATA;                         \
+    }                                                       \
+} while (loop_cond)
+
+static int parse_cube(AVFilterContext *ctx, FILE *f)
+{
+    LUT3DOpenCLContext *lut3d = ctx->priv;
+    char line[MAX_LINE_SIZE];
+
+    while (fgets(line, sizeof(line), f)) {
+        if (!strncmp(line, "LUT_3D_SIZE", 11)) {
+            int ret, i, j, k;
+            const int size = strtol(line + 12, NULL, 0);
+            const int size2 = size * size;
+
+            ret = allocate_3dlut(ctx, size);
+            if (ret < 0)
+                return ret;
+
+            for (k = 0; k < size; k++) {
+                for (j = 0; j < size; j++) {
+                    for (i = 0; i < size; i++) {
+                        struct rgbvec *vec = &lut3d->lut[i * size2 + j * size + k];
+
+                        do {
+try_again:
+                            NEXT_LINE(0);
+                            if (!strncmp(line, "DOMAIN_", 7)) {
+                                av_log(ctx, AV_LOG_ERROR, "Min/max not supported in this format\n");
+                                return AVERROR_INVALIDDATA;
+                            } else if (!strncmp(line, "TITLE", 5)) {
+                                goto try_again;
+                            }
+                        } while (skip_line(line));
+                        if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
+                            return AVERROR_INVALIDDATA;
+                    }
+                }
+            }
+            break;
+        }
+    }
+
+    return 0;
+}
+
+static int lut3d_opencl_init_device(AVFilterContext *avctx)
+{
+    int err;
+    LUT3DOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+
+   size_t n = ctx->lutsize;                    
+   size_t total = n * n * n;                 
+
+
+   cl_mem lut3d_buf = clCreateBuffer(ctx->ocf.hwctx->context,
+       CL_MEM_READ_ONLY |
+       CL_MEM_COPY_HOST_PTR |
+       CL_MEM_HOST_NO_ACCESS,
+       sizeof(rgbvec) * total,
+       ctx->lut, &cle);
+
+   if (!lut3d_buf) {
+       av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: "
+              "%d.\n", cle);
+       return AVERROR(EIO);
+   }
+   ctx->lut3d_buf = lut3d_buf;
+
+   av_log(avctx, AV_LOG_DEBUG, "LUT3D data loaded onto host\n");
+
+
+
+
+   err = ff_opencl_filter_load_program(avctx, &ff_source_lut3d_cl, 1);
+   if (err < 0)
+       return err;
+
+   ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                             ctx->ocf.hwctx->device_id,
+                                             0, &cle);
+   CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+                    "command queue %d.\n", cle);
+
+    const char *kernel_name;
+    switch (ctx->interpolation) {
+        case INTERPOLATE_NEAREST:     kernel_name = "lut3d_interp_nearest";        break;
+        case INTERPOLATE_TRILINEAR:   kernel_name = "lut3d_interp_trilinear";      break;
+        case INTERPOLATE_TETRAHEDRAL: kernel_name = "lut3d_interp_tetrahedral";    break;
+        default:
+            av_assert0(0);
+        }
+   ctx->kernel = clCreateKernel(ctx->ocf.program, kernel_name, &cle);
+   CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                    "kernel %d.\n", cle);
+
+   ctx->initialised = 1;
+   return 0;
+   fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel)
+        clReleaseKernel(ctx->kernel);
+    return err;
+}
+
+static int lut3d_opencl_init(AVFilterContext *avctx)
+{
+
+    av_log(avctx, AV_LOG_DEBUG, "Starting intialization of LUT3D OpenCL\n");
+    LUT3DOpenCLContext *ctx = avctx->priv;
+    int err = 0;
+
+    ff_opencl_filter_init(avctx);
+
+    av_log(avctx, AV_LOG_DEBUG, "LUT3D OpenCL filter initialized\n");
+
+
+    FILE *f;
+    const char *ext;
+
+    if (!ctx->file) {
+        return set_identity_matrix(avctx, 32);
+    }
+    else {
+        ext = strrchr(ctx->file, '.');
+        if (!ext) {
+            av_log(avctx, AV_LOG_ERROR, "Unable to guess the format from the extension\n");
+            err = AVERROR_INVALIDDATA;
+            return err;
+        }
+        ext++;
+        if (!av_strcasecmp(ext, "cube")) {
+            f = avpriv_fopen_utf8(ctx->file, "r");
+            if (!f) {
+                err = AVERROR(errno);
+                av_log(avctx, AV_LOG_ERROR, "%s: %s\n", ctx->file, av_err2str(err));
+                return err;
+            }
+            err = parse_cube(avctx, f);
+            fclose(f);
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Unrecognized '.%s' file type\n", ext);
+            err = AVERROR(EINVAL);
+            return err;
+        }
+        if (!err && !ctx->lutsize) {
+            av_log(avctx, AV_LOG_ERROR, "3D LUT is empty\n");
+            err = AVERROR_INVALIDDATA;
+            return err;
+        }
+        
+    }
+    av_log(avctx, AV_LOG_DEBUG, "LUT3D OpenCL data loaded\n");
+    return err;
+}
+
+static int lut3d_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext *avctx = inlink->dst;
+    AVFilterLink *outlink = avctx->outputs[0];
+    LUT3DOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    cl_int cle;
+    size_t global_work[2];
+    cl_mem src, dst;
+    int err, p;
+    
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    if (!ctx->initialised) {
+        AVHWFramesContext *input_ctx =
+            (AVHWFramesContext*)input->hw_frames_ctx->data;
+        int fmt = input_ctx->sw_format;
+
+        // Make sure the input is a format we support
+        if (fmt != AV_PIX_FMT_ARGB &&
+            fmt != AV_PIX_FMT_RGBA &&
+            fmt != AV_PIX_FMT_ABGR &&
+            fmt != AV_PIX_FMT_BGRA
+        ) {
+            av_log(avctx, AV_LOG_ERROR, "unsupported (non-RGB) format in lut3d_opencl.\n");
+            err = AVERROR(ENOSYS);
+            goto fail;
+        }
+
+
+        err = lut3d_opencl_init_device(avctx);
+        if (err < 0)
+            goto fail;
+    }
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+
+    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+        src = (cl_mem) input->data[p];
+        dst = (cl_mem)output->data[p];
+
+        if (!dst)
+            break;
+
+        CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem,   &src);
+        CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem,   &dst);
+        CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_mem,   &ctx->lut3d_buf);
+        CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_int,   &ctx->lutsize);
+
+        err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
+        if (err < 0)
+            goto fail;
+
+        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+                "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+                p, global_work[0], global_work[1]);
+
+        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                        global_work, NULL,
+                                        0, NULL, NULL);
+        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue "
+                            "kernel: %d.\n", cle);
+    }
+
+    cle = clFinish(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&input);
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    return err;
+}
+
+static av_cold void lut3d_opencl_uninit(AVFilterContext *avctx)
+{
+    LUT3DOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+    clReleaseMemObject(ctx->lut3d_buf);
+
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    av_freep(&ctx->lut);
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+static const AVFilterPad lut3d_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &lut3d_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+};
+
+static const AVFilterPad lut3d_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_output,
+    },
+};
+
+#define OFFSET(x) offsetof(LUT3DOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+#define TFLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
+
+
+
+#if CONFIG_LUT3D_OPENCL_FILTER
+
+
+static const AVOption lut3d_opencl_options[] = {
+    { "file", "set 3D LUT file name", OFFSET(file), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
+    { "interp", "select interpolation mode", OFFSET(interpolation), AV_OPT_TYPE_INT, {.i64=INTERPOLATE_TETRAHEDRAL}, 0, NB_INTERP_MODE-1, TFLAGS, .unit = "interp_mode" }, 
+        { "nearest",     "use values from the nearest defined points",            0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_NEAREST},     0, 0, TFLAGS, .unit = "interp_mode" }, 
+        { "trilinear",   "interpolate values using the 8 points defining a cube", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TRILINEAR},   0, 0, TFLAGS, .unit = "interp_mode" }, 
+        { "tetrahedral", "interpolate values using a tetrahedron",                0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TETRAHEDRAL}, 0, 0, TFLAGS, .unit = "interp_mode" }, \
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(lut3d_opencl);
+
+const FFFilter ff_vf_lut3d_opencl = {
+    .p.name         = "lut3d_opencl",
+    .p.description  = NULL_IF_CONFIG_SMALL("Adjust colors using a 3D LUT."),
+    .p.priv_class   = &lut3d_opencl_class,
+    .p.flags        = AVFILTER_FLAG_HWDEVICE,
+    .priv_size      = sizeof(LUT3DOpenCLContext),
+    .init           = &lut3d_opencl_init,
+    .uninit         = &lut3d_opencl_uninit,
+    FILTER_INPUTS(lut3d_opencl_inputs),
+    FILTER_OUTPUTS(lut3d_opencl_outputs),
+    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_OPENCL),
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_LUT3D_OPENCL_FILTER */
-- 
2.39.5 (Apple Git-154)

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [FFmpeg-devel] [PATCH v2] avfilter/vf_lut3d_opencl Initial support for OpenCL implementation of vf_lut3d.
  2025-05-01 17:07 ` [FFmpeg-devel] [PATCH v2] avfilter/vf_lut3d_opencl Initial support for OpenCL implementation of vf_lut3d Jan Studený via ffmpeg-devel
@ 2025-05-09 10:20   ` Jan Studený via ffmpeg-devel
  0 siblings, 0 replies; 2+ messages in thread
From: Jan Studený via ffmpeg-devel @ 2025-05-09 10:20 UTC (permalink / raw)
  To: FFmpeg development discussions and patches; +Cc: Jan Studený

Hi,
Since this is my first FFmpeg patch I’m not sure if I should include anything extra to make the review easier, like logs, test results, or more explanation.

Please let me know if there’s anything I can add or do differently.

Best regards,

Jan Studený
On May 1, 2025 at 20:13 +0300, Jan Studený <jendas1@yahoo.com>, wrote:

> The comile error is fixed by adding opencl dependency to configure.
>
> ---
> configure | 1 +
> libavfilter/Makefile | 1 +
> libavfilter/allfilters.c | 1 +
> libavfilter/opencl/lut3d.cl | 177 +++++++++++++
> libavfilter/opencl_source.h | 2 +
> libavfilter/vf_lut3d_opencl.c | 460 ++++++++++++++++++++++++++++++++++
> 6 files changed, 642 insertions(+)
> create mode 100644 libavfilter/opencl/lut3d.cl
> create mode 100644 libavfilter/vf_lut3d_opencl.c
>
> diff --git a/configure b/configure
> index ee270b770c..9b9ea3b39b 100755
> --- a/configure
> +++ b/configure
> @@ -3934,6 +3934,7 @@ ladspa_filter_deps="ladspa libdl"
> lcevc_filter_deps="liblcevc_dec"
> lensfun_filter_deps="liblensfun version3"
> libplacebo_filter_deps="libplacebo vulkan"
> +lut3d_opencl_filter_deps="opencl"
> lv2_filter_deps="lv2"
> mcdeint_filter_deps="avcodec gpl"
> metadata_filter_deps="avformat"
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 7c0d879ec9..6524d0f91a 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -378,6 +378,7 @@ OBJS-$(CONFIG_LUT1D_FILTER) += vf_lut3d.o
> OBJS-$(CONFIG_LUT_FILTER) += vf_lut.o
> OBJS-$(CONFIG_LUT2_FILTER) += vf_lut2.o framesync.o
> OBJS-$(CONFIG_LUT3D_FILTER) += vf_lut3d.o framesync.o
> +OBJS-$(CONFIG_LUT3D_OPENCL_FILTER) += vf_lut3d_opencl.o opencl.o opencl/lut3d.o
> OBJS-$(CONFIG_LUTRGB_FILTER) += vf_lut.o
> OBJS-$(CONFIG_LUTYUV_FILTER) += vf_lut.o
> OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += vf_maskedclamp.o framesync.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 740d9ab265..72c2f48ac4 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -353,6 +353,7 @@ extern const FFFilter ff_vf_lut;
> extern const FFFilter ff_vf_lut1d;
> extern const FFFilter ff_vf_lut2;
> extern const FFFilter ff_vf_lut3d;
> +extern const FFFilter ff_vf_lut3d_opencl;
> extern const FFFilter ff_vf_lutrgb;
> extern const FFFilter ff_vf_lutyuv;
> extern const FFFilter ff_vf_maskedclamp;
> diff --git a/libavfilter/opencl/lut3d.cl b/libavfilter/opencl/lut3d.cl
> new file mode 100644
> index 0000000000..16dfecdc4e
> --- /dev/null
> +++ b/libavfilter/opencl/lut3d.cl
> @@ -0,0 +1,177 @@
> +/*
> + * Copyright (c) 2025 Jan Studeny
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +typedef struct rgbvec {
> + float r, g, b, a;
> +} rgbvec;
> +
> +#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
> +
> +#define NEAR(x) ((int)((x) + .5))
> +#define PREV(x) ((int)(x))
> +#define NEXT(x) (MIN((int)(x) + 1, lut_edge_size - 1))
> +
> +/**
> + * Get the nearest defined point
> + */
> +static rgbvec interp_nearest(float4 px, __global const rgbvec *lut, int lut_edge_size)
> +{
> + int r = NEAR(px[0]);
> + int g = NEAR(px[1]);
> + int b = NEAR(px[2]);
> + int index = r * lut_edge_size * lut_edge_size + g * lut_edge_size + b;
> + return lut[index];
> +}
> +
> +static float lerpf(float v0, float v1, float f)
> +{
> + return v0 + (v1 - v0) * f;
> +}
> +
> +static rgbvec lerp(const rgbvec *v0, const rgbvec *v1, float f)
> +{
> + rgbvec v = {
> + lerpf(v0->r, v1->r, f), lerpf(v0->g, v1->g, f), lerpf(v0->b, v1->b, f)
> + };
> + return v;
> +}
> +/**
> + * Interpolate using the 8 vertices of a cube
> + * @see https://en.wikipedia.org/wiki/Trilinear_interpolation
> + */
> +static rgbvec interp_trilinear(float4 px, __global const rgbvec *lut, int lut_edge_size)
> +{
> + const int lutsize2 = lut_edge_size * lut_edge_size;
> + const int lutsize = lut_edge_size;
> +
> + const int prev[] = { PREV(px[0]), PREV(px[1]), PREV(px[2]) };
> + const int next[] = { NEXT(px[0]), NEXT(px[1]), NEXT(px[2]) };
> +
> + const rgbvec d = {
> + px[0] - prev[0],
> + px[1] - prev[1],
> + px[2] - prev[2]
> + };
> +
> + const rgbvec c000 = lut[prev[0] * lutsize2 + prev[1] * lutsize + prev[2]];
> + const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
> + const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
> + const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
> + const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
> + const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
> + const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
> + const rgbvec c111 = lut[next[0] * lutsize2 + next[1] * lutsize + next[2]];
> +
> + const rgbvec c00 = lerp(&c000, &c100, d.r);
> + const rgbvec c10 = lerp(&c010, &c110, d.r);
> + const rgbvec c01 = lerp(&c001, &c101, d.r);
> + const rgbvec c11 = lerp(&c011, &c111, d.r);
> +
> + const rgbvec c0 = lerp(&c00, &c10, d.g);
> + const rgbvec c1 = lerp(&c01, &c11, d.g);
> +
> + return lerp(&c0, &c1, d.b);
> +}
> +
> +/**
> + * Tetrahedral interpolation. Based on code found in Truelight Software Library paper.
> + * @see http://www.filmlight.ltd.uk/pdf/whitepapers/FL-TL-TN-0057-SoftwareLib.pdf
> + */
> +
> +static rgbvec interp_tetrahedral(float4 px, __global const rgbvec *lut, int lut_edge_size)
> +{
> + const int lutsize2 = lut_edge_size*lut_edge_size;
> + const int lutsize = lut_edge_size;
> + const int prev[] = {PREV(px[0]), PREV(px[1]), PREV(px[2])};
> + const int next[] = {NEXT(px[0]), NEXT(px[1]), NEXT(px[2])};
> + const rgbvec d = {px[0] - prev[0], px[1] - prev[1], px[2] - prev[2]};
> + const rgbvec c000 = lut[prev[0] * lutsize2 + prev[1] * lutsize + prev[2]];
> + const rgbvec c111 = lut[next[0] * lutsize2 + next[1] * lutsize + next[2]];
> + rgbvec c;
> + if (d.r > d.g) {
> + if (d.g > d.b) {
> + const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
> + const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
> + c.r = (1-d.r) * c000.r + (d.r-d.g) * c100.r + (d.g-d.b) * c110.r + (d.b) * c111.r;
> + c.g = (1-d.r) * c000.g + (d.r-d.g) * c100.g + (d.g-d.b) * c110.g + (d.b) * c111.g;
> + c.b = (1-d.r) * c000.b + (d.r-d.g) * c100.b + (d.g-d.b) * c110.b + (d.b) * c111.b;
> + } else if (d.r > d.b) {
> + const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]];
> + const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
> + c.r = (1-d.r) * c000.r + (d.r-d.b) * c100.r + (d.b-d.g) * c101.r + (d.g) * c111.r;
> + c.g = (1-d.r) * c000.g + (d.r-d.b) * c100.g + (d.b-d.g) * c101.g + (d.g) * c111.g;
> + c.b = (1-d.r) * c000.b + (d.r-d.b) * c100.b + (d.b-d.g) * c101.b + (d.g) * c111.b;
> + } else {
> + const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
> + const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]];
> + c.r = (1-d.b) * c000.r + (d.b-d.r) * c001.r + (d.r-d.g) * c101.r + (d.g) * c111.r;
> + c.g = (1-d.b) * c000.g + (d.b-d.r) * c001.g + (d.r-d.g) * c101.g + (d.g) * c111.g;
> + c.b = (1-d.b) * c000.b + (d.b-d.r) * c001.b + (d.r-d.g) * c101.b + (d.g) * c111.b;
> + }
> + } else {
> + if (d.b > d.g) {
> + const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]];
> + const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
> + c.r = (1-d.b) * c000.r + (d.b-d.g) * c001.r + (d.g-d.r) * c011.r + (d.r) * c111.r;
> + c.g = (1-d.b) * c000.g + (d.b-d.g) * c001.g + (d.g-d.r) * c011.g + (d.r) * c111.g;
> + c.b = (1-d.b) * c000.b + (d.b-d.g) * c001.b + (d.g-d.r) * c011.b + (d.r) * c111.b;
> + } else if (d.b > d.r) {
> + const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
> + const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]];
> + c.r = (1-d.g) * c000.r + (d.g-d.b) * c010.r + (d.b-d.r) * c011.r + (d.r) * c111.r;
> + c.g = (1-d.g) * c000.g + (d.g-d.b) * c010.g + (d.b-d.r) * c011.g + (d.r) * c111.g;
> + c.b = (1-d.g) * c000.b + (d.g-d.b) * c010.b + (d.b-d.r) * c011.b + (d.r) * c111.b;
> + } else {
> + const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]];
> + const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]];
> + c.r = (1-d.g) * c000.r + (d.g-d.r) * c010.r + (d.r-d.b) * c110.r + (d.b) * c111.r;
> + c.g = (1-d.g) * c000.g + (d.g-d.r) * c010.g + (d.r-d.b) * c110.g + (d.b) * c111.g;
> + c.b = (1-d.g) * c000.b + (d.g-d.r) * c010.b + (d.r-d.b) * c110.b + (d.b) * c111.b;
> + }
> + }
> + return c;
> +}
> +
> +#define LUT3D_KERNEL(INTERP_FUNC) \
> +__kernel void lut3d_##INTERP_FUNC( \
> + __read_only image2d_t src, \
> + __write_only image2d_t dst, \
> + __global const rgbvec* lut, \
> + int lut_edge_size) \
> +{ \
> + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | \
> + CLK_ADDRESS_CLAMP_TO_EDGE | \
> + CLK_FILTER_NEAREST); \
> + \
> + int2 loc = (int2)(get_global_id(0), get_global_id(1)); \
> + float4 px = read_imagef(src, sampler, loc); \
> + \
> + for (int i = 0; i < 3; i++) { \
> + px[i] *= (lut_edge_size - 1); \
> + } \
> + \
> + rgbvec lutpx = INTERP_FUNC(px, lut, lut_edge_size); \
> + \
> + write_imagef(dst, loc, (float4)(lutpx.r, lutpx.g, lutpx.b, 0.0f)); \
> +}
> +
> +LUT3D_KERNEL(interp_nearest)
> +LUT3D_KERNEL(interp_trilinear)
> +LUT3D_KERNEL(interp_tetrahedral)
> diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
> index b6930fb686..d143286d21 100644
> --- a/libavfilter/opencl_source.h
> +++ b/libavfilter/opencl_source.h
> @@ -26,6 +26,7 @@ extern const char *ff_source_convolution_cl;
> extern const char *ff_source_deshake_cl;
> extern const char *ff_source_neighbor_cl;
> extern const char *ff_source_nlmeans_cl;
> +extern const char *ff_source_lut3d_cl;
> extern const char *ff_source_overlay_cl;
> extern const char *ff_source_pad_cl;
> extern const char *ff_source_remap_cl;
> @@ -34,4 +35,5 @@ extern const char *ff_source_transpose_cl;
> extern const char *ff_source_unsharp_cl;
> extern const char *ff_source_xfade_cl;
>
> +
> #endif /* AVFILTER_OPENCL_SOURCE_H */
> diff --git a/libavfilter/vf_lut3d_opencl.c b/libavfilter/vf_lut3d_opencl.c
> new file mode 100644
> index 0000000000..bb7d10ed37
> --- /dev/null
> +++ b/libavfilter/vf_lut3d_opencl.c
> @@ -0,0 +1,460 @@
> +/*
> + * Copyright (c) 2025 Jan Studeny
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "config_components.h"
> +
> +#include "libavutil/avassert.h"
> +#include "libavutil/common.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/mem.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +#include "libavutil/avstring.h"
> +
> +#include "libavutil/file_open.h"
> +
> +#include "avfilter.h"
> +#include "filters.h"
> +#include "opencl.h"
> +#include "drawutils.h"
> +#include "opencl_source.h"
> +#include "video.h"
> +
> +
> +#define MAX_LINE_SIZE 512
> +
> +enum interp_mode {
> + INTERPOLATE_NEAREST,
> + INTERPOLATE_TRILINEAR,
> + INTERPOLATE_TETRAHEDRAL,
> + INTERPOLATE_PYRAMID,
> + INTERPOLATE_PRISM,
> + NB_INTERP_MODE
> +};
> +
> +typedef struct rgbvec {
> + cl_float r, g, b, a;
> +} rgbvec;
> +
> +#define MAX_LEVEL 256
> +
> +
> +typedef struct LUT3DOpenCLContext {
> + OpenCLFilterContext ocf;
> +
> + int initialised;
> + cl_kernel kernel;
> + cl_command_queue command_queue;
> + cl_mem lut3d_buf;
> +
> + struct rgbvec *lut;
> + int lutsize;
> + int lutsize2;
> + struct rgbvec scale;
> + int interpolation; ///<interp_mode
> + char *file;
> +} LUT3DOpenCLContext;
> +
> +static int allocate_3dlut(AVFilterContext *ctx, int lutsize)
> +{
> + LUT3DOpenCLContext *lut3d = ctx->priv;
> + if (lutsize < 2 || lutsize > MAX_LEVEL) {
> + av_log(ctx, AV_LOG_ERROR, "Too large or invalid 3D LUT size\n");
> + return AVERROR(EINVAL);
> + }
> +
> + av_freep(&lut3d->lut);
> + lut3d->lut = av_malloc_array(lutsize * lutsize * lutsize, sizeof(*lut3d->lut));
> + if (!lut3d->lut)
> + return AVERROR(ENOMEM);
> +
> + lut3d->lutsize = lutsize;
> + lut3d->lutsize2 = lutsize * lutsize;
> + return 0;
> +}
> +
> +static int set_identity_matrix(AVFilterContext *ctx, int size)
> +{
> + LUT3DOpenCLContext *lut3d = ctx->priv;
> + int ret, i, j, k;
> + const int size2 = size * size;
> + const float c = 1. / (size - 1);
> +
> + ret = allocate_3dlut(ctx, size);
> + if (ret < 0)
> + return ret;
> +
> + for (k = 0; k < size; k++) {
> + for (j = 0; j < size; j++) {
> + for (i = 0; i < size; i++) {
> + struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i];
> + vec->r = k * c;
> + vec->g = j * c;
> + vec->b = i * c;
> + }
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int skip_line(const char *p)
> +{
> + while (*p && av_isspace(*p))
> + p++;
> + return !*p || *p == '#';
> +}
> +
> +#define NEXT_LINE(loop_cond) do { \
> + if (!fgets(line, sizeof(line), f)) { \
> + av_log(ctx, AV_LOG_ERROR, "Unexpected EOF\n"); \
> + return AVERROR_INVALIDDATA; \
> + } \
> +} while (loop_cond)
> +
> +static int parse_cube(AVFilterContext *ctx, FILE *f)
> +{
> + LUT3DOpenCLContext *lut3d = ctx->priv;
> + char line[MAX_LINE_SIZE];
> +
> + while (fgets(line, sizeof(line), f)) {
> + if (!strncmp(line, "LUT_3D_SIZE", 11)) {
> + int ret, i, j, k;
> + const int size = strtol(line + 12, NULL, 0);
> + const int size2 = size * size;
> +
> + ret = allocate_3dlut(ctx, size);
> + if (ret < 0)
> + return ret;
> +
> + for (k = 0; k < size; k++) {
> + for (j = 0; j < size; j++) {
> + for (i = 0; i < size; i++) {
> + struct rgbvec *vec = &lut3d->lut[i * size2 + j * size + k];
> +
> + do {
> +try_again:
> + NEXT_LINE(0);
> + if (!strncmp(line, "DOMAIN_", 7)) {
> + av_log(ctx, AV_LOG_ERROR, "Min/max not supported in this format\n");
> + return AVERROR_INVALIDDATA;
> + } else if (!strncmp(line, "TITLE", 5)) {
> + goto try_again;
> + }
> + } while (skip_line(line));
> + if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
> + return AVERROR_INVALIDDATA;
> + }
> + }
> + }
> + break;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int lut3d_opencl_init_device(AVFilterContext *avctx)
> +{
> + int err;
> + LUT3DOpenCLContext *ctx = avctx->priv;
> + cl_int cle;
> +
> +
> + size_t n = ctx->lutsize;
> + size_t total = n * n * n;
> +
> +
> + cl_mem lut3d_buf = clCreateBuffer(ctx->ocf.hwctx->context,
> + CL_MEM_READ_ONLY |
> + CL_MEM_COPY_HOST_PTR |
> + CL_MEM_HOST_NO_ACCESS,
> + sizeof(rgbvec) * total,
> + ctx->lut, &cle);
> +
> + if (!lut3d_buf) {
> + av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: "
> + "%d.\n", cle);
> + return AVERROR(EIO);
> + }
> + ctx->lut3d_buf = lut3d_buf;
> +
> + av_log(avctx, AV_LOG_DEBUG, "LUT3D data loaded onto host\n");
> +
> +
> +
> +
> + err = ff_opencl_filter_load_program(avctx, &ff_source_lut3d_cl, 1);
> + if (err < 0)
> + return err;
> +
> + ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
> + ctx->ocf.hwctx->device_id,
> + 0, &cle);
> + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
> + "command queue %d.\n", cle);
> +
> + const char *kernel_name;
> + switch (ctx->interpolation) {
> + case INTERPOLATE_NEAREST: kernel_name = "lut3d_interp_nearest"; break;
> + case INTERPOLATE_TRILINEAR: kernel_name = "lut3d_interp_trilinear"; break;
> + case INTERPOLATE_TETRAHEDRAL: kernel_name = "lut3d_interp_tetrahedral"; break;
> + default:
> + av_assert0(0);
> + }
> + ctx->kernel = clCreateKernel(ctx->ocf.program, kernel_name, &cle);
> + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
> + "kernel %d.\n", cle);
> +
> + ctx->initialised = 1;
> + return 0;
> + fail:
> + if (ctx->command_queue)
> + clReleaseCommandQueue(ctx->command_queue);
> + if (ctx->kernel)
> + clReleaseKernel(ctx->kernel);
> + return err;
> +}
> +
> +static int lut3d_opencl_init(AVFilterContext *avctx)
> +{
> +
> + av_log(avctx, AV_LOG_DEBUG, "Starting intialization of LUT3D OpenCL\n");
> + LUT3DOpenCLContext *ctx = avctx->priv;
> + int err = 0;
> +
> + ff_opencl_filter_init(avctx);
> +
> + av_log(avctx, AV_LOG_DEBUG, "LUT3D OpenCL filter initialized\n");
> +
> +
> + FILE *f;
> + const char *ext;
> +
> + if (!ctx->file) {
> + return set_identity_matrix(avctx, 32);
> + }
> + else {
> + ext = strrchr(ctx->file, '.');
> + if (!ext) {
> + av_log(avctx, AV_LOG_ERROR, "Unable to guess the format from the extension\n");
> + err = AVERROR_INVALIDDATA;
> + return err;
> + }
> + ext++;
> + if (!av_strcasecmp(ext, "cube")) {
> + f = avpriv_fopen_utf8(ctx->file, "r");
> + if (!f) {
> + err = AVERROR(errno);
> + av_log(avctx, AV_LOG_ERROR, "%s: %s\n", ctx->file, av_err2str(err));
> + return err;
> + }
> + err = parse_cube(avctx, f);
> + fclose(f);
> + } else {
> + av_log(avctx, AV_LOG_ERROR, "Unrecognized '.%s' file type\n", ext);
> + err = AVERROR(EINVAL);
> + return err;
> + }
> + if (!err && !ctx->lutsize) {
> + av_log(avctx, AV_LOG_ERROR, "3D LUT is empty\n");
> + err = AVERROR_INVALIDDATA;
> + return err;
> + }
> +
> + }
> + av_log(avctx, AV_LOG_DEBUG, "LUT3D OpenCL data loaded\n");
> + return err;
> +}
> +
> +static int lut3d_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
> +{
> + AVFilterContext *avctx = inlink->dst;
> + AVFilterLink *outlink = avctx->outputs[0];
> + LUT3DOpenCLContext *ctx = avctx->priv;
> + AVFrame *output = NULL;
> + cl_int cle;
> + size_t global_work[2];
> + cl_mem src, dst;
> + int err, p;
> +
> + av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
> + av_get_pix_fmt_name(input->format),
> + input->width, input->height, input->pts);
> +
> + if (!input->hw_frames_ctx)
> + return AVERROR(EINVAL);
> +
> + if (!ctx->initialised) {
> + AVHWFramesContext *input_ctx =
> + (AVHWFramesContext*)input->hw_frames_ctx->data;
> + int fmt = input_ctx->sw_format;
> +
> + // Make sure the input is a format we support
> + if (fmt != AV_PIX_FMT_ARGB &&
> + fmt != AV_PIX_FMT_RGBA &&
> + fmt != AV_PIX_FMT_ABGR &&
> + fmt != AV_PIX_FMT_BGRA
> + ) {
> + av_log(avctx, AV_LOG_ERROR, "unsupported (non-RGB) format in lut3d_opencl.\n");
> + err = AVERROR(ENOSYS);
> + goto fail;
> + }
> +
> +
> + err = lut3d_opencl_init_device(avctx);
> + if (err < 0)
> + goto fail;
> + }
> +
> + output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> + if (!output) {
> + err = AVERROR(ENOMEM);
> + goto fail;
> + }
> +
> +
> + for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
> + src = (cl_mem) input->data[p];
> + dst = (cl_mem)output->data[p];
> +
> + if (!dst)
> + break;
> +
> + CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &src);
> + CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &dst);
> + CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_mem, &ctx->lut3d_buf);
> + CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_int, &ctx->lutsize);
> +
> + err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
> + if (err < 0)
> + goto fail;
> +
> + av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
> + "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
> + p, global_work[0], global_work[1]);
> +
> + cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
> + global_work, NULL,
> + 0, NULL, NULL);
> + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue "
> + "kernel: %d.\n", cle);
> + }
> +
> + cle = clFinish(ctx->command_queue);
> + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
> +
> + err = av_frame_copy_props(output, input);
> + if (err < 0)
> + goto fail;
> +
> + av_frame_free(&input);
> +
> + av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
> + av_get_pix_fmt_name(output->format),
> + output->width, output->height, output->pts);
> +
> + return ff_filter_frame(outlink, output);
> +
> +fail:
> + clFinish(ctx->command_queue);
> + av_frame_free(&input);
> + av_frame_free(&output);
> + return err;
> +}
> +
> +static av_cold void lut3d_opencl_uninit(AVFilterContext *avctx)
> +{
> + LUT3DOpenCLContext *ctx = avctx->priv;
> + cl_int cle;
> +
> + clReleaseMemObject(ctx->lut3d_buf);
> +
> + if (ctx->kernel) {
> + cle = clReleaseKernel(ctx->kernel);
> + if (cle != CL_SUCCESS)
> + av_log(avctx, AV_LOG_ERROR, "Failed to release "
> + "kernel: %d.\n", cle);
> + }
> +
> + if (ctx->command_queue) {
> + cle = clReleaseCommandQueue(ctx->command_queue);
> + if (cle != CL_SUCCESS)
> + av_log(avctx, AV_LOG_ERROR, "Failed to release "
> + "command queue: %d.\n", cle);
> + }
> +
> + av_freep(&ctx->lut);
> +
> + ff_opencl_filter_uninit(avctx);
> +}
> +
> +static const AVFilterPad lut3d_opencl_inputs[] = {
> + {
> + .name = "default",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .filter_frame = &lut3d_opencl_filter_frame,
> + .config_props = &ff_opencl_filter_config_input,
> + },
> +};
> +
> +static const AVFilterPad lut3d_opencl_outputs[] = {
> + {
> + .name = "default",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .config_props = &ff_opencl_filter_config_output,
> + },
> +};
> +
> +#define OFFSET(x) offsetof(LUT3DOpenCLContext, x)
> +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
> +#define TFLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
> +
> +
> +
> +#if CONFIG_LUT3D_OPENCL_FILTER
> +
> +
> +static const AVOption lut3d_opencl_options[] = {
> + { "file", "set 3D LUT file name", OFFSET(file), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
> + { "interp", "select interpolation mode", OFFSET(interpolation), AV_OPT_TYPE_INT, {.i64=INTERPOLATE_TETRAHEDRAL}, 0, NB_INTERP_MODE-1, TFLAGS, .unit = "interp_mode" },
> + { "nearest", "use values from the nearest defined points", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_NEAREST}, 0, 0, TFLAGS, .unit = "interp_mode" },
> + { "trilinear", "interpolate values using the 8 points defining a cube", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TRILINEAR}, 0, 0, TFLAGS, .unit = "interp_mode" },
> + { "tetrahedral", "interpolate values using a tetrahedron", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TETRAHEDRAL}, 0, 0, TFLAGS, .unit = "interp_mode" }, \
> + { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(lut3d_opencl);
> +
> +const FFFilter ff_vf_lut3d_opencl = {
> + .p.name = "lut3d_opencl",
> + .p.description = NULL_IF_CONFIG_SMALL("Adjust colors using a 3D LUT."),
> + .p.priv_class = &lut3d_opencl_class,
> + .p.flags = AVFILTER_FLAG_HWDEVICE,
> + .priv_size = sizeof(LUT3DOpenCLContext),
> + .init = &lut3d_opencl_init,
> + .uninit = &lut3d_opencl_uninit,
> + FILTER_INPUTS(lut3d_opencl_inputs),
> + FILTER_OUTPUTS(lut3d_opencl_outputs),
> + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_OPENCL),
> + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};
> +
> +#endif /* CONFIG_LUT3D_OPENCL_FILTER */
> --
> 2.39.5 (Apple Git-154)
>
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-05-09 10:21 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20250501170711.60035-1-jendas1.ref@yahoo.com>
2025-05-01 17:07 ` [FFmpeg-devel] [PATCH v2] avfilter/vf_lut3d_opencl Initial support for OpenCL implementation of vf_lut3d Jan Studený via ffmpeg-devel
2025-05-09 10:20   ` Jan Studený via ffmpeg-devel

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git