[FFmpeg-devel] [PATCH] libavfilter/vf_remap_opencl: make kernel handle subsampled chroma

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

From: Matthias Welwarsky via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: Matthias Welwarsky <ffmpeg@welwarsky.de>
Subject: [FFmpeg-devel] [PATCH] libavfilter/vf_remap_opencl: make kernel handle subsampled chroma
Date: Fri, 05 Sep 2025 16:21:44 +0200
Message-ID: <2756493.vuYhMxLoTh@linux.fritz.box> (raw)

This patch fixes handling YUV frames with subsampled chroma.
Before this patch, the remap kernel was already aware of multiple planes,
but it treated the subsampled chroma incorrectly. The maps were read
based on destination coordinates and the resulting source coordinates
were also not correctly transformed back to the source image.

Also, for both Y and UV, the wrong vector components were used.

With this patch, it is possible to have a decode->remap->encode
chain entirely on the GPU. I'm using it to reframe videos from an
insta360 camera. On a Lenovo T480 with the built-in UHD 620 GPU, it
can achieve about 5x real-time (VAAPI decode -> remap_opencl
-> VAAPI encode).

Signed-off-by: Matthias Welwarsky <ffmpeg@welwarsky.de>
---
 libavfilter/opencl/remap.cl   | 128 ++++++++++++++++++++++++----------
 libavfilter/vf_remap_opencl.c |  43 ++++++++++--
 2 files changed, 130 insertions(+), 41 deletions(-)

diff --git a/libavfilter/opencl/remap.cl b/libavfilter/opencl/remap.cl
index fba82d134e..1a18f1afff 100644
--- a/libavfilter/opencl/remap.cl
+++ b/libavfilter/opencl/remap.cl
@@ -22,52 +22,108 @@ const sampler_t linear_sampler = 
(CLK_NORMALIZED_COORDS_FALSE |
 const sampler_t nearest_sampler = (CLK_NORMALIZED_COORDS_FALSE |
                                    CLK_FILTER_NEAREST);
 
-__kernel void remap_near(__write_only image2d_t dst,
-                         __read_only  image2d_t src,
-                         __read_only  image2d_t xmapi,
-                         __read_only  image2d_t ymapi,
-                         float4 fill_color)
+__kernel void remap_near(
+    __write_only image2d_t dst,
+    __read_only  image2d_t src,
+    __read_only  image2d_t xmapi,
+    __read_only  image2d_t ymapi,
+    float4 fill_color,
+    float4 scale,
+    int4 swizzle)
 {
     int2 p = (int2)(get_global_id(0), get_global_id(1));
-    int2 dimi = get_image_dim(src);
-    float2 dimf = (float2)(dimi.x, dimi.y);
-    float4 val;
-    int2 mi;
-    float m;
 
-    float4 xmap = read_imagef(xmapi, nearest_sampler, p);
-    float4 ymap = read_imagef(ymapi, nearest_sampler, p);
-    float2 pos  = (float2)(xmap.x, ymap.x);
-    pos.xy = pos.xy * 65535.f;
+    /* image dimensions */
+    int2 src_dim = get_image_dim(src);
+    int2 dst_dim = get_image_dim(dst);
+    int2 map_dim = get_image_dim(xmapi);
 
-    mi = ((pos >= (float2)(0.f, 0.f)) * (pos < dimf) * (p <= dimi));
-    m = mi.x && mi.y;
-    val = mix(fill_color, read_imagef(src, nearest_sampler, pos), m);
+    float2 src_dimf = (float2)(src_dim.x, src_dim.y);
+    float2 dst_dimf = (float2)(dst_dim.x, dst_dim.y);
+    float2 map_dimf = (float2)(map_dim.x, map_dim.y);
+
+    /* compute map scaling to full-res */
+    float2 map_scale = map_dimf / dst_dimf;
+
+    /* scaled position to fetch from the maps */
+    float2 map_p = (float2)(p.x, p.y) * map_scale;
+
+    /* read mapping coordinates from full-res maps */
+    float4 xmap = read_imagef(xmapi, nearest_sampler, map_p);
+    float4 ymap = read_imagef(ymapi, nearest_sampler, map_p);
+    float2 pos  = (float2)(xmap.x, ymap.x) * 65535.f;
+
+    pos /= map_scale;
+
+    /* check bounds */
+    int2 mi = ((pos >= (float2)(0.f,0.f)) * (pos < src_dimf));
+    float m = mi.x && mi.y;
+
+    /* read source and apply swizzle + scale */
+    float4 src_val = read_imagef(src, nearest_sampler, pos);
+
+    float tmp[4];
+    vstore4(src_val, 0, tmp);
+    src_val = (float4)(tmp[swizzle.x] * scale.x,
+                       tmp[swizzle.y] * scale.y,
+                       tmp[swizzle.z] * scale.z,
+                       tmp[swizzle.w] * scale.w);
+
+    /* mix with fill color if out-of-bounds */
+    float4 val = mix(fill_color, src_val, m);
 
     write_imagef(dst, p, val);
 }
 
-__kernel void remap_linear(__write_only image2d_t dst,
-                           __read_only  image2d_t src,
-                           __read_only  image2d_t xmapi,
-                           __read_only  image2d_t ymapi,
-                           float4 fill_color)
+__kernel void remap_linear(
+    __write_only image2d_t dst,
+    __read_only  image2d_t src,
+    __read_only  image2d_t xmapi,
+    __read_only  image2d_t ymapi,
+    float4 fill_color,
+    float4 scale,
+    int4 swizzle)
 {
     int2 p = (int2)(get_global_id(0), get_global_id(1));
-    int2 dimi = get_image_dim(src);
-    float2 dimf = (float2)(dimi.x, dimi.y);
-    float4 val;
-    int2 mi;
-    float m;
-    float4 xmap = read_imagef(xmapi, nearest_sampler, p);
-    float4 ymap = read_imagef(ymapi, nearest_sampler, p);
-    float2 pos  = (float2)(xmap.x, ymap.x);
-    pos.xy = pos.xy * 65535.f;
-
-    mi = ((pos >= (float2)(0.f, 0.f)) * (pos < dimf) * (p <= dimi));
-    m = mi.x && mi.y;
-    val = mix(fill_color, read_imagef(src, linear_sampler, pos), m);
+
+    int2 src_dim = get_image_dim(src);
+    int2 dst_dim = get_image_dim(dst);
+    int2 map_dim = get_image_dim(xmapi);
+
+    float2 src_dimf = (float2)(src_dim.x, src_dim.y);
+    float2 dst_dimf = (float2)(dst_dim.x, dst_dim.y);
+    float2 map_dimf = (float2)(map_dim.x, map_dim.y);
+
+    float2 map_scale = map_dimf / dst_dimf;
+
+    float2 map_p = (float2)(p.x, p.y) * map_scale;
+
+    float4 xmap = read_imagef(xmapi, nearest_sampler, map_p);
+    float4 ymap = read_imagef(ymapi, nearest_sampler, map_p);
+    float2 pos  = (float2)(xmap.x, ymap.x) * 65535.f;
+
+    pos /= map_scale;
+
+    int2 mi = ((pos >= (float2)(0.f,0.f)) * (pos < src_dimf));
+    float m = mi.x && mi.y;
+    float4 src_val = read_imagef(src, linear_sampler, pos);
+
+    float tmp[4];
+    vstore4(src_val, 0, tmp);
+    src_val = (float4)(tmp[swizzle.x] * scale.x,
+                       tmp[swizzle.y] * scale.y,
+                       tmp[swizzle.z] * scale.z,
+                       tmp[swizzle.w] * scale.w);
+
+    float4 val = mix(fill_color, src_val, m);
 
     write_imagef(dst, p, val);
 }
diff --git a/libavfilter/vf_remap_opencl.c b/libavfilter/vf_remap_opencl.c
index bb83944b2f..f6c5211f73 100644
--- a/libavfilter/vf_remap_opencl.c
+++ b/libavfilter/vf_remap_opencl.c
@@ -29,10 +29,18 @@
 #include "opencl_source.h"
 #include "video.h"
+#define MAX_PLANES 4
+
+typedef struct PlaneParams {
+    cl_float4 scale;
+    cl_int4   swizzle;
+} PlaneParams;
+
 typedef struct RemapOpenCLContext {
     OpenCLFilterContext ocf;
 
     int nb_planes;
+    PlaneParams plane[MAX_PLANES];
     int interp;
     uint8_t fill_rgba[4];
     cl_float4 cl_fill_color;
@@ -41,6 +49,7 @@ typedef struct RemapOpenCLContext {
     cl_kernel        kernel;
     cl_command_queue command_queue;
 
     FFFrameSync fs;
 } RemapOpenCLContext;
 
@@ -73,10 +82,9 @@ static int remap_opencl_load(AVFilterContext *avctx,
     cl_int cle;
     const char *source = ff_source_remap_cl;
     const char *kernel = kernels[ctx->interp];
-    const AVPixFmtDescriptor *main_desc;
     int err, main_planes;
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(main_format);
-    int is_rgb = !!(desc->flags & AV_PIX_FMT_FLAG_RGB);
+    const AVPixFmtDescriptor *main_desc = av_pix_fmt_desc_get(main_format);
+    int is_rgb = !!(main_desc->flags & AV_PIX_FMT_FLAG_RGB);
     const float scale = 1.f / 255.f;
     uint8_t rgba_map[4];
 
@@ -94,8 +102,6 @@ static int remap_opencl_load(AVFilterContext *avctx,
         ctx->cl_fill_color.s[3] = ctx->fill_rgba[3] * scale;
     }
-    main_desc = av_pix_fmt_desc_get(main_format);
-
     main_planes = 0;
     for (int i = 0; i < main_desc->nb_components; i++)
         main_planes = FFMAX(main_planes,
@@ -103,6 +109,26 @@ static int remap_opencl_load(AVFilterContext *avctx,
 
     ctx->nb_planes = main_planes;
 
+    for (int p = 0; p < ctx->nb_planes; p++) {
+        PlaneParams *pp = &ctx->plane[p];
+
+        if (is_rgb) {
+            // RGB plane (single plane)
+            pp->scale = (cl_float4){{1.0f, 1.0f, 1.0f, 1.0f}};
+            pp->swizzle = (cl_int4){{0, 1, 2, 3}};
+        } else {
+                pp->scale = (cl_float4){{1.0f, 0.0f, 0.0f, 1.0f}};
+                pp->swizzle = (cl_int4){{0, 0, 0, 3}};
+            } else {
+                pp->scale = (cl_float4){{1.0f, 1.0f, 0.0f, 1.0f}};
+                pp->swizzle = (cl_int4){{0, 1, 0, 3}};
+            }
     err = ff_opencl_filter_load_program(avctx, &source, 1);
     if (err < 0)
         goto fail;
@@ -132,6 +158,7 @@ static int remap_opencl_process_frame(FFFrameSync *fs)
     AVFilterContext *avctx = fs->parent;
     AVFilterLink *outlink = avctx->outputs[0];
     RemapOpenCLContext *ctx = avctx->priv;
+
     AVFrame *input_main, *input_xmap, *input_ymap;
     AVFrame *output;
     cl_mem mem;
@@ -199,6 +226,12 @@ static int remap_opencl_process_frame(FFFrameSync *fs)
         CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_float4, &cl_fill_color);
         kernel_arg++;
 
+        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_float4, &(ctx-
>plane[plane].scale));
+        kernel_arg++;
+
+        CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int4, &(ctx-
>plane[plane].swizzle));
+        kernel_arg++;
+
         err = ff_opencl_filter_work_size_from_image(avctx, global_work,
                                                     output, plane, 0);


_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

                 reply	other threads:[~2025-09-05 14:22 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2756493.vuYhMxLoTh@linux.fritz.box \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=ffmpeg@welwarsky.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git