From: ddesouza via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: ddesouza <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH] avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling (PR #20958)
Date: Tue, 18 Nov 2025 16:29:23 -0000
Message-ID: <176348336430.25.16743169341643234132@2cb04c0e5124> (raw)
PR #20958 opened by ddesouza
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20958
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20958.patch
This is the revised version of PR #20562, according to the comments from @haasn and @mkver.
>From a14bccc09b9af29672c141a447999fd3d99a1ba1 Mon Sep 17 00:00:00 2001
From: Diego de Souza <ddesouza@nvidia.com>
Date: Wed, 12 Nov 2025 20:08:45 +0100
Subject: [PATCH 1/3] avutil/hwcontext_cuda: Expands pixel formats support
Add support for additional pixel formats in CUDA hardware context:
- Planar formats (yuv420p10, yuv422p, yuv422p10, yuv444p10)
- Semiplanar formats (nv16, p210, p216)
Signed-off-by: Diego de Souza <ddesouza@nvidia.com>
---
libavutil/hwcontext_cuda.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index 10d3399537..b0b65b2446 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -50,6 +50,10 @@ static const enum AVPixelFormat supported_formats[] = {
AV_PIX_FMT_P016,
AV_PIX_FMT_P210,
AV_PIX_FMT_P216,
+ AV_PIX_FMT_YUV422P,
+ AV_PIX_FMT_YUV420P10,
+ AV_PIX_FMT_YUV422P10,
+ AV_PIX_FMT_YUV444P10,
AV_PIX_FMT_YUV444P10MSB,
AV_PIX_FMT_YUV444P12MSB,
AV_PIX_FMT_YUV444P16,
--
2.49.1
>From 9d85f56a1a109a897a40f0993f2c6b13f1b81dcf Mon Sep 17 00:00:00 2001
From: Diego de Souza <ddesouza@nvidia.com>
Date: Thu, 13 Nov 2025 09:49:45 +0100
Subject: [PATCH 2/3] avfilter/hwupload_cuda: Expands pixel formats support
Add support for uploading additional pixel formats to NVIDIA GPUs:
- Planar formats (yuv420p10, yuv422p, yuv422p10, yuv444p10)
- Semiplanar formats (nv16, p210, p216)
Signed-off-by: Diego de Souza <ddesouza@nvidia.com>
---
libavfilter/vf_hwupload_cuda.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/libavfilter/vf_hwupload_cuda.c b/libavfilter/vf_hwupload_cuda.c
index b505f8b298..34f959ca50 100644
--- a/libavfilter/vf_hwupload_cuda.c
+++ b/libavfilter/vf_hwupload_cuda.c
@@ -59,9 +59,9 @@ static int cudaupload_query_formats(const AVFilterContext *ctx,
int ret;
static const enum AVPixelFormat input_pix_fmts[] = {
- AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV444P,
- AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_YUV444P16,
- AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32,
+ AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_NV16, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
+ AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_P210, AV_PIX_FMT_P216, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV444P16,
+ AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, AV_PIX_FMT_RGB32, AV_PIX_FMT_BGR32,
#if CONFIG_VULKAN
AV_PIX_FMT_VULKAN,
#endif
--
2.49.1
>From 92d47b6b570e96e8ea234d1cd7bb2dd5b5d289a7 Mon Sep 17 00:00:00 2001
From: Diego de Souza <ddesouza@nvidia.com>
Date: Tue, 18 Nov 2025 17:16:43 +0100
Subject: [PATCH 3/3] avfilter/scale_cuda: Add support for 4:2:2 chroma
subsampling
The supported YUV pixel formats were separated between planar
and semiplanar. This approach reduces the number of CUDA kernels
for all pixel formats.
This patch:
1. Adds support for YUV 4:2:2 planar and semi-planar formats:
yuv422p, yuv422p10, nv16, p210, p216
2. Implements new conversion structures and kernel definitions
for planar and semi-planar formats
Signed-off-by: Diego de Souza <ddesouza@nvidia.com>
---
libavfilter/vf_scale_cuda.c | 91 ++-
libavfilter/vf_scale_cuda.cu | 1151 +++++++++++++++++-----------------
libavfilter/vf_scale_cuda.h | 16 +
3 files changed, 672 insertions(+), 586 deletions(-)
diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index 88a6e20610..4e66593489 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -39,17 +39,29 @@
#include "cuda/load_helper.h"
#include "vf_scale_cuda.h"
-static const enum AVPixelFormat supported_formats[] = {
- AV_PIX_FMT_YUV420P,
- AV_PIX_FMT_NV12,
- AV_PIX_FMT_YUV444P,
- AV_PIX_FMT_P010,
- AV_PIX_FMT_P016,
- AV_PIX_FMT_YUV444P16,
- AV_PIX_FMT_0RGB32,
- AV_PIX_FMT_0BGR32,
- AV_PIX_FMT_RGB32,
- AV_PIX_FMT_BGR32,
+struct format_entry {
+ enum AVPixelFormat format;
+ char name[13];
+};
+
+static const struct format_entry supported_formats[] = {
+ {AV_PIX_FMT_YUV420P, "planar8"},
+ {AV_PIX_FMT_YUV422P, "planar8"},
+ {AV_PIX_FMT_YUV444P, "planar8"},
+ {AV_PIX_FMT_YUV420P10,"planar10"},
+ {AV_PIX_FMT_YUV422P10,"planar10"},
+ {AV_PIX_FMT_YUV444P10,"planar10"},
+ {AV_PIX_FMT_YUV444P16,"planar16"},
+ {AV_PIX_FMT_NV12, "semiplanar8"},
+ {AV_PIX_FMT_NV16, "semiplanar8"},
+ {AV_PIX_FMT_P010, "semiplanar10"},
+ {AV_PIX_FMT_P210, "semiplanar10"},
+ {AV_PIX_FMT_P016, "semiplanar16"},
+ {AV_PIX_FMT_P216, "semiplanar16"},
+ {AV_PIX_FMT_0RGB32, "bgr0"},
+ {AV_PIX_FMT_0BGR32, "rgb0"},
+ {AV_PIX_FMT_RGB32, "bgra"},
+ {AV_PIX_FMT_BGR32, "rgba"},
};
#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
@@ -187,11 +199,21 @@ static int format_is_supported(enum AVPixelFormat fmt)
int i;
for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
- if (supported_formats[i] == fmt)
+ if (supported_formats[i].format == fmt)
return 1;
return 0;
}
+static const char* get_format_name(enum AVPixelFormat fmt)
+{
+ int i;
+
+ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
+ if (supported_formats[i].format == fmt)
+ return supported_formats[i].name;
+ return NULL;
+}
+
static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat in_format, enum AVPixelFormat out_format)
{
CUDAScaleContext *s = ctx->priv;
@@ -284,8 +306,8 @@ static av_cold int cudascale_load_functions(AVFilterContext *ctx)
char buf[128];
int ret;
- const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt);
- const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt);
+ const char *in_fmt_name = get_format_name(s->in_fmt);
+ const char *out_fmt_name = get_format_name(s->out_fmt);
const char *function_infix = "";
@@ -335,11 +357,13 @@ static av_cold int cudascale_load_functions(AVFilterContext *ctx)
ret = AVERROR(ENOSYS);
goto fail;
}
+ av_log(ctx, AV_LOG_DEBUG, "Luma filter: %s (%s -> %s)\n", buf, av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name);
ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf));
if (ret < 0)
goto fail;
+ av_log(ctx, AV_LOG_DEBUG, "Chroma filter: %s (%s -> %s)\n", buf, av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
fail:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
@@ -416,26 +440,35 @@ fail:
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
CUtexObject src_tex[4], int src_left, int src_top, int src_width, int src_height,
- AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch)
+ AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch, int color_range)
{
CUDAScaleContext *s = ctx->priv;
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
- CUdeviceptr dst_devptr[4] = {
- (CUdeviceptr)out_frame->data[0], (CUdeviceptr)out_frame->data[1],
- (CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3]
+ CUDAScaleKernelParams params = {
+ .src_tex = {src_tex[0], src_tex[1], src_tex[2], src_tex[3]},
+ .dst = {
+ (CUdeviceptr)out_frame->data[0],
+ (CUdeviceptr)out_frame->data[1],
+ (CUdeviceptr)out_frame->data[2],
+ (CUdeviceptr)out_frame->data[3]
+ },
+ .dst_width = dst_width,
+ .dst_height = dst_height,
+ .dst_pitch = dst_pitch,
+ .src_left = src_left,
+ .src_top = src_top,
+ .src_width = src_width,
+ .src_height = src_height,
+ .param = s->param,
+ .color_range = color_range
};
- void *args_uchar[] = {
- &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
- &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
- &dst_width, &dst_height, &dst_pitch,
- &src_left, &src_top, &src_width, &src_height, &s->param
- };
+ void *args[] = { ¶ms };
return CHECK_CU(cu->cuLaunchKernel(func,
DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
- BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));
+ BLOCKX, BLOCKY, 1, 0, s->cu_stream, args, NULL));
}
static int scalecuda_resize(AVFilterContext *ctx,
@@ -486,10 +519,14 @@ static int scalecuda_resize(AVFilterContext *ctx,
goto exit;
}
+ // Normalize color range: treat AVCOL_RANGE_UNSPECIFIED as limited range (MPEG)
+ // This follows the convention used in other FFmpeg filters (e.g., vf_zscale)
+ int color_range = (in->color_range == AVCOL_RANGE_UNSPECIFIED) ? AVCOL_RANGE_MPEG : in->color_range;
+
// scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
ret = call_resize_kernel(ctx, s->cu_func,
tex, in->crop_left, in->crop_top, crop_width, crop_height,
- out, out->width, out->height, out->linesize[0]);
+ out, out->width, out->height, out->linesize[0], color_range);
if (ret < 0)
goto exit;
@@ -503,7 +540,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
out,
AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w),
AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h),
- out->linesize[1]);
+ out->linesize[1], color_range);
if (ret < 0)
goto exit;
}
diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu
index 271b55cd5d..ed5432a53f 100644
--- a/libavfilter/vf_scale_cuda.cu
+++ b/libavfilter/vf_scale_cuda.cu
@@ -35,9 +35,18 @@ using subsample_function_t = T (*)(cudaTextureObject_t tex, int xo, int yo,
static const ushort mask_10bit = 0xFFC0;
static const ushort mask_16bit = 0xFFFF;
-static inline __device__ ushort conv_8to16(uchar in, ushort mask)
+static inline __device__ ushort conv_8to16(uchar in, ushort mask, int color_range)
{
- return ((ushort)in | ((ushort)in << 8)) & mask;
+ ushort shifted = (ushort)in << 8;
+ // AVCOL_RANGE_MPEG = 1 (limited range)
+ return (color_range == 1) ? shifted : ((shifted | ((ushort)in )) & mask);
+}
+
+static inline __device__ ushort conv_8to10pl(uchar in, int color_range)
+{
+ ushort shifted = (ushort)in << 2;
+ // AVCOL_RANGE_MPEG = 1 (limited range)
+ return (color_range == 1) ? shifted : (shifted | ((ushort)in >> 6));
}
static inline __device__ uchar conv_16to8(ushort in)
@@ -50,9 +59,23 @@ static inline __device__ uchar conv_10to8(ushort in)
return in >> 8;
}
-static inline __device__ ushort conv_10to16(ushort in)
+static inline __device__ uchar conv_10to8pl(ushort in)
{
- return in | (in >> 10);
+ return in >> 2;
+}
+
+static inline __device__ ushort conv_10to16(ushort in, int color_range)
+{
+ ushort shifted = (in >> 10);
+ // AVCOL_RANGE_MPEG = 1 (limited range)
+ return (color_range == 1) ? in : (in | shifted);
+}
+
+static inline __device__ ushort conv_10to16pl(ushort in, int color_range)
+{
+ ushort shifted = (in << 6);
+ // AVCOL_RANGE_MPEG = 1 (limited range)
+ return (color_range == 1) ? shifted : (shifted | (in >> 4));
}
static inline __device__ ushort conv_16to10(ushort in)
@@ -60,12 +83,18 @@ static inline __device__ ushort conv_16to10(ushort in)
return in & mask_10bit;
}
+static inline __device__ ushort conv_16to10pl(ushort in)
+{
+ return in >> 6;
+}
+
#define DEF_F(N, T) \
template<subsample_function_t<in_T> subsample_func_y, \
subsample_function_t<in_T_uv> subsample_func_uv> \
__device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], int xo, int yo, \
int dst_width, int dst_height, int dst_pitch, \
- int src_left, int src_top, int src_width, int src_height, float param)
+ int src_left, int src_top, int src_width, int src_height, \
+ float param, int color_range)
#define SUB_F(m, plane) \
subsample_func_##m(src_tex[plane], xo, yo, \
@@ -81,9 +110,9 @@ static inline __device__ ushort conv_16to10(ushort in)
#define DEFAULT_DST(n) \
dst[n][yo*FIXED_PITCH+xo]
-// yuv420p->X
+// planar8->X
-struct Convert_yuv420p_yuv420p
+struct Convert_planar8_planar8
{
static const int in_bit_depth = 8;
typedef uchar in_T;
@@ -103,7 +132,47 @@ struct Convert_yuv420p_yuv420p
}
};
-struct Convert_yuv420p_nv12
+struct Convert_planar8_planar10
+{
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0), color_range);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = conv_8to10pl(SUB_F(uv, 1), color_range);
+ DEFAULT_DST(2) = conv_8to10pl(SUB_F(uv, 2), color_range);
+ }
+};
+
+struct Convert_planar8_planar16
+{
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit, color_range);
+ DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit, color_range);
+ }
+};
+
+struct Convert_planar8_semiplanar8
{
static const int in_bit_depth = 8;
typedef uchar in_T;
@@ -125,14 +194,82 @@ struct Convert_yuv420p_nv12
}
};
-struct Convert_yuv420p_yuv444p
+struct Convert_planar8_semiplanar10
{
static const int in_bit_depth = 8;
typedef uchar in_T;
typedef uchar in_T_uv;
+ typedef ushort out_T;
+ typedef ushort2 out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit, color_range);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = make_ushort2(
+ conv_8to16(SUB_F(uv, 1), mask_10bit, color_range),
+ conv_8to16(SUB_F(uv, 2), mask_10bit, color_range)
+ );
+ }
+};
+
+struct Convert_planar8_semiplanar16
+{
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar in_T_uv;
+ typedef ushort out_T;
+ typedef ushort2 out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = make_ushort2(
+ conv_8to16(SUB_F(uv, 1), mask_16bit, color_range),
+ conv_8to16(SUB_F(uv, 2), mask_16bit, color_range)
+ );
+ }
+};
+
+
+
+// planar10->X
+
+struct Convert_planar10_planar8
+{
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
typedef uchar out_T;
typedef uchar out_T_uv;
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0));
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = conv_10to8pl(SUB_F(uv, 1));
+ DEFAULT_DST(2) = conv_10to8pl(SUB_F(uv, 2));
+ }
+};
+
+struct Convert_planar10_planar10
+{
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
+
DEF_F(Convert, out_T)
{
DEFAULT_DST(0) = SUB_F(y, 0);
@@ -145,249 +282,227 @@ struct Convert_yuv420p_yuv444p
}
};
-struct Convert_yuv420p_p010le
+struct Convert_planar10_planar16
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = make_ushort2(
- conv_8to16(SUB_F(uv, 1), mask_10bit),
- conv_8to16(SUB_F(uv, 2), mask_10bit)
- );
- }
-};
-
-struct Convert_yuv420p_p016le
-{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = make_ushort2(
- conv_8to16(SUB_F(uv, 1), mask_16bit),
- conv_8to16(SUB_F(uv, 2), mask_16bit)
- );
- }
-};
-
-struct Convert_yuv420p_yuv444p16le
-{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
typedef ushort out_T;
typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+ DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0), color_range);
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit);
- DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit);
+ DEFAULT_DST(1) = conv_10to16pl(SUB_F(uv, 1), color_range);
+ DEFAULT_DST(2) = conv_10to16pl(SUB_F(uv, 2), color_range);
}
};
-// nv12->X
-
-struct Convert_nv12_yuv420p
+struct Convert_planar10_semiplanar8
{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = SUB_F(y, 0);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = res.x;
- DEFAULT_DST(2) = res.y;
- }
-};
-
-struct Convert_nv12_nv12
-{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
typedef uchar out_T;
typedef uchar2 out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = SUB_F(y, 0);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = SUB_F(uv, 1);
- }
-};
-
-struct Convert_nv12_yuv444p
-{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = SUB_F(y, 0);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = res.x;
- DEFAULT_DST(2) = res.y;
- }
-};
-
-struct Convert_nv12_p010le
-{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = make_ushort2(
- conv_8to16(res.x, mask_10bit),
- conv_8to16(res.y, mask_10bit)
- );
- }
-};
-
-struct Convert_nv12_p016le
-{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = make_ushort2(
- conv_8to16(res.x, mask_16bit),
- conv_8to16(res.y, mask_16bit)
- );
- }
-};
-
-struct Convert_nv12_yuv444p16le
-{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar2 in_T_uv;
- typedef ushort out_T;
- typedef ushort out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit);
- DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit);
- }
-};
-
-// yuv444p->X
-
-struct Convert_yuv444p_yuv420p
-{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = SUB_F(y, 0);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = SUB_F(uv, 1);
- DEFAULT_DST(2) = SUB_F(uv, 2);
- }
-};
-
-struct Convert_yuv444p_nv12
-{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
- typedef uchar out_T;
- typedef uchar2 out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = SUB_F(y, 0);
+ DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0));
}
DEF_F(Convert_uv, out_T_uv)
{
DEFAULT_DST(1) = make_uchar2(
+ conv_10to8pl(SUB_F(uv, 1)),
+ conv_10to8pl(SUB_F(uv, 2))
+ );
+ }
+};
+
+struct Convert_planar10_semiplanar10
+{
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef ushort out_T;
+ typedef ushort2 out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = (SUB_F(y, 0) << 6);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = make_ushort2(
+ (SUB_F(uv, 1) << 6),
+ (SUB_F(uv, 2) << 6)
+ );
+ }
+};
+
+struct Convert_planar10_semiplanar16
+{
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef ushort out_T;
+ typedef ushort2 out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0), color_range);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = make_ushort2(
+ conv_10to16pl(SUB_F(uv, 1), color_range),
+ conv_10to16pl(SUB_F(uv, 2), color_range)
+ );
+ }
+};
+
+// planar16->X
+
+struct Convert_planar16_planar8
+{
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef uchar out_T;
+ typedef uchar out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
+ DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
+ }
+};
+
+struct Convert_planar16_planar10
+{
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0));
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = conv_16to10pl(SUB_F(uv, 1));
+ DEFAULT_DST(2) = conv_16to10pl(SUB_F(uv, 2));
+ }
+};
+
+struct Convert_planar16_planar16
+{
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = SUB_F(y, 0);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = SUB_F(uv, 1);
+ DEFAULT_DST(2) = SUB_F(uv, 2);
+ }
+};
+
+struct Convert_planar16_semiplanar8
+{
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef uchar out_T;
+ typedef uchar2 out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = make_uchar2(
+ conv_16to8(SUB_F(uv, 1)),
+ conv_16to8(SUB_F(uv, 2))
+ );
+ }
+};
+
+struct Convert_planar16_semiplanar10
+{
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef ushort out_T;
+ typedef ushort2 out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = make_ushort2(
+ conv_16to10(SUB_F(uv, 1)),
+ conv_16to10(SUB_F(uv, 2))
+ );
+ }
+};
+
+struct Convert_planar16_semiplanar16
+{
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort in_T_uv;
+ typedef ushort out_T;
+ typedef ushort2 out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = SUB_F(y, 0);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = make_ushort2(
SUB_F(uv, 1),
SUB_F(uv, 2)
);
}
};
-struct Convert_yuv444p_yuv444p
+// semiplanar8->X
+
+struct Convert_semiplanar8_planar8
{
static const int in_bit_depth = 8;
typedef uchar in_T;
- typedef uchar in_T_uv;
+ typedef uchar2 in_T_uv;
typedef uchar out_T;
typedef uchar out_T_uv;
@@ -398,78 +513,122 @@ struct Convert_yuv444p_yuv444p
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = SUB_F(uv, 1);
- DEFAULT_DST(2) = SUB_F(uv, 2);
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = res.x;
+ DEFAULT_DST(2) = res.y;
}
};
-struct Convert_yuv444p_p010le
+struct Convert_semiplanar8_planar10
{
static const int in_bit_depth = 8;
typedef uchar in_T;
- typedef uchar in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = make_ushort2(
- conv_8to16(SUB_F(uv, 1), mask_10bit),
- conv_8to16(SUB_F(uv, 2), mask_10bit)
- );
- }
-};
-
-struct Convert_yuv444p_p016le
-{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = make_ushort2(
- conv_8to16(SUB_F(uv, 1), mask_16bit),
- conv_8to16(SUB_F(uv, 2), mask_16bit)
- );
- }
-};
-
-struct Convert_yuv444p_yuv444p16le
-{
- static const int in_bit_depth = 8;
- typedef uchar in_T;
- typedef uchar in_T_uv;
+ typedef uchar2 in_T_uv;
typedef ushort out_T;
typedef ushort out_T_uv;
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit);
+ DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0), color_range);
}
DEF_F(Convert_uv, out_T_uv)
{
- DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit);
- DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit);
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = conv_8to10pl(res.x, color_range);
+ DEFAULT_DST(2) = conv_8to10pl(res.y, color_range);
}
};
-// p010le->X
+struct Convert_semiplanar8_planar16
+{
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar2 in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
-struct Convert_p010le_yuv420p
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit, color_range);
+ DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit, color_range);
+ }
+};
+
+struct Convert_semiplanar8_semiplanar8
+{
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar2 in_T_uv;
+ typedef uchar out_T;
+ typedef uchar2 out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = SUB_F(y, 0);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ DEFAULT_DST(1) = SUB_F(uv, 1);
+ }
+};
+
+struct Convert_semiplanar8_semiplanar10
+{
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar2 in_T_uv;
+ typedef ushort out_T;
+ typedef ushort2 out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit, color_range);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = make_ushort2(
+ conv_8to16(res.x, mask_10bit, color_range),
+ conv_8to16(res.y, mask_10bit, color_range)
+ );
+ }
+};
+
+struct Convert_semiplanar8_semiplanar16
+{
+ static const int in_bit_depth = 8;
+ typedef uchar in_T;
+ typedef uchar2 in_T_uv;
+ typedef ushort out_T;
+ typedef ushort2 out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit, color_range);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = make_ushort2(
+ conv_8to16(res.x, mask_16bit, color_range),
+ conv_8to16(res.y, mask_16bit, color_range)
+ );
+ }
+};
+
+// semiplanar10->X
+
+struct Convert_semiplanar10_planar8
{
static const int in_bit_depth = 10;
typedef ushort in_T;
@@ -490,7 +649,49 @@ struct Convert_p010le_yuv420p
}
};
-struct Convert_p010le_nv12
+struct Convert_semiplanar10_planar10
+{
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort2 in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = SUB_F(y, 0) >> 6;
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = res.x >> 6;
+ DEFAULT_DST(2) = res.y >> 6;
+ }
+};
+
+struct Convert_semiplanar10_planar16
+{
+ static const int in_bit_depth = 10;
+ typedef ushort in_T;
+ typedef ushort2 in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0), color_range);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = conv_10to16(res.x, color_range);
+ DEFAULT_DST(2) = conv_10to16(res.y, color_range);
+ }
+};
+
+struct Convert_semiplanar10_semiplanar8
{
static const int in_bit_depth = 10;
typedef ushort in_T;
@@ -513,28 +714,7 @@ struct Convert_p010le_nv12
}
};
-struct Convert_p010le_yuv444p
-{
- static const int in_bit_depth = 10;
- typedef ushort in_T;
- typedef ushort2 in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0));
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = conv_10to8(res.x);
- DEFAULT_DST(2) = conv_10to8(res.y);
- }
-};
-
-struct Convert_p010le_p010le
+struct Convert_semiplanar10_semiplanar10
{
static const int in_bit_depth = 10;
typedef ushort in_T;
@@ -553,7 +733,7 @@ struct Convert_p010le_p010le
}
};
-struct Convert_p010le_p016le
+struct Convert_semiplanar10_semiplanar16
{
static const int in_bit_depth = 10;
typedef ushort in_T;
@@ -563,43 +743,23 @@ struct Convert_p010le_p016le
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0));
+ DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0), color_range);
}
DEF_F(Convert_uv, out_T_uv)
{
in_T_uv res = SUB_F(uv, 1);
DEFAULT_DST(1) = make_ushort2(
- conv_10to16(res.x),
- conv_10to16(res.y)
+ conv_10to16(res.x, color_range),
+ conv_10to16(res.y, color_range)
);
}
};
-struct Convert_p010le_yuv444p16le
-{
- static const int in_bit_depth = 10;
- typedef ushort in_T;
- typedef ushort2 in_T_uv;
- typedef ushort out_T;
- typedef ushort out_T_uv;
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0));
- }
+// semiplanar16->X
- DEF_F(Convert_uv, out_T_uv)
- {
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = conv_10to16(res.x);
- DEFAULT_DST(2) = conv_10to16(res.y);
- }
-};
-
-// p016le->X
-
-struct Convert_p016le_yuv420p
+struct Convert_semiplanar16_planar8
{
static const int in_bit_depth = 16;
typedef ushort in_T;
@@ -620,7 +780,49 @@ struct Convert_p016le_yuv420p
}
};
-struct Convert_p016le_nv12
+struct Convert_semiplanar16_planar10
+{
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort2 in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0));
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = conv_16to10pl(res.x);
+ DEFAULT_DST(2) = conv_16to10pl(res.y);
+ }
+};
+
+struct Convert_semiplanar16_planar16
+{
+ static const int in_bit_depth = 16;
+ typedef ushort in_T;
+ typedef ushort2 in_T_uv;
+ typedef ushort out_T;
+ typedef ushort out_T_uv;
+
+ DEF_F(Convert, out_T)
+ {
+ DEFAULT_DST(0) = SUB_F(y, 0);
+ }
+
+ DEF_F(Convert_uv, out_T_uv)
+ {
+ in_T_uv res = SUB_F(uv, 1);
+ DEFAULT_DST(1) = res.x;
+ DEFAULT_DST(2) = res.y;
+ }
+};
+
+struct Convert_semiplanar16_semiplanar8
{
static const int in_bit_depth = 16;
typedef ushort in_T;
@@ -643,28 +845,7 @@ struct Convert_p016le_nv12
}
};
-struct Convert_p016le_yuv444p
-{
- static const int in_bit_depth = 16;
- typedef ushort in_T;
- typedef ushort2 in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = conv_16to8(res.x);
- DEFAULT_DST(2) = conv_16to8(res.y);
- }
-};
-
-struct Convert_p016le_p010le
+struct Convert_semiplanar16_semiplanar10
{
static const int in_bit_depth = 16;
typedef ushort in_T;
@@ -687,7 +868,7 @@ struct Convert_p016le_p010le
}
};
-struct Convert_p016le_p016le
+struct Convert_semiplanar16_semiplanar16
{
static const int in_bit_depth = 16;
typedef ushort in_T;
@@ -706,155 +887,6 @@ struct Convert_p016le_p016le
}
};
-struct Convert_p016le_yuv444p16le
-{
- static const int in_bit_depth = 16;
- typedef ushort in_T;
- typedef ushort2 in_T_uv;
- typedef ushort out_T;
- typedef ushort out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = SUB_F(y, 0);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- in_T_uv res = SUB_F(uv, 1);
- DEFAULT_DST(1) = res.x;
- DEFAULT_DST(2) = res.y;
- }
-};
-
-// yuv444p16le->X
-
-struct Convert_yuv444p16le_yuv420p
-{
- static const int in_bit_depth = 16;
- typedef ushort in_T;
- typedef ushort in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
- DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
- }
-};
-
-struct Convert_yuv444p16le_nv12
-{
- static const int in_bit_depth = 16;
- typedef ushort in_T;
- typedef ushort in_T_uv;
- typedef uchar out_T;
- typedef uchar2 out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = make_uchar2(
- conv_16to8(SUB_F(uv, 1)),
- conv_16to8(SUB_F(uv, 2))
- );
- }
-};
-
-struct Convert_yuv444p16le_yuv444p
-{
- static const int in_bit_depth = 16;
- typedef ushort in_T;
- typedef ushort in_T_uv;
- typedef uchar out_T;
- typedef uchar out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0));
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1));
- DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2));
- }
-};
-
-struct Convert_yuv444p16le_p010le
-{
- static const int in_bit_depth = 16;
- typedef ushort in_T;
- typedef ushort in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0));
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = make_ushort2(
- conv_16to10(SUB_F(uv, 1)),
- conv_16to10(SUB_F(uv, 2))
- );
- }
-};
-
-struct Convert_yuv444p16le_p016le
-{
- static const int in_bit_depth = 16;
- typedef ushort in_T;
- typedef ushort in_T_uv;
- typedef ushort out_T;
- typedef ushort2 out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = SUB_F(y, 0);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = make_ushort2(
- SUB_F(uv, 1),
- SUB_F(uv, 2)
- );
- }
-};
-
-struct Convert_yuv444p16le_yuv444p16le
-{
- static const int in_bit_depth = 16;
- typedef ushort in_T;
- typedef ushort in_T_uv;
- typedef ushort out_T;
- typedef ushort out_T_uv;
-
- DEF_F(Convert, out_T)
- {
- DEFAULT_DST(0) = SUB_F(y, 0);
- }
-
- DEF_F(Convert_uv, out_T_uv)
- {
- DEFAULT_DST(1) = SUB_F(uv, 1);
- DEFAULT_DST(2) = SUB_F(uv, 2);
- }
-};
-
#define DEF_CONVERT_IDENTITY(fmt1, fmt2)\
\
struct Convert_##fmt1##_##fmt2 \
@@ -930,7 +962,7 @@ struct Convert_bgr0_bgra
res.x,
res.y,
res.z,
- 1
+ 0xFF
);
}
@@ -954,7 +986,7 @@ struct Convert_bgr0_rgba
res.z,
res.y,
res.x,
- 1
+ 0xFF
);
}
@@ -978,7 +1010,7 @@ struct Convert_rgb0_bgra
res.z,
res.y,
res.x,
- 1
+ 0xFF
);
}
@@ -1002,7 +1034,7 @@ struct Convert_rgb0_rgba
res.x,
res.y,
res.z,
- 1
+ 0xFF
);
}
@@ -1147,25 +1179,26 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
/// --- FUNCTION EXPORTS ---
-#define KERNEL_ARGS(T) \
- cudaTextureObject_t src_tex_0, cudaTextureObject_t src_tex_1, \
- cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \
- T *dst_0, T *dst_1, T *dst_2, T *dst_3, \
- int dst_width, int dst_height, int dst_pitch, \
- int src_left, int src_top, int src_width, int src_height, float param
+#define KERNEL_ARGS(T) CUDAScaleKernelParams params
#define SUBSAMPLE(Convert, T) \
- cudaTextureObject_t src_tex[4] = \
- { src_tex_0, src_tex_1, src_tex_2, src_tex_3 }; \
- T *dst[4] = { dst_0, dst_1, dst_2, dst_3 }; \
+ cudaTextureObject_t src_tex[4] = { \
+ params.src_tex[0], params.src_tex[1], \
+ params.src_tex[2], params.src_tex[3] \
+ }; \
+ T *dst[4] = { \
+ (T*)params.dst[0], (T*)params.dst[1], \
+ (T*)params.dst[2], (T*)params.dst[3] \
+ }; \
int xo = blockIdx.x * blockDim.x + threadIdx.x; \
int yo = blockIdx.y * blockDim.y + threadIdx.y; \
- if (yo >= dst_height || xo >= dst_width) return; \
+ if (yo >= params.dst_height || xo >= params.dst_width) return; \
Convert( \
src_tex, dst, xo, yo, \
- dst_width, dst_height, dst_pitch, \
- src_left, src_top, \
- src_width, src_height, param);
+ params.dst_width, params.dst_height, params.dst_pitch, \
+ params.src_left, params.src_top, \
+ params.src_width, params.src_height, \
+ params.param, params.color_range);
extern "C" {
@@ -1184,12 +1217,12 @@ extern "C" {
NEAREST_KERNEL(C,_uv)
#define NEAREST_KERNELS(C) \
- NEAREST_KERNEL_RAW(yuv420p_ ## C) \
- NEAREST_KERNEL_RAW(nv12_ ## C) \
- NEAREST_KERNEL_RAW(yuv444p_ ## C) \
- NEAREST_KERNEL_RAW(p010le_ ## C) \
- NEAREST_KERNEL_RAW(p016le_ ## C) \
- NEAREST_KERNEL_RAW(yuv444p16le_ ## C)
+ NEAREST_KERNEL_RAW(planar8_ ## C) \
+ NEAREST_KERNEL_RAW(planar10_ ## C) \
+ NEAREST_KERNEL_RAW(planar16_ ## C) \
+ NEAREST_KERNEL_RAW(semiplanar8_ ## C) \
+ NEAREST_KERNEL_RAW(semiplanar10_ ## C) \
+ NEAREST_KERNEL_RAW(semiplanar16_ ## C)
#define NEAREST_KERNELS_RGB(C) \
NEAREST_KERNEL_RAW(rgb0_ ## C) \
@@ -1197,12 +1230,12 @@ extern "C" {
NEAREST_KERNEL_RAW(rgba_ ## C) \
NEAREST_KERNEL_RAW(bgra_ ## C) \
-NEAREST_KERNELS(yuv420p)
-NEAREST_KERNELS(nv12)
-NEAREST_KERNELS(yuv444p)
-NEAREST_KERNELS(p010le)
-NEAREST_KERNELS(p016le)
-NEAREST_KERNELS(yuv444p16le)
+NEAREST_KERNELS(planar8)
+NEAREST_KERNELS(planar10)
+NEAREST_KERNELS(planar16)
+NEAREST_KERNELS(semiplanar8)
+NEAREST_KERNELS(semiplanar10)
+NEAREST_KERNELS(semiplanar16)
NEAREST_KERNELS_RGB(rgb0)
NEAREST_KERNELS_RGB(bgr0)
@@ -1224,12 +1257,12 @@ NEAREST_KERNELS_RGB(bgra)
BILINEAR_KERNEL(C,_uv)
#define BILINEAR_KERNELS(C) \
- BILINEAR_KERNEL_RAW(yuv420p_ ## C) \
- BILINEAR_KERNEL_RAW(nv12_ ## C) \
- BILINEAR_KERNEL_RAW(yuv444p_ ## C) \
- BILINEAR_KERNEL_RAW(p010le_ ## C) \
- BILINEAR_KERNEL_RAW(p016le_ ## C) \
- BILINEAR_KERNEL_RAW(yuv444p16le_ ## C)
+ BILINEAR_KERNEL_RAW(planar8_ ## C) \
+ BILINEAR_KERNEL_RAW(planar10_ ## C) \
+ BILINEAR_KERNEL_RAW(planar16_ ## C) \
+ BILINEAR_KERNEL_RAW(semiplanar8_ ## C) \
+ BILINEAR_KERNEL_RAW(semiplanar10_ ## C) \
+ BILINEAR_KERNEL_RAW(semiplanar16_ ## C)
#define BILINEAR_KERNELS_RGB(C) \
BILINEAR_KERNEL_RAW(rgb0_ ## C) \
@@ -1237,12 +1270,12 @@ NEAREST_KERNELS_RGB(bgra)
BILINEAR_KERNEL_RAW(rgba_ ## C) \
BILINEAR_KERNEL_RAW(bgra_ ## C)
-BILINEAR_KERNELS(yuv420p)
-BILINEAR_KERNELS(nv12)
-BILINEAR_KERNELS(yuv444p)
-BILINEAR_KERNELS(p010le)
-BILINEAR_KERNELS(p016le)
-BILINEAR_KERNELS(yuv444p16le)
+BILINEAR_KERNELS(planar8)
+BILINEAR_KERNELS(planar10)
+BILINEAR_KERNELS(planar16)
+BILINEAR_KERNELS(semiplanar8)
+BILINEAR_KERNELS(semiplanar10)
+BILINEAR_KERNELS(semiplanar16)
BILINEAR_KERNELS_RGB(rgb0)
BILINEAR_KERNELS_RGB(bgr0)
@@ -1264,12 +1297,12 @@ BILINEAR_KERNELS_RGB(bgra)
BICUBIC_KERNEL(C,_uv)
#define BICUBIC_KERNELS(C) \
- BICUBIC_KERNEL_RAW(yuv420p_ ## C) \
- BICUBIC_KERNEL_RAW(nv12_ ## C) \
- BICUBIC_KERNEL_RAW(yuv444p_ ## C) \
- BICUBIC_KERNEL_RAW(p010le_ ## C) \
- BICUBIC_KERNEL_RAW(p016le_ ## C) \
- BICUBIC_KERNEL_RAW(yuv444p16le_ ## C)
+ BICUBIC_KERNEL_RAW(planar8_ ## C) \
+ BICUBIC_KERNEL_RAW(planar10_ ## C) \
+ BICUBIC_KERNEL_RAW(planar16_ ## C) \
+ BICUBIC_KERNEL_RAW(semiplanar8_ ## C) \
+ BICUBIC_KERNEL_RAW(semiplanar10_ ## C) \
+ BICUBIC_KERNEL_RAW(semiplanar16_ ## C)
#define BICUBIC_KERNELS_RGB(C) \
BICUBIC_KERNEL_RAW(rgb0_ ## C) \
@@ -1277,12 +1310,12 @@ BILINEAR_KERNELS_RGB(bgra)
BICUBIC_KERNEL_RAW(rgba_ ## C) \
BICUBIC_KERNEL_RAW(bgra_ ## C)
-BICUBIC_KERNELS(yuv420p)
-BICUBIC_KERNELS(nv12)
-BICUBIC_KERNELS(yuv444p)
-BICUBIC_KERNELS(p010le)
-BICUBIC_KERNELS(p016le)
-BICUBIC_KERNELS(yuv444p16le)
+BICUBIC_KERNELS(planar8)
+BICUBIC_KERNELS(planar10)
+BICUBIC_KERNELS(planar16)
+BICUBIC_KERNELS(semiplanar8)
+BICUBIC_KERNELS(semiplanar10)
+BICUBIC_KERNELS(semiplanar16)
BICUBIC_KERNELS_RGB(rgb0)
BICUBIC_KERNELS_RGB(bgr0)
@@ -1304,12 +1337,12 @@ BICUBIC_KERNELS_RGB(bgra)
LANCZOS_KERNEL(C,_uv)
#define LANCZOS_KERNELS(C) \
- LANCZOS_KERNEL_RAW(yuv420p_ ## C) \
- LANCZOS_KERNEL_RAW(nv12_ ## C) \
- LANCZOS_KERNEL_RAW(yuv444p_ ## C) \
- LANCZOS_KERNEL_RAW(p010le_ ## C) \
- LANCZOS_KERNEL_RAW(p016le_ ## C) \
- LANCZOS_KERNEL_RAW(yuv444p16le_ ## C)
+ LANCZOS_KERNEL_RAW(planar8_ ## C) \
+ LANCZOS_KERNEL_RAW(planar10_ ## C) \
+ LANCZOS_KERNEL_RAW(planar16_ ## C) \
+ LANCZOS_KERNEL_RAW(semiplanar8_ ## C) \
+ LANCZOS_KERNEL_RAW(semiplanar10_ ## C) \
+ LANCZOS_KERNEL_RAW(semiplanar16_ ## C)
#define LANCZOS_KERNELS_RGB(C) \
LANCZOS_KERNEL_RAW(rgb0_ ## C) \
@@ -1317,12 +1350,12 @@ BICUBIC_KERNELS_RGB(bgra)
LANCZOS_KERNEL_RAW(rgba_ ## C) \
LANCZOS_KERNEL_RAW(bgra_ ## C)
-LANCZOS_KERNELS(yuv420p)
-LANCZOS_KERNELS(nv12)
-LANCZOS_KERNELS(yuv444p)
-LANCZOS_KERNELS(p010le)
-LANCZOS_KERNELS(p016le)
-LANCZOS_KERNELS(yuv444p16le)
+LANCZOS_KERNELS(planar8)
+LANCZOS_KERNELS(planar10)
+LANCZOS_KERNELS(planar16)
+LANCZOS_KERNELS(semiplanar8)
+LANCZOS_KERNELS(semiplanar10)
+LANCZOS_KERNELS(semiplanar16)
LANCZOS_KERNELS_RGB(rgb0)
LANCZOS_KERNELS_RGB(bgr0)
diff --git a/libavfilter/vf_scale_cuda.h b/libavfilter/vf_scale_cuda.h
index 40d5b9cfac..1fb3498ee8 100644
--- a/libavfilter/vf_scale_cuda.h
+++ b/libavfilter/vf_scale_cuda.h
@@ -23,6 +23,22 @@
#ifndef AVFILTER_SCALE_CUDA_H
#define AVFILTER_SCALE_CUDA_H
+#include <ffnvcodec/dynlink_cuda.h>
+
#define SCALE_CUDA_PARAM_DEFAULT 999999.0f
+typedef struct __attribute__((aligned(16))) {
+ CUtexObject src_tex[4];
+ CUdeviceptr dst[4];
+ int dst_width;
+ int dst_height;
+ int dst_pitch;
+ int src_left;
+ int src_top;
+ int src_width;
+ int src_height;
+ float param;
+ int color_range;
+} CUDAScaleKernelParams;
+
#endif
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2025-11-18 16:30 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=176348336430.25.16743169341643234132@2cb04c0e5124 \
--to=ffmpeg-devel@ffmpeg.org \
--cc=code@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git