From: Diego Felix de Souza via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> To: <ffmpeg-devel@ffmpeg.org> Cc: ddesouza@nvidia.com Subject: [FFmpeg-devel] [PATCH 3/3] avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling Date: Thu, 3 Jul 2025 16:25:20 +0200 Message-ID: <mailman.5310.1751552831.1384.ffmpeg-devel@ffmpeg.org> (raw) In-Reply-To: <20250703142520.16586-1-ddesouza@nvidia.com> [-- Attachment #1: Type: message/rfc822, Size: 44570 bytes --] From: <ddesouza@nvidia.com> To: <ffmpeg-devel@ffmpeg.org> Cc: Diego de Souza <ddesouza@nvidia.com> Subject: [PATCH 3/3] avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling Date: Thu, 3 Jul 2025 16:25:20 +0200 Message-ID: <20250703142520.16586-3-ddesouza@nvidia.com> From: Diego de Souza <ddesouza@nvidia.com> The supported YUV pixel formats were separated between planar and semiplanar. This approach reduces the number of CUDA kernels for all pixel formats. This patch: 1. Adds support for YUV 4:2:2 planar and semi-planar formats: yuv422p, yuv422p10, nv16, p210, p216 2. Implements new conversion structures and kernel definitions for planar and semi-planar formats Signed-off-by: Diego de Souza <ddesouza@nvidia.com> --- libavfilter/vf_scale_cuda.c | 52 ++- libavfilter/vf_scale_cuda.cu | 637 ++++++++++++++++++----------------- 2 files changed, 368 insertions(+), 321 deletions(-) diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index 44eef207ca..560f901730 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -39,17 +39,29 @@ #include "cuda/load_helper.h" #include "vf_scale_cuda.h" -static const enum AVPixelFormat supported_formats[] = { - AV_PIX_FMT_YUV420P, - AV_PIX_FMT_NV12, - AV_PIX_FMT_YUV444P, - AV_PIX_FMT_P010, - AV_PIX_FMT_P016, - AV_PIX_FMT_YUV444P16, - AV_PIX_FMT_0RGB32, - AV_PIX_FMT_0BGR32, - AV_PIX_FMT_RGB32, - AV_PIX_FMT_BGR32, +struct format_entry { + enum AVPixelFormat format; + const char *name; +}; + +static const struct format_entry supported_formats[] = { + {AV_PIX_FMT_YUV420P, "planar8"}, + {AV_PIX_FMT_YUV422P, "planar8"}, + {AV_PIX_FMT_YUV444P, "planar8"}, + {AV_PIX_FMT_YUV420P10,"planar10"}, + {AV_PIX_FMT_YUV422P10,"planar10"}, + {AV_PIX_FMT_YUV444P10,"planar10"}, + {AV_PIX_FMT_YUV444P16,"planar16"}, + {AV_PIX_FMT_NV12, "semiplanar8"}, + {AV_PIX_FMT_NV16, "semiplanar8"}, + {AV_PIX_FMT_P010, "semiplanar10"}, + {AV_PIX_FMT_P210, "semiplanar10"}, + {AV_PIX_FMT_P016, "semiplanar16"}, + {AV_PIX_FMT_P216, "semiplanar16"}, + {AV_PIX_FMT_0RGB32, "bgr0"}, + {AV_PIX_FMT_0BGR32, "rgb0"}, + {AV_PIX_FMT_RGB32, "bgra"}, + {AV_PIX_FMT_BGR32, "rgba"}, }; #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) @@ -187,11 +199,21 @@ static int format_is_supported(enum AVPixelFormat fmt) int i; for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) - if (supported_formats[i] == fmt) + if (supported_formats[i].format == fmt) return 1; return 0; } +static const char* get_format_name(enum AVPixelFormat fmt) +{ + int i; + + for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) + if (supported_formats[i].format == fmt) + return supported_formats[i].name; + return NULL; +} + static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat in_format, enum AVPixelFormat out_format) { CUDAScaleContext *s = ctx->priv; @@ -284,8 +306,8 @@ static av_cold int cudascale_load_functions(AVFilterContext *ctx) char buf[128]; int ret; - const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt); - const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt); + const char *in_fmt_name = get_format_name(s->in_fmt); + const char *out_fmt_name = get_format_name(s->out_fmt); const char *function_infix = ""; @@ -335,11 +357,13 @@ static av_cold int cudascale_load_functions(AVFilterContext *ctx) ret = AVERROR(ENOSYS); goto fail; } + av_log(ctx, AV_LOG_DEBUG, "Luma filter: %s (%s -> %s)\n", buf, av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt)); snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name); ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf)); if (ret < 0) goto fail; + av_log(ctx, AV_LOG_DEBUG, "Chroma filter: %s (%s -> %s)\n", buf, av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt)); fail: CHECK_CU(cu->cuCtxPopCurrent(&dummy)); diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu index 271b55cd5d..62d1649a25 100644 --- a/libavfilter/vf_scale_cuda.cu +++ b/libavfilter/vf_scale_cuda.cu @@ -40,6 +40,11 @@ static inline __device__ ushort conv_8to16(uchar in, ushort mask) return ((ushort)in | ((ushort)in << 8)) & mask; } +static inline __device__ ushort conv_8to10pl(uchar in) +{ + return ((ushort)in << 2) | ((ushort)in >> 6); +} + static inline __device__ uchar conv_16to8(ushort in) { return in >> 8; @@ -50,16 +55,31 @@ static inline __device__ uchar conv_10to8(ushort in) return in >> 8; } +static inline __device__ uchar conv_10to8pl(ushort in) +{ + return in >> 2; +} + static inline __device__ ushort conv_10to16(ushort in) { return in | (in >> 10); } +static inline __device__ ushort conv_10to16pl(ushort in) +{ + return (in << 6) | (in >> 4); +} + static inline __device__ ushort conv_16to10(ushort in) { return in & mask_10bit; } +static inline __device__ ushort conv_16to10pl(ushort in) +{ + return in >> 6; +} + #define DEF_F(N, T) \ template<subsample_function_t<in_T> subsample_func_y, \ subsample_function_t<in_T_uv> subsample_func_uv> \ @@ -81,9 +101,9 @@ static inline __device__ ushort conv_16to10(ushort in) #define DEFAULT_DST(n) \ dst[n][yo*FIXED_PITCH+xo] -// yuv420p->X +// planar8->X -struct Convert_yuv420p_yuv420p +struct Convert_planar8_planar8 { static const int in_bit_depth = 8; typedef uchar in_T; @@ -103,71 +123,69 @@ struct Convert_yuv420p_yuv420p } }; -struct Convert_yuv420p_nv12 +struct Convert_planar8_planar10 { static const int in_bit_depth = 8; typedef uchar in_T; typedef uchar in_T_uv; - typedef uchar out_T; - typedef uchar2 out_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = SUB_F(y, 0); + DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = make_uchar2( - SUB_F(uv, 1), - SUB_F(uv, 2) - ); + DEFAULT_DST(1) = conv_8to10pl(SUB_F(uv, 1)); + DEFAULT_DST(2) = conv_8to10pl(SUB_F(uv, 2)); } }; -struct Convert_yuv420p_yuv444p +struct Convert_planar8_planar16 { static const int in_bit_depth = 8; typedef uchar in_T; typedef uchar in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = SUB_F(y, 0); + DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = SUB_F(uv, 1); - DEFAULT_DST(2) = SUB_F(uv, 2); + DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit); + DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit); } }; -struct Convert_yuv420p_p010le +struct Convert_planar8_semiplanar8 { static const int in_bit_depth = 8; typedef uchar in_T; typedef uchar in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; + typedef uchar out_T; + typedef uchar2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit); + DEFAULT_DST(0) = SUB_F(y, 0); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = make_ushort2( - conv_8to16(SUB_F(uv, 1), mask_10bit), - conv_8to16(SUB_F(uv, 2), mask_10bit) + DEFAULT_DST(1) = make_uchar2( + SUB_F(uv, 1), + SUB_F(uv, 2) ); } }; -struct Convert_yuv420p_p016le +struct Convert_planar8_semiplanar10 { static const int in_bit_depth = 8; typedef uchar in_T; @@ -177,25 +195,25 @@ struct Convert_yuv420p_p016le DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); + DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit); } DEF_F(Convert_uv, out_T_uv) { DEFAULT_DST(1) = make_ushort2( - conv_8to16(SUB_F(uv, 1), mask_16bit), - conv_8to16(SUB_F(uv, 2), mask_16bit) + conv_8to16(SUB_F(uv, 1), mask_10bit), + conv_8to16(SUB_F(uv, 2), mask_10bit) ); } }; -struct Convert_yuv420p_yuv444p16le +struct Convert_planar8_semiplanar16 { static const int in_bit_depth = 8; typedef uchar in_T; typedef uchar in_T_uv; typedef ushort out_T; - typedef ushort out_T_uv; + typedef ushort2 out_T_uv; DEF_F(Convert, out_T) { @@ -204,41 +222,44 @@ struct Convert_yuv420p_yuv444p16le DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit); - DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit); + DEFAULT_DST(1) = make_ushort2( + conv_8to16(SUB_F(uv, 1), mask_16bit), + conv_8to16(SUB_F(uv, 2), mask_16bit) + ); } }; -// nv12->X -struct Convert_nv12_yuv420p + +// planar10->X + +struct Convert_planar10_planar8 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; typedef uchar out_T; typedef uchar out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = SUB_F(y, 0); + DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = res.x; - DEFAULT_DST(2) = res.y; + DEFAULT_DST(1) = conv_10to8pl(SUB_F(uv, 1)); + DEFAULT_DST(2) = conv_10to8pl(SUB_F(uv, 2)); } }; -struct Convert_nv12_nv12 +struct Convert_planar10_planar10 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; - typedef uchar out_T; - typedef uchar2 out_T_uv; + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { @@ -248,148 +269,145 @@ struct Convert_nv12_nv12 DEF_F(Convert_uv, out_T_uv) { DEFAULT_DST(1) = SUB_F(uv, 1); + DEFAULT_DST(2) = SUB_F(uv, 2); } }; -struct Convert_nv12_yuv444p +struct Convert_planar10_planar16 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = SUB_F(y, 0); + DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = res.x; - DEFAULT_DST(2) = res.y; + DEFAULT_DST(1) = conv_10to16pl(SUB_F(uv, 1)); + DEFAULT_DST(2) = conv_10to16pl(SUB_F(uv, 2)); } }; -struct Convert_nv12_p010le +struct Convert_planar10_semiplanar8 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef uchar out_T; + typedef uchar2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit); + DEFAULT_DST(0) = conv_10to8pl(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = make_ushort2( - conv_8to16(res.x, mask_10bit), - conv_8to16(res.y, mask_10bit) + DEFAULT_DST(1) = make_uchar2( + conv_10to8pl(SUB_F(uv, 1)), + conv_10to8pl(SUB_F(uv, 2)) ); } }; -struct Convert_nv12_p016le +struct Convert_planar10_semiplanar10 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; typedef ushort out_T; typedef ushort2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); + DEFAULT_DST(0) = (SUB_F(y, 0) << 6); } DEF_F(Convert_uv, out_T_uv) { - in_T_uv res = SUB_F(uv, 1); DEFAULT_DST(1) = make_ushort2( - conv_8to16(res.x, mask_16bit), - conv_8to16(res.y, mask_16bit) + (SUB_F(uv, 1) << 6), + (SUB_F(uv, 2) << 6) ); } }; -struct Convert_nv12_yuv444p16le +struct Convert_planar10_semiplanar16 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar2 in_T_uv; + static const int in_bit_depth = 10; + typedef ushort in_T; + typedef ushort in_T_uv; typedef ushort out_T; - typedef ushort out_T_uv; + typedef ushort2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); + DEFAULT_DST(0) = conv_10to16pl(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { - in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit); - DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit); + DEFAULT_DST(1) = make_ushort2( + conv_10to16pl(SUB_F(uv, 1)), + conv_10to16pl(SUB_F(uv, 2)) + ); } }; -// yuv444p->X +// planar16->X -struct Convert_yuv444p_yuv420p +struct Convert_planar16_planar8 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; typedef uchar out_T; typedef uchar out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = SUB_F(y, 0); + DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = SUB_F(uv, 1); - DEFAULT_DST(2) = SUB_F(uv, 2); + DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1)); + DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2)); } }; -struct Convert_yuv444p_nv12 +struct Convert_planar16_planar10 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; - typedef uchar out_T; - typedef uchar2 out_T_uv; + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = SUB_F(y, 0); + DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = make_uchar2( - SUB_F(uv, 1), - SUB_F(uv, 2) - ); + DEFAULT_DST(1) = conv_16to10pl(SUB_F(uv, 1)); + DEFAULT_DST(2) = conv_16to10pl(SUB_F(uv, 2)); } }; -struct Convert_yuv444p_yuv444p +struct Convert_planar16_planar16 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { @@ -403,144 +421,144 @@ struct Convert_yuv444p_yuv444p } }; -struct Convert_yuv444p_p010le +struct Convert_planar16_semiplanar8 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; + typedef uchar out_T; + typedef uchar2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit); + DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = make_ushort2( - conv_8to16(SUB_F(uv, 1), mask_10bit), - conv_8to16(SUB_F(uv, 2), mask_10bit) + DEFAULT_DST(1) = make_uchar2( + conv_16to8(SUB_F(uv, 1)), + conv_16to8(SUB_F(uv, 2)) ); } }; -struct Convert_yuv444p_p016le +struct Convert_planar16_semiplanar10 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; typedef ushort out_T; typedef ushort2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); + DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { DEFAULT_DST(1) = make_ushort2( - conv_8to16(SUB_F(uv, 1), mask_16bit), - conv_8to16(SUB_F(uv, 2), mask_16bit) + conv_16to10(SUB_F(uv, 1)), + conv_16to10(SUB_F(uv, 2)) ); } }; -struct Convert_yuv444p_yuv444p16le +struct Convert_planar16_semiplanar16 { - static const int in_bit_depth = 8; - typedef uchar in_T; - typedef uchar in_T_uv; + static const int in_bit_depth = 16; + typedef ushort in_T; + typedef ushort in_T_uv; typedef ushort out_T; - typedef ushort out_T_uv; + typedef ushort2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); + DEFAULT_DST(0) = SUB_F(y, 0); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = conv_8to16(SUB_F(uv, 1), mask_16bit); - DEFAULT_DST(2) = conv_8to16(SUB_F(uv, 2), mask_16bit); + DEFAULT_DST(1) = make_ushort2( + SUB_F(uv, 1), + SUB_F(uv, 2) + ); } }; -// p010le->X +// semiplanar8->X -struct Convert_p010le_yuv420p +struct Convert_semiplanar8_planar8 { - static const int in_bit_depth = 10; - typedef ushort in_T; - typedef ushort2 in_T_uv; + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar2 in_T_uv; typedef uchar out_T; typedef uchar out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0)); + DEFAULT_DST(0) = SUB_F(y, 0); } DEF_F(Convert_uv, out_T_uv) { in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = conv_10to8(res.x); - DEFAULT_DST(2) = conv_10to8(res.y); + DEFAULT_DST(1) = res.x; + DEFAULT_DST(2) = res.y; } }; -struct Convert_p010le_nv12 +struct Convert_semiplanar8_planar10 { - static const int in_bit_depth = 10; - typedef ushort in_T; - typedef ushort2 in_T_uv; - typedef uchar out_T; - typedef uchar2 out_T_uv; + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar2 in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0)); + DEFAULT_DST(0) = conv_8to10pl(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = make_uchar2( - conv_10to8(res.x), - conv_10to8(res.y) - ); + DEFAULT_DST(1) = conv_8to10pl(res.x); + DEFAULT_DST(2) = conv_8to10pl(res.y); } }; -struct Convert_p010le_yuv444p +struct Convert_semiplanar8_planar16 { - static const int in_bit_depth = 10; - typedef ushort in_T; - typedef ushort2 in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar2 in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0)); + DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); } DEF_F(Convert_uv, out_T_uv) { in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = conv_10to8(res.x); - DEFAULT_DST(2) = conv_10to8(res.y); + DEFAULT_DST(1) = conv_8to16(res.x, mask_16bit); + DEFAULT_DST(2) = conv_8to16(res.y, mask_16bit); } }; -struct Convert_p010le_p010le +struct Convert_semiplanar8_semiplanar8 { - static const int in_bit_depth = 10; - typedef ushort in_T; - typedef ushort2 in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar2 in_T_uv; + typedef uchar out_T; + typedef uchar2 out_T_uv; DEF_F(Convert, out_T) { @@ -553,55 +571,57 @@ struct Convert_p010le_p010le } }; -struct Convert_p010le_p016le +struct Convert_semiplanar8_semiplanar10 { - static const int in_bit_depth = 10; - typedef ushort in_T; - typedef ushort2 in_T_uv; + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar2 in_T_uv; typedef ushort out_T; typedef ushort2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0)); + DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_10bit); } DEF_F(Convert_uv, out_T_uv) { in_T_uv res = SUB_F(uv, 1); DEFAULT_DST(1) = make_ushort2( - conv_10to16(res.x), - conv_10to16(res.y) + conv_8to16(res.x, mask_10bit), + conv_8to16(res.y, mask_10bit) ); } }; -struct Convert_p010le_yuv444p16le +struct Convert_semiplanar8_semiplanar16 { - static const int in_bit_depth = 10; - typedef ushort in_T; - typedef ushort2 in_T_uv; + static const int in_bit_depth = 8; + typedef uchar in_T; + typedef uchar2 in_T_uv; typedef ushort out_T; - typedef ushort out_T_uv; + typedef ushort2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0)); + DEFAULT_DST(0) = conv_8to16(SUB_F(y, 0), mask_16bit); } DEF_F(Convert_uv, out_T_uv) { in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = conv_10to16(res.x); - DEFAULT_DST(2) = conv_10to16(res.y); + DEFAULT_DST(1) = make_ushort2( + conv_8to16(res.x, mask_16bit), + conv_8to16(res.y, mask_16bit) + ); } }; -// p016le->X +// semiplanar10->X -struct Convert_p016le_yuv420p +struct Convert_semiplanar10_planar8 { - static const int in_bit_depth = 16; + static const int in_bit_depth = 10; typedef ushort in_T; typedef ushort2 in_T_uv; typedef uchar out_T; @@ -609,87 +629,85 @@ struct Convert_p016le_yuv420p DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); + DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = conv_16to8(res.x); - DEFAULT_DST(2) = conv_16to8(res.y); + DEFAULT_DST(1) = conv_10to8(res.x); + DEFAULT_DST(2) = conv_10to8(res.y); } }; -struct Convert_p016le_nv12 +struct Convert_semiplanar10_planar10 { - static const int in_bit_depth = 16; + static const int in_bit_depth = 10; typedef ushort in_T; typedef ushort2 in_T_uv; - typedef uchar out_T; - typedef uchar2 out_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); + DEFAULT_DST(0) = SUB_F(y, 0) >> 6; } DEF_F(Convert_uv, out_T_uv) { in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = make_uchar2( - conv_16to8(res.x), - conv_16to8(res.y) - ); + DEFAULT_DST(1) = res.x >> 6; + DEFAULT_DST(2) = res.y >> 6; } }; -struct Convert_p016le_yuv444p +struct Convert_semiplanar10_planar16 { - static const int in_bit_depth = 16; + static const int in_bit_depth = 10; typedef ushort in_T; typedef ushort2 in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); + DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = conv_16to8(res.x); - DEFAULT_DST(2) = conv_16to8(res.y); + DEFAULT_DST(1) = conv_10to16(res.x); + DEFAULT_DST(2) = conv_10to16(res.y); } }; -struct Convert_p016le_p010le +struct Convert_semiplanar10_semiplanar8 { - static const int in_bit_depth = 16; + static const int in_bit_depth = 10; typedef ushort in_T; typedef ushort2 in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; + typedef uchar out_T; + typedef uchar2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0)); + DEFAULT_DST(0) = conv_10to8(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = make_ushort2( - conv_16to10(res.x), - conv_16to10(res.y) + DEFAULT_DST(1) = make_uchar2( + conv_10to8(res.x), + conv_10to8(res.y) ); } }; -struct Convert_p016le_p016le +struct Convert_semiplanar10_semiplanar10 { - static const int in_bit_depth = 16; + static const int in_bit_depth = 10; typedef ushort in_T; typedef ushort2 in_T_uv; typedef ushort out_T; @@ -706,34 +724,37 @@ struct Convert_p016le_p016le } }; -struct Convert_p016le_yuv444p16le +struct Convert_semiplanar10_semiplanar16 { - static const int in_bit_depth = 16; + static const int in_bit_depth = 10; typedef ushort in_T; typedef ushort2 in_T_uv; typedef ushort out_T; - typedef ushort out_T_uv; + typedef ushort2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = SUB_F(y, 0); + DEFAULT_DST(0) = conv_10to16(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { in_T_uv res = SUB_F(uv, 1); - DEFAULT_DST(1) = res.x; - DEFAULT_DST(2) = res.y; + DEFAULT_DST(1) = make_ushort2( + conv_10to16(res.x), + conv_10to16(res.y) + ); } }; -// yuv444p16le->X -struct Convert_yuv444p16le_yuv420p +// semiplanar16->X + +struct Convert_semiplanar16_planar8 { static const int in_bit_depth = 16; typedef ushort in_T; - typedef ushort in_T_uv; + typedef ushort2 in_T_uv; typedef uchar out_T; typedef uchar out_T_uv; @@ -744,104 +765,107 @@ struct Convert_yuv444p16le_yuv420p DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1)); - DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2)); + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = conv_16to8(res.x); + DEFAULT_DST(2) = conv_16to8(res.y); } }; -struct Convert_yuv444p16le_nv12 +struct Convert_semiplanar16_planar10 { static const int in_bit_depth = 16; typedef ushort in_T; - typedef ushort in_T_uv; - typedef uchar out_T; - typedef uchar2 out_T_uv; + typedef ushort2 in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); + DEFAULT_DST(0) = conv_16to10pl(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = make_uchar2( - conv_16to8(SUB_F(uv, 1)), - conv_16to8(SUB_F(uv, 2)) - ); + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = conv_16to10pl(res.x); + DEFAULT_DST(2) = conv_16to10pl(res.y); } }; -struct Convert_yuv444p16le_yuv444p +struct Convert_semiplanar16_planar16 { static const int in_bit_depth = 16; typedef ushort in_T; - typedef ushort in_T_uv; - typedef uchar out_T; - typedef uchar out_T_uv; + typedef ushort2 in_T_uv; + typedef ushort out_T; + typedef ushort out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); + DEFAULT_DST(0) = SUB_F(y, 0); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = conv_16to8(SUB_F(uv, 1)); - DEFAULT_DST(2) = conv_16to8(SUB_F(uv, 2)); + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = res.x; + DEFAULT_DST(2) = res.y; } }; -struct Convert_yuv444p16le_p010le +struct Convert_semiplanar16_semiplanar8 { static const int in_bit_depth = 16; typedef ushort in_T; - typedef ushort in_T_uv; - typedef ushort out_T; - typedef ushort2 out_T_uv; + typedef ushort2 in_T_uv; + typedef uchar out_T; + typedef uchar2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0)); + DEFAULT_DST(0) = conv_16to8(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { - DEFAULT_DST(1) = make_ushort2( - conv_16to10(SUB_F(uv, 1)), - conv_16to10(SUB_F(uv, 2)) + in_T_uv res = SUB_F(uv, 1); + DEFAULT_DST(1) = make_uchar2( + conv_16to8(res.x), + conv_16to8(res.y) ); } }; -struct Convert_yuv444p16le_p016le +struct Convert_semiplanar16_semiplanar10 { static const int in_bit_depth = 16; typedef ushort in_T; - typedef ushort in_T_uv; + typedef ushort2 in_T_uv; typedef ushort out_T; typedef ushort2 out_T_uv; DEF_F(Convert, out_T) { - DEFAULT_DST(0) = SUB_F(y, 0); + DEFAULT_DST(0) = conv_16to10(SUB_F(y, 0)); } DEF_F(Convert_uv, out_T_uv) { + in_T_uv res = SUB_F(uv, 1); DEFAULT_DST(1) = make_ushort2( - SUB_F(uv, 1), - SUB_F(uv, 2) + conv_16to10(res.x), + conv_16to10(res.y) ); } }; -struct Convert_yuv444p16le_yuv444p16le +struct Convert_semiplanar16_semiplanar16 { static const int in_bit_depth = 16; typedef ushort in_T; - typedef ushort in_T_uv; + typedef ushort2 in_T_uv; typedef ushort out_T; - typedef ushort out_T_uv; + typedef ushort2 out_T_uv; DEF_F(Convert, out_T) { @@ -851,7 +875,6 @@ struct Convert_yuv444p16le_yuv444p16le DEF_F(Convert_uv, out_T_uv) { DEFAULT_DST(1) = SUB_F(uv, 1); - DEFAULT_DST(2) = SUB_F(uv, 2); } }; @@ -1184,12 +1207,12 @@ extern "C" { NEAREST_KERNEL(C,_uv) #define NEAREST_KERNELS(C) \ - NEAREST_KERNEL_RAW(yuv420p_ ## C) \ - NEAREST_KERNEL_RAW(nv12_ ## C) \ - NEAREST_KERNEL_RAW(yuv444p_ ## C) \ - NEAREST_KERNEL_RAW(p010le_ ## C) \ - NEAREST_KERNEL_RAW(p016le_ ## C) \ - NEAREST_KERNEL_RAW(yuv444p16le_ ## C) + NEAREST_KERNEL_RAW(planar8_ ## C) \ + NEAREST_KERNEL_RAW(planar10_ ## C) \ + NEAREST_KERNEL_RAW(planar16_ ## C) \ + NEAREST_KERNEL_RAW(semiplanar8_ ## C) \ + NEAREST_KERNEL_RAW(semiplanar10_ ## C) \ + NEAREST_KERNEL_RAW(semiplanar16_ ## C) #define NEAREST_KERNELS_RGB(C) \ NEAREST_KERNEL_RAW(rgb0_ ## C) \ @@ -1197,12 +1220,12 @@ extern "C" { NEAREST_KERNEL_RAW(rgba_ ## C) \ NEAREST_KERNEL_RAW(bgra_ ## C) \ -NEAREST_KERNELS(yuv420p) -NEAREST_KERNELS(nv12) -NEAREST_KERNELS(yuv444p) -NEAREST_KERNELS(p010le) -NEAREST_KERNELS(p016le) -NEAREST_KERNELS(yuv444p16le) +NEAREST_KERNELS(planar8) +NEAREST_KERNELS(planar10) +NEAREST_KERNELS(planar16) +NEAREST_KERNELS(semiplanar8) +NEAREST_KERNELS(semiplanar10) +NEAREST_KERNELS(semiplanar16) NEAREST_KERNELS_RGB(rgb0) NEAREST_KERNELS_RGB(bgr0) @@ -1224,12 +1247,12 @@ NEAREST_KERNELS_RGB(bgra) BILINEAR_KERNEL(C,_uv) #define BILINEAR_KERNELS(C) \ - BILINEAR_KERNEL_RAW(yuv420p_ ## C) \ - BILINEAR_KERNEL_RAW(nv12_ ## C) \ - BILINEAR_KERNEL_RAW(yuv444p_ ## C) \ - BILINEAR_KERNEL_RAW(p010le_ ## C) \ - BILINEAR_KERNEL_RAW(p016le_ ## C) \ - BILINEAR_KERNEL_RAW(yuv444p16le_ ## C) + BILINEAR_KERNEL_RAW(planar8_ ## C) \ + BILINEAR_KERNEL_RAW(planar10_ ## C) \ + BILINEAR_KERNEL_RAW(planar16_ ## C) \ + BILINEAR_KERNEL_RAW(semiplanar8_ ## C) \ + BILINEAR_KERNEL_RAW(semiplanar10_ ## C) \ + BILINEAR_KERNEL_RAW(semiplanar16_ ## C) #define BILINEAR_KERNELS_RGB(C) \ BILINEAR_KERNEL_RAW(rgb0_ ## C) \ @@ -1237,12 +1260,12 @@ NEAREST_KERNELS_RGB(bgra) BILINEAR_KERNEL_RAW(rgba_ ## C) \ BILINEAR_KERNEL_RAW(bgra_ ## C) -BILINEAR_KERNELS(yuv420p) -BILINEAR_KERNELS(nv12) -BILINEAR_KERNELS(yuv444p) -BILINEAR_KERNELS(p010le) -BILINEAR_KERNELS(p016le) -BILINEAR_KERNELS(yuv444p16le) +BILINEAR_KERNELS(planar8) +BILINEAR_KERNELS(planar10) +BILINEAR_KERNELS(planar16) +BILINEAR_KERNELS(semiplanar8) +BILINEAR_KERNELS(semiplanar10) +BILINEAR_KERNELS(semiplanar16) BILINEAR_KERNELS_RGB(rgb0) BILINEAR_KERNELS_RGB(bgr0) @@ -1264,12 +1287,12 @@ BILINEAR_KERNELS_RGB(bgra) BICUBIC_KERNEL(C,_uv) #define BICUBIC_KERNELS(C) \ - BICUBIC_KERNEL_RAW(yuv420p_ ## C) \ - BICUBIC_KERNEL_RAW(nv12_ ## C) \ - BICUBIC_KERNEL_RAW(yuv444p_ ## C) \ - BICUBIC_KERNEL_RAW(p010le_ ## C) \ - BICUBIC_KERNEL_RAW(p016le_ ## C) \ - BICUBIC_KERNEL_RAW(yuv444p16le_ ## C) + BICUBIC_KERNEL_RAW(planar8_ ## C) \ + BICUBIC_KERNEL_RAW(planar10_ ## C) \ + BICUBIC_KERNEL_RAW(planar16_ ## C) \ + BICUBIC_KERNEL_RAW(semiplanar8_ ## C) \ + BICUBIC_KERNEL_RAW(semiplanar10_ ## C) \ + BICUBIC_KERNEL_RAW(semiplanar16_ ## C) #define BICUBIC_KERNELS_RGB(C) \ BICUBIC_KERNEL_RAW(rgb0_ ## C) \ @@ -1277,12 +1300,12 @@ BILINEAR_KERNELS_RGB(bgra) BICUBIC_KERNEL_RAW(rgba_ ## C) \ BICUBIC_KERNEL_RAW(bgra_ ## C) -BICUBIC_KERNELS(yuv420p) -BICUBIC_KERNELS(nv12) -BICUBIC_KERNELS(yuv444p) -BICUBIC_KERNELS(p010le) -BICUBIC_KERNELS(p016le) -BICUBIC_KERNELS(yuv444p16le) +BICUBIC_KERNELS(planar8) +BICUBIC_KERNELS(planar10) +BICUBIC_KERNELS(planar16) +BICUBIC_KERNELS(semiplanar8) +BICUBIC_KERNELS(semiplanar10) +BICUBIC_KERNELS(semiplanar16) BICUBIC_KERNELS_RGB(rgb0) BICUBIC_KERNELS_RGB(bgr0) @@ -1304,12 +1327,12 @@ BICUBIC_KERNELS_RGB(bgra) LANCZOS_KERNEL(C,_uv) #define LANCZOS_KERNELS(C) \ - LANCZOS_KERNEL_RAW(yuv420p_ ## C) \ - LANCZOS_KERNEL_RAW(nv12_ ## C) \ - LANCZOS_KERNEL_RAW(yuv444p_ ## C) \ - LANCZOS_KERNEL_RAW(p010le_ ## C) \ - LANCZOS_KERNEL_RAW(p016le_ ## C) \ - LANCZOS_KERNEL_RAW(yuv444p16le_ ## C) + LANCZOS_KERNEL_RAW(planar8_ ## C) \ + LANCZOS_KERNEL_RAW(planar10_ ## C) \ + LANCZOS_KERNEL_RAW(planar16_ ## C) \ + LANCZOS_KERNEL_RAW(semiplanar8_ ## C) \ + LANCZOS_KERNEL_RAW(semiplanar10_ ## C) \ + LANCZOS_KERNEL_RAW(semiplanar16_ ## C) #define LANCZOS_KERNELS_RGB(C) \ LANCZOS_KERNEL_RAW(rgb0_ ## C) \ @@ -1317,12 +1340,12 @@ BICUBIC_KERNELS_RGB(bgra) LANCZOS_KERNEL_RAW(rgba_ ## C) \ LANCZOS_KERNEL_RAW(bgra_ ## C) -LANCZOS_KERNELS(yuv420p) -LANCZOS_KERNELS(nv12) -LANCZOS_KERNELS(yuv444p) -LANCZOS_KERNELS(p010le) -LANCZOS_KERNELS(p016le) -LANCZOS_KERNELS(yuv444p16le) +LANCZOS_KERNELS(planar8) +LANCZOS_KERNELS(planar10) +LANCZOS_KERNELS(planar16) +LANCZOS_KERNELS(semiplanar8) +LANCZOS_KERNELS(semiplanar10) +LANCZOS_KERNELS(semiplanar16) LANCZOS_KERNELS_RGB(rgb0) LANCZOS_KERNELS_RGB(bgr0) -- 2.39.5 (Apple Git-154) ----------------------------------------------------------------------------------- NVIDIA GmbH Wuerselen Amtsgericht Aachen HRB 8361 Managing Directors: Rebecca Peters, Donald Robertson, Janet Hall, Ludwig von Reiche ----------------------------------------------------------------------------------- This email message is for the sole use of the intended recipient(s) and may contain confidential information. Any unauthorized review, use, disclosure or distribution is prohibited. If you are not the intended recipient, please contact the sender by reply email and destroy all copies of the original message. ----------------------------------------------------------------------------------- [-- Attachment #2: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
prev parent reply other threads:[~2025-07-03 14:27 UTC|newest] Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top [not found] <20250703142520.16586-1-ddesouza@nvidia.com> 2025-07-03 14:25 ` [FFmpeg-devel] [PATCH 2/3] avfilter/hwupload_cuda: Expands pixel formats support Diego Felix de Souza via ffmpeg-devel 2025-07-03 14:25 ` Diego Felix de Souza via ffmpeg-devel [this message]
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=mailman.5310.1751552831.1384.ffmpeg-devel@ffmpeg.org \ --to=ffmpeg-devel@ffmpeg.org \ --cc=ddesouza@nvidia.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git