From: Philip Langdale <philipl@overt.org>
To: ffmpeg-devel@ffmpeg.org
Cc: Philip Langdale <philipl@overt.org>
Subject: [FFmpeg-devel] [PATCH] avfilter/scale_cuda: add support for rgb32/bgr32 conversions
Date: Thu, 15 Jun 2023 22:15:18 -0700
Message-ID: <20230616051518.949854-1-philipl@overt.org> (raw)
As we are introducing two new formats and supporting conversions
between them, and also with the existing 0rgb32/0bgr32 formats, we get
a combinatorial explosion of kernels. I introduced a few new macros to
keep the things mostly managable.
The conversions are all simple, following existing patterns, with four
specific exceptions. When converting from 0rgb32/0bgr32 to rgb32/bgr32,
we need to ensure the alpha value is set to 1. In all other cases, it
can just be passed through, either to be used or ignored.
---
libavfilter/vf_scale_cuda.c | 2 +
libavfilter/vf_scale_cuda.cu | 175 ++++++++++++++++++++++++++++-------
2 files changed, 146 insertions(+), 31 deletions(-)
diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index 1c99befec8..370cb1d9cd 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -51,6 +51,8 @@ static const enum AVPixelFormat supported_formats[] = {
AV_PIX_FMT_YUV444P16,
AV_PIX_FMT_0RGB32,
AV_PIX_FMT_0BGR32,
+ AV_PIX_FMT_RGB32,
+ AV_PIX_FMT_BGR32,
};
#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu
index c9c6cafdb6..c82649e84f 100644
--- a/libavfilter/vf_scale_cuda.cu
+++ b/libavfilter/vf_scale_cuda.cu
@@ -853,9 +853,67 @@ struct Convert_yuv444p16le_yuv444p16le
}
};
-// bgr0->X
-
-struct Convert_bgr0_bgr0
+#define DEF_CONVERT_IDENTITY(fmt1, fmt2)\
+ \
+struct Convert_##fmt1##_##fmt2 \
+{ \
+ static const int in_bit_depth = 8; \
+ typedef uchar4 in_T; \
+ typedef uchar in_T_uv; \
+ typedef uchar4 out_T; \
+ typedef uchar out_T_uv; \
+ \
+ DEF_F(Convert, out_T) \
+ { \
+ DEFAULT_DST(0) = SUB_F(y, 0); \
+ } \
+ \
+ DEF_F(Convert_uv, out_T_uv) \
+ { \
+ } \
+}; \
+
+#define DEF_CONVERT_REORDER(fmt1, fmt2) \
+ \
+struct Convert_##fmt1##_##fmt2 \
+{ \
+ static const int in_bit_depth = 8; \
+ typedef uchar4 in_T; \
+ typedef uchar in_T_uv; \
+ typedef uchar4 out_T; \
+ typedef uchar out_T_uv; \
+ \
+ DEF_F(Convert, out_T) \
+ { \
+ uchar4 res = SUB_F(y, 0); \
+ DEFAULT_DST(0) = make_uchar4( \
+ res.z, \
+ res.y, \
+ res.x, \
+ res.w \
+ ); \
+ } \
+ \
+ DEF_F(Convert_uv, out_T_uv) \
+ { \
+ } \
+}; \
+
+#define DEF_CONVERT_RGB(fmt1, fmt2) \
+ \
+DEF_CONVERT_IDENTITY(fmt1, fmt1) \
+DEF_CONVERT_REORDER (fmt1, fmt2) \
+DEF_CONVERT_REORDER (fmt2, fmt1) \
+DEF_CONVERT_IDENTITY(fmt2, fmt2)
+
+DEF_CONVERT_RGB(rgb0, bgr0)
+DEF_CONVERT_RGB(rgba, bgra)
+DEF_CONVERT_IDENTITY(rgba, rgb0)
+DEF_CONVERT_IDENTITY(bgra, bgr0)
+DEF_CONVERT_REORDER(rgba, bgr0)
+DEF_CONVERT_REORDER(bgra, rgb0)
+
+struct Convert_bgr0_bgra
{
static const int in_bit_depth = 8;
typedef uchar4 in_T;
@@ -865,7 +923,13 @@ struct Convert_bgr0_bgr0
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = SUB_F(y, 0);
+ uchar4 res = SUB_F(y, 0);
+ DEFAULT_DST(0) = make_uchar4(
+ res.x,
+ res.y,
+ res.z,
+ 1
+ );
}
DEF_F(Convert_uv, out_T_uv)
@@ -873,7 +937,7 @@ struct Convert_bgr0_bgr0
}
};
-struct Convert_bgr0_rgb0
+struct Convert_bgr0_rgba
{
static const int in_bit_depth = 8;
typedef uchar4 in_T;
@@ -888,7 +952,7 @@ struct Convert_bgr0_rgb0
res.z,
res.y,
res.x,
- res.w
+ 1
);
}
@@ -897,9 +961,7 @@ struct Convert_bgr0_rgb0
}
};
-// rgb0->X
-
-struct Convert_rgb0_bgr0
+struct Convert_rgb0_bgra
{
static const int in_bit_depth = 8;
typedef uchar4 in_T;
@@ -914,7 +976,7 @@ struct Convert_rgb0_bgr0
res.z,
res.y,
res.x,
- res.w
+ 1
);
}
@@ -923,7 +985,7 @@ struct Convert_rgb0_bgr0
}
};
-struct Convert_rgb0_rgb0
+struct Convert_rgb0_rgba
{
static const int in_bit_depth = 8;
typedef uchar4 in_T;
@@ -933,7 +995,13 @@ struct Convert_rgb0_rgb0
DEF_F(Convert, out_T)
{
- DEFAULT_DST(0) = SUB_F(y, 0);
+ uchar4 res = SUB_F(y, 0);
+ DEFAULT_DST(0) = make_uchar4(
+ res.x,
+ res.y,
+ res.z,
+ 1
+ );
}
DEF_F(Convert_uv, out_T_uv)
@@ -1117,6 +1185,12 @@ extern "C" {
NEAREST_KERNEL_RAW(p016le_ ## C) \
NEAREST_KERNEL_RAW(yuv444p16le_ ## C)
+#define NEAREST_KERNELS_RGB(fmt1, fmt2) \
+ NEAREST_KERNEL_RAW(fmt1##_##fmt1) \
+ NEAREST_KERNEL_RAW(fmt1##_##fmt2) \
+ NEAREST_KERNEL_RAW(fmt2##_##fmt1) \
+ NEAREST_KERNEL_RAW(fmt2##_##fmt2)
+
NEAREST_KERNELS(yuv420p)
NEAREST_KERNELS(nv12)
NEAREST_KERNELS(yuv444p)
@@ -1124,11 +1198,16 @@ NEAREST_KERNELS(p010le)
NEAREST_KERNELS(p016le)
NEAREST_KERNELS(yuv444p16le)
-NEAREST_KERNEL_RAW(bgr0_bgr0)
-NEAREST_KERNEL_RAW(rgb0_rgb0)
-NEAREST_KERNEL_RAW(bgr0_rgb0)
-NEAREST_KERNEL_RAW(rgb0_bgr0)
-
+NEAREST_KERNELS_RGB(rgb0, bgr0)
+NEAREST_KERNELS_RGB(rgba, bgra)
+NEAREST_KERNEL_RAW(rgb0_rgba)
+NEAREST_KERNEL_RAW(rgb0_bgra)
+NEAREST_KERNEL_RAW(bgr0_rgba)
+NEAREST_KERNEL_RAW(bgr0_bgra)
+NEAREST_KERNEL_RAW(rgba_rgb0)
+NEAREST_KERNEL_RAW(rgba_bgr0)
+NEAREST_KERNEL_RAW(bgra_rgb0)
+NEAREST_KERNEL_RAW(bgra_bgr0)
#define BILINEAR_KERNEL(C, S) \
__global__ void Subsample_Bilinear_##C##S( \
@@ -1152,6 +1231,12 @@ NEAREST_KERNEL_RAW(rgb0_bgr0)
BILINEAR_KERNEL_RAW(p016le_ ## C) \
BILINEAR_KERNEL_RAW(yuv444p16le_ ## C)
+#define BILINEAR_KERNELS_RGB(fmt1, fmt2)\
+ BILINEAR_KERNEL_RAW(fmt1##_##fmt1) \
+ BILINEAR_KERNEL_RAW(fmt1##_##fmt2) \
+ BILINEAR_KERNEL_RAW(fmt2##_##fmt1) \
+ BILINEAR_KERNEL_RAW(fmt2##_##fmt2)
+
BILINEAR_KERNELS(yuv420p)
BILINEAR_KERNELS(nv12)
BILINEAR_KERNELS(yuv444p)
@@ -1159,10 +1244,16 @@ BILINEAR_KERNELS(p010le)
BILINEAR_KERNELS(p016le)
BILINEAR_KERNELS(yuv444p16le)
-BILINEAR_KERNEL_RAW(bgr0_bgr0)
-BILINEAR_KERNEL_RAW(rgb0_rgb0)
-BILINEAR_KERNEL_RAW(bgr0_rgb0)
-BILINEAR_KERNEL_RAW(rgb0_bgr0)
+BILINEAR_KERNELS_RGB(rgb0, bgr0)
+BILINEAR_KERNELS_RGB(rgba, bgra)
+BILINEAR_KERNEL_RAW(rgb0_rgba)
+BILINEAR_KERNEL_RAW(rgb0_bgra)
+BILINEAR_KERNEL_RAW(bgr0_rgba)
+BILINEAR_KERNEL_RAW(bgr0_bgra)
+BILINEAR_KERNEL_RAW(rgba_rgb0)
+BILINEAR_KERNEL_RAW(rgba_bgr0)
+BILINEAR_KERNEL_RAW(bgra_rgb0)
+BILINEAR_KERNEL_RAW(bgra_bgr0)
#define BICUBIC_KERNEL(C, S) \
__global__ void Subsample_Bicubic_##C##S( \
@@ -1186,6 +1277,12 @@ BILINEAR_KERNEL_RAW(rgb0_bgr0)
BICUBIC_KERNEL_RAW(p016le_ ## C) \
BICUBIC_KERNEL_RAW(yuv444p16le_ ## C)
+#define BICUBIC_KERNELS_RGB(fmt1, fmt2) \
+ BICUBIC_KERNEL_RAW(fmt1##_##fmt1) \
+ BICUBIC_KERNEL_RAW(fmt1##_##fmt2) \
+ BICUBIC_KERNEL_RAW(fmt2##_##fmt1) \
+ BICUBIC_KERNEL_RAW(fmt2##_##fmt2)
+
BICUBIC_KERNELS(yuv420p)
BICUBIC_KERNELS(nv12)
BICUBIC_KERNELS(yuv444p)
@@ -1193,11 +1290,16 @@ BICUBIC_KERNELS(p010le)
BICUBIC_KERNELS(p016le)
BICUBIC_KERNELS(yuv444p16le)
-BICUBIC_KERNEL_RAW(bgr0_bgr0)
-BICUBIC_KERNEL_RAW(rgb0_rgb0)
-BICUBIC_KERNEL_RAW(bgr0_rgb0)
-BICUBIC_KERNEL_RAW(rgb0_bgr0)
-
+BICUBIC_KERNELS_RGB(rgb0, bgr0)
+BICUBIC_KERNELS_RGB(rgba, bgra)
+BICUBIC_KERNEL_RAW(rgb0_rgba)
+BICUBIC_KERNEL_RAW(rgb0_bgra)
+BICUBIC_KERNEL_RAW(bgr0_rgba)
+BICUBIC_KERNEL_RAW(bgr0_bgra)
+BICUBIC_KERNEL_RAW(rgba_rgb0)
+BICUBIC_KERNEL_RAW(rgba_bgr0)
+BICUBIC_KERNEL_RAW(bgra_rgb0)
+BICUBIC_KERNEL_RAW(bgra_bgr0)
#define LANCZOS_KERNEL(C, S) \
__global__ void Subsample_Lanczos_##C##S( \
@@ -1221,6 +1323,12 @@ BICUBIC_KERNEL_RAW(rgb0_bgr0)
LANCZOS_KERNEL_RAW(p016le_ ## C) \
LANCZOS_KERNEL_RAW(yuv444p16le_ ## C)
+#define LANCZOS_KERNELS_RGB(fmt1, fmt2) \
+ LANCZOS_KERNEL_RAW(fmt1##_##fmt1) \
+ LANCZOS_KERNEL_RAW(fmt1##_##fmt2) \
+ LANCZOS_KERNEL_RAW(fmt2##_##fmt1) \
+ LANCZOS_KERNEL_RAW(fmt2##_##fmt2)
+
LANCZOS_KERNELS(yuv420p)
LANCZOS_KERNELS(nv12)
LANCZOS_KERNELS(yuv444p)
@@ -1228,9 +1336,14 @@ LANCZOS_KERNELS(p010le)
LANCZOS_KERNELS(p016le)
LANCZOS_KERNELS(yuv444p16le)
-LANCZOS_KERNEL_RAW(bgr0_bgr0)
-LANCZOS_KERNEL_RAW(rgb0_rgb0)
-LANCZOS_KERNEL_RAW(bgr0_rgb0)
-LANCZOS_KERNEL_RAW(rgb0_bgr0)
-
+LANCZOS_KERNELS_RGB(rgb0, bgr0)
+LANCZOS_KERNELS_RGB(rgba, bgra)
+LANCZOS_KERNEL_RAW(rgb0_rgba)
+LANCZOS_KERNEL_RAW(rgb0_bgra)
+LANCZOS_KERNEL_RAW(bgr0_rgba)
+LANCZOS_KERNEL_RAW(bgr0_bgra)
+LANCZOS_KERNEL_RAW(rgba_rgb0)
+LANCZOS_KERNEL_RAW(rgba_bgr0)
+LANCZOS_KERNEL_RAW(bgra_rgb0)
+LANCZOS_KERNEL_RAW(bgra_bgr0)
}
--
2.39.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
reply other threads:[~2023-06-16 5:15 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230616051518.949854-1-philipl@overt.org \
--to=philipl@overt.org \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git