* [FFmpeg-devel] [PATCH v2 02/13] hwcontext_vulkan: temporarily disable host_image_copy
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 03/13] hwcontext_vulkan: enable uniformBufferStandardLayout Lynne
` (11 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
NVIDIA's support for it is a disaster.
Of no benefit to other vendors.
NVIDIA are working on fixing it, but it may take time.
---
libavutil/hwcontext_vulkan.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 894bc3dae1..5a9b995eb9 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -613,7 +613,6 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
{ VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT },
{ VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME, FF_VK_EXT_SUBGROUP_ROTATE },
- { VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME, FF_VK_EXT_HOST_IMAGE_COPY },
#ifdef VK_KHR_shader_expect_assume
{ VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME },
#endif
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* [FFmpeg-devel] [PATCH v2 03/13] hwcontext_vulkan: enable uniformBufferStandardLayout
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 02/13] hwcontext_vulkan: temporarily disable host_image_copy Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 04/13] vulkan: add support for 16-bit RGGB Bayer pixfmt Lynne
` (10 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavutil/hwcontext_vulkan.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 5a9b995eb9..c20ebde36d 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -280,6 +280,7 @@ static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceF
COPY_VAL(vulkan_1_2.shaderSharedInt64Atomics);
COPY_VAL(vulkan_1_2.vulkanMemoryModel);
COPY_VAL(vulkan_1_2.vulkanMemoryModelDeviceScope);
+ COPY_VAL(vulkan_1_2.uniformBufferStandardLayout);
COPY_VAL(vulkan_1_3.dynamicRendering);
COPY_VAL(vulkan_1_3.maintenance4);
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* [FFmpeg-devel] [PATCH v2 04/13] vulkan: add support for 16-bit RGGB Bayer pixfmt
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 02/13] hwcontext_vulkan: temporarily disable host_image_copy Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 03/13] hwcontext_vulkan: enable uniformBufferStandardLayout Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 05/13] lavc/vulkan/common: sign-ify lengths Lynne
` (9 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavutil/hwcontext_vulkan.c | 3 +++
libavutil/vulkan.c | 5 +++--
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index c20ebde36d..e818099fdb 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -376,6 +376,9 @@ static const struct FFVkFormatEntry {
{ VK_FORMAT_R32_UINT, AV_PIX_FMT_GBRAP32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT } },
{ VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+ /* Bayer */
+ { VK_FORMAT_R16_UNORM, AV_PIX_FMT_BAYER_RGGB16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
+
/* Two-plane 420 YUV at 8, 10, 12 and 16 bits */
{ VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
{ VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index f36f2bb7cf..5cfb634a62 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1542,7 +1542,7 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
pix_fmt == AV_PIX_FMT_X2RGB10 || pix_fmt == AV_PIX_FMT_X2BGR10 ||
pix_fmt == AV_PIX_FMT_RGBAF32 || pix_fmt == AV_PIX_FMT_RGBF32 ||
pix_fmt == AV_PIX_FMT_RGBA128 || pix_fmt == AV_PIX_FMT_RGB96 ||
- pix_fmt == AV_PIX_FMT_GBRP)
+ pix_fmt == AV_PIX_FMT_GBRP || pix_fmt == AV_PIX_FMT_BAYER_RGGB16)
return 1;
return 0;
}
@@ -1699,7 +1699,8 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt,
case AV_PIX_FMT_YUVA422P16:
case AV_PIX_FMT_YUVA444P10:
case AV_PIX_FMT_YUVA444P12:
- case AV_PIX_FMT_YUVA444P16: {
+ case AV_PIX_FMT_YUVA444P16:
+ case AV_PIX_FMT_BAYER_RGGB16: {
const char *rep_tab[] = {
[FF_VK_REP_NATIVE] = "r16ui",
[FF_VK_REP_FLOAT] = "r16f",
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* [FFmpeg-devel] [PATCH v2 05/13] lavc/vulkan/common: sign-ify lengths
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (2 preceding siblings ...)
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 04/13] vulkan: add support for 16-bit RGGB Bayer pixfmt Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 06/13] lavc: add codec ID and profiles for ProRes RAW Lynne
` (8 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This makes left_bits return useful data rather than overflowing, and
also saves some 64-bit integer operations, which is still always a plus sadly.
---
libavcodec/vulkan/common.comp | 22 +++++++++++-----------
libavcodec/vulkan/ffv1_dec_setup.comp | 2 +-
libavcodec/vulkan/ffv1_vlc.comp | 4 ++--
3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp
index 10af9c0623..6825693fa3 100644
--- a/libavcodec/vulkan/common.comp
+++ b/libavcodec/vulkan/common.comp
@@ -193,8 +193,8 @@ struct GetBitContext {
uint64_t buf_end;
uint64_t bits;
- uint bits_valid;
- uint size_in_bits;
+ int bits_valid;
+ int size_in_bits;
};
#define LOAD64() \
@@ -216,11 +216,11 @@ struct GetBitContext {
gb.bits_valid += 32; \
}
-void init_get_bits(inout GetBitContext gb, u8buf data, uint64_t len)
+void init_get_bits(inout GetBitContext gb, u8buf data, int len)
{
gb.buf = gb.buf_start = uint64_t(data);
gb.buf_end = uint64_t(data) + len;
- gb.size_in_bits = uint(len) * 8;
+ gb.size_in_bits = len * 8;
/* Preload */
LOAD64()
@@ -237,7 +237,7 @@ bool get_bit(inout GetBitContext gb)
return val;
}
-uint get_bits(inout GetBitContext gb, uint n)
+uint get_bits(inout GetBitContext gb, int n)
{
if (n == 0)
return 0;
@@ -251,7 +251,7 @@ uint get_bits(inout GetBitContext gb, uint n)
return val;
}
-uint show_bits(inout GetBitContext gb, uint n)
+uint show_bits(inout GetBitContext gb, int n)
{
if (n > gb.bits_valid)
RELOAD32()
@@ -259,7 +259,7 @@ uint show_bits(inout GetBitContext gb, uint n)
return uint(gb.bits >> (64 - n));
}
-void skip_bits(inout GetBitContext gb, uint n)
+void skip_bits(inout GetBitContext gb, int n)
{
if (n > gb.bits_valid)
RELOAD32()
@@ -268,12 +268,12 @@ void skip_bits(inout GetBitContext gb, uint n)
gb.bits_valid -= n;
}
-uint tell_bits(in GetBitContext gb)
+int tell_bits(in GetBitContext gb)
{
- return uint(gb.buf - gb.buf_start) * 8 - gb.bits_valid;
+ return int(gb.buf - gb.buf_start) * 8 - gb.bits_valid;
}
-uint left_bits(in GetBitContext gb)
+int left_bits(in GetBitContext gb)
{
- return gb.size_in_bits - uint(gb.buf - gb.buf_start) * 8 + gb.bits_valid;
+ return gb.size_in_bits - int(gb.buf - gb.buf_start) * 8 + gb.bits_valid;
}
diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp
index 671f28e7e7..5da09df21c 100644
--- a/libavcodec/vulkan/ffv1_dec_setup.comp
+++ b/libavcodec/vulkan/ffv1_dec_setup.comp
@@ -107,7 +107,7 @@ void golomb_init(inout SliceContext sc)
uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;
init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count),
- sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count);
+ int(sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count));
}
void main(void)
diff --git a/libavcodec/vulkan/ffv1_vlc.comp b/libavcodec/vulkan/ffv1_vlc.comp
index d374e5a069..32a6ca9f37 100644
--- a/libavcodec/vulkan/ffv1_vlc.comp
+++ b/libavcodec/vulkan/ffv1_vlc.comp
@@ -121,7 +121,7 @@ Symbol get_vlc_symbol(inout VlcState state, int v, int bits)
return set_sr_golomb(code, k, 12, bits);
}
-uint get_ur_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
+uint get_ur_golomb(inout GetBitContext gb, int k, int limit, int esc_len)
{
for (uint i = 0; i < 12; i++)
if (get_bit(gb))
@@ -130,7 +130,7 @@ uint get_ur_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
return get_bits(gb, esc_len) + 11;
}
-int get_sr_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
+int get_sr_golomb(inout GetBitContext gb, int k, int limit, int esc_len)
{
int v = int(get_ur_golomb(gb, k, limit, esc_len));
return (v >> 1) ^ -(v & 1);
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* [FFmpeg-devel] [PATCH v2 06/13] lavc: add codec ID and profiles for ProRes RAW
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (3 preceding siblings ...)
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 05/13] lavc/vulkan/common: sign-ify lengths Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 07/13] lavc: add a ProRes RAW parser Lynne
` (7 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/codec_desc.c | 8 ++++++++
libavcodec/codec_id.h | 1 +
libavcodec/defs.h | 3 +++
libavcodec/profiles.c | 6 ++++++
libavcodec/profiles.h | 1 +
libavformat/isom_tags.c | 4 ++++
6 files changed, 23 insertions(+)
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index dae2296689..36cbaf288e 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -1992,6 +1992,14 @@ static const AVCodecDescriptor codec_descriptors[] = {
.long_name = NULL_IF_CONFIG_SMALL("Advanced Professional Video"),
.props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
},
+ {
+ .id = AV_CODEC_ID_PRORES_RAW,
+ .type = AVMEDIA_TYPE_VIDEO,
+ .name = "prores_raw",
+ .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes RAW"),
+ .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+ .profiles = NULL_IF_CONFIG_SMALL(ff_prores_raw_profiles),
+ },
/* various PCM "codecs" */
{
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index d00d3fe121..adf263f6b0 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -330,6 +330,7 @@ enum AVCodecID {
AV_CODEC_ID_RV60,
AV_CODEC_ID_JPEGXL_ANIM,
AV_CODEC_ID_APV,
+ AV_CODEC_ID_PRORES_RAW,
/* various PCM "codecs" */
AV_CODEC_ID_FIRST_AUDIO = 0x10000, ///< A dummy id pointing at the start of audio codecs
diff --git a/libavcodec/defs.h b/libavcodec/defs.h
index 8ce5d424c9..b13e983b13 100644
--- a/libavcodec/defs.h
+++ b/libavcodec/defs.h
@@ -185,6 +185,9 @@
#define AV_PROFILE_PRORES_4444 4
#define AV_PROFILE_PRORES_XQ 5
+#define AV_PROFILE_PRORES_RAW 0
+#define AV_PROFILE_PRORES_RAW_HQ 1
+
#define AV_PROFILE_ARIB_PROFILE_A 0
#define AV_PROFILE_ARIB_PROFILE_C 1
diff --git a/libavcodec/profiles.c b/libavcodec/profiles.c
index 991f24135d..2cf733b0a2 100644
--- a/libavcodec/profiles.c
+++ b/libavcodec/profiles.c
@@ -182,6 +182,12 @@ const AVProfile ff_prores_profiles[] = {
{ AV_PROFILE_UNKNOWN }
};
+const AVProfile ff_prores_raw_profiles[] = {
+ { AV_PROFILE_PRORES_RAW, "RAW" },
+ { AV_PROFILE_PRORES_RAW_HQ, "HQ" },
+ { AV_PROFILE_UNKNOWN }
+};
+
const AVProfile ff_mjpeg_profiles[] = {
{ AV_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT, "Baseline" },
{ AV_PROFILE_MJPEG_HUFFMAN_EXTENDED_SEQUENTIAL_DCT, "Sequential" },
diff --git a/libavcodec/profiles.h b/libavcodec/profiles.h
index 4892388149..6f4011ff0c 100644
--- a/libavcodec/profiles.h
+++ b/libavcodec/profiles.h
@@ -74,6 +74,7 @@ extern const AVProfile ff_vp9_profiles[];
extern const AVProfile ff_av1_profiles[];
extern const AVProfile ff_sbc_profiles[];
extern const AVProfile ff_prores_profiles[];
+extern const AVProfile ff_prores_raw_profiles[];
extern const AVProfile ff_mjpeg_profiles[];
extern const AVProfile ff_arib_caption_profiles[];
extern const AVProfile ff_evc_profiles[];
diff --git a/libavformat/isom_tags.c b/libavformat/isom_tags.c
index 69174b4a3f..151c42e9e6 100644
--- a/libavformat/isom_tags.c
+++ b/libavformat/isom_tags.c
@@ -240,6 +240,10 @@ const AVCodecTag ff_codec_movvideo_tags[] = {
{ AV_CODEC_ID_PRORES, MKTAG('a', 'p', 'c', 'o') }, /* Apple ProRes 422 Proxy */
{ AV_CODEC_ID_PRORES, MKTAG('a', 'p', '4', 'h') }, /* Apple ProRes 4444 */
{ AV_CODEC_ID_PRORES, MKTAG('a', 'p', '4', 'x') }, /* Apple ProRes 4444 XQ */
+
+ { AV_CODEC_ID_PRORES_RAW, MKTAG('a', 'p', 'r', 'n') }, /* Apple ProRes RAW */
+ { AV_CODEC_ID_PRORES_RAW, MKTAG('a', 'p', 'r', 'h') }, /* Apple ProRes RAW HQ */
+
{ AV_CODEC_ID_FLIC, MKTAG('f', 'l', 'i', 'c') },
{ AV_CODEC_ID_AIC, MKTAG('i', 'c', 'o', 'd') },
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* [FFmpeg-devel] [PATCH v2 07/13] lavc: add a ProRes RAW parser
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (4 preceding siblings ...)
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 06/13] lavc: add codec ID and profiles for ProRes RAW Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 08/13] lavc: add a ProRes RAW decoder Lynne
` (6 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
Simple parser that only parses frame information.
This helps avoid requiring the software decoder on init to decode a
single frame, since the decoder can be quite slow.
---
libavcodec/Makefile | 1 +
libavcodec/parsers.c | 1 +
libavcodec/prores_raw_parser.c | 72 ++++++++++++++++++++++++++++++++++
libavformat/mov.c | 1 +
4 files changed, 75 insertions(+)
create mode 100644 libavcodec/prores_raw_parser.c
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 215577f7c9..78e099ce5d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1250,6 +1250,7 @@ OBJS-$(CONFIG_MPEGVIDEO_PARSER) += mpegvideo_parser.o \
OBJS-$(CONFIG_OPUS_PARSER) += vorbis_data.o
OBJS-$(CONFIG_PNG_PARSER) += png_parser.o
OBJS-$(CONFIG_PNM_PARSER) += pnm_parser.o pnm.o
+OBJS-$(CONFIG_PRORES_RAW_PARSER) += prores_raw_parser.o
OBJS-$(CONFIG_QOI_PARSER) += qoi_parser.o
OBJS-$(CONFIG_RV34_PARSER) += rv34_parser.o
OBJS-$(CONFIG_SBC_PARSER) += sbc_parser.o
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index 21164f3751..b12c48f79f 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -68,6 +68,7 @@ extern const AVCodecParser ff_mpegvideo_parser;
extern const AVCodecParser ff_opus_parser;
extern const AVCodecParser ff_png_parser;
extern const AVCodecParser ff_pnm_parser;
+extern const AVCodecParser ff_prores_raw_parser;
extern const AVCodecParser ff_qoi_parser;
extern const AVCodecParser ff_rv34_parser;
extern const AVCodecParser ff_sbc_parser;
diff --git a/libavcodec/prores_raw_parser.c b/libavcodec/prores_raw_parser.c
new file mode 100644
index 0000000000..a286d674b2
--- /dev/null
+++ b/libavcodec/prores_raw_parser.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "parser.h"
+#include "bytestream.h"
+
+static int prores_raw_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+ const uint8_t **poutbuf, int *poutbuf_size,
+ const uint8_t *buf, int buf_size)
+{
+ GetByteContext gb;
+ uint32_t header_size;
+ int version;
+
+ bytestream2_init(&gb, buf, buf_size);
+ if (bytestream2_get_be32(&gb) != buf_size) /* Packet size */
+ return buf_size;
+
+ if (bytestream2_get_le32(&gb) != MKTAG('p','r','r','f')) /* Frame header */
+ return buf_size;
+
+ header_size = bytestream2_get_be16(&gb) + 8;
+ version = bytestream2_get_be16(&gb);
+ if (version > 1) {
+ avpriv_request_sample(avctx, "Version %d", version);
+ return buf_size;
+ }
+
+ if (header_size < (version == 0 ? 144 : 96))
+ return buf_size;
+
+ /* Vendor header (e.g. "peac" for Panasonic or "atm0" for Atmos) */
+ bytestream2_skip(&gb, 4);
+
+ s->width = bytestream2_get_be16(&gb);
+ s->height = bytestream2_get_be16(&gb);
+ s->coded_width = FFALIGN(s->width, 16);
+ s->coded_height = FFALIGN(s->height, 16);
+ s->format = AV_PIX_FMT_BAYER_RGGB16;
+ s->key_frame = 1;
+ s->pict_type = AV_PICTURE_TYPE_I;
+ s->field_order = AV_FIELD_PROGRESSIVE;
+ s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
+
+ /* This parser only performs analysis */
+ *poutbuf = buf;
+ *poutbuf_size = buf_size;
+
+ return buf_size;
+}
+
+const AVCodecParser ff_prores_raw_parser = {
+ .codec_ids = { AV_CODEC_ID_PRORES_RAW },
+ .parser_parse = prores_raw_parse,
+};
diff --git a/libavformat/mov.c b/libavformat/mov.c
index c935bbf0bf..39c1d6c286 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -2987,6 +2987,7 @@ static int mov_finalize_stsd_codec(MOVContext *c, AVIOContext *pb,
case AV_CODEC_ID_VP9:
sti->need_parsing = AVSTREAM_PARSE_FULL;
break;
+ case AV_CODEC_ID_PRORES_RAW:
case AV_CODEC_ID_APV:
case AV_CODEC_ID_EVC:
case AV_CODEC_ID_AV1:
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* [FFmpeg-devel] [PATCH v2 08/13] lavc: add a ProRes RAW decoder
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (5 preceding siblings ...)
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 07/13] lavc: add a ProRes RAW parser Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 09/13] lavc: add a ProRes RAW Vulkan hwaccel Lynne
` (5 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
configure | 1 +
libavcodec/Makefile | 1 +
libavcodec/allcodecs.c | 1 +
libavcodec/prores_raw.c | 525 ++++++++++++++++++++++++++++++++++++++++
libavcodec/prores_raw.h | 60 +++++
5 files changed, 588 insertions(+)
create mode 100644 libavcodec/prores_raw.c
create mode 100644 libavcodec/prores_raw.h
diff --git a/configure b/configure
index 6df8fa4deb..66e76cd47c 100755
--- a/configure
+++ b/configure
@@ -3087,6 +3087,7 @@ prores_decoder_select="blockdsp idctdsp"
prores_encoder_select="fdctdsp"
prores_aw_encoder_select="fdctdsp"
prores_ks_encoder_select="fdctdsp"
+prores_raw_decoder_select="blockdsp idctdsp"
qcelp_decoder_select="lsp"
qdm2_decoder_select="mpegaudiodsp"
ra_144_decoder_select="audiodsp"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 78e099ce5d..b825d19e9d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -629,6 +629,7 @@ OBJS-$(CONFIG_PRORES_DECODER) += proresdec.o proresdsp.o proresdata.o
OBJS-$(CONFIG_PRORES_ENCODER) += proresenc_anatoliy.o proresdata.o
OBJS-$(CONFIG_PRORES_AW_ENCODER) += proresenc_anatoliy.o proresdata.o
OBJS-$(CONFIG_PRORES_KS_ENCODER) += proresenc_kostya.o proresdata.o
+OBJS-$(CONFIG_PRORES_RAW_DECODER) += prores_raw.o
OBJS-$(CONFIG_PRORES_VIDEOTOOLBOX_ENCODER) += videotoolboxenc.o
OBJS-$(CONFIG_PROSUMER_DECODER) += prosumer.o
OBJS-$(CONFIG_PSD_DECODER) += psd.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 7b01453ca2..dcf399e810 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -269,6 +269,7 @@ extern const FFCodec ff_prores_encoder;
extern const FFCodec ff_prores_decoder;
extern const FFCodec ff_prores_aw_encoder;
extern const FFCodec ff_prores_ks_encoder;
+extern const FFCodec ff_prores_raw_decoder;
extern const FFCodec ff_prosumer_decoder;
extern const FFCodec ff_psd_decoder;
extern const FFCodec ff_ptx_decoder;
diff --git a/libavcodec/prores_raw.c b/libavcodec/prores_raw.c
new file mode 100644
index 0000000000..b4382ef573
--- /dev/null
+++ b/libavcodec/prores_raw.c
@@ -0,0 +1,525 @@
+/*
+ * ProRes RAW decoder
+ * Copyright (c) 2023-2025 Paul B Mahol
+ * Copyright (c) 2025 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/mem.h"
+
+#define CACHED_BITSTREAM_READER !ARCH_X86_32
+
+#include "config_components.h"
+#include "avcodec.h"
+#include "bytestream.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "get_bits.h"
+#include "idctdsp.h"
+#include "proresdata.h"
+#include "thread.h"
+#include "hwconfig.h"
+#include "hwaccel_internal.h"
+
+#include "prores_raw.h"
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+ ProResRAWContext *s = avctx->priv_data;
+ uint8_t idct_permutation[64];
+
+ avctx->bits_per_raw_sample = 12;
+ avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
+ avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
+ avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
+
+ s->pix_fmt = AV_PIX_FMT_NONE;
+
+ ff_blockdsp_init(&s->bdsp);
+ ff_proresdsp_init(&s->prodsp, avctx->bits_per_raw_sample);
+
+ ff_init_scantable_permutation(idct_permutation,
+ s->prodsp.idct_permutation_type);
+
+ ff_permute_scantable(s->scan, ff_prores_interlaced_scan, idct_permutation);
+
+ return 0;
+}
+
+static int16_t get_value(GetBitContext *gb, int16_t codebook)
+{
+ const int16_t switch_bits = codebook >> 8;
+ const int16_t rice_order = codebook & 0xf;
+ const int16_t exp_order = (codebook >> 4) & 0xf;
+ int16_t q, bits;
+
+ uint32_t b = show_bits_long(gb, 32);
+ if (!b)
+ return 0;
+ q = ff_clz(b);
+
+ if (b & 0x80000000) {
+ skip_bits_long(gb, 1 + rice_order);
+ return (b & 0x7FFFFFFF) >> (31 - rice_order);
+ }
+
+ if (q <= switch_bits) {
+ skip_bits_long(gb, 1 + rice_order + q);
+ return (q << rice_order) +
+ (((b << (q + 1)) >> 1) >> (31 - rice_order));
+ }
+
+ bits = exp_order + (q << 1) - switch_bits;
+ skip_bits_long(gb, bits);
+ return (b >> (32 - bits)) +
+ ((switch_bits + 1) << rice_order) -
+ (1 << exp_order);
+}
+
+#define TODCCODEBOOK(x) ((x + 1) >> 1)
+
+static const uint8_t align_tile_w[16] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+};
+
+const uint8_t ff_prores_raw_dc_cb[13] = {
+ 16, 33, 50, 51, 51, 51, 68, 68, 68, 68, 68, 68, 118,
+};
+
+const int16_t ff_prores_raw_ac_cb[95] = {
+ 0, 529, 273, 273, 546, 546, 546, 290, 290, 290, 563, 563,
+ 563, 563, 563, 563, 563, 563, 307, 307, 580, 580, 580, 580,
+ 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580,
+ 580, 580, 580, 580, 580, 580, 853, 853, 853, 853, 853, 853,
+ 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853,
+ 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853,
+ 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853,
+ 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 358
+};
+
+const int16_t ff_prores_raw_rn_cb[28] = {
+ 512, 256, 0, 0, 529, 529, 273, 273, 17, 17, 33, 33, 546,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 50, 50, 68,
+};
+
+const int16_t ff_prores_raw_ln_cb[15] = {
+ 256, 273, 546, 546, 290, 290, 1075, 1075, 563, 563, 563, 563, 563, 563, 51
+};
+
+static int decode_comp(AVCodecContext *avctx, TileContext *tile,
+ AVFrame *frame, const uint8_t *data, int size,
+ int component, int16_t *qmat)
+{
+ int ret;
+ ProResRAWContext *s = avctx->priv_data;
+ const ptrdiff_t linesize = frame->linesize[0] >> 1;
+ uint16_t *dst = (uint16_t *)(frame->data[0] + tile->y*frame->linesize[0] + 2*tile->x);
+
+ int idx;
+ const int w = FFMIN(s->tw, avctx->width - tile->x) / 2;
+ const int nb_blocks = w / 8;
+ const int log2_nb_blocks = 31 - ff_clz(nb_blocks);
+ const int block_mask = (1 << log2_nb_blocks) - 1;
+ const int nb_codes = 64 * nb_blocks;
+
+ LOCAL_ALIGNED_32(int16_t, block, [64*16]);
+ LOCAL_ALIGNED_32(int16_t, out, [64]);
+
+ int16_t dc;
+ int16_t prev_dc = 0;
+ int16_t sign = 0;
+ int16_t dc_add = 0;
+ int16_t dc_codebook;
+
+ int16_t ac, rn, ln;
+ int16_t ac_codebook = 49;
+ int16_t rn_codebook = 0;
+ int16_t ln_codebook = 66;
+
+ const uint8_t *scan = s->scan;
+ GetBitContext gb;
+
+ if (component > 1)
+ dst += linesize;
+ dst += component & 1;
+
+ if ((ret = init_get_bits8(&gb, data, size)) < 0)
+ return ret;
+
+ for (int n = 0; n < nb_blocks; n++)
+ s->bdsp.clear_block(block + n*64);
+
+ /* Special handling for first block */
+ dc = get_value(&gb, 700);
+ prev_dc = (dc >> 1) ^ -(dc & 1);
+ block[0] = prev_dc;
+
+ for (int n = 1; n < nb_blocks; n++) {
+ if (get_bits_left(&gb) <= 0)
+ break;
+
+ if ((n & 15) == 1)
+ dc_codebook = 100;
+ else
+ dc_codebook = ff_prores_raw_dc_cb[FFMIN(TODCCODEBOOK(dc),
+ FF_ARRAY_ELEMS(ff_prores_raw_dc_cb) - 1)];
+
+ dc = get_value(&gb, dc_codebook);
+
+ sign = sign ^ dc & 1;
+ dc_add = (-sign ^ TODCCODEBOOK(dc)) + sign;
+ sign = dc_add < 0;
+ prev_dc += dc_add;
+
+ block[n*64] = prev_dc;
+ }
+
+ for (int n = nb_blocks; n <= nb_codes;) {
+ if (get_bits_left(&gb) <= 0)
+ break;
+
+ ln = get_value(&gb, ln_codebook);
+
+ for (int i = 0; i < ln; i++) {
+ if (get_bits_left(&gb) <= 0)
+ break;
+
+ if ((n + i) >= nb_codes)
+ break;
+
+ ac = get_value(&gb, ac_codebook);
+ ac_codebook = ff_prores_raw_ac_cb[FFMIN(ac, FF_ARRAY_ELEMS(ff_prores_raw_ac_cb) - 1)];
+ sign = -get_bits1(&gb);
+
+ idx = scan[(n + i) >> log2_nb_blocks] + (((n + i) & block_mask) << 6);
+ block[idx] = ((ac + 1) ^ sign) - sign;
+ }
+
+ n += ln;
+ if (n >= nb_codes)
+ break;
+
+ rn = get_value(&gb, rn_codebook);
+ rn_codebook = ff_prores_raw_rn_cb[FFMIN(rn, FF_ARRAY_ELEMS(ff_prores_raw_rn_cb) - 1)];
+
+ n += rn + 1;
+ if (n >= nb_codes)
+ break;
+
+ if (get_bits_left(&gb) <= 0)
+ break;
+
+ ac = get_value(&gb, ac_codebook);
+ sign = -get_bits1(&gb);
+
+ idx = scan[n >> log2_nb_blocks] + ((n & block_mask) << 6);
+ block[idx] = ((ac + 1) ^ sign) - sign;
+
+ ac_codebook = ff_prores_raw_ac_cb[FFMIN(ac, FF_ARRAY_ELEMS(ff_prores_raw_ac_cb) - 1)];
+ ln_codebook = ff_prores_raw_ln_cb[FFMIN(ac, FF_ARRAY_ELEMS(ff_prores_raw_ln_cb) - 1)];
+
+ n++;
+ }
+
+ for (int n = 0; n < nb_blocks; n++) {
+ uint16_t *ptr = dst + n*16;
+
+ s->prodsp.idct_put(out, 16, block + n*64, qmat);
+ for (int i = 0; i < 8; i++) {
+ for (int j = 0; j < 8; j++)
+ ptr[j * 2] = out[8*i+j] << 4; // 4 bits of LSB padding
+ ptr += 2 * linesize;
+ }
+ }
+
+ return 0;
+}
+
+static int decode_tile(AVCodecContext *avctx, TileContext *tile,
+ AVFrame *frame)
+{
+ int ret;
+ GetByteContext *gb = &tile->gb;
+
+ uint16_t qscale;
+ LOCAL_ALIGNED_32(int16_t, qmat, [64]);
+ int size[4];
+
+ if (tile->x >= avctx->width)
+ return 0;
+
+ /* Tile header */
+ qscale = bytestream2_get_be16(gb);
+ size[0] = bytestream2_get_be16(gb);
+ size[1] = bytestream2_get_be16(gb);
+ size[2] = bytestream2_get_be16(gb);
+ size[3] = bytestream2_size(gb) - size[0] - size[1] - size[2] - 8;
+ if (size[3] < 0)
+ return AVERROR_INVALIDDATA;
+
+ for (int i = 0; i < 64; i++)
+ qmat[i] = (qscale - 16384) >> 1;
+
+ ret = decode_comp(avctx, tile, frame, gb->buffer,
+ size[0], 2, qmat);
+ if (ret < 0)
+ goto fail;
+
+ ret = decode_comp(avctx, tile, frame, gb->buffer + size[0],
+ size[1], 1, qmat);
+ if (ret < 0)
+ goto fail;
+
+ ret = decode_comp(avctx, tile, frame, gb->buffer + size[0] + size[1],
+ size[2], 3, qmat);
+ if (ret < 0)
+ goto fail;
+
+ ret = decode_comp(avctx, tile, frame, gb->buffer + size[0] + size[1] + size[2],
+ size[3], 0, qmat);
+ if (ret < 0)
+ goto fail;
+
+ return 0;
+fail:
+ av_log(avctx, AV_LOG_ERROR, "tile %d/%d decoding error\n", tile->x, tile->y);
+ return ret;
+}
+
+static int decode_tiles(AVCodecContext *avctx, void *arg,
+ int n, int thread_nb)
+{
+ ProResRAWContext *s = avctx->priv_data;
+ TileContext *tile = &s->tiles[n];
+ AVFrame *frame = arg;
+
+ return decode_tile(avctx, tile, frame);
+}
+
+static enum AVPixelFormat get_pixel_format(AVCodecContext *avctx,
+ enum AVPixelFormat pix_fmt)
+{
+ enum AVPixelFormat pix_fmts[] = {
+ pix_fmt,
+ AV_PIX_FMT_NONE,
+ };
+
+ return ff_get_format(avctx, pix_fmts);
+}
+
+static int decode_frame(AVCodecContext *avctx,
+ AVFrame *frame, int *got_frame_ptr,
+ AVPacket *avpkt)
+{
+ enum AVPixelFormat pix_fmt;
+ int header_size, ret, w, h, aa, flags;
+ ProResRAWContext *s = avctx->priv_data;
+ GetByteContext gb;
+ uint32_t offset;
+
+ bytestream2_init(&gb, avpkt->data, avpkt->size);
+ if (bytestream2_get_be32(&gb) != avpkt->size)
+ return AVERROR_INVALIDDATA;
+
+ /* ProRes RAW frame */
+ if (bytestream2_get_le32(&gb) != MKTAG('p','r','r','f'))
+ return AVERROR_INVALIDDATA;
+
+ header_size = bytestream2_get_be16(&gb) + 8;
+ s->version = bytestream2_get_be16(&gb);
+ if (s->version > 1) {
+ avpriv_request_sample(avctx, "Version %d", s->version);
+ return AVERROR_PATCHWELCOME;
+ }
+
+ if (header_size < (s->version == 0 ? 144 : 96))
+ return AVERROR_INVALIDDATA;
+
+ /* Vendor header (e.g. "peac" for Panasonic or "atm0" for Atmos) */
+ bytestream2_skip(&gb, 4);
+
+ w = bytestream2_get_be16(&gb);
+ h = bytestream2_get_be16(&gb);
+
+ avctx->coded_width = FFALIGN(w, 16);
+ avctx->coded_height = FFALIGN(h, 16);
+
+ if (w != avctx->width || h != avctx->height) {
+ av_log(avctx, AV_LOG_WARNING, "picture resolution change: %dx%d -> %dx%d\n",
+ avctx->width, avctx->height, w, h);
+ if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
+ return ret;
+ }
+
+ pix_fmt = AV_PIX_FMT_BAYER_RGGB16;
+ if (pix_fmt != s->pix_fmt) {
+ s->pix_fmt = pix_fmt;
+
+ ret = get_pixel_format(avctx, pix_fmt);
+ if (ret < 0)
+ return ret;
+
+ avctx->pix_fmt = ret;
+ }
+
+ if (s->version == 0) {
+ bytestream2_skip(&gb, 1 * 4);
+ bytestream2_skip(&gb, 2);
+ bytestream2_skip(&gb, 2);
+ bytestream2_skip(&gb, 4);
+ bytestream2_skip(&gb, 4);
+ bytestream2_skip(&gb, 3 * 3 * 4);
+ bytestream2_skip(&gb, 4);
+ bytestream2_skip(&gb, 2);
+
+ flags = bytestream2_get_be16(&gb);
+ aa = (flags >> 1) & 7;
+ bytestream2_skip(&gb, 64);
+ } else {
+ bytestream2_skip(&gb, 10);
+ bytestream2_skip(&gb, 48);
+
+ flags = bytestream2_get_be16(&gb);
+ aa = (flags >> 1) & 7;
+ bytestream2_skip(&gb, 16);
+ }
+
+ bytestream2_skip(&gb, header_size - bytestream2_tell(&gb));
+
+ s->nb_tw = (w + 15) >> 4;
+ s->nb_th = (h + 15) >> 4;
+ s->nb_tw = (s->nb_tw >> aa) + align_tile_w[~(-1 * (1 << aa)) & s->nb_tw];
+ s->nb_tiles = s->nb_tw * s->nb_th;
+ av_log(avctx, AV_LOG_DEBUG, "%dx%d | nb_tiles: %d\n", s->nb_tw, s->nb_th, s->nb_tiles);
+
+ s->tw = s->version == 0 ? 128 : 256;
+ s->th = 16;
+ av_log(avctx, AV_LOG_DEBUG, "tile_size: %dx%d\n", s->tw, s->th);
+
+ av_fast_mallocz(&s->tiles, &s->tiles_size, s->nb_tiles * sizeof(*s->tiles));
+ if (!s->tiles)
+ return AVERROR(ENOMEM);
+
+ if (bytestream2_get_bytes_left(&gb) < s->nb_tiles * 2)
+ return AVERROR_INVALIDDATA;
+
+ /* Read tile data offsets */
+ offset = bytestream2_tell(&gb) + s->nb_tiles * 2;
+ for (int n = 0; n < s->nb_tiles; n++) {
+ TileContext *tile = &s->tiles[n];
+
+ int size = bytestream2_get_be16(&gb);
+ if (offset >= avpkt->size)
+ return AVERROR_INVALIDDATA;
+ if (size >= avpkt->size)
+ return AVERROR_INVALIDDATA;
+ if (offset > avpkt->size - size)
+ return AVERROR_INVALIDDATA;
+
+ bytestream2_init(&tile->gb, avpkt->data + offset, size);
+
+ tile->y = (n / s->nb_tw) * s->th;
+ tile->x = (n % s->nb_tw) * s->tw;
+
+ offset += size;
+ }
+
+ ret = ff_thread_get_buffer(avctx, frame, 0);
+ if (ret < 0)
+ return ret;
+
+ s->frame = frame;
+
+ /* Start */
+ if (avctx->hwaccel) {
+ const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
+
+ ret = ff_hwaccel_frame_priv_alloc(avctx, &s->hwaccel_picture_private);
+ if (ret < 0)
+ return ret;
+
+ ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, avpkt->size);
+ if (ret < 0)
+ return ret;
+
+ for (int n = 0; n < s->nb_tiles; n++) {
+ TileContext *tile = &s->tiles[n];
+ ret = hwaccel->decode_slice(avctx, tile->gb.buffer,
+ tile->gb.buffer_end - tile->gb.buffer);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = hwaccel->end_frame(avctx);
+ if (ret < 0)
+ return ret;
+
+ av_refstruct_unref(&s->hwaccel_picture_private);
+ } else {
+ avctx->execute2(avctx, decode_tiles, frame, NULL, s->nb_tiles);
+ }
+
+ frame->pict_type = AV_PICTURE_TYPE_I;
+ frame->flags |= AV_FRAME_FLAG_KEY;
+
+ *got_frame_ptr = 1;
+
+ return avpkt->size;
+}
+
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+ ProResRAWContext *s = avctx->priv_data;
+ av_refstruct_unref(&s->hwaccel_picture_private);
+ av_freep(&s->tiles);
+ return 0;
+}
+
+#if HAVE_THREADS
+static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+{
+ ProResRAWContext *rsrc = src->priv_data;
+ ProResRAWContext *rdst = dst->priv_data;
+
+ rdst->pix_fmt = rsrc->pix_fmt;
+
+ return 0;
+}
+#endif
+
+const FFCodec ff_prores_raw_decoder = {
+ .p.name = "prores_raw",
+ CODEC_LONG_NAME("Apple ProRes RAW"),
+ .p.type = AVMEDIA_TYPE_VIDEO,
+ .p.id = AV_CODEC_ID_PRORES_RAW,
+ .priv_data_size = sizeof(ProResRAWContext),
+ .init = decode_init,
+ .close = decode_end,
+ FF_CODEC_DECODE_CB(decode_frame),
+ UPDATE_THREAD_CONTEXT(update_thread_context),
+ .p.capabilities = AV_CODEC_CAP_DR1 |
+ AV_CODEC_CAP_FRAME_THREADS |
+ AV_CODEC_CAP_SLICE_THREADS,
+ .caps_internal = FF_CODEC_CAP_INIT_CLEANUP |
+ FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
+ .hw_configs = (const AVCodecHWConfigInternal *const []) {
+ NULL
+ },
+};
diff --git a/libavcodec/prores_raw.h b/libavcodec/prores_raw.h
new file mode 100644
index 0000000000..f8a54b89db
--- /dev/null
+++ b/libavcodec/prores_raw.h
@@ -0,0 +1,60 @@
+/*
+ * ProRes RAW decoder
+ * Copyright (c) 2025 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PRORES_RAW_H
+#define AVCODEC_PRORES_RAW_H
+
+#include "libavutil/mem_internal.h"
+
+#include "bytestream.h"
+#include "blockdsp.h"
+#include "proresdsp.h"
+
+typedef struct TileContext {
+ GetByteContext gb;
+ unsigned x, y;
+} TileContext;
+
+typedef struct ProResRAWContext {
+ ProresDSPContext prodsp;
+ BlockDSPContext bdsp;
+
+ TileContext *tiles;
+ unsigned int tiles_size;
+ int nb_tiles;
+ int tw, th;
+ int nb_tw, nb_th;
+
+ enum AVPixelFormat pix_fmt;
+ AVFrame *frame;
+ void *hwaccel_picture_private;
+
+ int version;
+
+ uint8_t scan[64];
+} ProResRAWContext;
+
+extern const uint8_t ff_prores_raw_dc_cb[13];
+extern const int16_t ff_prores_raw_ac_cb[95];
+extern const int16_t ff_prores_raw_rn_cb[28];
+extern const int16_t ff_prores_raw_ln_cb[15];
+
+#endif /* AVCODEC_PRORES_RAW_H */
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* [FFmpeg-devel] [PATCH v2 09/13] lavc: add a ProRes RAW Vulkan hwaccel
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (6 preceding siblings ...)
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 08/13] lavc: add a ProRes RAW decoder Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 10/13] scale_vulkan: refactor shader initialization Lynne
` (4 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This commit adds a ProRes RAW hardware implementation written in Vulkan.
Both version 0 and version 1 streams are supported.
The implementation is highly parallelized, with 512 invocations dispatched
per every tile, with generally 4k tiles on a 5.8k stream.
Thanks to unlord for the 8-point iDCT.
Benchmark for a generic 5.8k RAW HQ file:
6900XT: 63fps
7900XTX: 84fps
6000 Ada: 120fps
Intel: 9fps
---
configure | 2 +
libavcodec/Makefile | 1 +
libavcodec/hwaccels.h | 1 +
libavcodec/prores_raw.c | 6 +
libavcodec/vulkan/Makefile | 3 +
libavcodec/vulkan/prores_raw.comp | 347 +++++++++++++++++++++
libavcodec/vulkan_decode.c | 9 +-
libavcodec/vulkan_prores_raw.c | 503 ++++++++++++++++++++++++++++++
8 files changed, 871 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/vulkan/prores_raw.comp
create mode 100644 libavcodec/vulkan_prores_raw.c
diff --git a/configure b/configure
index 66e76cd47c..eeb81d7aa3 100755
--- a/configure
+++ b/configure
@@ -3290,6 +3290,8 @@ mpeg4_videotoolbox_hwaccel_deps="videotoolbox"
mpeg4_videotoolbox_hwaccel_select="mpeg4_decoder"
prores_videotoolbox_hwaccel_deps="videotoolbox"
prores_videotoolbox_hwaccel_select="prores_decoder"
+prores_raw_vulkan_hwaccel_deps="vulkan spirv_compiler"
+prores_raw_vulkan_hwaccel_select="prores_raw_decoder"
vc1_d3d11va_hwaccel_deps="d3d11va"
vc1_d3d11va_hwaccel_select="vc1_decoder"
vc1_d3d11va2_hwaccel_deps="d3d11va"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index b825d19e9d..19fdaa9ad3 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1080,6 +1080,7 @@ OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o
OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o
OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o
OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o
+OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores_raw.o
# Objects duplicated from other libraries for shared builds
SHLIBOBJS += log2_tab.o reverse.o
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 0b2c725247..fb9b850233 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -67,6 +67,7 @@ extern const struct FFHWAccel ff_mpeg4_vaapi_hwaccel;
extern const struct FFHWAccel ff_mpeg4_vdpau_hwaccel;
extern const struct FFHWAccel ff_mpeg4_videotoolbox_hwaccel;
extern const struct FFHWAccel ff_prores_videotoolbox_hwaccel;
+extern const struct FFHWAccel ff_prores_raw_vulkan_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d11va_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d11va2_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d12va_hwaccel;
diff --git a/libavcodec/prores_raw.c b/libavcodec/prores_raw.c
index b4382ef573..cc237c44a7 100644
--- a/libavcodec/prores_raw.c
+++ b/libavcodec/prores_raw.c
@@ -317,6 +317,9 @@ static enum AVPixelFormat get_pixel_format(AVCodecContext *avctx,
enum AVPixelFormat pix_fmt)
{
enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+ AV_PIX_FMT_VULKAN,
+#endif
pix_fmt,
AV_PIX_FMT_NONE,
};
@@ -520,6 +523,9 @@ const FFCodec ff_prores_raw_decoder = {
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP |
FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
.hw_configs = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+ HWACCEL_VULKAN(prores_raw),
+#endif
NULL
},
};
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 729cb4f15c..d8e1471fa6 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -14,6 +14,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
vulkan/ffv1_common.o vulkan/ffv1_reset.o \
vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o
+OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \
+ vulkan/prores_raw.o
+
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
.SECONDARY: $(VULKAN:.comp=.c)
libavcodec/vulkan/%.c: TAG = VULKAN
diff --git a/libavcodec/vulkan/prores_raw.comp b/libavcodec/vulkan/prores_raw.comp
new file mode 100644
index 0000000000..f5dee35e35
--- /dev/null
+++ b/libavcodec/vulkan/prores_raw.comp
@@ -0,0 +1,347 @@
+/*
+ * ProRes RAW decoder
+ *
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define I16(x) (int16_t(x))
+
+#define COMP_ID (gl_LocalInvocationID.z)
+#define BLOCK_ID (gl_LocalInvocationID.y)
+#define ROW_ID (gl_LocalInvocationID.x)
+
+GetBitContext gb;
+shared float btemp[gl_WorkGroupSize.z][16][64] = { };
+shared float block[gl_WorkGroupSize.z][16][64];
+
+void idct8_horiz(const uint row_id)
+{
+ float t0, t1, t2, t3, t4, t5, t6, t7, u8;
+ float u0, u1, u2, u3, u4, u5, u6, u7;
+
+ /* Input */
+ t0 = block[COMP_ID][BLOCK_ID][8*row_id + 0];
+ u4 = block[COMP_ID][BLOCK_ID][8*row_id + 1];
+ t2 = block[COMP_ID][BLOCK_ID][8*row_id + 2];
+ u6 = block[COMP_ID][BLOCK_ID][8*row_id + 3];
+ t1 = block[COMP_ID][BLOCK_ID][8*row_id + 4];
+ u5 = block[COMP_ID][BLOCK_ID][8*row_id + 5];
+ t3 = block[COMP_ID][BLOCK_ID][8*row_id + 6];
+ u7 = block[COMP_ID][BLOCK_ID][8*row_id + 7];
+
+ /* Embedded scaled inverse 4-point Type-II DCT */
+ u0 = t0 + t1;
+ u1 = t0 - t1;
+ u3 = t2 + t3;
+ u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;
+ t0 = u0 + u3;
+ t3 = u0 - u3;
+ t1 = u1 + u2;
+ t2 = u1 - u2;
+
+ /* Embedded scaled inverse 4-point Type-IV DST */
+ t5 = u5 + u6;
+ t6 = u5 - u6;
+ t7 = u4 + u7;
+ t4 = u4 - u7;
+ u7 = t7 + t5;
+ u5 = (t7 - t5)*(1.4142135623730950488016887242097f);
+ u8 = (t4 + t6)*(1.8477590650225735122563663787936f);
+ u4 = u8 - t4*(1.0823922002923939687994464107328f);
+ u6 = u8 - t6*(2.6131259297527530557132863468544f);
+ t7 = u7;
+ t6 = t7 - u6;
+ t5 = t6 + u5;
+ t4 = t5 - u4;
+
+ /* Butterflies */
+ u0 = t0 + t7;
+ u7 = t0 - t7;
+ u6 = t1 + t6;
+ u1 = t1 - t6;
+ u2 = t2 + t5;
+ u5 = t2 - t5;
+ u4 = t3 + t4;
+ u3 = t3 - t4;
+
+ /* Output */
+ btemp[COMP_ID][BLOCK_ID][0*8 + row_id] = u0;
+ btemp[COMP_ID][BLOCK_ID][1*8 + row_id] = u1;
+ btemp[COMP_ID][BLOCK_ID][2*8 + row_id] = u2;
+ btemp[COMP_ID][BLOCK_ID][3*8 + row_id] = u3;
+ btemp[COMP_ID][BLOCK_ID][4*8 + row_id] = u4;
+ btemp[COMP_ID][BLOCK_ID][5*8 + row_id] = u5;
+ btemp[COMP_ID][BLOCK_ID][6*8 + row_id] = u6;
+ btemp[COMP_ID][BLOCK_ID][7*8 + row_id] = u7;
+}
+
+void idct8_vert(const uint row_id)
+{
+ float t0, t1, t2, t3, t4, t5, t6, t7, u8;
+ float u0, u1, u2, u3, u4, u5, u6, u7;
+
+ /* Input */
+ t0 = btemp[COMP_ID][BLOCK_ID][8*row_id + 0] + 0.5f; // NOTE
+ u4 = btemp[COMP_ID][BLOCK_ID][8*row_id + 1];
+ t2 = btemp[COMP_ID][BLOCK_ID][8*row_id + 2];
+ u6 = btemp[COMP_ID][BLOCK_ID][8*row_id + 3];
+ t1 = btemp[COMP_ID][BLOCK_ID][8*row_id + 4];
+ u5 = btemp[COMP_ID][BLOCK_ID][8*row_id + 5];
+ t3 = btemp[COMP_ID][BLOCK_ID][8*row_id + 6];
+ u7 = btemp[COMP_ID][BLOCK_ID][8*row_id + 7];
+
+ /* Embedded scaled inverse 4-point Type-II DCT */
+ u0 = t0 + t1;
+ u1 = t0 - t1;
+ u3 = t2 + t3;
+ u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;
+ t0 = u0 + u3;
+ t3 = u0 - u3;
+ t1 = u1 + u2;
+ t2 = u1 - u2;
+
+ /* Embedded scaled inverse 4-point Type-IV DST */
+ t5 = u5 + u6;
+ t6 = u5 - u6;
+ t7 = u4 + u7;
+ t4 = u4 - u7;
+ u7 = t7 + t5;
+ u5 = (t7 - t5)*(1.4142135623730950488016887242097f);
+ u8 = (t4 + t6)*(1.8477590650225735122563663787936f);
+ u4 = u8 - t4*(1.0823922002923939687994464107328f);
+ u6 = u8 - t6*(2.6131259297527530557132863468544f);
+ t7 = u7;
+ t6 = t7 - u6;
+ t5 = t6 + u5;
+ t4 = t5 - u4;
+
+ /* Butterflies */
+ u0 = t0 + t7;
+ u7 = t0 - t7;
+ u6 = t1 + t6;
+ u1 = t1 - t6;
+ u2 = t2 + t5;
+ u5 = t2 - t5;
+ u4 = t3 + t4;
+ u3 = t3 - t4;
+
+ /* Output */
+ block[COMP_ID][BLOCK_ID][0*8 + row_id] = u0;
+ block[COMP_ID][BLOCK_ID][1*8 + row_id] = u1;
+ block[COMP_ID][BLOCK_ID][2*8 + row_id] = u2;
+ block[COMP_ID][BLOCK_ID][3*8 + row_id] = u3;
+ block[COMP_ID][BLOCK_ID][4*8 + row_id] = u4;
+ block[COMP_ID][BLOCK_ID][5*8 + row_id] = u5;
+ block[COMP_ID][BLOCK_ID][6*8 + row_id] = u6;
+ block[COMP_ID][BLOCK_ID][7*8 + row_id] = u7;
+}
+
+int16_t get_value(int16_t codebook)
+{
+ const int16_t switch_bits = codebook >> 8;
+ const int16_t rice_order = codebook & I16(0xf);
+ const int16_t exp_order = (codebook >> 4) & I16(0xf);
+
+ uint32_t b = show_bits(gb, 32);
+ if (expectEXT(b == 0, false))
+ return I16(0);
+ int16_t q = I16(31) - I16(findMSB(b));
+
+ if ((b & 0x80000000) != 0) {
+ skip_bits(gb, 1 + rice_order);
+ return I16((b & 0x7FFFFFFF) >> (31 - rice_order));
+ }
+
+ if (q <= switch_bits) {
+ skip_bits(gb, q + rice_order + 1);
+ return I16((q << rice_order) +
+ (((b << (q + 1)) >> 1) >> (31 - rice_order)));
+ }
+
+ int16_t bits = exp_order + (q << 1) - switch_bits;
+ skip_bits(gb, bits);
+ return I16((b >> (32 - bits)) +
+ ((switch_bits + 1) << rice_order) -
+ (1 << exp_order));
+}
+
+#define TODCCODEBOOK(x) ((x + 1) >> 1)
+
+void read_dc_vals(const uint nb_blocks)
+{
+ int16_t dc, dc_add;
+ int16_t prev_dc = I16(0), sign = I16(0);
+
+ /* Special handling for first block */
+ dc = get_value(I16(700));
+ prev_dc = (dc >> 1) ^ -(dc & I16(1));
+ btemp[COMP_ID][0][0] = prev_dc;
+
+ for (uint n = 1; n < nb_blocks; n++) {
+ if (expectEXT(left_bits(gb) <= 0, false))
+ break;
+
+ uint8_t dc_codebook;
+ if ((n & 15) == 1)
+ dc_codebook = uint8_t(100);
+ else
+ dc_codebook = dc_cb[min(TODCCODEBOOK(dc), 13 - 1)];
+
+ dc = get_value(dc_codebook);
+
+ sign = sign ^ dc & int16_t(1);
+ dc_add = (-sign ^ I16(TODCCODEBOOK(dc))) + sign;
+ sign = I16(dc_add < 0);
+ prev_dc += dc_add;
+
+ btemp[COMP_ID][n][0] = prev_dc;
+ }
+}
+
+void read_ac_vals(const uint nb_blocks)
+{
+ const uint nb_codes = nb_blocks << 6;
+ const uint log2_nb_blocks = findMSB(nb_blocks);
+ const uint block_mask = (1 << log2_nb_blocks) - 1;
+
+ int16_t ac, rn, ln;
+ int16_t ac_codebook = I16(49);
+ int16_t rn_codebook = I16( 0);
+ int16_t ln_codebook = I16(66);
+ int16_t sign;
+ int16_t val;
+
+ for (uint n = nb_blocks; n <= nb_codes;) {
+ if (expectEXT(left_bits(gb) <= 0, false))
+ break;
+
+ ln = get_value(ln_codebook);
+ for (uint i = 0; i < ln; i++) {
+ if (expectEXT(left_bits(gb) <= 0, false))
+ break;
+
+ if (expectEXT(n >= nb_codes, false))
+ break;
+
+ ac = get_value(ac_codebook);
+ ac_codebook = ac_cb[min(ac, 95 - 1)];
+ sign = -int16_t(get_bit(gb));
+
+ val = ((ac + I16(1)) ^ sign) - sign;
+ btemp[COMP_ID][n & block_mask][n >> log2_nb_blocks] = val;
+
+ n++;
+ }
+
+ if (expectEXT(n >= nb_codes, false))
+ break;
+
+ rn = get_value(rn_codebook);
+ rn_codebook = rn_cb[min(rn, 28 - 1)];
+
+ n += rn + 1;
+ if (expectEXT(n >= nb_codes, false))
+ break;
+
+ if (expectEXT(left_bits(gb) <= 0, false))
+ break;
+
+ ac = get_value(ac_codebook);
+ sign = -int16_t(get_bit(gb));
+
+ val = ((ac + I16(1)) ^ sign) - sign;
+ btemp[COMP_ID][n & block_mask][n >> log2_nb_blocks] = val;
+
+ ac_codebook = ac_cb[min(ac, 95 - 1)];
+ ln_codebook = ln_cb[min(ac, 15 - 1)];
+
+ n++;
+ }
+}
+
+void main(void)
+{
+ const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ TileData td = tile_data[tile_idx];
+
+ if (expectEXT(td.pos.x >= frame_size.x, false))
+ return;
+
+ uint64_t pkt_offset = uint64_t(pkt_data) + td.offset;
+ u8vec2buf hdr_data = u8vec2buf(pkt_offset);
+ int qscale = int(pack16(hdr_data[0].v.yx));
+
+ ivec4 size = ivec4(td.size,
+ pack16(hdr_data[2].v.yx),
+ pack16(hdr_data[1].v.yx),
+ pack16(hdr_data[3].v.yx));
+ size[0] = size[0] - size[1] - size[2] - size[3] - 8;
+ if (expectEXT(size[0] < 0, false))
+ return;
+
+ const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1);
+ const float n = float(qscale - 16384) / (2.0f*4096.0f);
+ const uint w = min(tile_size.x, frame_size.x - td.pos.x) / 2;
+ const uint nb_blocks = w / 8;
+
+ const ivec4 comp_offset = ivec4(size[2] + size[1] + size[3],
+ size[2],
+ 0,
+ size[2] + size[1]);
+
+ if (BLOCK_ID == 0 && ROW_ID == 0) {
+ init_get_bits(gb, u8buf(pkt_offset + 8 + comp_offset[COMP_ID]),
+ size[COMP_ID]);
+ read_dc_vals(nb_blocks);
+ read_ac_vals(nb_blocks);
+ }
+
+ barrier();
+
+ [[unroll]]
+ for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x)
+ block[COMP_ID][BLOCK_ID][i] = btemp[COMP_ID][BLOCK_ID][scan[i]] *
+ n * idct_8x8_scales[i];
+
+ barrier();
+
+#ifdef PARALLEL_ROWS
+ idct8_horiz(ROW_ID);
+
+ barrier();
+
+ idct8_vert(ROW_ID);
+#else
+ for (uint j = 0; j < 8; j++)
+ idct8_horiz(j);
+
+ barrier();
+
+ for (uint j = 0; j < 8; j++)
+ idct8_vert(j);
+#endif
+
+ barrier();
+
+ [[unroll]]
+ for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x)
+ imageStore(dst,
+ offs + 2*ivec2(BLOCK_ID*8 + (i & 7), i >> 3),
+ vec4(block[COMP_ID][BLOCK_ID][i]));
+}
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 7310ba1547..857f16bc0a 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -25,7 +25,8 @@
#include "libavutil/vulkan_loader.h"
#define DECODER_IS_SDR(codec_id) \
- ((codec_id) == AV_CODEC_ID_FFV1)
+ (((codec_id) == AV_CODEC_ID_FFV1) || \
+ ((codec_id) == AV_CODEC_ID_PRORES_RAW))
#if CONFIG_H264_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
@@ -39,6 +40,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc;
#if CONFIG_FFV1_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
#endif
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc;
+#endif
static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_H264_VULKAN_HWACCEL
@@ -53,6 +57,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_FFV1_VULKAN_HWACCEL
&ff_vk_dec_ffv1_desc,
#endif
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+ &ff_vk_dec_prores_raw_desc,
+#endif
};
static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id)
diff --git a/libavcodec/vulkan_prores_raw.c b/libavcodec/vulkan_prores_raw.c
new file mode 100644
index 0000000000..12571c0e2c
--- /dev/null
+++ b/libavcodec/vulkan_prores_raw.c
@@ -0,0 +1,503 @@
+/*
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan_decode.h"
+#include "hwaccel_internal.h"
+
+#include "prores_raw.h"
+#include "libavutil/vulkan_spirv.h"
+#include "libavutil/mem.h"
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_prores_raw_comp;
+
+const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc = {
+ .codec_id = AV_CODEC_ID_PRORES_RAW,
+ .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR,
+ .queue_flags = VK_QUEUE_COMPUTE_BIT,
+};
+
+typedef struct ProResRAWVulkanDecodePicture {
+ FFVulkanDecodePicture vp;
+
+ AVBufferRef *tile_data;
+ uint32_t nb_tiles;
+} ProResRAWVulkanDecodePicture;
+
+typedef struct ProResRAWVulkanDecodeContext {
+ FFVulkanShader decode[2];
+
+ AVBufferPool *tile_data_pool;
+
+ FFVkBuffer uniform_buf;
+} ProResRAWVulkanDecodeContext;
+
+typedef struct DecodePushData {
+ VkDeviceAddress tile_data;
+ VkDeviceAddress pkt_data;
+ uint32_t frame_size[2];
+ uint32_t tile_size[2];
+} DecodePushData;
+
+typedef struct TileData {
+ int32_t pos[2];
+ uint32_t offset;
+ uint32_t size;
+} TileData;
+
+static int vk_prores_raw_start_frame(AVCodecContext *avctx,
+ const AVBufferRef *buffer_ref,
+ av_unused const uint8_t *buffer,
+ av_unused uint32_t size)
+{
+ int err;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx;
+ ProResRAWContext *prr = avctx->priv_data;
+
+ ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &pp->vp;
+
+ /* Host map the input tile data if supported */
+ if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
+ ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data,
+ buffer_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+
+ /* Allocate tile data */
+ err = ff_vk_get_pooled_buffer(&ctx->s, &prv->tile_data_pool,
+ &pp->tile_data,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, prr->nb_tiles*sizeof(TileData),
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0)
+ return err;
+
+ /* Prepare frame to be used */
+ err = ff_vk_decode_prepare_frame_sdr(dec, prr->frame, vp, 1,
+ FF_VK_REP_FLOAT, 0);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int vk_prores_raw_decode_slice(AVCodecContext *avctx,
+ const uint8_t *data,
+ uint32_t size)
+{
+ ProResRAWContext *prr = avctx->priv_data;
+
+ ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &pp->vp;
+
+ FFVkBuffer *tile_data_buf = (FFVkBuffer *)pp->tile_data->data;
+ TileData *td = (TileData *)tile_data_buf->mapped_mem;
+ FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL;
+
+ td[pp->nb_tiles].pos[0] = prr->tiles[pp->nb_tiles].x;
+ td[pp->nb_tiles].pos[1] = prr->tiles[pp->nb_tiles].y;
+ td[pp->nb_tiles].size = size;
+
+ if (vp->slices_buf && slices_buf->host_ref) {
+ td[pp->nb_tiles].offset = data - slices_buf->mapped_mem;
+ pp->nb_tiles++;
+ } else {
+ int err;
+ td[pp->nb_tiles].offset = vp->slices_size;
+ err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
+ &pp->nb_tiles, NULL);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
+static int vk_prores_raw_end_frame(AVCodecContext *avctx)
+{
+ int err;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ ProResRAWContext *prr = avctx->priv_data;
+ ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx;
+
+ DecodePushData pd_decode;
+ FFVulkanShader *decode_shader;
+
+ ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &pp->vp;
+
+ FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
+ FFVkBuffer *tile_data = (FFVkBuffer *)pp->tile_data->data;
+
+ VkImageMemoryBarrier2 img_bar[8];
+ int nb_img_bar = 0;
+
+ FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
+ ff_vk_exec_start(&ctx->s, exec);
+
+ /* Prepare deps */
+ RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, prr->frame,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+ err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
+ prr->frame);
+ if (err < 0)
+ return err;
+
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &pp->tile_data, 1, 0));
+ pp->tile_data = NULL;
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
+ vp->slices_buf = NULL;
+
+ ff_vk_frame_barrier(&ctx->s, exec, prr->frame, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_ACCESS_2_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+ nb_img_bar = 0;
+
+ decode_shader = &prv->decode[prr->version];
+ ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
+ prr->frame, vp->view.out,
+ 0, 0,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader);
+ pd_decode = (DecodePushData) {
+ .tile_data = tile_data->address,
+ .pkt_data = slices_buf->address,
+ .frame_size[0] = avctx->width,
+ .frame_size[1] = avctx->height,
+ .tile_size[0] = prr->tw,
+ .tile_size[1] = prr->th,
+ };
+ ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd_decode), &pd_decode);
+
+ vk->CmdDispatch(exec->buf, prr->nb_tw, prr->nb_th, 1);
+
+ err = ff_vk_exec_submit(&ctx->s, exec);
+ if (err < 0)
+ return err;
+
+fail:
+ return 0;
+}
+
+static int init_decode_shader(ProResRAWContext *prr, FFVulkanContext *s,
+ FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+ FFVulkanShader *shd, int version)
+{
+ int err;
+ FFVulkanDescriptorSetBinding *desc_set;
+ int parallel_rows = 1;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+
+ if (s->props.properties.limits.maxComputeWorkGroupInvocations < 512 ||
+ s->props.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU)
+ parallel_rows = 0;
+
+ RET(ff_vk_shader_init(s, shd, "prores_raw",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2",
+ "GL_EXT_null_initializer" }, 3,
+ parallel_rows ? 8 : 1 /* 8x8 transforms, 8-point width */,
+ version == 0 ? 8 : 16 /* Horizontal blocks */,
+ 4 /* Components */,
+ 0));
+
+ if (parallel_rows)
+ GLSLC(0, #define PARALLEL_ROWS );
+
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ GLSLC(0, layout(buffer_reference, buffer_reference_align = 16) buffer TileData { );
+ GLSLC(1, ivec2 pos; );
+ GLSLC(1, uint offset; );
+ GLSLC(1, uint size; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, TileData tile_data; );
+ GLSLC(1, u8buf pkt_data; );
+ GLSLC(1, uvec2 frame_size; );
+ GLSLC(1, uvec2 tile_size; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "dst",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = "r16",
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0));
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "dct_scale_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "float idct_8x8_scales[64];",
+ },
+ {
+ .name = "scan_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t scan[64];",
+ },
+ {
+ .name = "dc_cb_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t dc_cb[13];",
+ },
+ {
+ .name = "ac_cb_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t ac_cb[95];",
+ },
+ {
+ .name = "rn_cb_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t rn_cb[28];",
+ },
+ {
+ .name = "ln_cb_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t ln_cb[15];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 6, 1, 0));
+
+ GLSLD(ff_source_prores_raw_comp);
+
+ RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static void vk_decode_prores_raw_uninit(FFVulkanDecodeShared *ctx)
+{
+ ProResRAWVulkanDecodeContext *fv = ctx->sd_ctx;
+
+ ff_vk_shader_free(&ctx->s, &fv->decode[0]);
+ ff_vk_shader_free(&ctx->s, &fv->decode[1]);
+
+ ff_vk_free_buf(&ctx->s, &fv->uniform_buf);
+
+ av_buffer_pool_uninit(&fv->tile_data_pool);
+
+ av_freep(&fv);
+}
+
+static int vk_decode_prores_raw_init(AVCodecContext *avctx)
+{
+ int err;
+ ProResRAWContext *prr = avctx->priv_data;
+
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = NULL;
+ size_t ua;
+
+ ProResRAWVulkanDecodeContext *prv;
+ FFVkSPIRVCompiler *spv;
+
+ uint8_t *uniform_buf;
+ float *dct_scale_buf;
+ double idct_8_scales[8] = {
+ cos(4.0*M_PI/16.0)/2.0,
+ cos(1.0*M_PI/16.0)/2.0,
+ cos(2.0*M_PI/16.0)/2.0,
+ cos(3.0*M_PI/16.0)/2.0,
+ cos(4.0*M_PI/16.0)/2.0,
+ cos(5.0*M_PI/16.0)/2.0,
+ cos(6.0*M_PI/16.0)/2.0,
+ cos(7.0*M_PI/16.0)/2.0,
+ };
+ uint8_t *scan_buf;
+ size_t cb_size[5] = {
+ 13*sizeof(uint8_t),
+ 95*sizeof(int16_t),
+ 28*sizeof(int16_t),
+ 15*sizeof(int16_t),
+ };
+ size_t cb_offset[5];
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ err = ff_vk_decode_init(avctx);
+ if (err < 0)
+ return err;
+ ctx = dec->shared_ctx;
+
+ prv = ctx->sd_ctx = av_mallocz(sizeof(*prv));
+ if (!prv) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ ctx->sd_ctx_free = &vk_decode_prores_raw_uninit;
+
+ ua = ctx->s.props.properties.limits.minUniformBufferOffsetAlignment;
+ cb_offset[0] = 64*sizeof(float) + 64*sizeof(uint8_t);
+ cb_offset[1] = cb_offset[0] + FFALIGN(cb_size[0], ua);
+ cb_offset[2] = cb_offset[1] + FFALIGN(cb_size[1], ua);
+ cb_offset[3] = cb_offset[2] + FFALIGN(cb_size[2], ua);
+ cb_offset[4] = cb_offset[3] + FFALIGN(cb_size[3], ua);
+
+ /* Setup decode shader */
+ RET(init_decode_shader(prr, &ctx->s, &ctx->exec_pool, spv, &prv->decode[0], 0));
+ RET(init_decode_shader(prr, &ctx->s, &ctx->exec_pool, spv, &prv->decode[1], 1));
+
+ RET(ff_vk_create_buf(&ctx->s, &prv->uniform_buf,
+ 64*sizeof(float) + 64*sizeof(uint8_t) + cb_offset[4] + 256,
+ NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+
+ RET(ff_vk_map_buffer(&ctx->s, &prv->uniform_buf, &uniform_buf, 0));
+
+ dct_scale_buf = (float *)uniform_buf;
+ for (int i = 0; i < 64; i++)
+ dct_scale_buf[i] = (float)(idct_8_scales[i >> 3] *
+ idct_8_scales[i & 7]);
+
+ scan_buf = uniform_buf + 64*sizeof(float);
+ for (int i = 0; i < 64; i++)
+ scan_buf[prr->scan[i]] = i;
+
+ memcpy(uniform_buf + cb_offset[0], ff_prores_raw_dc_cb,
+ sizeof(ff_prores_raw_dc_cb));
+ memcpy(uniform_buf + cb_offset[1], ff_prores_raw_ac_cb,
+ sizeof(ff_prores_raw_ac_cb));
+ memcpy(uniform_buf + cb_offset[2], ff_prores_raw_rn_cb,
+ sizeof(ff_prores_raw_rn_cb));
+ memcpy(uniform_buf + cb_offset[3], ff_prores_raw_ln_cb,
+ sizeof(ff_prores_raw_ln_cb));
+
+ RET(ff_vk_unmap_buffer(&ctx->s, &prv->uniform_buf, 1));
+
+ for (int i = 0; i < 2; i++) {
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &prv->decode[i], 1, 0, 0,
+ &prv->uniform_buf,
+ 0, 64*sizeof(float),
+ VK_FORMAT_UNDEFINED));
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &prv->decode[i], 1, 1, 0,
+ &prv->uniform_buf,
+ 64*sizeof(float), 64*sizeof(uint8_t),
+ VK_FORMAT_UNDEFINED));
+ for (int j = 0; j < 4; j++)
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &prv->decode[i], 1, 2 + j, 0,
+ &prv->uniform_buf,
+ cb_offset[j], cb_size[j],
+ VK_FORMAT_UNDEFINED));
+ }
+
+fail:
+ spv->uninit(&spv);
+
+ return err;
+}
+
+static void vk_prores_raw_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+ AVHWDeviceContext *dev_ctx = _hwctx.nc;
+
+ ProResRAWVulkanDecodePicture *pp = data;
+ FFVulkanDecodePicture *vp = &pp->vp;
+
+ ff_vk_decode_free_frame(dev_ctx, vp);
+}
+
+const FFHWAccel ff_prores_raw_vulkan_hwaccel = {
+ .p.name = "prores_raw_vulkan",
+ .p.type = AVMEDIA_TYPE_VIDEO,
+ .p.id = AV_CODEC_ID_PRORES_RAW,
+ .p.pix_fmt = AV_PIX_FMT_VULKAN,
+ .start_frame = &vk_prores_raw_start_frame,
+ .decode_slice = &vk_prores_raw_decode_slice,
+ .end_frame = &vk_prores_raw_end_frame,
+ .free_frame_priv = &vk_prores_raw_free_frame_priv,
+ .frame_priv_data_size = sizeof(ProResRAWVulkanDecodePicture),
+ .init = &vk_decode_prores_raw_init,
+ .update_thread_context = &ff_vk_update_thread_context,
+ .decode_params = &ff_vk_params_invalidate,
+ .flush = &ff_vk_decode_flush,
+ .uninit = &ff_vk_decode_uninit,
+ .frame_params = &ff_vk_frame_params,
+ .priv_data_size = sizeof(FFVulkanDecodeContext),
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* [FFmpeg-devel] [PATCH v2 10/13] scale_vulkan: refactor shader initialization
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (7 preceding siblings ...)
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 09/13] lavc: add a ProRes RAW Vulkan hwaccel Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 11/13] scale_vulkan: add support for basic Debayering Lynne
` (3 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavfilter/vf_scale_vulkan.c | 160 ++++++++++++++++++----------------
1 file changed, 85 insertions(+), 75 deletions(-)
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index 1d6492e213..46b1476933 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -112,6 +112,78 @@ static const char write_444[] = {
C(0, } )
};
+static int init_scale_shader(ScaleVulkanContext *s, FFVulkanShader *shd,
+ FFVulkanDescriptorSetBinding *desc, AVFrame *in)
+{
+ GLSLD( scale_bilinear );
+
+ if (s->vkctx.output_format != s->vkctx.input_format) {
+ GLSLD( rgb2yuv );
+ }
+
+ switch (s->vkctx.output_format) {
+ case AV_PIX_FMT_NV12: GLSLD(write_nv12); break;
+ case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
+ case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
+ default: break;
+ }
+
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height);
+ GLSLC(1, vec2 c_r = vec2(crop_w, crop_h) / in_d; );
+ GLSLC(1, vec2 c_o = vec2(crop_x, crop_y) / in_d; );
+ GLSLC(0, );
+
+ if (s->vkctx.output_format == s->vkctx.input_format) {
+ for (int i = 0; i < desc[1].elems; i++) {
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ switch (s->scaler) {
+ case F_NEAREST:
+ case F_BILINEAR:
+ GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i);
+ GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
+ break;
+ };
+ GLSLC(1, } );
+ }
+ } else {
+ GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); );
+ GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG);
+ switch (s->vkctx.output_format) {
+ case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break;
+ case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break;
+ case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break;
+ default: return AVERROR(EINVAL);
+ }
+ }
+
+ GLSLC(0, } );
+
+ if (s->vkctx.output_format != s->vkctx.input_format) {
+ const AVLumaCoefficients *lcoeffs;
+ double tmp_mat[3][3];
+
+ lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace);
+ if (!lcoeffs) {
+ av_log(s, AV_LOG_ERROR, "Unsupported colorspace\n");
+ return AVERROR(EINVAL);
+ }
+
+ ff_fill_rgb2yuv_table(lcoeffs, tmp_mat);
+
+ for (int y = 0; y < 3; y++)
+ for (int x = 0; x < 3; x++)
+ s->opts.yuv_matrix[x][y] = tmp_mat[x][y];
+ s->opts.yuv_matrix[3][3] = 1.0;
+ }
+
+ return 0;
+}
+
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
@@ -157,18 +229,6 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
32, 32, 1,
0));
- GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
- GLSLC(1, mat4 yuv_matrix; );
- GLSLC(1, int crop_x; );
- GLSLC(1, int crop_y; );
- GLSLC(1, int crop_w; );
- GLSLC(1, int crop_h; );
- GLSLC(0, }; );
- GLSLC(0, );
-
- ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts),
- VK_SHADER_STAGE_COMPUTE_BIT);
-
desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_img",
@@ -191,71 +251,21 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0));
- GLSLD( scale_bilinear );
-
- if (s->vkctx.output_format != s->vkctx.input_format) {
- GLSLD( rgb2yuv );
- }
-
- switch (s->vkctx.output_format) {
- case AV_PIX_FMT_NV12: GLSLD(write_nv12); break;
- case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
- case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
- default: break;
- }
-
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height);
- GLSLC(1, vec2 c_r = vec2(crop_w, crop_h) / in_d; );
- GLSLC(1, vec2 c_o = vec2(crop_x, crop_y) / in_d; );
- GLSLC(0, );
-
- if (s->vkctx.output_format == s->vkctx.input_format) {
- for (int i = 0; i < desc[1].elems; i++) {
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- switch (s->scaler) {
- case F_NEAREST:
- case F_BILINEAR:
- GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- break;
- };
- GLSLC(1, } );
- }
- } else {
- GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); );
- GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG);
- switch (s->vkctx.output_format) {
- case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break;
- case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break;
- case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break;
- default: return AVERROR(EINVAL);
- }
- }
-
- GLSLC(0, } );
-
- if (s->vkctx.output_format != s->vkctx.input_format) {
- const AVLumaCoefficients *lcoeffs;
- double tmp_mat[3][3];
-
- lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace);
- if (!lcoeffs) {
- av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n");
- return AVERROR(EINVAL);
- }
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, mat4 yuv_matrix; );
+ GLSLC(1, int crop_x; );
+ GLSLC(1, int crop_y; );
+ GLSLC(1, int crop_w; );
+ GLSLC(1, int crop_h; );
+ GLSLC(0, }; );
+ GLSLC(0, );
- ff_fill_rgb2yuv_table(lcoeffs, tmp_mat);
+ ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts),
+ VK_SHADER_STAGE_COMPUTE_BIT);
- for (int y = 0; y < 3; y++)
- for (int x = 0; x < 3; x++)
- s->opts.yuv_matrix[x][y] = tmp_mat[x][y];
- s->opts.yuv_matrix[3][3] = 1.0;
- }
+ err = init_scale_shader(s, shd, desc, in);
+ if (err < 0)
+ goto fail;
RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main",
&spv_opaque));
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* [FFmpeg-devel] [PATCH v2 11/13] scale_vulkan: add support for basic Debayering
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (8 preceding siblings ...)
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 10/13] scale_vulkan: refactor shader initialization Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 12/13] lavc/vp9dec: use cbs_vp9 to parse the frame header Lynne
` (2 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavfilter/vf_scale_vulkan.c | 60 +++++++++++++++++--
libavfilter/vulkan/Makefile | 1 +
libavfilter/vulkan/debayer.comp | 102 ++++++++++++++++++++++++++++++++
3 files changed, 159 insertions(+), 4 deletions(-)
create mode 100644 libavfilter/vulkan/debayer.comp
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index 46b1476933..5cb09ac385 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -27,6 +27,8 @@
#include "colorspace.h"
#include "video.h"
+extern const char *ff_source_debayer_comp;
+
enum ScalerFunc {
F_BILINEAR = 0,
F_NEAREST,
@@ -34,6 +36,13 @@ enum ScalerFunc {
F_NB,
};
+enum DebayerFunc {
+ DB_BILINEAR = 0,
+ DB_BILINEAR_HQ,
+
+ DB_NB,
+};
+
typedef struct ScaleVulkanContext {
FFVulkanContext vkctx;
@@ -58,6 +67,7 @@ typedef struct ScaleVulkanContext {
enum ScalerFunc scaler;
enum AVColorRange out_range;
+ enum DebayerFunc debayer;
} ScaleVulkanContext;
static const char scale_bilinear[] = {
@@ -184,6 +194,25 @@ static int init_scale_shader(ScaleVulkanContext *s, FFVulkanShader *shd,
return 0;
}
+static int init_debayer_shader(ScaleVulkanContext *s, FFVulkanShader *shd,
+ FFVulkanDescriptorSetBinding *desc, AVFrame *in)
+{
+ GLSLD(ff_source_debayer_comp);
+
+ GLSLC(0, void main(void));
+ GLSLC(0, { );
+ if (s->debayer == DB_BILINEAR)
+ GLSLC(1, debayer_bilinear(););
+ else if (s->debayer == DB_BILINEAR_HQ)
+ GLSLC(1, debayer_bilinear_hq(););
+ GLSLC(0, } );
+
+ shd->lg_size[0] <<= 1;
+ shd->lg_size[1] <<= 1;
+
+ return 0;
+}
+
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
@@ -197,6 +226,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
FFVkSPIRVCompiler *spv;
FFVulkanDescriptorSetBinding *desc;
+ int debayer = s->vkctx.input_format == AV_PIX_FMT_BAYER_RGGB16;
int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format);
switch (s->scaler) {
@@ -222,7 +252,10 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
}
RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL));
- RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode));
+
+ if (!debayer)
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode));
+
RET(ff_vk_shader_init(vkctx, &s->shd, "scale",
VK_SHADER_STAGE_COMPUTE_BIT,
NULL, 0,
@@ -232,7 +265,13 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .type = debayer ?
+ VK_DESCRIPTOR_TYPE_STORAGE_IMAGE :
+ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .mem_layout = debayer ?
+ ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT) :
+ NULL,
+ .mem_quali = "readonly",
.dimensions = 2,
.elems = in_planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -263,7 +302,10 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts),
VK_SHADER_STAGE_COMPUTE_BIT);
- err = init_scale_shader(s, shd, desc, in);
+ if (debayer)
+ err = init_debayer_shader(s, shd, desc, in);
+ else
+ err = init_scale_shader(s, shd, desc, in);
if (err < 0)
goto fail;
@@ -361,7 +403,14 @@ static int scale_vulkan_config_output(AVFilterLink *outlink)
s->vkctx.output_format = s->vkctx.input_format;
}
- if (s->vkctx.output_format != s->vkctx.input_format) {
+ if (s->vkctx.input_format == AV_PIX_FMT_BAYER_RGGB16) {
+ if (s->vkctx.output_format == s->vkctx.input_format) {
+ s->vkctx.output_format = AV_PIX_FMT_RGBA64;
+ } else if (!ff_vk_mt_is_np_rgb(s->vkctx.output_format)) {
+ av_log(avctx, AV_LOG_ERROR, "Unsupported output format for debayer\n");
+ return AVERROR(EINVAL);
+ }
+ } else if (s->vkctx.output_format != s->vkctx.input_format) {
if (!ff_vk_mt_is_np_rgb(s->vkctx.input_format)) {
av_log(avctx, AV_LOG_ERROR, "Unsupported input format for conversion\n");
return AVERROR(EINVAL);
@@ -406,6 +455,9 @@ static const AVOption scale_vulkan_options[] = {
{ "scaler", "Scaler function", OFFSET(scaler), AV_OPT_TYPE_INT, {.i64 = F_BILINEAR}, 0, F_NB, .flags = FLAGS, .unit = "scaler" },
{ "bilinear", "Bilinear interpolation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = F_BILINEAR}, 0, 0, .flags = FLAGS, .unit = "scaler" },
{ "nearest", "Nearest (useful for pixel art)", 0, AV_OPT_TYPE_CONST, {.i64 = F_NEAREST}, 0, 0, .flags = FLAGS, .unit = "scaler" },
+ { "debayer", "Debayer algorithm to use", OFFSET(debayer), AV_OPT_TYPE_INT, {.i64 = DB_BILINEAR_HQ}, 0, DB_NB, .flags = FLAGS, .unit = "debayer" },
+ { "bilinear", "Bilinear debayering (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = DB_BILINEAR}, 0, 0, .flags = FLAGS, .unit = "debayer" },
+ { "bilinear_hq", "Bilinear debayering (high quality)", 0, AV_OPT_TYPE_CONST, {.i64 = DB_BILINEAR_HQ}, 0, 0, .flags = FLAGS, .unit = "debayer" },
{ "format", "Output video format (software format of hardware frames)", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
{ "out_range", "Output colour range (from 0 to 2) (default 0)", OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED}, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, .flags = FLAGS, .unit = "range" },
{ "full", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" },
diff --git a/libavfilter/vulkan/Makefile b/libavfilter/vulkan/Makefile
index 573eee32c7..c77aaf4f6b 100644
--- a/libavfilter/vulkan/Makefile
+++ b/libavfilter/vulkan/Makefile
@@ -4,6 +4,7 @@ clean::
$(RM) $(GEN_CLEANSUFFIXES:%=libavfilter/vulkan/%)
OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.o
+OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vulkan/debayer.o
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavfilter/vulkan/*.comp))
.SECONDARY: $(VULKAN:.comp=.c)
diff --git a/libavfilter/vulkan/debayer.comp b/libavfilter/vulkan/debayer.comp
new file mode 100644
index 0000000000..c86c2f5eec
--- /dev/null
+++ b/libavfilter/vulkan/debayer.comp
@@ -0,0 +1,102 @@
+/*
+ *
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define LD(xo, yo) \
+ (imageLoad(input_img[0], pos + ivec2((xo), (yo))).r)
+
+void debayer_bilinear(void)
+{
+ ivec2 pos = ivec2(gl_GlobalInvocationID.xy) << 1;
+
+ /* R basis */
+ vec4 tl = vec4(LD(0, 0),
+ (LD(1, 0) + LD(-1, 0) + LD(0, 1) + LD(0, -1)) / 4.0f,
+ (LD(-1, -1) + LD(1, 1) + LD(-1, 1) + LD(1, -1)) / 4.0f,
+ 1.0f);
+ imageStore(output_img[0], pos, tl);
+
+ /* G1 basis */
+ vec4 tr = vec4((LD(2, 0) + LD(0, 0)) / 2.0f,
+ LD(1, 0),
+ (LD(1, 1) + LD(1, -1)) / 2.0f,
+ 1.0f);
+ imageStore(output_img[0], pos + ivec2(1, 0), tr);
+
+ /* G2 basis */
+ vec4 bl = vec4((LD(0, 2) + LD(0, 0)) / 2.0f,
+ LD(0, 1),
+ (LD(1, 1) + LD(-1, 1)) / 2.0f,
+ 1.0f);
+ imageStore(output_img[0], pos + ivec2(0, 1), bl);
+
+ /* B basis */
+ vec4 br = vec4((LD(0, 0) + LD(2, 2) + LD(0, 2) + LD(2, 0)) / 4.0f,
+ (LD(2, 1) + LD(0, 1) + LD(1, 2) + LD(1, 0)) / 4.0f,
+ LD(1, 1),
+ 1.0f);
+ imageStore(output_img[0], pos + ivec2(1, 1), br);
+}
+
+void debayer_bilinear_hq(void)
+{
+ ivec2 pos = ivec2(gl_GlobalInvocationID.xy) << 1;
+
+ /* R basis */
+ vec4 tl = vec4(LD(0, 0),
+ (4.0f*LD(0, 0) + 2.0f*(LD(0, -1) + LD(0, 1) + LD(-1, 0) + LD(1, 0)) -
+ (LD(0, -2) + LD(0, 2) + LD(-2, 0) + LD(2, 0))) / 8.0f,
+ (12.0f*LD(0, 0) + 4.0f*(LD(-1, -1) + LD(-1, 1) + LD(1, -1) + LD(1, 1)) -
+ 3.0f*(LD(0, -2) + LD(0, 2) + LD(-2, 0) + LD(2, 0))) / 16.0f,
+ 1.0f);
+ imageStore(output_img[0], pos, tl);
+
+ /* G1 basis */
+ vec4 tr = vec4((10.0f*LD(1, 0) + 8.0f*(LD(0, 0) + LD(2, 0)) -
+ 2.0f*(LD(0, -1) + LD(2, 1) + LD(0, 1) + LD(2, -1) + LD(-1, 0) + LD(3, 0)) +
+ LD(1, -2) + LD(1, 2)) / 16.0f,
+ LD(1, 0),
+ (10.0f*LD(1, 0) + 8.0f*(LD(1, -1) + LD(1, 1)) -
+ 2.0f*(LD(0, -1) + LD(0, 1) + LD(2, -1) + LD(2, 1) + LD(1, -2) + LD(1, 2)) +
+ LD(-1, 0) + LD(3, 0)) / 16.0f,
+ 1.0f);
+ imageStore(output_img[0], pos + ivec2(1, 0), tr);
+
+
+ /* G2 basis */
+ vec4 bl = vec4((10.0f*LD(0, 1) + 8.0f*(LD(0, 0) + LD(0, 2)) -
+ 2.0f*(LD(-1, 0) + LD(-1, 2) + LD(1, 0) + LD(1, 2) + LD(0, -1) + LD(0, 3)) +
+ LD(-2, 1) + LD(2, 1)) / 16.0f,
+ LD(0, 1),
+ (10.0f*LD(0, 1) + 8.0f*(LD(-1, 1) + LD(1, 1)) -
+ 2.0f*(LD(-1, 0) + LD(1, 2) + LD(-1, 2) + LD(1, 0) + LD(-2, 1) + LD(2, 1)) +
+ LD(0, -1) + LD(0, 3)) / 16.0f,
+ 1.0f);
+ imageStore(output_img[0], pos + ivec2(0, 1), bl);
+
+ /* B basis */
+ vec4 br = vec4((12.0f*LD(1, 1) + 4.0f*(LD(0, 0) + LD(0, 2) + LD(2, 0) + LD(2, 2)) -
+ 3.0f*(LD(1, -1) + LD(1, 3) + LD(-1, 1) + LD(3, 1))) / 16.0f,
+ (4.0f*LD(1, 1) + 2.0f*(LD(1, 0) + LD(1, 2) + LD(0, 1) + LD(2, 1)) -
+ (LD(1, -1) + LD(1, 3) + LD(-1, 1) + LD(3, 1))) / 8.0f,
+ LD(1, 1),
+ 1.0f);
+ imageStore(output_img[0], pos + ivec2(1, 1), br);
+}
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* [FFmpeg-devel] [PATCH v2 12/13] lavc/vp9dec: use cbs_vp9 to parse the frame header
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (9 preceding siblings ...)
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 11/13] scale_vulkan: add support for basic Debayering Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-13 18:15 ` Andreas Rheinhardt
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 13/13] WIP vp9: add Vulkan VP9 hwaccel Lynne
2025-07-13 22:10 ` [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Niklas Haas
12 siblings, 1 reply; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
configure | 2 +-
libavcodec/vp9.c | 40 ++++++++++++++++++++++++++++++++++++++++
libavcodec/vp9dec.h | 6 ++++++
libavcodec/vp9shared.h | 4 ++++
4 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/configure b/configure
index eeb81d7aa3..92ee54c7a6 100755
--- a/configure
+++ b/configure
@@ -3153,7 +3153,7 @@ vp6a_decoder_select="vp6_decoder"
vp6f_decoder_select="vp6_decoder"
vp7_decoder_select="h264pred videodsp vp8dsp"
vp8_decoder_select="h264pred videodsp vp8dsp"
-vp9_decoder_select="videodsp vp9_parser vp9_superframe_split_bsf"
+vp9_decoder_select="videodsp vp9_parser cbs_vp9 vp9_superframe_split_bsf"
vvc_decoder_select="cabac cbs_h266 golomb videodsp vvc_sei"
wcmv_decoder_select="inflate_wrapper"
webp_decoder_select="vp8_decoder exif"
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 141f0941b4..a385956f4f 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -97,6 +97,7 @@ static void vp9_tile_data_free(VP9TileData *td)
static void vp9_frame_unref(VP9Frame *f)
{
ff_progress_frame_unref(&f->tf);
+ av_refstruct_unref(&f->header_ref);
av_refstruct_unref(&f->extradata);
av_refstruct_unref(&f->hwaccel_picture_private);
f->segmentation_map = NULL;
@@ -145,6 +146,9 @@ fail:
static void vp9_frame_replace(VP9Frame *dst, const VP9Frame *src)
{
+ av_refstruct_replace(&dst->header_ref, src->header_ref);
+ dst->frame_header = src->frame_header;
+
ff_progress_frame_replace(&dst->tf, &src->tf);
av_refstruct_replace(&dst->extradata, src->extradata);
@@ -1255,6 +1259,11 @@ static av_cold int vp9_decode_free(AVCodecContext *avctx)
av_freep(&s->entries);
ff_pthread_free(s, vp9_context_offsets);
#endif
+
+ av_refstruct_unref(&s->header_ref);
+ ff_cbs_fragment_free(&s->current_frag);
+ ff_cbs_close(&s->cbc);
+
av_freep(&s->td);
return 0;
}
@@ -1557,11 +1566,27 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
int size = pkt->size;
VP9Context *s = avctx->priv_data;
int ret, i, j, ref;
+ CodedBitstreamUnit *unit;
+ VP9RawFrame *rf;
+
int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
(!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
const VP9Frame *src;
AVFrame *f;
+ ret = ff_cbs_read_packet(s->cbc, &s->current_frag, pkt);
+ if (ret < 0) {
+ ff_cbs_fragment_reset(&s->current_frag);
+ av_log(avctx, AV_LOG_ERROR, "Failed to read frame header.\n");
+ return ret;
+ }
+
+ unit = &s->current_frag.units[0];
+ rf = unit->content;
+
+ av_refstruct_replace(&s->header_ref, unit->content_ref);
+ s->frame_header = &rf->header;
+
if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
return ret;
} else if (ret == 0) {
@@ -1592,6 +1617,10 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
vp9_frame_unref(&s->s.frames[CUR_FRAME]);
if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
return ret;
+
+ s->s.frames[CUR_FRAME].header_ref = av_refstruct_ref(s->header_ref);
+ s->s.frames[CUR_FRAME].frame_header = s->frame_header;
+
f = s->s.frames[CUR_FRAME].tf.f;
if (s->s.h.keyframe)
f->flags |= AV_FRAME_FLAG_KEY;
@@ -1779,6 +1808,9 @@ static void vp9_decode_flush(AVCodecContext *avctx)
for (i = 0; i < 8; i++)
ff_progress_frame_unref(&s->s.refs[i]);
+ ff_cbs_fragment_reset(&s->current_frag);
+ ff_cbs_flush(s->cbc);
+
if (FF_HW_HAS_CB(avctx, flush))
FF_HW_SIMPLE_CALL(avctx, flush);
}
@@ -1791,6 +1823,10 @@ static av_cold int vp9_decode_init(AVCodecContext *avctx)
s->last_bpp = 0;
s->s.h.filter.sharpness = -1;
+ ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_VP9, avctx);
+ if (ret < 0)
+ return ret;
+
#if HAVE_THREADS
if (avctx->active_thread_type & FF_THREAD_SLICE) {
ret = ff_pthread_init(s, vp9_context_offsets);
@@ -1814,6 +1850,10 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
av_refstruct_replace(&s->frame_extradata_pool, ssrc->frame_extradata_pool);
s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
+ av_refstruct_replace(&s->header_ref, ssrc->header_ref);
+ s->frame_header = ssrc->frame_header;
+ memcpy(s->cbc->priv_data, ssrc->cbc->priv_data, sizeof(CodedBitstreamVP9Context));
+
s->s.h.invisible = ssrc->s.h.invisible;
s->s.h.keyframe = ssrc->s.h.keyframe;
s->s.h.intraonly = ssrc->s.h.intraonly;
diff --git a/libavcodec/vp9dec.h b/libavcodec/vp9dec.h
index e41f47a82a..c3ad2bbcdb 100644
--- a/libavcodec/vp9dec.h
+++ b/libavcodec/vp9dec.h
@@ -38,6 +38,7 @@
#include "vp9dsp.h"
#include "vp9shared.h"
#include "vpx_rac.h"
+#include "cbs_vp9.h"
#define REF_INVALID_SCALE 0xFFFF
@@ -97,6 +98,11 @@ typedef struct VP9Context {
VP9SharedContext s;
VP9TileData *td;
+ CodedBitstreamContext *cbc;
+ CodedBitstreamFragment current_frag;
+ VP9RawFrame *header_ref; ///< RefStruct reference backing frame_header
+ VP9RawFrameHeader *frame_header;
+
VP9DSPContext dsp;
VideoDSPContext vdsp;
GetBitContext gb;
diff --git a/libavcodec/vp9shared.h b/libavcodec/vp9shared.h
index 8a450c26a6..d2226e0072 100644
--- a/libavcodec/vp9shared.h
+++ b/libavcodec/vp9shared.h
@@ -30,6 +30,7 @@
#include "libavutil/mem_internal.h"
#include "progressframe.h"
+#include "cbs_vp9.h"
#include "vp9.h"
enum BlockPartition {
@@ -63,6 +64,9 @@ typedef struct VP9mvrefPair {
} VP9mvrefPair;
typedef struct VP9Frame {
+ VP9RawFrame *header_ref; ///< RefStruct reference backing frame_header
+ VP9RawFrameHeader *frame_header;
+
ProgressFrame tf;
void *extradata; ///< RefStruct reference
uint8_t *segmentation_map;
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 12/13] lavc/vp9dec: use cbs_vp9 to parse the frame header
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 12/13] lavc/vp9dec: use cbs_vp9 to parse the frame header Lynne
@ 2025-07-13 18:15 ` Andreas Rheinhardt
2025-07-14 5:06 ` Lynne
0 siblings, 1 reply; 16+ messages in thread
From: Andreas Rheinhardt @ 2025-07-13 18:15 UTC (permalink / raw)
To: ffmpeg-devel
Lynne:
> ---
> configure | 2 +-
> libavcodec/vp9.c | 40 ++++++++++++++++++++++++++++++++++++++++
> libavcodec/vp9dec.h | 6 ++++++
> libavcodec/vp9shared.h | 4 ++++
> 4 files changed, 51 insertions(+), 1 deletion(-)
>
> diff --git a/configure b/configure
> index eeb81d7aa3..92ee54c7a6 100755
> --- a/configure
> +++ b/configure
> @@ -3153,7 +3153,7 @@ vp6a_decoder_select="vp6_decoder"
> vp6f_decoder_select="vp6_decoder"
> vp7_decoder_select="h264pred videodsp vp8dsp"
> vp8_decoder_select="h264pred videodsp vp8dsp"
> -vp9_decoder_select="videodsp vp9_parser vp9_superframe_split_bsf"
> +vp9_decoder_select="videodsp vp9_parser cbs_vp9 vp9_superframe_split_bsf"
> vvc_decoder_select="cabac cbs_h266 golomb videodsp vvc_sei"
> wcmv_decoder_select="inflate_wrapper"
> webp_decoder_select="vp8_decoder exif"
> diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
> index 141f0941b4..a385956f4f 100644
> --- a/libavcodec/vp9.c
> +++ b/libavcodec/vp9.c
> @@ -97,6 +97,7 @@ static void vp9_tile_data_free(VP9TileData *td)
> static void vp9_frame_unref(VP9Frame *f)
> {
> ff_progress_frame_unref(&f->tf);
> + av_refstruct_unref(&f->header_ref);
> av_refstruct_unref(&f->extradata);
> av_refstruct_unref(&f->hwaccel_picture_private);
> f->segmentation_map = NULL;
> @@ -145,6 +146,9 @@ fail:
>
> static void vp9_frame_replace(VP9Frame *dst, const VP9Frame *src)
> {
> + av_refstruct_replace(&dst->header_ref, src->header_ref);
> + dst->frame_header = src->frame_header;
> +
> ff_progress_frame_replace(&dst->tf, &src->tf);
>
> av_refstruct_replace(&dst->extradata, src->extradata);
> @@ -1255,6 +1259,11 @@ static av_cold int vp9_decode_free(AVCodecContext *avctx)
> av_freep(&s->entries);
> ff_pthread_free(s, vp9_context_offsets);
> #endif
> +
> + av_refstruct_unref(&s->header_ref);
> + ff_cbs_fragment_free(&s->current_frag);
> + ff_cbs_close(&s->cbc);
> +
> av_freep(&s->td);
> return 0;
> }
> @@ -1557,11 +1566,27 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
> int size = pkt->size;
> VP9Context *s = avctx->priv_data;
> int ret, i, j, ref;
> + CodedBitstreamUnit *unit;
> + VP9RawFrame *rf;
> +
> int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
> (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
> const VP9Frame *src;
> AVFrame *f;
>
> + ret = ff_cbs_read_packet(s->cbc, &s->current_frag, pkt);
> + if (ret < 0) {
> + ff_cbs_fragment_reset(&s->current_frag);
> + av_log(avctx, AV_LOG_ERROR, "Failed to read frame header.\n");
> + return ret;
> + }
> +
> + unit = &s->current_frag.units[0];
> + rf = unit->content;
> +
> + av_refstruct_replace(&s->header_ref, unit->content_ref);
> + s->frame_header = &rf->header;
> +
> if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
> return ret;
> } else if (ret == 0) {
> @@ -1592,6 +1617,10 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
> vp9_frame_unref(&s->s.frames[CUR_FRAME]);
> if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
> return ret;
> +
> + s->s.frames[CUR_FRAME].header_ref = av_refstruct_ref(s->header_ref);
> + s->s.frames[CUR_FRAME].frame_header = s->frame_header;
> +
> f = s->s.frames[CUR_FRAME].tf.f;
> if (s->s.h.keyframe)
> f->flags |= AV_FRAME_FLAG_KEY;
> @@ -1779,6 +1808,9 @@ static void vp9_decode_flush(AVCodecContext *avctx)
> for (i = 0; i < 8; i++)
> ff_progress_frame_unref(&s->s.refs[i]);
>
> + ff_cbs_fragment_reset(&s->current_frag);
> + ff_cbs_flush(s->cbc);
> +
> if (FF_HW_HAS_CB(avctx, flush))
> FF_HW_SIMPLE_CALL(avctx, flush);
> }
> @@ -1791,6 +1823,10 @@ static av_cold int vp9_decode_init(AVCodecContext *avctx)
> s->last_bpp = 0;
> s->s.h.filter.sharpness = -1;
>
> + ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_VP9, avctx);
> + if (ret < 0)
> + return ret;
> +
> #if HAVE_THREADS
> if (avctx->active_thread_type & FF_THREAD_SLICE) {
> ret = ff_pthread_init(s, vp9_context_offsets);
> @@ -1814,6 +1850,10 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
> av_refstruct_replace(&s->frame_extradata_pool, ssrc->frame_extradata_pool);
> s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
>
> + av_refstruct_replace(&s->header_ref, ssrc->header_ref);
> + s->frame_header = ssrc->frame_header;
> + memcpy(s->cbc->priv_data, ssrc->cbc->priv_data, sizeof(CodedBitstreamVP9Context));
> +
> s->s.h.invisible = ssrc->s.h.invisible;
> s->s.h.keyframe = ssrc->s.h.keyframe;
> s->s.h.intraonly = ssrc->s.h.intraonly;
> diff --git a/libavcodec/vp9dec.h b/libavcodec/vp9dec.h
> index e41f47a82a..c3ad2bbcdb 100644
> --- a/libavcodec/vp9dec.h
> +++ b/libavcodec/vp9dec.h
> @@ -38,6 +38,7 @@
> #include "vp9dsp.h"
> #include "vp9shared.h"
> #include "vpx_rac.h"
> +#include "cbs_vp9.h"
>
> #define REF_INVALID_SCALE 0xFFFF
>
> @@ -97,6 +98,11 @@ typedef struct VP9Context {
> VP9SharedContext s;
> VP9TileData *td;
>
> + CodedBitstreamContext *cbc;
> + CodedBitstreamFragment current_frag;
> + VP9RawFrame *header_ref; ///< RefStruct reference backing frame_header
> + VP9RawFrameHeader *frame_header;
> +
> VP9DSPContext dsp;
> VideoDSPContext vdsp;
> GetBitContext gb;
> diff --git a/libavcodec/vp9shared.h b/libavcodec/vp9shared.h
> index 8a450c26a6..d2226e0072 100644
> --- a/libavcodec/vp9shared.h
> +++ b/libavcodec/vp9shared.h
> @@ -30,6 +30,7 @@
> #include "libavutil/mem_internal.h"
>
> #include "progressframe.h"
> +#include "cbs_vp9.h"
> #include "vp9.h"
>
> enum BlockPartition {
> @@ -63,6 +64,9 @@ typedef struct VP9mvrefPair {
> } VP9mvrefPair;
>
> typedef struct VP9Frame {
> + VP9RawFrame *header_ref; ///< RefStruct reference backing frame_header
> + VP9RawFrameHeader *frame_header;
> +
> ProgressFrame tf;
> void *extradata; ///< RefStruct reference
> uint8_t *segmentation_map;
My expectation for a patch that uses CBS to parse the frame header is
that the other header parsing code would be removed in said patch. Why
is this not true here? What is the benefit of this patch then?
- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 12/13] lavc/vp9dec: use cbs_vp9 to parse the frame header
2025-07-13 18:15 ` Andreas Rheinhardt
@ 2025-07-14 5:06 ` Lynne
0 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-14 5:06 UTC (permalink / raw)
To: ffmpeg-devel
On 14/07/2025 03:15, Andreas Rheinhardt wrote:
> Lynne:
>> ---
>> configure | 2 +-
>> libavcodec/vp9.c | 40 ++++++++++++++++++++++++++++++++++++++++
>> libavcodec/vp9dec.h | 6 ++++++
>> libavcodec/vp9shared.h | 4 ++++
>> 4 files changed, 51 insertions(+), 1 deletion(-)
>>
>> diff --git a/configure b/configure
>> index eeb81d7aa3..92ee54c7a6 100755
>> --- a/configure
>> +++ b/configure
>> @@ -3153,7 +3153,7 @@ vp6a_decoder_select="vp6_decoder"
>> vp6f_decoder_select="vp6_decoder"
>> vp7_decoder_select="h264pred videodsp vp8dsp"
>> vp8_decoder_select="h264pred videodsp vp8dsp"
>> -vp9_decoder_select="videodsp vp9_parser vp9_superframe_split_bsf"
>> +vp9_decoder_select="videodsp vp9_parser cbs_vp9 vp9_superframe_split_bsf"
>> vvc_decoder_select="cabac cbs_h266 golomb videodsp vvc_sei"
>> wcmv_decoder_select="inflate_wrapper"
>> webp_decoder_select="vp8_decoder exif"
>> diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
>> index 141f0941b4..a385956f4f 100644
>> --- a/libavcodec/vp9.c
>> +++ b/libavcodec/vp9.c
>> @@ -97,6 +97,7 @@ static void vp9_tile_data_free(VP9TileData *td)
>> static void vp9_frame_unref(VP9Frame *f)
>> {
>> ff_progress_frame_unref(&f->tf);
>> + av_refstruct_unref(&f->header_ref);
>> av_refstruct_unref(&f->extradata);
>> av_refstruct_unref(&f->hwaccel_picture_private);
>> f->segmentation_map = NULL;
>> @@ -145,6 +146,9 @@ fail:
>>
>> static void vp9_frame_replace(VP9Frame *dst, const VP9Frame *src)
>> {
>> + av_refstruct_replace(&dst->header_ref, src->header_ref);
>> + dst->frame_header = src->frame_header;
>> +
>> ff_progress_frame_replace(&dst->tf, &src->tf);
>>
>> av_refstruct_replace(&dst->extradata, src->extradata);
>> @@ -1255,6 +1259,11 @@ static av_cold int vp9_decode_free(AVCodecContext *avctx)
>> av_freep(&s->entries);
>> ff_pthread_free(s, vp9_context_offsets);
>> #endif
>> +
>> + av_refstruct_unref(&s->header_ref);
>> + ff_cbs_fragment_free(&s->current_frag);
>> + ff_cbs_close(&s->cbc);
>> +
>> av_freep(&s->td);
>> return 0;
>> }
>> @@ -1557,11 +1566,27 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
>> int size = pkt->size;
>> VP9Context *s = avctx->priv_data;
>> int ret, i, j, ref;
>> + CodedBitstreamUnit *unit;
>> + VP9RawFrame *rf;
>> +
>> int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
>> (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
>> const VP9Frame *src;
>> AVFrame *f;
>>
>> + ret = ff_cbs_read_packet(s->cbc, &s->current_frag, pkt);
>> + if (ret < 0) {
>> + ff_cbs_fragment_reset(&s->current_frag);
>> + av_log(avctx, AV_LOG_ERROR, "Failed to read frame header.\n");
>> + return ret;
>> + }
>> +
>> + unit = &s->current_frag.units[0];
>> + rf = unit->content;
>> +
>> + av_refstruct_replace(&s->header_ref, unit->content_ref);
>> + s->frame_header = &rf->header;
>> +
>> if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
>> return ret;
>> } else if (ret == 0) {
>> @@ -1592,6 +1617,10 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
>> vp9_frame_unref(&s->s.frames[CUR_FRAME]);
>> if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
>> return ret;
>> +
>> + s->s.frames[CUR_FRAME].header_ref = av_refstruct_ref(s->header_ref);
>> + s->s.frames[CUR_FRAME].frame_header = s->frame_header;
>> +
>> f = s->s.frames[CUR_FRAME].tf.f;
>> if (s->s.h.keyframe)
>> f->flags |= AV_FRAME_FLAG_KEY;
>> @@ -1779,6 +1808,9 @@ static void vp9_decode_flush(AVCodecContext *avctx)
>> for (i = 0; i < 8; i++)
>> ff_progress_frame_unref(&s->s.refs[i]);
>>
>> + ff_cbs_fragment_reset(&s->current_frag);
>> + ff_cbs_flush(s->cbc);
>> +
>> if (FF_HW_HAS_CB(avctx, flush))
>> FF_HW_SIMPLE_CALL(avctx, flush);
>> }
>> @@ -1791,6 +1823,10 @@ static av_cold int vp9_decode_init(AVCodecContext *avctx)
>> s->last_bpp = 0;
>> s->s.h.filter.sharpness = -1;
>>
>> + ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_VP9, avctx);
>> + if (ret < 0)
>> + return ret;
>> +
>> #if HAVE_THREADS
>> if (avctx->active_thread_type & FF_THREAD_SLICE) {
>> ret = ff_pthread_init(s, vp9_context_offsets);
>> @@ -1814,6 +1850,10 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
>> av_refstruct_replace(&s->frame_extradata_pool, ssrc->frame_extradata_pool);
>> s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
>>
>> + av_refstruct_replace(&s->header_ref, ssrc->header_ref);
>> + s->frame_header = ssrc->frame_header;
>> + memcpy(s->cbc->priv_data, ssrc->cbc->priv_data, sizeof(CodedBitstreamVP9Context));
>> +
>> s->s.h.invisible = ssrc->s.h.invisible;
>> s->s.h.keyframe = ssrc->s.h.keyframe;
>> s->s.h.intraonly = ssrc->s.h.intraonly;
>> diff --git a/libavcodec/vp9dec.h b/libavcodec/vp9dec.h
>> index e41f47a82a..c3ad2bbcdb 100644
>> --- a/libavcodec/vp9dec.h
>> +++ b/libavcodec/vp9dec.h
>> @@ -38,6 +38,7 @@
>> #include "vp9dsp.h"
>> #include "vp9shared.h"
>> #include "vpx_rac.h"
>> +#include "cbs_vp9.h"
>>
>> #define REF_INVALID_SCALE 0xFFFF
>>
>> @@ -97,6 +98,11 @@ typedef struct VP9Context {
>> VP9SharedContext s;
>> VP9TileData *td;
>>
>> + CodedBitstreamContext *cbc;
>> + CodedBitstreamFragment current_frag;
>> + VP9RawFrame *header_ref; ///< RefStruct reference backing frame_header
>> + VP9RawFrameHeader *frame_header;
>> +
>> VP9DSPContext dsp;
>> VideoDSPContext vdsp;
>> GetBitContext gb;
>> diff --git a/libavcodec/vp9shared.h b/libavcodec/vp9shared.h
>> index 8a450c26a6..d2226e0072 100644
>> --- a/libavcodec/vp9shared.h
>> +++ b/libavcodec/vp9shared.h
>> @@ -30,6 +30,7 @@
>> #include "libavutil/mem_internal.h"
>>
>> #include "progressframe.h"
>> +#include "cbs_vp9.h"
>> #include "vp9.h"
>>
>> enum BlockPartition {
>> @@ -63,6 +64,9 @@ typedef struct VP9mvrefPair {
>> } VP9mvrefPair;
>>
>> typedef struct VP9Frame {
>> + VP9RawFrame *header_ref; ///< RefStruct reference backing frame_header
>> + VP9RawFrameHeader *frame_header;
>> +
>> ProgressFrame tf;
>> void *extradata; ///< RefStruct reference
>> uint8_t *segmentation_map;
>
> My expectation for a patch that uses CBS to parse the frame header is
> that the other header parsing code would be removed in said patch. Why
> is this not true here? What is the benefit of this patch then?
It allows us to implement the vulkan_vp9 hwaccel without needing to
implement parsing dozens of more fields.
The plan is to gradually switch the decoder to use the CBS structs,
which have different names and sometimes different definitions.
I don't mind doing this first, but it would delay the vulkan_vp9
hwaccel, which users have been waiting for quite a long time, and I
wanted to hear whether switching to CBS was acceptable at all.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* [FFmpeg-devel] [PATCH v2 13/13] WIP vp9: add Vulkan VP9 hwaccel
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (10 preceding siblings ...)
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 12/13] lavc/vp9dec: use cbs_vp9 to parse the frame header Lynne
@ 2025-07-12 18:51 ` Lynne
2025-07-13 22:10 ` [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Niklas Haas
12 siblings, 0 replies; 16+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
configure | 2 +
libavcodec/Makefile | 1 +
libavcodec/hwaccels.h | 1 +
libavcodec/vp9.c | 18 +-
libavcodec/vulkan_decode.c | 24 +++
libavcodec/vulkan_decode.h | 1 +
libavcodec/vulkan_vp9.c | 366 +++++++++++++++++++++++++++++++++++
libavutil/hwcontext_vulkan.c | 15 ++
libavutil/vulkan_functions.h | 3 +-
libavutil/vulkan_loader.h | 3 +
10 files changed, 432 insertions(+), 2 deletions(-)
create mode 100644 libavcodec/vulkan_vp9.c
diff --git a/configure b/configure
index 92ee54c7a6..d1c54c0b8e 100755
--- a/configure
+++ b/configure
@@ -3326,6 +3326,8 @@ vp9_vdpau_hwaccel_deps="vdpau VdpPictureInfoVP9"
vp9_vdpau_hwaccel_select="vp9_decoder"
vp9_videotoolbox_hwaccel_deps="videotoolbox"
vp9_videotoolbox_hwaccel_select="vp9_decoder"
+vp9_vulkan_hwaccel_deps="vulkan"
+vp9_vulkan_hwaccel_select="vp9_decoder"
vvc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVVC"
vvc_vaapi_hwaccel_select="vvc_decoder"
wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 19fdaa9ad3..fcd1ae2a9e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1078,6 +1078,7 @@ OBJS-$(CONFIG_VP9_NVDEC_HWACCEL) += nvdec_vp9.o
OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) += vaapi_vp9.o
OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o
OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o
+OBJS-$(CONFIG_VP9_VULKAN_HWACCEL) += vulkan_decode.o vulkan_vp9.o
OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o
OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o
OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores_raw.o
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index fb9b850233..4b205d386e 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -85,6 +85,7 @@ extern const struct FFHWAccel ff_vp9_nvdec_hwaccel;
extern const struct FFHWAccel ff_vp9_vaapi_hwaccel;
extern const struct FFHWAccel ff_vp9_vdpau_hwaccel;
extern const struct FFHWAccel ff_vp9_videotoolbox_hwaccel;
+extern const struct FFHWAccel ff_vp9_vulkan_hwaccel;
extern const struct FFHWAccel ff_vvc_vaapi_hwaccel;
extern const struct FFHWAccel ff_wmv3_d3d11va_hwaccel;
extern const struct FFHWAccel ff_wmv3_d3d11va2_hwaccel;
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index a385956f4f..31792962b4 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -169,7 +169,8 @@ static int update_size(AVCodecContext *avctx, int w, int h)
CONFIG_VP9_NVDEC_HWACCEL + \
CONFIG_VP9_VAAPI_HWACCEL + \
CONFIG_VP9_VDPAU_HWACCEL + \
- CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
+ CONFIG_VP9_VIDEOTOOLBOX_HWACCEL + \
+ CONFIG_VP9_VULKAN_HWACCEL)
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
VP9Context *s = avctx->priv_data;
uint8_t *p;
@@ -206,6 +207,9 @@ static int update_size(AVCodecContext *avctx, int w, int h)
#endif
#if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
*fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+ *fmtp++ = AV_PIX_FMT_VULKAN;
#endif
break;
case AV_PIX_FMT_YUV420P12:
@@ -217,6 +221,9 @@ static int update_size(AVCodecContext *avctx, int w, int h)
#endif
#if CONFIG_VP9_VDPAU_HWACCEL
*fmtp++ = AV_PIX_FMT_VDPAU;
+#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+ *fmtp++ = AV_PIX_FMT_VULKAN;
#endif
break;
case AV_PIX_FMT_YUV444P:
@@ -224,6 +231,9 @@ static int update_size(AVCodecContext *avctx, int w, int h)
case AV_PIX_FMT_YUV444P12:
#if CONFIG_VP9_VAAPI_HWACCEL
*fmtp++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+ *fmtp++ = AV_PIX_FMT_VULKAN;
#endif
break;
case AV_PIX_FMT_GBRP:
@@ -231,6 +241,9 @@ static int update_size(AVCodecContext *avctx, int w, int h)
case AV_PIX_FMT_GBRP12:
#if CONFIG_VP9_VAAPI_HWACCEL
*fmtp++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+ *fmtp++ = AV_PIX_FMT_VULKAN;
#endif
break;
}
@@ -1919,6 +1932,9 @@ const FFCodec ff_vp9_decoder = {
#endif
#if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
HWACCEL_VIDEOTOOLBOX(vp9),
+#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+ HWACCEL_VULKAN(vp9),
#endif
NULL
},
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 857f16bc0a..dea25d93aa 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -34,6 +34,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
#if CONFIG_HEVC_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc;
#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_vp9_desc;
+#endif
#if CONFIG_AV1_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc;
#endif
@@ -51,6 +54,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_HEVC_VULKAN_HWACCEL
&ff_vk_dec_hevc_desc,
#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+ &ff_vk_dec_vp9_desc,
+#endif
#if CONFIG_AV1_VULKAN_HWACCEL
&ff_vk_dec_av1_desc,
#endif
@@ -78,6 +84,7 @@ static const VkVideoProfileInfoKHR *get_video_profile(FFVulkanDecodeShared *ctx,
VkStructureType profile_struct_type =
codec_id == AV_CODEC_ID_H264 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR :
codec_id == AV_CODEC_ID_HEVC ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR :
+ codec_id == AV_CODEC_ID_VP9 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR :
codec_id == AV_CODEC_ID_AV1 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR :
VK_STRUCTURE_TYPE_MAX_ENUM;
if (profile_struct_type == VK_STRUCTURE_TYPE_MAX_ENUM)
@@ -688,6 +695,7 @@ static VkResult vulkan_setup_profile(AVCodecContext *avctx,
const FFVulkanDecodeDescriptor *vk_desc,
VkVideoDecodeH264CapabilitiesKHR *h264_caps,
VkVideoDecodeH265CapabilitiesKHR *h265_caps,
+ VkVideoDecodeVP9CapabilitiesKHR *vp9_caps,
VkVideoDecodeAV1CapabilitiesKHR *av1_caps,
VkVideoCapabilitiesKHR *caps,
VkVideoDecodeCapabilitiesKHR *dec_caps,
@@ -699,6 +707,7 @@ static VkResult vulkan_setup_profile(AVCodecContext *avctx,
VkVideoDecodeH264ProfileInfoKHR *h264_profile = &prof->h264_profile;
VkVideoDecodeH265ProfileInfoKHR *h265_profile = &prof->h265_profile;
+ VkVideoDecodeVP9ProfileInfoKHR *vp9_profile = &prof->vp9_profile;
VkVideoDecodeAV1ProfileInfoKHR *av1_profile = &prof->av1_profile;
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
@@ -724,6 +733,11 @@ static VkResult vulkan_setup_profile(AVCodecContext *avctx,
usage->pNext = h265_profile;
h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR;
h265_profile->stdProfileIdc = cur_profile;
+ } else if (avctx->codec_id == AV_CODEC_ID_VP9) {
+ dec_caps->pNext = vp9_caps;
+ usage->pNext = vp9_profile;
+ vp9_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR;
+ vp9_profile->stdProfile = cur_profile;
} else if (avctx->codec_id == AV_CODEC_ID_AV1) {
dec_caps->pNext = av1_caps;
usage->pNext = av1_profile;
@@ -784,6 +798,9 @@ static int vulkan_decode_get_profile(AVCodecContext *avctx, AVBufferRef *frames_
VkVideoDecodeH265CapabilitiesKHR h265_caps = {
.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR,
};
+ VkVideoDecodeVP9CapabilitiesKHR vp9_caps = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR,
+ };
VkVideoDecodeAV1CapabilitiesKHR av1_caps = {
.sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_KHR,
};
@@ -804,12 +821,14 @@ static int vulkan_decode_get_profile(AVCodecContext *avctx, AVBufferRef *frames_
cur_profile = avctx->profile;
base_profile = avctx->codec_id == AV_CODEC_ID_H264 ? AV_PROFILE_H264_CONSTRAINED_BASELINE :
avctx->codec_id == AV_CODEC_ID_H265 ? AV_PROFILE_HEVC_MAIN :
+ avctx->codec_id == AV_CODEC_ID_VP9 ? STD_VIDEO_VP9_PROFILE_0 :
avctx->codec_id == AV_CODEC_ID_AV1 ? STD_VIDEO_AV1_PROFILE_MAIN :
0;
ret = vulkan_setup_profile(avctx, prof, hwctx, vk, vk_desc,
&h264_caps,
&h265_caps,
+ &vp9_caps,
&av1_caps,
caps,
dec_caps,
@@ -826,6 +845,7 @@ static int vulkan_decode_get_profile(AVCodecContext *avctx, AVBufferRef *frames_
ret = vulkan_setup_profile(avctx, prof, hwctx, vk, vk_desc,
&h264_caps,
&h265_caps,
+ &vp9_caps,
&av1_caps,
caps,
dec_caps,
@@ -852,6 +872,7 @@ static int vulkan_decode_get_profile(AVCodecContext *avctx, AVBufferRef *frames_
max_level = avctx->codec_id == AV_CODEC_ID_H264 ? ff_vk_h264_level_to_av(h264_caps.maxLevelIdc) :
avctx->codec_id == AV_CODEC_ID_H265 ? ff_vk_h265_level_to_av(h265_caps.maxLevelIdc) :
+ avctx->codec_id == AV_CODEC_ID_VP9 ? vp9_caps.maxLevel :
avctx->codec_id == AV_CODEC_ID_AV1 ? av1_caps.maxLevel :
0;
@@ -1175,6 +1196,9 @@ static int create_empty_session_parameters(AVCodecContext *avctx,
.videoSession = ctx->common.session,
};
+ if (avctx->codec_id == AV_CODEC_ID_VP9)
+ return 0;
+
ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create,
s->hwctx->alloc, &ctx->empty_session_params);
if (ret != VK_SUCCESS) {
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index bf6506f280..bf51d5a170 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -38,6 +38,7 @@ typedef struct FFVulkanDecodeDescriptor {
typedef struct FFVulkanDecodeProfileData {
VkVideoDecodeH264ProfileInfoKHR h264_profile;
VkVideoDecodeH265ProfileInfoKHR h265_profile;
+ VkVideoDecodeVP9ProfileInfoKHR vp9_profile;
VkVideoDecodeAV1ProfileInfoKHR av1_profile;
VkVideoDecodeUsageInfoKHR usage;
VkVideoProfileInfoKHR profile;
diff --git a/libavcodec/vulkan_vp9.c b/libavcodec/vulkan_vp9.c
new file mode 100644
index 0000000000..6713ab2218
--- /dev/null
+++ b/libavcodec/vulkan_vp9.c
@@ -0,0 +1,366 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vp9shared.h"
+
+#include "vulkan_decode.h"
+
+const FFVulkanDecodeDescriptor ff_vk_dec_vp9_desc = {
+ .codec_id = AV_CODEC_ID_VP9,
+ .decode_extension = FF_VK_EXT_VIDEO_DECODE_VP9,
+ .queue_flags = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
+ .decode_op = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR,
+ .ext_props = {
+ .extensionName = VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME,
+ .specVersion = VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION,
+ },
+};
+
+typedef struct VP9VulkanDecodePicture {
+ FFVulkanDecodePicture vp;
+
+ /* TODO: investigate if this can be removed to make decoding completely
+ * independent. */
+ FFVulkanDecodeContext *dec;
+
+ /* Current picture */
+ StdVideoVP9ColorConfig color_config;
+ StdVideoVP9Segmentation segmentation;
+ StdVideoVP9LoopFilter loop_filter;
+ StdVideoDecodeVP9PictureInfo std_pic_info;
+ VkVideoDecodeVP9PictureInfoKHR vp9_pic_info;
+
+ const VP9Frame *ref_src[8];
+
+ uint8_t frame_id_set;
+ uint8_t frame_id;
+ uint8_t ref_frame_sign_bias_mask;
+} VP9VulkanDecodePicture;
+
+static int vk_vp9_fill_pict(AVCodecContext *avctx, const VP9Frame **ref_src,
+ VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */
+ VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */
+ const VP9Frame *pic, int is_current)
+{
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ VP9VulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vkpic = &hp->vp;
+
+ int err = ff_vk_decode_prepare_frame(dec, pic->tf.f, vkpic, is_current,
+ dec->dedicated_dpb);
+ if (err < 0)
+ return err;
+
+ *ref = (VkVideoPictureResourceInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+ .codedOffset = (VkOffset2D){ 0, 0 },
+ .codedExtent = (VkExtent2D){ pic->tf.f->width, pic->tf.f->height },
+ .baseArrayLayer = (dec->dedicated_dpb && ctx->common.layered_dpb) ?
+ hp->frame_id : 0,
+ .imageViewBinding = vkpic->view.ref[0],
+ };
+
+ *ref_slot = (VkVideoReferenceSlotInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
+ .slotIndex = hp->frame_id,
+ .pPictureResource = ref,
+ };
+
+ if (ref_src)
+ *ref_src = pic;
+
+ return 0;
+}
+
+static enum StdVideoVP9InterpolationFilter remap_interp(uint8_t is_filter_switchable,
+ uint8_t raw_interpolation_filter_type)
+{
+ static const enum StdVideoVP9InterpolationFilter remap[] = {
+ STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SMOOTH,
+ STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP,
+ STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SHARP,
+ STD_VIDEO_VP9_INTERPOLATION_FILTER_BILINEAR,
+ };
+ if (is_filter_switchable)
+ return STD_VIDEO_VP9_INTERPOLATION_FILTER_SWITCHABLE;
+ return remap[raw_interpolation_filter_type];
+}
+
+static int vk_vp9_start_frame(AVCodecContext *avctx,
+ av_unused const AVBufferRef *buffer_ref,
+ av_unused const uint8_t *buffer,
+ av_unused uint32_t size)
+{
+ int err;
+ int ref_count = 0;
+ const VP9SharedContext *s = avctx->priv_data;
+
+ const VP9Frame *pic = &s->frames[CUR_FRAME];
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ uint8_t profile = (pic->frame_header->profile_high_bit << 1) | pic->frame_header->profile_low_bit;
+
+ VP9VulkanDecodePicture *ap = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &ap->vp;
+
+ if (!ap->frame_id_set) {
+ unsigned slot_idx = 0;
+ for (unsigned i = 0; i < 32; i++) {
+ if (!(dec->frame_id_alloc_mask & (1 << i))) {
+ slot_idx = i;
+ break;
+ }
+ }
+ ap->frame_id = slot_idx;
+ ap->frame_id_set = 1;
+ dec->frame_id_alloc_mask |= (1 << slot_idx);
+ }
+
+ for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) {
+ const int idx = pic->frame_header->ref_frame_idx[i];
+ const VP9Frame *ref_frame = &s->frames[idx];
+ VP9VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private;
+ int found = 0;
+
+ if (!ref_frame->tf.f)
+ continue;
+
+ for (int j = 0; j < ref_count; j++) {
+ if (vp->ref_slots[j].slotIndex == hp->frame_id) {
+ found = 1;
+ break;
+ }
+ }
+ if (found)
+ continue;
+
+ err = vk_vp9_fill_pict(avctx, &ap->ref_src[ref_count],
+ &vp->ref_slots[ref_count], &vp->refs[ref_count],
+ ref_frame, 0);
+ if (err < 0)
+ return err;
+
+ ref_count++;
+ }
+
+ err = vk_vp9_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref,
+ pic, 1);
+ if (err < 0)
+ return err;
+
+ ap->loop_filter = (StdVideoVP9LoopFilter) {
+ .flags = (StdVideoVP9LoopFilterFlags) {
+ .loop_filter_delta_enabled = pic->frame_header->loop_filter_delta_enabled,
+ .loop_filter_delta_update = pic->frame_header->loop_filter_delta_update,
+ },
+ .loop_filter_level = pic->frame_header->loop_filter_level,
+ .loop_filter_sharpness = pic->frame_header->loop_filter_sharpness,
+ .update_ref_delta = 0x0,
+ .update_mode_delta = 0x0,
+ };
+
+ for (int i = 0; i < 2; i++)
+ ap->loop_filter.update_mode_delta |= pic->frame_header->update_mode_delta[i];
+
+ for (int i = 0; i < STD_VIDEO_VP9_MAX_REF_FRAMES; i++) {
+ ap->loop_filter.loop_filter_ref_deltas[i] = pic->frame_header->loop_filter_ref_deltas[i];
+ ap->loop_filter.update_ref_delta |= pic->frame_header->update_ref_delta[i];
+ }
+ for (int i = 0; i < STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS; i++)
+ ap->loop_filter.loop_filter_mode_deltas[i] = pic->frame_header->loop_filter_mode_deltas[i];
+
+ ap->segmentation = (StdVideoVP9Segmentation) {
+ .flags = (StdVideoVP9SegmentationFlags) {
+ .segmentation_update_map = pic->frame_header->segmentation_update_map,
+ .segmentation_temporal_update = pic->frame_header->segmentation_temporal_update,
+ .segmentation_update_data = pic->frame_header->segmentation_update_data,
+ .segmentation_abs_or_delta_update = pic->frame_header->segmentation_abs_or_delta_update,
+ },
+ };
+
+ for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_TREE_PROBS; i++)
+ ap->segmentation.segmentation_tree_probs[i] = pic->frame_header->segmentation_tree_probs[i];
+ for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_PRED_PROB; i++)
+ ap->segmentation.segmentation_pred_prob[i] = pic->frame_header->segmentation_pred_prob[i];
+ for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTS; i++) {
+ ap->segmentation.FeatureEnabled[i] = 0x0;
+ for (int j = 0; j < STD_VIDEO_VP9_SEG_LVL_MAX; j++) {
+ ap->segmentation.FeatureEnabled[i] |= pic->frame_header->feature_enabled[i][j];
+ ap->segmentation.FeatureData[i][j] = pic->frame_header->feature_sign[i][j] ?
+ -pic->frame_header->feature_value[i][j] :
+ +pic->frame_header->feature_value[i][j];
+ }
+ }
+
+ ap->color_config = (StdVideoVP9ColorConfig) {
+ .flags = (StdVideoVP9ColorConfigFlags) {
+ .color_range = pic->frame_header->color_range,
+ },
+ .BitDepth = profile < 2 ? 8 :
+ pic->frame_header->ten_or_twelve_bit ? 12 : 10,
+ .subsampling_x = pic->frame_header->subsampling_x,
+ .subsampling_y = pic->frame_header->subsampling_y,
+ .color_space = pic->frame_header->color_space,
+ };
+
+ ap->std_pic_info = (StdVideoDecodeVP9PictureInfo) {
+ .flags = (StdVideoDecodeVP9PictureInfoFlags) {
+ .error_resilient_mode = pic->frame_header->error_resilient_mode,
+ .intra_only = pic->frame_header->intra_only,
+ .allow_high_precision_mv = pic->frame_header->allow_high_precision_mv,
+ .refresh_frame_context = pic->frame_header->refresh_frame_context,
+ .frame_parallel_decoding_mode = pic->frame_header->frame_parallel_decoding_mode,
+ .segmentation_enabled = pic->frame_header->segmentation_enabled,
+ .show_frame = pic->frame_header->segmentation_enabled,
+ .UsePrevFrameMvs = s->h.use_last_frame_mvs,
+ },
+ .profile = profile,
+ .frame_type = pic->frame_header->frame_type,
+ .frame_context_idx = pic->frame_header->frame_context_idx,
+ .reset_frame_context = pic->frame_header->reset_frame_context,
+ .refresh_frame_flags = pic->frame_header->refresh_frame_flags,
+ .ref_frame_sign_bias_mask = 0x0,
+ .interpolation_filter = remap_interp(pic->frame_header->is_filter_switchable,
+ pic->frame_header->raw_interpolation_filter_type),
+ .base_q_idx = pic->frame_header->base_q_idx,
+ .delta_q_y_dc = pic->frame_header->delta_q_y_dc,
+ .delta_q_uv_dc = pic->frame_header->delta_q_uv_dc,
+ .delta_q_uv_ac = pic->frame_header->delta_q_uv_ac,
+ .tile_cols_log2 = pic->frame_header->tile_cols_log2,
+ .tile_rows_log2 = pic->frame_header->tile_rows_log2,
+ /* Reserved */
+ .pColorConfig = &ap->color_config,
+ .pLoopFilter = &ap->loop_filter,
+ .pSegmentation = &ap->segmentation,
+ };
+
+ for (int i = 0; i < 3; i++)
+ ap->std_pic_info.ref_frame_sign_bias_mask |= pic->frame_header->ref_frame_sign_bias[i] << i;
+
+ ap->vp9_pic_info = (VkVideoDecodeVP9PictureInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR,
+ .pStdPictureInfo = &ap->std_pic_info,
+ .uncompressedHeaderOffset = 0,
+ .compressedHeaderOffset = s->h.uncompressed_header_size,
+ .tilesOffset = s->h.uncompressed_header_size +
+ s->h.compressed_header_size,
+ };
+
+ for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) {
+ const int idx = pic->frame_header->ref_frame_idx[i];
+ const VP9Frame *ref_frame = &s->frames[idx];
+ VP9VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private;
+
+ if (!ref_frame->tf.f)
+ ap->vp9_pic_info.referenceNameSlotIndices[i] = -1;
+ else
+ ap->vp9_pic_info.referenceNameSlotIndices[i] = hp->frame_id;
+ }
+
+ vp->decode_info = (VkVideoDecodeInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR,
+ .pNext = &ap->vp9_pic_info,
+ .flags = 0x0,
+ .pSetupReferenceSlot = &vp->ref_slot,
+ .referenceSlotCount = ref_count,
+ .pReferenceSlots = vp->ref_slots,
+ .dstPictureResource = (VkVideoPictureResourceInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+ .codedOffset = (VkOffset2D){ 0, 0 },
+ .codedExtent = (VkExtent2D){ pic->tf.f->width, pic->tf.f->height },
+ .baseArrayLayer = 0,
+ .imageViewBinding = vp->view.out[0],
+ },
+ };
+
+ ap->dec = dec;
+
+ return 0;
+}
+
+static int vk_vp9_decode_slice(AVCodecContext *avctx,
+ const uint8_t *data,
+ uint32_t size)
+{
+ int err;
+ const VP9SharedContext *s = avctx->priv_data;
+ VP9VulkanDecodePicture *ap = s->frames[CUR_FRAME].hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &ap->vp;
+
+ err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, NULL, NULL);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int vk_vp9_end_frame(AVCodecContext *avctx)
+{
+ const VP9SharedContext *s = avctx->priv_data;
+
+ const VP9Frame *pic = &s->frames[CUR_FRAME];
+ VP9VulkanDecodePicture *ap = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &ap->vp;
+ FFVulkanDecodePicture *rvp[STD_VIDEO_VP9_REFS_PER_FRAME] = { 0 };
+ AVFrame *rav[STD_VIDEO_VP9_REFS_PER_FRAME] = { 0 };
+
+ for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) {
+ const VP9Frame *rp = ap->ref_src[i];
+ VP9VulkanDecodePicture *rhp = rp->hwaccel_picture_private;
+
+ rvp[i] = &rhp->vp;
+ rav[i] = ap->ref_src[i]->tf.f;
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %"SIZE_SPECIFIER" bytes\n",
+ vp->slices_size);
+
+ return ff_vk_decode_frame(avctx, pic->tf.f, vp, rav, rvp);
+}
+
+static void vk_vp9_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+ AVHWDeviceContext *hwctx = _hwctx.nc;
+ VP9VulkanDecodePicture *ap = data;
+
+ /* Workaround for a spec issue. */
+ if (ap->frame_id_set)
+ ap->dec->frame_id_alloc_mask &= ~(1 << ap->frame_id);
+
+ /* Free frame resources, this also destroys the session parameters. */
+ ff_vk_decode_free_frame(hwctx, &ap->vp);
+}
+
+const FFHWAccel ff_vp9_vulkan_hwaccel = {
+ .p.name = "av1_vulkan",
+ .p.type = AVMEDIA_TYPE_VIDEO,
+ .p.id = AV_CODEC_ID_VP9,
+ .p.pix_fmt = AV_PIX_FMT_VULKAN,
+ .start_frame = &vk_vp9_start_frame,
+ .decode_slice = &vk_vp9_decode_slice,
+ .end_frame = &vk_vp9_end_frame,
+ .free_frame_priv = &vk_vp9_free_frame_priv,
+ .frame_priv_data_size = sizeof(VP9VulkanDecodePicture),
+ .init = &ff_vk_decode_init,
+ .update_thread_context = &ff_vk_update_thread_context,
+ .flush = &ff_vk_decode_flush,
+ .uninit = &ff_vk_decode_uninit,
+ .frame_params = &ff_vk_frame_params,
+ .priv_data_size = sizeof(FFVulkanDecodeContext),
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index e818099fdb..2c1c38ba66 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -90,6 +90,9 @@ typedef struct VulkanDeviceFeatures {
#ifdef VK_KHR_video_maintenance2
VkPhysicalDeviceVideoMaintenance2FeaturesKHR video_maintenance_2;
#endif
+#ifdef VK_KHR_video_decode_vp9
+ VkPhysicalDeviceVideoDecodeVP9FeaturesKHR vp9_decode;
+#endif
VkPhysicalDeviceShaderObjectFeaturesEXT shader_object;
VkPhysicalDeviceCooperativeMatrixFeaturesKHR cooperative_matrix;
@@ -227,6 +230,10 @@ static void device_features_init(AVHWDeviceContext *ctx, VulkanDeviceFeatures *f
FF_VK_STRUCT_EXT(s, &feats->device, &feats->video_maintenance_2, FF_VK_EXT_VIDEO_MAINTENANCE_2,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_2_FEATURES_KHR);
#endif
+#ifdef VK_KHR_video_decode_vp9
+ FF_VK_STRUCT_EXT(s, &feats->device, &feats->vp9_decode, FF_VK_EXT_VIDEO_DECODE_VP9,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_DECODE_VP9_FEATURES_KHR);
+#endif
FF_VK_STRUCT_EXT(s, &feats->device, &feats->shader_object, FF_VK_EXT_SHADER_OBJECT,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT);
@@ -299,6 +306,10 @@ static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceF
COPY_VAL(video_maintenance_2.videoMaintenance2);
#endif
+#ifdef VK_KHR_video_decode_vp9
+ COPY_VAL(vp9_decode.videoDecodeVP9);
+#endif
+
COPY_VAL(shader_object.shaderObject);
COPY_VAL(cooperative_matrix.cooperativeMatrix);
@@ -644,6 +655,9 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
{ VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 },
{ VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
+#ifdef VK_KHR_video_decode_vp9
+ { VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_VP9 },
+#endif
{ VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 },
};
@@ -1548,6 +1562,7 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR);
PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR);
+ PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR);
PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
av_free(qf);
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 68fa7b802d..9fd646fa4e 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -59,7 +59,8 @@ typedef uint64_t FFVulkanExtensions;
#define FF_VK_EXT_VIDEO_DECODE_QUEUE (1ULL << 40) /* VK_KHR_video_decode_queue */
#define FF_VK_EXT_VIDEO_DECODE_H264 (1ULL << 41) /* VK_KHR_video_decode_h264 */
#define FF_VK_EXT_VIDEO_DECODE_H265 (1ULL << 42) /* VK_KHR_video_decode_h265 */
-#define FF_VK_EXT_VIDEO_DECODE_AV1 (1ULL << 43) /* VK_KHR_video_decode_av1 */
+#define FF_VK_EXT_VIDEO_DECODE_VP9 (1ULL << 43) /* VK_KHR_video_decode_av1 */
+#define FF_VK_EXT_VIDEO_DECODE_AV1 (1ULL << 44) /* VK_KHR_video_decode_av1 */
#define FF_VK_EXT_VIDEO_ENCODE_QUEUE (1ULL << 50) /* VK_KHR_video_encode_queue */
#define FF_VK_EXT_VIDEO_ENCODE_H264 (1ULL << 51) /* VK_KHR_video_encode_h264 */
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index 7e805fdd4c..37a3731feb 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -76,6 +76,9 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
{ VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
{ VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 },
{ VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
+#ifdef VK_KHR_video_decode_vp9
+ { VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_VP9 },
+#endif
{ VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 },
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR },
#ifdef VK_KHR_shader_expect_assume
--
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (11 preceding siblings ...)
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 13/13] WIP vp9: add Vulkan VP9 hwaccel Lynne
@ 2025-07-13 22:10 ` Niklas Haas
12 siblings, 0 replies; 16+ messages in thread
From: Niklas Haas @ 2025-07-13 22:10 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
On Sun, 13 Jul 2025 03:51:10 +0900 Lynne <dev@lynne.ee> wrote:
> This makes it possible to apply Adobe .cube files to inputs.
> ---
> doc/filters.texi | 30 ++++++++++++++++++++++++++++++
> libavfilter/vf_libplacebo.c | 36 ++++++++++++++++++++++++++++++++++++
> 2 files changed, 66 insertions(+)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index ed2956fe75..13add0ff01 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -16321,6 +16321,36 @@ Render frames with rounded corners. The value, given as a float ranging from
> square to fully circular. In other words, it gives the radius divided by half
> the smaller side length. Defaults to @code{0.0}.
>
> +@item lut
> +Specifies a custom LUT (in Adobe .cube format) to apply to the colors
> +as part of color conversion. The exact interpretation depends on the value
> +of @option{lut_type}.
> +
> +@item lut_type
> +Controls the interpretation of color values fed to and from the LUT
> +specified as @option{lut}. Valid values are:
> +
> +@table @samp
> +@item auto
> +Chooses the interpretation of the LUT automatically from tagged
> +metadata, and otherwise falls back to @samp{native}. (Default)
> +
> +@item native
> +Applied to raw image contents in its native RGB colorspace (non-linear
> +light), before conversion to the output color space.
> +
> +@item normalized
> +Applied to the normalized RGB image contents, in linear light, before
> +conversion to the output color space.
> +
> +@item conversion
> +Fully replaces the conversion from the image color space to the output
> +color space. If such a LUT is present, it has the highest priority, and
> +overrides any ICC profiles, as well as options related to tone mapping
> +and output colorimetry (@option{color_primaries}, @option{color_trc}).
> +
> +@end table
> +
> @item extra_opts
> Pass extra libplacebo internal configuration options. These can be specified
> as a list of @var{key}=@var{value} pairs separated by ':'. The following example
> diff --git a/libavfilter/vf_libplacebo.c b/libavfilter/vf_libplacebo.c
> index 475030c80d..cbdbe3e665 100644
> --- a/libavfilter/vf_libplacebo.c
> +++ b/libavfilter/vf_libplacebo.c
> @@ -159,6 +159,7 @@ typedef struct LibplaceboContext {
> pl_vulkan vulkan;
> pl_gpu gpu;
> pl_tex tex[4];
> + struct pl_custom_lut *lut;
>
> /* input state */
> LibplaceboInput *inputs;
> @@ -184,6 +185,8 @@ typedef struct LibplaceboContext {
> AVExpr *pos_x_pexpr, *pos_y_pexpr, *pos_w_pexpr, *pos_h_pexpr;
> float pad_crop_ratio;
> float corner_rounding;
> + char *lut_filename;
> + enum pl_lut_type lut_type;
> int force_original_aspect_ratio;
> int force_divisible_by;
> int reset_sar;
> @@ -371,6 +374,26 @@ static int find_scaler(AVFilterContext *avctx,
> return AVERROR(EINVAL);
> }
>
> +static int parse_custom_lut(LibplaceboContext *s)
> +{
> + int ret;
> + uint8_t *lutbuf;
> + size_t lutbuf_size;
> +
> + if ((ret = av_file_map(s->lut_filename, &lutbuf, &lutbuf_size, 0, s)) < 0) {
> + av_log(s, AV_LOG_ERROR,
> + "The LUT file '%s' could not be read: %s\n",
> + s->lut_filename, av_err2str(ret));
> + return ret;
> + }
> +
> + s->lut = pl_lut_parse_cube(s->log, lutbuf, lutbuf_size);
> + av_file_unmap(lutbuf, lutbuf_size);
> + if (!s->lut)
> + return AVERROR(EINVAL);
> + return 0;
> +}
> +
> static int update_settings(AVFilterContext *ctx)
> {
> int err = 0;
> @@ -729,6 +752,9 @@ static int init_vulkan(AVFilterContext *avctx, const AVVulkanDeviceContext *hwct
> RET(parse_shader(avctx, buf, buf_len));
> }
>
> + if (s->lut_filename)
> + RET(parse_custom_lut(s));
> +
> /* Initialize inputs */
> s->inputs = av_calloc(s->nb_inputs, sizeof(*s->inputs));
> if (!s->inputs)
> @@ -757,6 +783,7 @@ static void libplacebo_uninit(AVFilterContext *avctx)
> av_freep(&s->inputs);
> }
>
> + pl_lut_free(&s->lut);
> #if PL_API_VER >= 351
> pl_cache_destroy(&s->cache);
> #endif
> @@ -1005,6 +1032,8 @@ static bool map_frame(pl_gpu gpu, pl_tex *tex,
> .tex = tex,
> .map_dovi = s->apply_dovi,
> ));
> + out->lut = s->lut;
> + out->lut_type = s->lut_type;
>
> if (!s->apply_filmgrain)
> out->film_grain.type = PL_FILM_GRAIN_NONE;
> @@ -1406,6 +1435,13 @@ static const AVOption libplacebo_options[] = {
> { "pad_crop_ratio", "ratio between padding and cropping when normalizing SAR (0=pad, 1=crop)", OFFSET(pad_crop_ratio), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, 1.0, DYNAMIC },
> { "fillcolor", "Background fill color", OFFSET(fillcolor), AV_OPT_TYPE_COLOR, {.str = "black@0"}, .flags = DYNAMIC },
> { "corner_rounding", "Corner rounding radius", OFFSET(corner_rounding), AV_OPT_TYPE_FLOAT, {.dbl = 0.0}, 0.0, 1.0, .flags = DYNAMIC },
> + { "lut", "Path to custom LUT file to apply", OFFSET(lut_filename), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = STATIC },
> + { "lut_type", "Application mode of the custom LUT", OFFSET(lut_type), AV_OPT_TYPE_INT, { .i64 = PL_LUT_UNKNOWN }, 0, PL_LUT_CONVERSION, STATIC, .unit = "lut_type" },
> + { "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = PL_LUT_UNKNOWN }, 0, 0, STATIC, .unit = "lut_type" },
> + { "native", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = PL_LUT_NATIVE }, 0, 0, STATIC, .unit = "lut_type" },
> + { "normalized", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = PL_LUT_NORMALIZED }, 0, 0, STATIC, .unit = "lut_type" },
> + { "conversion", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = PL_LUT_CONVERSION }, 0, 0, STATIC, .unit = "lut_type" },
> +
> { "extra_opts", "Pass extra libplacebo-specific options using a :-separated list of key=value pairs", OFFSET(extra_opts), AV_OPT_TYPE_DICT, .flags = DYNAMIC },
> #if PL_API_VER >= 351
> { "shader_cache", "Set shader cache path", OFFSET(shader_cache), AV_OPT_TYPE_STRING, {.str = NULL}, .flags = STATIC },
> --
> 2.50.0
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Commit LGTM.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 16+ messages in thread