* [FFmpeg-devel] [PATCH 1/7] vf_libplacebo: add support for specifying a LUT for the input
@ 2025-07-10 15:13 Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 2/7] hwcontext_vulkan: temporarily disable host_image_copy Lynne
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: Lynne @ 2025-07-10 15:13 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This makes it possible to apply Adobe .cube files to inputs.
---
libavfilter/vf_libplacebo.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/libavfilter/vf_libplacebo.c b/libavfilter/vf_libplacebo.c
index 475030c80d..d74a6e1bf7 100644
--- a/libavfilter/vf_libplacebo.c
+++ b/libavfilter/vf_libplacebo.c
@@ -159,6 +159,7 @@ typedef struct LibplaceboContext {
pl_vulkan vulkan;
pl_gpu gpu;
pl_tex tex[4];
+ struct pl_custom_lut *lut;
/* input state */
LibplaceboInput *inputs;
@@ -184,6 +185,7 @@ typedef struct LibplaceboContext {
AVExpr *pos_x_pexpr, *pos_y_pexpr, *pos_w_pexpr, *pos_h_pexpr;
float pad_crop_ratio;
float corner_rounding;
+ char *lut_filename;
int force_original_aspect_ratio;
int force_divisible_by;
int reset_sar;
@@ -371,6 +373,28 @@ static int find_scaler(AVFilterContext *avctx,
return AVERROR(EINVAL);
}
+static int update_lut(LibplaceboContext *s)
+{
+ int ret;
+ uint8_t *lutbuf;
+ size_t lutbuf_size;
+
+ pl_lut_free(&s->lut);
+
+ if ((ret = av_file_map(s->lut_filename, &lutbuf, &lutbuf_size, 0, s)) < 0) {
+ av_log(s, AV_LOG_ERROR,
+ "The LUT file '%s' could not be read: %s\n",
+ s->lut_filename, av_err2str(ret));
+ return ret;
+ }
+
+ s->lut = pl_lut_parse_cube(s->log, lutbuf, lutbuf_size);
+
+ av_file_unmap(lutbuf, lutbuf_size);
+
+ return 0;
+}
+
static int update_settings(AVFilterContext *ctx)
{
int err = 0;
@@ -468,6 +492,7 @@ static int update_settings(AVFilterContext *ctx)
RET(find_scaler(ctx, &opts->params.upscaler, s->upscaler, 0));
RET(find_scaler(ctx, &opts->params.downscaler, s->downscaler, 0));
RET(find_scaler(ctx, &opts->params.frame_mixer, s->frame_mixer, 1));
+ RET(update_lut(s));
#if PL_API_VER >= 309
while ((e = av_dict_get(s->extra_opts, "", e, AV_DICT_IGNORE_SUFFIX))) {
@@ -757,6 +782,7 @@ static void libplacebo_uninit(AVFilterContext *avctx)
av_freep(&s->inputs);
}
+ pl_lut_free(&s->lut);
#if PL_API_VER >= 351
pl_cache_destroy(&s->cache);
#endif
@@ -1005,6 +1031,7 @@ static bool map_frame(pl_gpu gpu, pl_tex *tex,
.tex = tex,
.map_dovi = s->apply_dovi,
));
+ out->lut = s->lut;
if (!s->apply_filmgrain)
out->film_grain.type = PL_FILM_GRAIN_NONE;
@@ -1406,6 +1433,7 @@ static const AVOption libplacebo_options[] = {
{ "pad_crop_ratio", "ratio between padding and cropping when normalizing SAR (0=pad, 1=crop)", OFFSET(pad_crop_ratio), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, 1.0, DYNAMIC },
{ "fillcolor", "Background fill color", OFFSET(fillcolor), AV_OPT_TYPE_COLOR, {.str = "black@0"}, .flags = DYNAMIC },
{ "corner_rounding", "Corner rounding radius", OFFSET(corner_rounding), AV_OPT_TYPE_FLOAT, {.dbl = 0.0}, 0.0, 1.0, .flags = DYNAMIC },
+ { "lut", "Apply a look-up table", OFFSET(lut_filename), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = DYNAMIC },
{ "extra_opts", "Pass extra libplacebo-specific options using a :-separated list of key=value pairs", OFFSET(extra_opts), AV_OPT_TYPE_DICT, .flags = DYNAMIC },
#if PL_API_VER >= 351
{ "shader_cache", "Set shader cache path", OFFSET(shader_cache), AV_OPT_TYPE_STRING, {.str = NULL}, .flags = STATIC },
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH 2/7] hwcontext_vulkan: temporarily disable host_image_copy
2025-07-10 15:13 [FFmpeg-devel] [PATCH 1/7] vf_libplacebo: add support for specifying a LUT for the input Lynne
@ 2025-07-10 15:13 ` Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 3/7] vulkan: add support for 16-bit RGGB Bayer pixfmt Lynne
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Lynne @ 2025-07-10 15:13 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
NVIDIA's support for it is a disaster.
Of no benefit to other vendors.
NVIDIA are working on fixing it, but it may take time.
---
libavutil/hwcontext_vulkan.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 894bc3dae1..5a9b995eb9 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -613,7 +613,6 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
{ VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT },
{ VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME, FF_VK_EXT_SUBGROUP_ROTATE },
- { VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME, FF_VK_EXT_HOST_IMAGE_COPY },
#ifdef VK_KHR_shader_expect_assume
{ VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME },
#endif
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH 3/7] vulkan: add support for 16-bit RGGB Bayer pixfmt
2025-07-10 15:13 [FFmpeg-devel] [PATCH 1/7] vf_libplacebo: add support for specifying a LUT for the input Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 2/7] hwcontext_vulkan: temporarily disable host_image_copy Lynne
@ 2025-07-10 15:13 ` Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 4/7] lavc/vulkan/common: sign-ify lengths Lynne
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Lynne @ 2025-07-10 15:13 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavutil/hwcontext_vulkan.c | 3 +++
libavutil/vulkan.c | 5 +++--
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 5a9b995eb9..4551867b70 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -375,6 +375,9 @@ static const struct FFVkFormatEntry {
{ VK_FORMAT_R32_UINT, AV_PIX_FMT_GBRAP32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT } },
{ VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+ /* Bayer */
+ { VK_FORMAT_R16_UNORM, AV_PIX_FMT_BAYER_RGGB16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
+
/* Two-plane 420 YUV at 8, 10, 12 and 16 bits */
{ VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
{ VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index f36f2bb7cf..5cfb634a62 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1542,7 +1542,7 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
pix_fmt == AV_PIX_FMT_X2RGB10 || pix_fmt == AV_PIX_FMT_X2BGR10 ||
pix_fmt == AV_PIX_FMT_RGBAF32 || pix_fmt == AV_PIX_FMT_RGBF32 ||
pix_fmt == AV_PIX_FMT_RGBA128 || pix_fmt == AV_PIX_FMT_RGB96 ||
- pix_fmt == AV_PIX_FMT_GBRP)
+ pix_fmt == AV_PIX_FMT_GBRP || pix_fmt == AV_PIX_FMT_BAYER_RGGB16)
return 1;
return 0;
}
@@ -1699,7 +1699,8 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt,
case AV_PIX_FMT_YUVA422P16:
case AV_PIX_FMT_YUVA444P10:
case AV_PIX_FMT_YUVA444P12:
- case AV_PIX_FMT_YUVA444P16: {
+ case AV_PIX_FMT_YUVA444P16:
+ case AV_PIX_FMT_BAYER_RGGB16: {
const char *rep_tab[] = {
[FF_VK_REP_NATIVE] = "r16ui",
[FF_VK_REP_FLOAT] = "r16f",
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH 4/7] lavc/vulkan/common: sign-ify lengths
2025-07-10 15:13 [FFmpeg-devel] [PATCH 1/7] vf_libplacebo: add support for specifying a LUT for the input Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 2/7] hwcontext_vulkan: temporarily disable host_image_copy Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 3/7] vulkan: add support for 16-bit RGGB Bayer pixfmt Lynne
@ 2025-07-10 15:13 ` Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 5/7] lavc: add codec ID and profiles for ProRes RAW Lynne
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Lynne @ 2025-07-10 15:13 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This makes left_bits return useful data rather than overflowing, and
also saves some 64-bit integer operations, which is still always a plus sadly.
---
libavcodec/vulkan/common.comp | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp
index 10af9c0623..6825693fa3 100644
--- a/libavcodec/vulkan/common.comp
+++ b/libavcodec/vulkan/common.comp
@@ -193,8 +193,8 @@ struct GetBitContext {
uint64_t buf_end;
uint64_t bits;
- uint bits_valid;
- uint size_in_bits;
+ int bits_valid;
+ int size_in_bits;
};
#define LOAD64() \
@@ -216,11 +216,11 @@ struct GetBitContext {
gb.bits_valid += 32; \
}
-void init_get_bits(inout GetBitContext gb, u8buf data, uint64_t len)
+void init_get_bits(inout GetBitContext gb, u8buf data, int len)
{
gb.buf = gb.buf_start = uint64_t(data);
gb.buf_end = uint64_t(data) + len;
- gb.size_in_bits = uint(len) * 8;
+ gb.size_in_bits = len * 8;
/* Preload */
LOAD64()
@@ -237,7 +237,7 @@ bool get_bit(inout GetBitContext gb)
return val;
}
-uint get_bits(inout GetBitContext gb, uint n)
+uint get_bits(inout GetBitContext gb, int n)
{
if (n == 0)
return 0;
@@ -251,7 +251,7 @@ uint get_bits(inout GetBitContext gb, uint n)
return val;
}
-uint show_bits(inout GetBitContext gb, uint n)
+uint show_bits(inout GetBitContext gb, int n)
{
if (n > gb.bits_valid)
RELOAD32()
@@ -259,7 +259,7 @@ uint show_bits(inout GetBitContext gb, uint n)
return uint(gb.bits >> (64 - n));
}
-void skip_bits(inout GetBitContext gb, uint n)
+void skip_bits(inout GetBitContext gb, int n)
{
if (n > gb.bits_valid)
RELOAD32()
@@ -268,12 +268,12 @@ void skip_bits(inout GetBitContext gb, uint n)
gb.bits_valid -= n;
}
-uint tell_bits(in GetBitContext gb)
+int tell_bits(in GetBitContext gb)
{
- return uint(gb.buf - gb.buf_start) * 8 - gb.bits_valid;
+ return int(gb.buf - gb.buf_start) * 8 - gb.bits_valid;
}
-uint left_bits(in GetBitContext gb)
+int left_bits(in GetBitContext gb)
{
- return gb.size_in_bits - uint(gb.buf - gb.buf_start) * 8 + gb.bits_valid;
+ return gb.size_in_bits - int(gb.buf - gb.buf_start) * 8 + gb.bits_valid;
}
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH 5/7] lavc: add codec ID and profiles for ProRes RAW
2025-07-10 15:13 [FFmpeg-devel] [PATCH 1/7] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (2 preceding siblings ...)
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 4/7] lavc/vulkan/common: sign-ify lengths Lynne
@ 2025-07-10 15:13 ` Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 6/7] lavc: add a ProRes RAW decoder Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 7/7] lavc: add a ProRes RAW Vulkan hwaccel Lynne
5 siblings, 0 replies; 7+ messages in thread
From: Lynne @ 2025-07-10 15:13 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/codec_desc.c | 8 ++++++++
libavcodec/codec_id.h | 1 +
libavcodec/defs.h | 3 +++
libavcodec/profiles.c | 6 ++++++
libavcodec/profiles.h | 1 +
libavformat/isom_tags.c | 4 ++++
6 files changed, 23 insertions(+)
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index dae2296689..36cbaf288e 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -1992,6 +1992,14 @@ static const AVCodecDescriptor codec_descriptors[] = {
.long_name = NULL_IF_CONFIG_SMALL("Advanced Professional Video"),
.props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
},
+ {
+ .id = AV_CODEC_ID_PRORES_RAW,
+ .type = AVMEDIA_TYPE_VIDEO,
+ .name = "prores_raw",
+ .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes RAW"),
+ .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+ .profiles = NULL_IF_CONFIG_SMALL(ff_prores_raw_profiles),
+ },
/* various PCM "codecs" */
{
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index d00d3fe121..adf263f6b0 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -330,6 +330,7 @@ enum AVCodecID {
AV_CODEC_ID_RV60,
AV_CODEC_ID_JPEGXL_ANIM,
AV_CODEC_ID_APV,
+ AV_CODEC_ID_PRORES_RAW,
/* various PCM "codecs" */
AV_CODEC_ID_FIRST_AUDIO = 0x10000, ///< A dummy id pointing at the start of audio codecs
diff --git a/libavcodec/defs.h b/libavcodec/defs.h
index 8ce5d424c9..b13e983b13 100644
--- a/libavcodec/defs.h
+++ b/libavcodec/defs.h
@@ -185,6 +185,9 @@
#define AV_PROFILE_PRORES_4444 4
#define AV_PROFILE_PRORES_XQ 5
+#define AV_PROFILE_PRORES_RAW 0
+#define AV_PROFILE_PRORES_RAW_HQ 1
+
#define AV_PROFILE_ARIB_PROFILE_A 0
#define AV_PROFILE_ARIB_PROFILE_C 1
diff --git a/libavcodec/profiles.c b/libavcodec/profiles.c
index 991f24135d..2cf733b0a2 100644
--- a/libavcodec/profiles.c
+++ b/libavcodec/profiles.c
@@ -182,6 +182,12 @@ const AVProfile ff_prores_profiles[] = {
{ AV_PROFILE_UNKNOWN }
};
+const AVProfile ff_prores_raw_profiles[] = {
+ { AV_PROFILE_PRORES_RAW, "RAW" },
+ { AV_PROFILE_PRORES_RAW_HQ, "HQ" },
+ { AV_PROFILE_UNKNOWN }
+};
+
const AVProfile ff_mjpeg_profiles[] = {
{ AV_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT, "Baseline" },
{ AV_PROFILE_MJPEG_HUFFMAN_EXTENDED_SEQUENTIAL_DCT, "Sequential" },
diff --git a/libavcodec/profiles.h b/libavcodec/profiles.h
index 4892388149..6f4011ff0c 100644
--- a/libavcodec/profiles.h
+++ b/libavcodec/profiles.h
@@ -74,6 +74,7 @@ extern const AVProfile ff_vp9_profiles[];
extern const AVProfile ff_av1_profiles[];
extern const AVProfile ff_sbc_profiles[];
extern const AVProfile ff_prores_profiles[];
+extern const AVProfile ff_prores_raw_profiles[];
extern const AVProfile ff_mjpeg_profiles[];
extern const AVProfile ff_arib_caption_profiles[];
extern const AVProfile ff_evc_profiles[];
diff --git a/libavformat/isom_tags.c b/libavformat/isom_tags.c
index 69174b4a3f..151c42e9e6 100644
--- a/libavformat/isom_tags.c
+++ b/libavformat/isom_tags.c
@@ -240,6 +240,10 @@ const AVCodecTag ff_codec_movvideo_tags[] = {
{ AV_CODEC_ID_PRORES, MKTAG('a', 'p', 'c', 'o') }, /* Apple ProRes 422 Proxy */
{ AV_CODEC_ID_PRORES, MKTAG('a', 'p', '4', 'h') }, /* Apple ProRes 4444 */
{ AV_CODEC_ID_PRORES, MKTAG('a', 'p', '4', 'x') }, /* Apple ProRes 4444 XQ */
+
+ { AV_CODEC_ID_PRORES_RAW, MKTAG('a', 'p', 'r', 'n') }, /* Apple ProRes RAW */
+ { AV_CODEC_ID_PRORES_RAW, MKTAG('a', 'p', 'r', 'h') }, /* Apple ProRes RAW HQ */
+
{ AV_CODEC_ID_FLIC, MKTAG('f', 'l', 'i', 'c') },
{ AV_CODEC_ID_AIC, MKTAG('i', 'c', 'o', 'd') },
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH 6/7] lavc: add a ProRes RAW decoder
2025-07-10 15:13 [FFmpeg-devel] [PATCH 1/7] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (3 preceding siblings ...)
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 5/7] lavc: add codec ID and profiles for ProRes RAW Lynne
@ 2025-07-10 15:13 ` Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 7/7] lavc: add a ProRes RAW Vulkan hwaccel Lynne
5 siblings, 0 replies; 7+ messages in thread
From: Lynne @ 2025-07-10 15:13 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
configure | 1 +
libavcodec/Makefile | 1 +
libavcodec/allcodecs.c | 1 +
libavcodec/proresdec_raw.c | 519 +++++++++++++++++++++++++++++++++++++
libavcodec/proresdec_raw.h | 60 +++++
5 files changed, 582 insertions(+)
create mode 100644 libavcodec/proresdec_raw.c
create mode 100644 libavcodec/proresdec_raw.h
diff --git a/configure b/configure
index 2ccafe7c20..e0503337cd 100755
--- a/configure
+++ b/configure
@@ -3087,6 +3087,7 @@ prores_decoder_select="blockdsp idctdsp"
prores_encoder_select="fdctdsp"
prores_aw_encoder_select="fdctdsp"
prores_ks_encoder_select="fdctdsp"
+prores_raw_decoder_select="blockdsp idctdsp"
qcelp_decoder_select="lsp"
qdm2_decoder_select="mpegaudiodsp"
ra_144_decoder_select="audiodsp"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 215577f7c9..47d16f3312 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -629,6 +629,7 @@ OBJS-$(CONFIG_PRORES_DECODER) += proresdec.o proresdsp.o proresdata.o
OBJS-$(CONFIG_PRORES_ENCODER) += proresenc_anatoliy.o proresdata.o
OBJS-$(CONFIG_PRORES_AW_ENCODER) += proresenc_anatoliy.o proresdata.o
OBJS-$(CONFIG_PRORES_KS_ENCODER) += proresenc_kostya.o proresdata.o
+OBJS-$(CONFIG_PRORES_RAW_DECODER) += proresdec_raw.o
OBJS-$(CONFIG_PRORES_VIDEOTOOLBOX_ENCODER) += videotoolboxenc.o
OBJS-$(CONFIG_PROSUMER_DECODER) += prosumer.o
OBJS-$(CONFIG_PSD_DECODER) += psd.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 7b01453ca2..dcf399e810 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -269,6 +269,7 @@ extern const FFCodec ff_prores_encoder;
extern const FFCodec ff_prores_decoder;
extern const FFCodec ff_prores_aw_encoder;
extern const FFCodec ff_prores_ks_encoder;
+extern const FFCodec ff_prores_raw_decoder;
extern const FFCodec ff_prosumer_decoder;
extern const FFCodec ff_psd_decoder;
extern const FFCodec ff_ptx_decoder;
diff --git a/libavcodec/proresdec_raw.c b/libavcodec/proresdec_raw.c
new file mode 100644
index 0000000000..caebed9e96
--- /dev/null
+++ b/libavcodec/proresdec_raw.c
@@ -0,0 +1,519 @@
+/*
+ * ProRes RAW decoder
+ * Copyright (c) 2023-2025 Paul B Mahol
+ * Copyright (c) 2025 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/mem.h"
+
+#define CACHED_BITSTREAM_READER !ARCH_X86_32
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "get_bits.h"
+#include "idctdsp.h"
+#include "proresdata.h"
+#include "thread.h"
+
+#include "proresdec_raw.h"
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+ ProResRAWContext *s = avctx->priv_data;
+ uint8_t idct_permutation[64];
+
+ avctx->bits_per_raw_sample = 12;
+ avctx->color_trc = AVCOL_TRC_LINEAR;
+
+ s->pix_fmt = AV_PIX_FMT_NONE;
+
+ ff_blockdsp_init(&s->bdsp);
+ ff_proresdsp_init(&s->prodsp, avctx->bits_per_raw_sample);
+
+ ff_init_scantable_permutation(idct_permutation,
+ s->prodsp.idct_permutation_type);
+
+ ff_permute_scantable(s->scan, ff_prores_interlaced_scan, idct_permutation);
+
+ return 0;
+}
+
+static int16_t get_value(GetBitContext *gb, int16_t codebook)
+{
+ const int16_t switch_bits = codebook >> 8;
+ const int16_t rice_order = codebook & 0xf;
+ const int16_t exp_order = (codebook >> 4) & 0xf;
+ int16_t q, bits;
+
+ uint32_t b = show_bits_long(gb, 32);
+ if (!b)
+ return 0;
+ q = ff_clz(b);
+
+ if (b & 0x80000000) {
+ skip_bits_long(gb, 1 + rice_order);
+ return (b & 0x7FFFFFFF) >> (31 - rice_order);
+ }
+
+ if (q <= switch_bits) {
+ skip_bits_long(gb, 1 + rice_order + q);
+ return (q << rice_order) +
+ (((b << (q + 1)) >> 1) >> (31 - rice_order));
+ }
+
+ bits = exp_order + (q << 1) - switch_bits;
+ skip_bits_long(gb, bits);
+ return (b >> (32 - bits)) +
+ ((switch_bits + 1) << rice_order) -
+ (1 << exp_order);
+}
+
+#define TODCCODEBOOK(x) (((x) & 1) + (x) >> 1)
+
+static const uint8_t align_tile_w[16] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+};
+
+const int16_t ff_prores_raw_dc_cb[13] = {
+ 16, 33, 50, 51, 51, 51, 68, 68, 68, 68, 68, 68, 118,
+};
+
+const int16_t ff_prores_raw_ac_cb[95] = {
+ 0, 529, 273, 273, 546, 546, 546, 290, 290, 290, 563, 563,
+ 563, 563, 563, 563, 563, 563, 307, 307, 580, 580, 580, 580,
+ 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580,
+ 580, 580, 580, 580, 580, 580, 853, 853, 853, 853, 853, 853,
+ 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853,
+ 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853,
+ 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853,
+ 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 358
+};
+
+const int16_t ff_prores_raw_rn_cb[28] = {
+ 512, 256, 0, 0, 529, 529, 273, 273, 17, 17, 33, 33, 546,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 50, 50, 68,
+};
+
+const int16_t ff_prores_raw_ln_cb[15] = {
+ 256, 273, 546, 546, 290, 290, 1075, 1075, 563, 563, 563, 563, 563, 563, 51
+};
+
+static int decode_comp(AVCodecContext *avctx, TileContext *tile,
+ AVFrame *frame, const uint8_t *data, int size,
+ int component, int16_t *qmat)
+{
+ int ret;
+ ProResRAWContext *s = avctx->priv_data;
+ const ptrdiff_t linesize = frame->linesize[0] >> 1;
+ uint16_t *dst = (uint16_t *)(frame->data[0] + tile->y*frame->linesize[0] + 2*tile->x);
+
+ int idx;
+ const int w = FFMIN(s->tw, avctx->width - tile->x) / 2;
+ const int nb_blocks = w / 8;
+ const int log2_nb_blocks = 31 - ff_clz(nb_blocks);
+ const int block_mask = (1 << log2_nb_blocks) - 1;
+ const int nb_codes = 64 * nb_blocks;
+
+ LOCAL_ALIGNED_32(int16_t, block, [64*16]);
+ LOCAL_ALIGNED_32(int16_t, out, [64]);
+
+ int16_t dc;
+ int16_t prev_dc = 0;
+ int16_t sign = 0;
+ int16_t dc_add = 0;
+ int16_t dc_codebook;
+
+ int16_t ac, rn, ln;
+ int16_t ac_codebook = 49;
+ int16_t rn_codebook = 0;
+ int16_t ln_codebook = 66;
+
+ const uint8_t *scan = s->scan;
+ GetBitContext gb;
+
+ if (component > 1)
+ dst += linesize;
+ dst += component & 1;
+
+ if ((ret = init_get_bits8(&gb, data, size)) < 0)
+ return ret;
+
+ for (int n = 0; n < nb_blocks; n++)
+ s->bdsp.clear_block(block + n*64);
+
+ /* Special handling for first block */
+ dc = get_value(&gb, 700);
+ prev_dc = ((dc & 1) + (dc >> 1) ^ -(int)(dc & 1)) + (dc & 1);
+ block[0] = prev_dc;
+
+ for (int n = 1; n < nb_blocks; n++) {
+ if (get_bits_left(&gb) <= 0)
+ break;
+
+ if ((n & 15) == 1)
+ dc_codebook = 100;
+ else
+ dc_codebook = ff_prores_raw_dc_cb[FFMIN(TODCCODEBOOK(dc),
+ FF_ARRAY_ELEMS(ff_prores_raw_dc_cb) - 1)];
+
+ dc = get_value(&gb, dc_codebook);
+
+ sign = sign ^ dc & 1;
+ dc_add = (-sign ^ TODCCODEBOOK(dc)) + sign;
+ sign = dc_add < 0;
+ prev_dc += dc_add;
+
+ block[n*64] = prev_dc;
+ }
+
+ for (int n = nb_blocks; n <= nb_codes;) {
+ if (get_bits_left(&gb) <= 0)
+ break;
+
+ ln = get_value(&gb, ln_codebook);
+
+ for (int i = 0; i < ln; i++) {
+ if (get_bits_left(&gb) <= 0)
+ break;
+
+ if ((n + i) >= nb_codes)
+ break;
+
+ ac = get_value(&gb, ac_codebook);
+ ac_codebook = ff_prores_raw_ac_cb[FFMIN(ac, FF_ARRAY_ELEMS(ff_prores_raw_ac_cb) - 1)];
+ sign = -get_bits1(&gb);
+
+ idx = scan[(n + i) >> log2_nb_blocks] + (((n + i) & block_mask) << 6);
+ block[idx] = ((ac + 1) ^ sign) - sign;
+ }
+
+ n += ln;
+ if (n >= nb_codes)
+ break;
+
+ rn = get_value(&gb, rn_codebook);
+ rn_codebook = ff_prores_raw_rn_cb[FFMIN(rn, FF_ARRAY_ELEMS(ff_prores_raw_rn_cb) - 1)];
+
+ n += rn + 1;
+ if (n >= nb_codes)
+ break;
+
+ if (get_bits_left(&gb) <= 0)
+ break;
+
+ ac = get_value(&gb, ac_codebook);
+ sign = -get_bits1(&gb);
+
+ idx = scan[n >> log2_nb_blocks] + ((n & block_mask) << 6);
+ block[idx] = ((ac + 1) ^ sign) - sign;
+
+ ac_codebook = ff_prores_raw_ac_cb[FFMIN(ac, FF_ARRAY_ELEMS(ff_prores_raw_ac_cb) - 1)];
+ ln_codebook = ff_prores_raw_ln_cb[FFMIN(ac, FF_ARRAY_ELEMS(ff_prores_raw_ln_cb) - 1)];
+
+ n++;
+ }
+
+ for (int n = 0; n < nb_blocks; n++) {
+ uint16_t *ptr = dst + n*16;
+
+ s->prodsp.idct_put(out, 16, block + n*64, qmat);
+ for (int i = 0; i < 8; i++) {
+ for (int j = 0; j < 8; j++)
+ ptr[j * 2] = out[8*i+j] << 4; // 4 bits of LSB padding
+ ptr += 2 * linesize;
+ }
+ }
+
+ return 0;
+}
+
+static int decode_tile(AVCodecContext *avctx, TileContext *tile,
+ AVFrame *frame)
+{
+ int ret;
+ GetByteContext *gb = &tile->gb;
+
+ uint16_t qscale;
+ LOCAL_ALIGNED_32(int16_t, qmat, [64]);
+ int size[4];
+
+ if (tile->x >= avctx->width)
+ return 0;
+
+ /* Tile header */
+ qscale = bytestream2_get_be16(gb);
+ size[0] = bytestream2_get_be16(gb);
+ size[1] = bytestream2_get_be16(gb);
+ size[2] = bytestream2_get_be16(gb);
+ size[3] = bytestream2_size(gb) - size[0] - size[1] - size[2] - 8;
+ if (size[3] < 0)
+ return AVERROR_INVALIDDATA;
+
+ for (int i = 0; i < 64; i++)
+ qmat[i] = (qscale - 16384) >> 1;
+
+ ret = decode_comp(avctx, tile, frame, gb->buffer,
+ size[0], 2, qmat);
+ if (ret < 0)
+ goto fail;
+
+ ret = decode_comp(avctx, tile, frame, gb->buffer + size[0],
+ size[1], 1, qmat);
+ if (ret < 0)
+ goto fail;
+
+ ret = decode_comp(avctx, tile, frame, gb->buffer + size[0] + size[1],
+ size[2], 3, qmat);
+ if (ret < 0)
+ goto fail;
+
+ ret = decode_comp(avctx, tile, frame, gb->buffer + size[0] + size[1] + size[2],
+ size[3], 0, qmat);
+ if (ret < 0)
+ goto fail;
+
+ return 0;
+fail:
+ av_log(avctx, AV_LOG_ERROR, "tile %d/%d decoding error\n", tile->x, tile->y);
+ return ret;
+}
+
+static int decode_tiles(AVCodecContext *avctx, void *arg,
+ int n, int thread_nb)
+{
+ ProResRAWContext *s = avctx->priv_data;
+ TileContext *tile = &s->tiles[n];
+ AVFrame *frame = arg;
+
+ return decode_tile(avctx, tile, frame);
+}
+
+static enum AVPixelFormat get_pixel_format(AVCodecContext *avctx,
+ enum AVPixelFormat pix_fmt)
+{
+ enum AVPixelFormat pix_fmts[] = {
+ pix_fmt,
+ AV_PIX_FMT_NONE,
+ };
+
+ return ff_get_format(avctx, pix_fmts);
+}
+
+static int decode_frame(AVCodecContext *avctx,
+ AVFrame *frame, int *got_frame_ptr,
+ AVPacket *avpkt)
+{
+ enum AVPixelFormat pix_fmt;
+ int header_size, ret, w, h, aa, flags;
+ ProResRAWContext *s = avctx->priv_data;
+ GetByteContext gb;
+ uint32_t offset;
+
+ bytestream2_init(&gb, avpkt->data, avpkt->size);
+ if (bytestream2_get_be32(&gb) != avpkt->size)
+ return AVERROR_INVALIDDATA;
+
+ /* ProRes RAW frame */
+ if (bytestream2_get_le32(&gb) != MKTAG('p','r','r','f'))
+ return AVERROR_INVALIDDATA;
+
+ header_size = bytestream2_get_be16(&gb) + 8;
+ s->version = bytestream2_get_be16(&gb);
+ if (s->version > 1) {
+ avpriv_request_sample(avctx, "Version %d", s->version);
+ return AVERROR_PATCHWELCOME;
+ }
+
+ if (header_size < (s->version == 0 ? 144 : 96))
+ return AVERROR_INVALIDDATA;
+
+ /* Vendor header (e.g. "peac" for Panasonic or "atm0" for Atmos) */
+ bytestream2_skip(&gb, 4);
+
+ w = bytestream2_get_be16(&gb);
+ h = bytestream2_get_be16(&gb);
+
+ avctx->coded_width = FFALIGN(w, 16);
+ avctx->coded_height = FFALIGN(h, 16);
+
+ if (w != avctx->width || h != avctx->height) {
+ av_log(avctx, AV_LOG_WARNING, "picture resolution change: %dx%d -> %dx%d\n",
+ avctx->width, avctx->height, w, h);
+ if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
+ return ret;
+ }
+
+ pix_fmt = AV_PIX_FMT_BAYER_RGGB16;
+ if (pix_fmt != s->pix_fmt) {
+ s->pix_fmt = pix_fmt;
+
+ ret = get_pixel_format(avctx, pix_fmt);
+ if (ret < 0)
+ return ret;
+
+ avctx->pix_fmt = ret;
+ }
+
+ if (s->version == 0) {
+ bytestream2_skip(&gb, 1 * 4);
+ bytestream2_skip(&gb, 2);
+ bytestream2_skip(&gb, 2);
+ bytestream2_skip(&gb, 4);
+ bytestream2_skip(&gb, 4);
+ bytestream2_skip(&gb, 3 * 3 * 4);
+ bytestream2_skip(&gb, 4);
+ bytestream2_skip(&gb, 2);
+
+ flags = bytestream2_get_be16(&gb);
+ aa = (flags >> 1) & 7;
+ bytestream2_skip(&gb, 64);
+ } else {
+ bytestream2_skip(&gb, 10);
+ bytestream2_skip(&gb, 48);
+
+ flags = bytestream2_get_be16(&gb);
+ aa = (flags >> 1) & 7;
+ bytestream2_skip(&gb, 16);
+ }
+
+ bytestream2_skip(&gb, header_size - bytestream2_tell(&gb));
+
+ s->nb_tw = (w + 15) >> 4;
+ s->nb_th = (h + 15) >> 4;
+ s->nb_tw = (s->nb_tw >> aa) + align_tile_w[~(-1 * (1 << aa)) & s->nb_tw];
+ s->nb_tiles = s->nb_tw * s->nb_th;
+ av_log(avctx, AV_LOG_DEBUG, "%dx%d | nb_tiles: %d\n", s->nb_tw, s->nb_th, s->nb_tiles);
+
+ s->tw = s->version == 0 ? 128 : 256;
+ s->th = 16;
+ av_log(avctx, AV_LOG_DEBUG, "tile_size: %dx%d\n", s->tw, s->th);
+
+ av_fast_mallocz(&s->tiles, &s->tiles_size, s->nb_tiles * sizeof(*s->tiles));
+ if (!s->tiles)
+ return AVERROR(ENOMEM);
+
+ if (bytestream2_get_bytes_left(&gb) < s->nb_tiles * 2)
+ return AVERROR_INVALIDDATA;
+
+ /* Read tile data offsets */
+ offset = bytestream2_tell(&gb) + s->nb_tiles * 2;
+ for (int n = 0; n < s->nb_tiles; n++) {
+ TileContext *tile = &s->tiles[n];
+
+ int size = bytestream2_get_be16(&gb);
+ if (offset >= avpkt->size)
+ return AVERROR_INVALIDDATA;
+ if (size >= avpkt->size)
+ return AVERROR_INVALIDDATA;
+ if (offset > avpkt->size - size)
+ return AVERROR_INVALIDDATA;
+
+ bytestream2_init(&tile->gb, avpkt->data + offset, size);
+
+ tile->y = (n / s->nb_tw) * s->th;
+ tile->x = (n % s->nb_tw) * s->tw;
+
+ offset += size;
+ }
+
+ ret = ff_thread_get_buffer(avctx, frame, 0);
+ if (ret < 0)
+ return ret;
+
+ s->frame = frame;
+
+ /* Start */
+ if (avctx->hwaccel) {
+ const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
+
+ ret = ff_hwaccel_frame_priv_alloc(avctx, &s->hwaccel_picture_private);
+ if (ret < 0)
+ return ret;
+
+ ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, avpkt->size);
+ if (ret < 0)
+ return ret;
+
+ for (int n = 0; n < s->nb_tiles; n++) {
+ TileContext *tile = &s->tiles[n];
+ ret = hwaccel->decode_slice(avctx, tile->gb.buffer,
+ tile->gb.buffer_end - tile->gb.buffer);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = hwaccel->end_frame(avctx);
+ if (ret < 0)
+ return ret;
+
+ av_refstruct_unref(&s->hwaccel_picture_private);
+ } else {
+ avctx->execute2(avctx, decode_tiles, frame, NULL, s->nb_tiles);
+ }
+
+ frame->pict_type = AV_PICTURE_TYPE_I;
+ frame->flags |= AV_FRAME_FLAG_KEY;
+
+ *got_frame_ptr = 1;
+
+ return avpkt->size;
+}
+
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+ ProResRAWContext *s = avctx->priv_data;
+ av_refstruct_unref(&s->hwaccel_picture_private);
+ av_freep(&s->tiles);
+ return 0;
+}
+
+#if HAVE_THREADS
+static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+{
+ ProResRAWContext *rsrc = src->priv_data;
+ ProResRAWContext *rdst = dst->priv_data;
+
+ rdst->pix_fmt = rsrc->pix_fmt;
+
+ return 0;
+}
+#endif
+
+const FFCodec ff_prores_raw_decoder = {
+ .p.name = "prores_raw",
+ CODEC_LONG_NAME("Apple ProRes RAW"),
+ .p.type = AVMEDIA_TYPE_VIDEO,
+ .p.id = AV_CODEC_ID_PRORES_RAW,
+ .priv_data_size = sizeof(ProResRAWContext),
+ .init = decode_init,
+ .close = decode_end,
+ FF_CODEC_DECODE_CB(decode_frame),
+ UPDATE_THREAD_CONTEXT(update_thread_context),
+ .p.capabilities = AV_CODEC_CAP_DR1 |
+ AV_CODEC_CAP_FRAME_THREADS |
+ AV_CODEC_CAP_SLICE_THREADS,
+ .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
+ .hw_configs = (const AVCodecHWConfigInternal *const []) {
+ NULL
+ },
+};
diff --git a/libavcodec/proresdec_raw.h b/libavcodec/proresdec_raw.h
new file mode 100644
index 0000000000..4b788b123f
--- /dev/null
+++ b/libavcodec/proresdec_raw.h
@@ -0,0 +1,60 @@
+/*
+ * ProRes RAW decoder
+ * Copyright (c) 2025 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PRORESDEC_RAW_H
+#define AVCODEC_PRORESDEC_RAW_H
+
+#include "libavutil/mem_internal.h"
+
+#include "bytestream.h"
+#include "blockdsp.h"
+#include "proresdsp.h"
+
+typedef struct TileContext {
+ GetByteContext gb;
+ unsigned x, y;
+} TileContext;
+
+typedef struct ProResRAWContext {
+ ProresDSPContext prodsp;
+ BlockDSPContext bdsp;
+
+ TileContext *tiles;
+ unsigned int tiles_size;
+ int nb_tiles;
+ int tw, th;
+ int nb_tw, nb_th;
+
+ enum AVPixelFormat pix_fmt;
+ AVFrame *frame;
+ void *hwaccel_picture_private;
+
+ int version;
+
+ uint8_t scan[64];
+} ProResRAWContext;
+
+extern const int16_t ff_prores_raw_dc_cb[13];
+extern const int16_t ff_prores_raw_ac_cb[95];
+extern const int16_t ff_prores_raw_rn_cb[28];
+extern const int16_t ff_prores_raw_ln_cb[15];
+
+#endif /* AVCODEC_PRORESDEC_RAW_H */
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH 7/7] lavc: add a ProRes RAW Vulkan hwaccel
2025-07-10 15:13 [FFmpeg-devel] [PATCH 1/7] vf_libplacebo: add support for specifying a LUT for the input Lynne
` (4 preceding siblings ...)
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 6/7] lavc: add a ProRes RAW decoder Lynne
@ 2025-07-10 15:13 ` Lynne
5 siblings, 0 replies; 7+ messages in thread
From: Lynne @ 2025-07-10 15:13 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This commit adds a ProRes RAW hardware implementation written in Vulkan.
Both version 0 and version 1 streams are supported.
The implementation is highly parallelized, with 512 invocations dispatched
per every tile, with generally 4k tiles on a 5.8k stream.
Thanks to unlord for the 8-point iDCT.
Benchmark for a generic 5.8k RAW HQ file:
6900XT: 63fps
7900XTX: 84fps
6000 Ada: 120fps
Intel: 9fps
---
configure | 2 +
libavcodec/Makefile | 1 +
libavcodec/hwaccels.h | 1 +
libavcodec/proresdec_raw.c | 10 +
libavcodec/vulkan/Makefile | 3 +
libavcodec/vulkan/prores_raw.comp | 348 +++++++++++++++++++++
libavcodec/vulkan_decode.c | 9 +-
libavcodec/vulkan_prores_raw.c | 498 ++++++++++++++++++++++++++++++
8 files changed, 871 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/vulkan/prores_raw.comp
create mode 100644 libavcodec/vulkan_prores_raw.c
diff --git a/configure b/configure
index e0503337cd..8c34dc7cf1 100755
--- a/configure
+++ b/configure
@@ -3290,6 +3290,8 @@ mpeg4_videotoolbox_hwaccel_deps="videotoolbox"
mpeg4_videotoolbox_hwaccel_select="mpeg4_decoder"
prores_videotoolbox_hwaccel_deps="videotoolbox"
prores_videotoolbox_hwaccel_select="prores_decoder"
+prores_raw_vulkan_hwaccel_deps="vulkan spirv_compiler"
+prores_raw_vulkan_hwaccel_select="prores_raw_decoder"
vc1_d3d11va_hwaccel_deps="d3d11va"
vc1_d3d11va_hwaccel_select="vc1_decoder"
vc1_d3d11va2_hwaccel_deps="d3d11va"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 47d16f3312..23721de65f 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1080,6 +1080,7 @@ OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o
OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o
OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o
OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o
+OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores_raw.o
# Objects duplicated from other libraries for shared builds
SHLIBOBJS += log2_tab.o reverse.o
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 0b2c725247..fb9b850233 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -67,6 +67,7 @@ extern const struct FFHWAccel ff_mpeg4_vaapi_hwaccel;
extern const struct FFHWAccel ff_mpeg4_vdpau_hwaccel;
extern const struct FFHWAccel ff_mpeg4_videotoolbox_hwaccel;
extern const struct FFHWAccel ff_prores_videotoolbox_hwaccel;
+extern const struct FFHWAccel ff_prores_raw_vulkan_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d11va_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d11va2_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d12va_hwaccel;
diff --git a/libavcodec/proresdec_raw.c b/libavcodec/proresdec_raw.c
index caebed9e96..6a0773c8a5 100644
--- a/libavcodec/proresdec_raw.c
+++ b/libavcodec/proresdec_raw.c
@@ -20,6 +20,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "config_components.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mem_internal.h"
#include "libavutil/mem.h"
@@ -30,10 +31,13 @@
#include "bytestream.h"
#include "codec_internal.h"
#include "decode.h"
+#include "hwconfig.h"
#include "get_bits.h"
#include "idctdsp.h"
#include "proresdata.h"
#include "thread.h"
+#include "hwconfig.h"
+#include "hwaccel_internal.h"
#include "proresdec_raw.h"
@@ -312,6 +316,9 @@ static enum AVPixelFormat get_pixel_format(AVCodecContext *avctx,
enum AVPixelFormat pix_fmt)
{
enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+ AV_PIX_FMT_VULKAN,
+#endif
pix_fmt,
AV_PIX_FMT_NONE,
};
@@ -514,6 +521,9 @@ const FFCodec ff_prores_raw_decoder = {
AV_CODEC_CAP_SLICE_THREADS,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
.hw_configs = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+ HWACCEL_VULKAN(prores_raw),
+#endif
NULL
},
};
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 729cb4f15c..d8e1471fa6 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -14,6 +14,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
vulkan/ffv1_common.o vulkan/ffv1_reset.o \
vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o
+OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \
+ vulkan/prores_raw.o
+
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
.SECONDARY: $(VULKAN:.comp=.c)
libavcodec/vulkan/%.c: TAG = VULKAN
diff --git a/libavcodec/vulkan/prores_raw.comp b/libavcodec/vulkan/prores_raw.comp
new file mode 100644
index 0000000000..fe0606e0b8
--- /dev/null
+++ b/libavcodec/vulkan/prores_raw.comp
@@ -0,0 +1,348 @@
+/*
+ * ProRes RAW decoder
+ *
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define I16(x) (int16_t(x))
+
+#define COMP_ID (gl_LocalInvocationID.z)
+#define BLOCK_ID (gl_LocalInvocationID.y)
+#define ROW_ID (gl_LocalInvocationID.x)
+
+GetBitContext gb;
+shared float block[gl_WorkGroupSize.z][16][64] = { };
+shared float transposed[gl_WorkGroupSize.z][16][64];
+
+void idct8_horiz(const uint row_id)
+{
+ float t0, t1, t2, t3, t4, t5, t6, t7, u8;
+ float u0, u1, u2, u3, u4, u5, u6, u7;
+
+ /* Input */
+ t0 = block[COMP_ID][BLOCK_ID][8*row_id + 0];
+ u4 = block[COMP_ID][BLOCK_ID][8*row_id + 1];
+ t2 = block[COMP_ID][BLOCK_ID][8*row_id + 2];
+ u6 = block[COMP_ID][BLOCK_ID][8*row_id + 3];
+ t1 = block[COMP_ID][BLOCK_ID][8*row_id + 4];
+ u5 = block[COMP_ID][BLOCK_ID][8*row_id + 5];
+ t3 = block[COMP_ID][BLOCK_ID][8*row_id + 6];
+ u7 = block[COMP_ID][BLOCK_ID][8*row_id + 7];
+
+ /* Embedded scaled inverse 4-point Type-II DCT */
+ u0 = t0 + t1;
+ u1 = t0 - t1;
+ u3 = t2 + t3;
+ u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;
+ t0 = u0 + u3;
+ t3 = u0 - u3;
+ t1 = u1 + u2;
+ t2 = u1 - u2;
+
+ /* Embedded scaled inverse 4-point Type-IV DST */
+ t5 = u5 + u6;
+ t6 = u5 - u6;
+ t7 = u4 + u7;
+ t4 = u4 - u7;
+ u7 = t7 + t5;
+ u5 = (t7 - t5)*(1.4142135623730950488016887242097f);
+ u8 = (t4 + t6)*(1.8477590650225735122563663787936f);
+ u4 = u8 - t4*(1.0823922002923939687994464107328f);
+ u6 = u8 - t6*(2.6131259297527530557132863468544f);
+ t7 = u7;
+ t6 = t7 - u6;
+ t5 = t6 + u5;
+ t4 = t5 - u4;
+
+ /* Butterflies */
+ u0 = t0 + t7;
+ u7 = t0 - t7;
+ u6 = t1 + t6;
+ u1 = t1 - t6;
+ u2 = t2 + t5;
+ u5 = t2 - t5;
+ u4 = t3 + t4;
+ u3 = t3 - t4;
+
+ /* Output */
+ transposed[COMP_ID][BLOCK_ID][0*8 + row_id] = u0;
+ transposed[COMP_ID][BLOCK_ID][1*8 + row_id] = u1;
+ transposed[COMP_ID][BLOCK_ID][2*8 + row_id] = u2;
+ transposed[COMP_ID][BLOCK_ID][3*8 + row_id] = u3;
+ transposed[COMP_ID][BLOCK_ID][4*8 + row_id] = u4;
+ transposed[COMP_ID][BLOCK_ID][5*8 + row_id] = u5;
+ transposed[COMP_ID][BLOCK_ID][6*8 + row_id] = u6;
+ transposed[COMP_ID][BLOCK_ID][7*8 + row_id] = u7;
+}
+
+void idct8_vert(const uint row_id)
+{
+ float t0, t1, t2, t3, t4, t5, t6, t7, u8;
+ float u0, u1, u2, u3, u4, u5, u6, u7;
+
+ /* Input */
+ t0 = transposed[COMP_ID][BLOCK_ID][8*row_id + 0] + 0.5f; // NOTE
+ u4 = transposed[COMP_ID][BLOCK_ID][8*row_id + 1];
+ t2 = transposed[COMP_ID][BLOCK_ID][8*row_id + 2];
+ u6 = transposed[COMP_ID][BLOCK_ID][8*row_id + 3];
+ t1 = transposed[COMP_ID][BLOCK_ID][8*row_id + 4];
+ u5 = transposed[COMP_ID][BLOCK_ID][8*row_id + 5];
+ t3 = transposed[COMP_ID][BLOCK_ID][8*row_id + 6];
+ u7 = transposed[COMP_ID][BLOCK_ID][8*row_id + 7];
+
+ /* Embedded scaled inverse 4-point Type-II DCT */
+ u0 = t0 + t1;
+ u1 = t0 - t1;
+ u3 = t2 + t3;
+ u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;
+ t0 = u0 + u3;
+ t3 = u0 - u3;
+ t1 = u1 + u2;
+ t2 = u1 - u2;
+
+ /* Embedded scaled inverse 4-point Type-IV DST */
+ t5 = u5 + u6;
+ t6 = u5 - u6;
+ t7 = u4 + u7;
+ t4 = u4 - u7;
+ u7 = t7 + t5;
+ u5 = (t7 - t5)*(1.4142135623730950488016887242097f);
+ u8 = (t4 + t6)*(1.8477590650225735122563663787936f);
+ u4 = u8 - t4*(1.0823922002923939687994464107328f);
+ u6 = u8 - t6*(2.6131259297527530557132863468544f);
+ t7 = u7;
+ t6 = t7 - u6;
+ t5 = t6 + u5;
+ t4 = t5 - u4;
+
+ /* Butterflies */
+ u0 = t0 + t7;
+ u7 = t0 - t7;
+ u6 = t1 + t6;
+ u1 = t1 - t6;
+ u2 = t2 + t5;
+ u5 = t2 - t5;
+ u4 = t3 + t4;
+ u3 = t3 - t4;
+
+ /* Output */
+ block[COMP_ID][BLOCK_ID][0*8 + row_id] = u0;
+ block[COMP_ID][BLOCK_ID][1*8 + row_id] = u1;
+ block[COMP_ID][BLOCK_ID][2*8 + row_id] = u2;
+ block[COMP_ID][BLOCK_ID][3*8 + row_id] = u3;
+ block[COMP_ID][BLOCK_ID][4*8 + row_id] = u4;
+ block[COMP_ID][BLOCK_ID][5*8 + row_id] = u5;
+ block[COMP_ID][BLOCK_ID][6*8 + row_id] = u6;
+ block[COMP_ID][BLOCK_ID][7*8 + row_id] = u7;
+}
+
+#define TODCCODEBOOK(x) (((x) & 1) + (x) >> 1)
+
+int16_t get_value(int16_t codebook)
+{
+ const int16_t switch_bits = codebook >> 8;
+ const int16_t rice_order = codebook & I16(0xf);
+ const int16_t exp_order = (codebook >> 4) & I16(0xf);
+
+ uint b = show_bits(gb, 32);
+ if (expectEXT(b == 0, false))
+ return I16(0);
+ int16_t q = I16(31) - I16(findMSB(b));
+
+ if ((b & 0x80000000) > 0) {
+ skip_bits(gb, 1 + rice_order);
+ return I16((b & 0x7FFFFFFF) >> (31 - rice_order));
+ }
+
+ if (q <= switch_bits) {
+ skip_bits(gb, q + rice_order + 1);
+ return I16((q << rice_order) +
+ (((b << (q + 1)) >> 1) >> (31 - rice_order)));
+ }
+
+ int16_t bits = exp_order + (q << 1) - switch_bits;
+ skip_bits(gb, bits);
+ return I16((b >> (32 - bits)) +
+ ((switch_bits + 1) << rice_order) -
+ (1 << exp_order));
+}
+
+void read_dc_vals(const uint nb_blocks)
+{
+ int16_t dc;
+ int16_t prev_dc = I16(0), dc_add = I16(0), sign = I16(0);
+
+ /* Special handling for first block */
+ dc = get_value(I16(700));
+ prev_dc = int16_t(((dc & 1) + (dc >> 1) ^ -int((dc & 1))) + (dc & 1));
+ block[COMP_ID][0][0] = prev_dc;
+
+ for (uint n = 1; n < nb_blocks; n++) {
+ if (expectEXT(left_bits(gb) <= 0, false))
+ break;
+
+ int16_t dc_codebook;
+ if ((n & 15) == 1)
+ dc_codebook = I16(100);
+ else
+ dc_codebook = dc_cb[min(TODCCODEBOOK(dc), 13 - 1)];
+
+ dc = get_value(dc_codebook);
+
+ sign = sign ^ dc & int16_t(1);
+ dc_add = (-sign ^ I16(TODCCODEBOOK(dc))) + sign;
+ sign = I16(dc_add < 0);
+ prev_dc += dc_add;
+
+ block[COMP_ID][n][0] = prev_dc;
+ }
+}
+
+void read_ac_vals(const uint nb_blocks)
+{
+ uint8_t idx;
+ const uint nb_codes = nb_blocks << 6;
+ const uint log2_nb_blocks = findMSB(nb_blocks);
+ const uint block_mask = (1 << log2_nb_blocks) - 1;
+
+ int16_t sign;
+ int16_t ac, rn, ln;
+ int16_t ac_codebook = I16(49);
+ int16_t rn_codebook = I16( 0);
+ int16_t ln_codebook = I16(66);
+ int16_t val;
+
+ for (uint n = nb_blocks; n <= nb_codes;) {
+ if (expectEXT(left_bits(gb) <= 0, false))
+ break;
+
+ ln = get_value(ln_codebook);
+ for (uint i = 0; i < ln; i++) {
+ if (expectEXT(left_bits(gb) <= 0, false))
+ break;
+
+ if (expectEXT((n + i) >= nb_codes, false))
+ break;
+
+ ac = get_value(ac_codebook);
+ ac_codebook = ac_cb[min(ac, 95 - 1)];
+ sign = -int16_t(get_bits(gb, 1));
+
+ idx = scan[(n + i) >> log2_nb_blocks];
+ val = int16_t(((ac + I16(1)) ^ sign) - sign);
+ block[COMP_ID][(n + i) & block_mask][idx] = val;
+ }
+
+ n += ln;
+ if (expectEXT(n >= nb_codes, false))
+ break;
+
+ rn = get_value(rn_codebook);
+ rn_codebook = rn_cb[min(rn, 28 - 1)];
+
+ n += rn + 1;
+ if (expectEXT(n >= nb_codes, false))
+ break;
+
+ if (expectEXT(left_bits(gb) <= 0, false))
+ break;
+
+ ac = get_value(ac_codebook);
+ sign = -int16_t(get_bits(gb, 1));
+
+ idx = scan[n >> log2_nb_blocks];
+ val = int16_t(((ac + 1) ^ sign) - sign);
+ block[COMP_ID][n & block_mask][idx] = val;
+
+ ac_codebook = ac_cb[min(ac, 95 - 1)];
+ ln_codebook = ln_cb[min(ac, 15 - 1)];
+
+ n++;
+ }
+}
+
+void main(void)
+{
+ const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ TileData td = tile_data[tile_idx];
+
+ if (expectEXT(td.pos.x >= frame_size.x, false))
+ return;
+
+ uint64_t pkt_offset = uint64_t(pkt_data) + td.offset;
+ u8vec2buf hdr_data = u8vec2buf(pkt_offset);
+ int qscale = int(pack16(hdr_data[0].v.yx));
+
+ ivec4 size = ivec4(td.size,
+ pack16(hdr_data[2].v.yx),
+ pack16(hdr_data[1].v.yx),
+ pack16(hdr_data[3].v.yx));
+ size[0] = size[0] - size[1] - size[2] - size[3] - 8;
+ if (expectEXT(size[0] < 0, false))
+ return;
+
+ const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1);
+ const float n = float(qscale - 16384) / (2.0f*4096.0f);
+ const uint w = min(tile_size.x, frame_size.x - td.pos.x) / 2;
+ const uint nb_blocks = w / 8;
+
+ const ivec4 comp_offset = ivec4(size[2] + size[1] + size[3],
+ size[2],
+ 0,
+ size[2] + size[1]);
+
+ if (BLOCK_ID == 0 && ROW_ID == 0) {
+ init_get_bits(gb, u8buf(pkt_offset + 8 + comp_offset[COMP_ID]),
+ size[COMP_ID]);
+ read_dc_vals(nb_blocks);
+ read_ac_vals(nb_blocks);
+ }
+
+ barrier();
+
+ [[unroll]]
+ for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x)
+ block[COMP_ID][BLOCK_ID][i] *= n * idct_8x8_scales[i];
+
+ barrier();
+
+#ifdef PARALLEL_ROWS
+ idct8_horiz(ROW_ID);
+
+ barrier();
+
+ idct8_vert(ROW_ID);
+#else
+ for (uint j = 0; j < 8; j++)
+ idct8_horiz(j);
+
+ barrier();
+
+ for (uint j = 0; j < 8; j++)
+ idct8_vert(j);
+#endif
+
+ barrier();
+
+ [[unroll]]
+ for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x)
+ imageStore(dst,
+ offs + 2*ivec2(BLOCK_ID*8 + (i & 7), i >> 3),
+ vec4(block[COMP_ID][BLOCK_ID][i]));
+}
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 7310ba1547..857f16bc0a 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -25,7 +25,8 @@
#include "libavutil/vulkan_loader.h"
#define DECODER_IS_SDR(codec_id) \
- ((codec_id) == AV_CODEC_ID_FFV1)
+ (((codec_id) == AV_CODEC_ID_FFV1) || \
+ ((codec_id) == AV_CODEC_ID_PRORES_RAW))
#if CONFIG_H264_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
@@ -39,6 +40,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc;
#if CONFIG_FFV1_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
#endif
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc;
+#endif
static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_H264_VULKAN_HWACCEL
@@ -53,6 +57,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_FFV1_VULKAN_HWACCEL
&ff_vk_dec_ffv1_desc,
#endif
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+ &ff_vk_dec_prores_raw_desc,
+#endif
};
static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id)
diff --git a/libavcodec/vulkan_prores_raw.c b/libavcodec/vulkan_prores_raw.c
new file mode 100644
index 0000000000..31320ffa18
--- /dev/null
+++ b/libavcodec/vulkan_prores_raw.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan_decode.h"
+#include "hwaccel_internal.h"
+
+#include "proresdec_raw.h"
+#include "libavutil/vulkan_spirv.h"
+#include "libavutil/mem.h"
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_prores_raw_comp;
+
+const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc = {
+ .codec_id = AV_CODEC_ID_PRORES_RAW,
+ .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR,
+ .queue_flags = VK_QUEUE_COMPUTE_BIT,
+};
+
+typedef struct ProResRAWVulkanDecodePicture {
+ FFVulkanDecodePicture vp;
+
+ AVBufferRef *tile_data;
+ uint32_t nb_tiles;
+} ProResRAWVulkanDecodePicture;
+
+typedef struct ProResRAWVulkanDecodeContext {
+ FFVulkanShader decode[2];
+
+ AVBufferPool *tile_data_pool;
+
+ FFVkBuffer uniform_buf;
+} ProResRAWVulkanDecodeContext;
+
+typedef struct DecodePushData {
+ VkDeviceAddress tile_data;
+ VkDeviceAddress pkt_data;
+ uint32_t frame_size[2];
+ uint32_t tile_size[2];
+} DecodePushData;
+
+typedef struct TileData {
+ int32_t pos[2];
+ uint32_t offset;
+ uint32_t size;
+} TileData;
+
+static int vk_prores_raw_start_frame(AVCodecContext *avctx,
+ const AVBufferRef *buffer_ref,
+ av_unused const uint8_t *buffer,
+ av_unused uint32_t size)
+{
+ int err;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx;
+ ProResRAWContext *prr = avctx->priv_data;
+
+ ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &pp->vp;
+
+ /* Host map the input tile data if supported */
+ if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
+ ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data,
+ buffer_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+
+ /* Allocate tile data */
+ err = ff_vk_get_pooled_buffer(&ctx->s, &prv->tile_data_pool,
+ &pp->tile_data,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, prr->nb_tiles*sizeof(TileData),
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0)
+ return err;
+
+ /* Prepare frame to be used */
+ err = ff_vk_decode_prepare_frame_sdr(dec, prr->frame, vp, 1,
+ FF_VK_REP_FLOAT, 0);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int vk_prores_raw_decode_slice(AVCodecContext *avctx,
+ const uint8_t *data,
+ uint32_t size)
+{
+ ProResRAWContext *prr = avctx->priv_data;
+
+ ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &pp->vp;
+
+ FFVkBuffer *tile_data_buf = (FFVkBuffer *)pp->tile_data->data;
+ TileData *td = (TileData *)tile_data_buf->mapped_mem;
+ FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL;
+
+ td[pp->nb_tiles].pos[0] = prr->tiles[pp->nb_tiles].x;
+ td[pp->nb_tiles].pos[1] = prr->tiles[pp->nb_tiles].y;
+ td[pp->nb_tiles].size = size;
+
+ if (vp->slices_buf && slices_buf->host_ref) {
+ td[pp->nb_tiles].offset = data - slices_buf->mapped_mem;
+ pp->nb_tiles++;
+ } else {
+ int err;
+ td[pp->nb_tiles].offset = vp->slices_size;
+ err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
+ &pp->nb_tiles, NULL);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
+static int vk_prores_raw_end_frame(AVCodecContext *avctx)
+{
+ int err;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ ProResRAWContext *prr = avctx->priv_data;
+ ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx;
+
+ DecodePushData pd_decode;
+ FFVulkanShader *decode_shader;
+
+ ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &pp->vp;
+
+ FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
+ FFVkBuffer *tile_data = (FFVkBuffer *)pp->tile_data->data;
+
+ VkImageMemoryBarrier2 img_bar[8];
+ int nb_img_bar = 0;
+
+ FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
+ ff_vk_exec_start(&ctx->s, exec);
+
+ /* Prepare deps */
+ RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, prr->frame,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &pp->tile_data, 1, 0));
+ pp->tile_data = NULL;
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
+ vp->slices_buf = NULL;
+
+ ff_vk_frame_barrier(&ctx->s, exec, prr->frame, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_ACCESS_2_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+ nb_img_bar = 0;
+
+ decode_shader = &prv->decode[prr->version];
+ ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
+ prr->frame, vp->view.out,
+ 0, 0,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader);
+ pd_decode = (DecodePushData) {
+ .tile_data = tile_data->address,
+ .pkt_data = slices_buf->address,
+ .frame_size[0] = avctx->width,
+ .frame_size[1] = avctx->height,
+ .tile_size[0] = prr->tw,
+ .tile_size[1] = prr->th,
+ };
+ ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd_decode), &pd_decode);
+
+ vk->CmdDispatch(exec->buf, prr->nb_tw, prr->nb_th, 1);
+
+ err = ff_vk_exec_submit(&ctx->s, exec);
+ if (err < 0)
+ return err;
+
+fail:
+ return 0;
+}
+
+static int init_decode_shader(ProResRAWContext *prr, FFVulkanContext *s,
+ FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+ FFVulkanShader *shd, int version)
+{
+ int err;
+ FFVulkanDescriptorSetBinding *desc_set;
+ int parallel_rows = 1;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+
+ if (s->props.properties.limits.maxComputeWorkGroupInvocations < 512 ||
+ s->props.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU)
+ parallel_rows = 0;
+
+ RET(ff_vk_shader_init(s, shd, "prores_raw",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2",
+ "GL_EXT_null_initializer",
+ "GL_AMD_gpu_shader_half_float" }, 4,
+ parallel_rows ? 8 : 1 /* 8x8 transforms, 8-point width */,
+ version == 0 ? 8 : 16 /* Horizontal blocks */,
+ 4 /* Components */,
+ 0));
+
+ if (parallel_rows)
+ GLSLC(0, #define PARALLEL_ROWS );
+
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ GLSLC(0, layout(buffer_reference, buffer_reference_align = 16) buffer TileData { );
+ GLSLC(1, ivec2 pos; );
+ GLSLC(1, uint offset; );
+ GLSLC(1, uint size; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, TileData tile_data; );
+ GLSLC(1, u8buf pkt_data; );
+ GLSLC(1, uvec2 frame_size; );
+ GLSLC(1, uvec2 tile_size; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "dst",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = "r16",
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0));
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "dct_scale_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "float idct_8x8_scales[64];",
+ },
+ {
+ .name = "scan_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t scan[64];",
+ },
+ {
+ .name = "dc_cb_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t dc_cb[13];",
+ },
+ {
+ .name = "ac_cb_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t ac_cb[95];",
+ },
+ {
+ .name = "rn_cb_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t rn_cb[28];",
+ },
+ {
+ .name = "ln_cb_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t ln_cb[15];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 6, 1, 0));
+
+ GLSLD(ff_source_prores_raw_comp);
+
+ RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static void vk_decode_prores_raw_uninit(FFVulkanDecodeShared *ctx)
+{
+ ProResRAWVulkanDecodeContext *fv = ctx->sd_ctx;
+
+ ff_vk_shader_free(&ctx->s, &fv->decode[0]);
+ ff_vk_shader_free(&ctx->s, &fv->decode[1]);
+
+ ff_vk_free_buf(&ctx->s, &fv->uniform_buf);
+
+ av_buffer_pool_uninit(&fv->tile_data_pool);
+
+ av_freep(&fv);
+}
+
+static int vk_decode_prores_raw_init(AVCodecContext *avctx)
+{
+ int err;
+ ProResRAWContext *prr = avctx->priv_data;
+
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = NULL;
+ size_t ua;
+
+ ProResRAWVulkanDecodeContext *prv;
+ FFVkSPIRVCompiler *spv;
+
+ uint8_t *uniform_buf;
+ float *dct_scale_buf;
+ double idct_8_scales[8] = {
+ cos(4.0*M_PI/16.0)/2.0,
+ cos(1.0*M_PI/16.0)/2.0,
+ cos(2.0*M_PI/16.0)/2.0,
+ cos(3.0*M_PI/16.0)/2.0,
+ cos(4.0*M_PI/16.0)/2.0,
+ cos(5.0*M_PI/16.0)/2.0,
+ cos(6.0*M_PI/16.0)/2.0,
+ cos(7.0*M_PI/16.0)/2.0,
+ };
+ uint8_t *scan_buf;
+ size_t cb_size[5] = {
+ 13*sizeof(int16_t),
+ 95*sizeof(int16_t),
+ 28*sizeof(int16_t),
+ 15*sizeof(int16_t),
+ };
+ size_t cb_offset[5];
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ err = ff_vk_decode_init(avctx);
+ if (err < 0)
+ return err;
+ ctx = dec->shared_ctx;
+
+ prv = ctx->sd_ctx = av_mallocz(sizeof(*prv));
+ if (!prv) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ ctx->sd_ctx_free = &vk_decode_prores_raw_uninit;
+
+ ua = ctx->s.props.properties.limits.minUniformBufferOffsetAlignment;
+ cb_offset[0] = 64*sizeof(float) + 64*sizeof(uint8_t);
+ cb_offset[1] = cb_offset[0] + FFALIGN(cb_size[0], ua);
+ cb_offset[2] = cb_offset[1] + FFALIGN(cb_size[1], ua);
+ cb_offset[3] = cb_offset[2] + FFALIGN(cb_size[2], ua);
+ cb_offset[4] = cb_offset[3] + FFALIGN(cb_size[3], ua);
+
+ /* Setup decode shader */
+ RET(init_decode_shader(prr, &ctx->s, &ctx->exec_pool, spv, &prv->decode[0], 0));
+ RET(init_decode_shader(prr, &ctx->s, &ctx->exec_pool, spv, &prv->decode[1], 1));
+
+ RET(ff_vk_create_buf(&ctx->s, &prv->uniform_buf,
+ 64*sizeof(float) + 64*sizeof(uint8_t) + cb_offset[4] + 256,
+ NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+
+ RET(ff_vk_map_buffer(&ctx->s, &prv->uniform_buf, &uniform_buf, 0));
+
+ dct_scale_buf = (float *)uniform_buf;
+ for (int i = 0; i < 64; i++)
+ dct_scale_buf[i] = (float)(idct_8_scales[i >> 3] * idct_8_scales[i & 7]);
+
+ scan_buf = uniform_buf + 64*sizeof(float);
+ for (int i = 0; i < 64; i++)
+ scan_buf[i] = prr->scan[i];
+
+ memcpy(uniform_buf + cb_offset[0], ff_prores_raw_dc_cb,
+ sizeof(ff_prores_raw_dc_cb));
+ memcpy(uniform_buf + cb_offset[1], ff_prores_raw_ac_cb,
+ sizeof(ff_prores_raw_ac_cb));
+ memcpy(uniform_buf + cb_offset[2], ff_prores_raw_rn_cb,
+ sizeof(ff_prores_raw_rn_cb));
+ memcpy(uniform_buf + cb_offset[3], ff_prores_raw_ln_cb,
+ sizeof(ff_prores_raw_ln_cb));
+
+ RET(ff_vk_unmap_buffer(&ctx->s, &prv->uniform_buf, 1));
+
+ for (int i = 0; i < 2; i++) {
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &prv->decode[i], 1, 0, 0,
+ &prv->uniform_buf,
+ 0, 64*sizeof(float),
+ VK_FORMAT_UNDEFINED));
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &prv->decode[i], 1, 1, 0,
+ &prv->uniform_buf,
+ 64*sizeof(float), 64*sizeof(uint8_t),
+ VK_FORMAT_UNDEFINED));
+ for (int j = 0; j < 4; j++)
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &prv->decode[i], 1, 2 + j, 0,
+ &prv->uniform_buf,
+ cb_offset[j], cb_size[j],
+ VK_FORMAT_UNDEFINED));
+ }
+
+fail:
+ spv->uninit(&spv);
+
+ return err;
+}
+
+static void vk_prores_raw_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+ AVHWDeviceContext *dev_ctx = _hwctx.nc;
+
+ ProResRAWVulkanDecodePicture *pp = data;
+ FFVulkanDecodePicture *vp = &pp->vp;
+
+ ff_vk_decode_free_frame(dev_ctx, vp);
+}
+
+const FFHWAccel ff_prores_raw_vulkan_hwaccel = {
+ .p.name = "prores_raw_vulkan",
+ .p.type = AVMEDIA_TYPE_VIDEO,
+ .p.id = AV_CODEC_ID_PRORES_RAW,
+ .p.pix_fmt = AV_PIX_FMT_VULKAN,
+ .start_frame = &vk_prores_raw_start_frame,
+ .decode_slice = &vk_prores_raw_decode_slice,
+ .end_frame = &vk_prores_raw_end_frame,
+ .free_frame_priv = &vk_prores_raw_free_frame_priv,
+ .frame_priv_data_size = sizeof(ProResRAWVulkanDecodePicture),
+ .init = &vk_decode_prores_raw_init,
+ .update_thread_context = &ff_vk_update_thread_context,
+ .decode_params = &ff_vk_params_invalidate,
+ .flush = &ff_vk_decode_flush,
+ .uninit = &ff_vk_decode_uninit,
+ .frame_params = &ff_vk_frame_params,
+ .priv_data_size = sizeof(FFVulkanDecodeContext),
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-07-10 15:15 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-07-10 15:13 [FFmpeg-devel] [PATCH 1/7] vf_libplacebo: add support for specifying a LUT for the input Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 2/7] hwcontext_vulkan: temporarily disable host_image_copy Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 3/7] vulkan: add support for 16-bit RGGB Bayer pixfmt Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 4/7] lavc/vulkan/common: sign-ify lengths Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 5/7] lavc: add codec ID and profiles for ProRes RAW Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 6/7] lavc: add a ProRes RAW decoder Lynne
2025-07-10 15:13 ` [FFmpeg-devel] [PATCH 7/7] lavc: add a ProRes RAW Vulkan hwaccel Lynne
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git