Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input
@ 2025-07-12 18:51 Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 02/13] hwcontext_vulkan: temporarily disable host_image_copy Lynne
                   ` (11 more replies)
  0 siblings, 12 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

This makes it possible to apply Adobe .cube files to inputs.
---
 doc/filters.texi            | 30 ++++++++++++++++++++++++++++++
 libavfilter/vf_libplacebo.c | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/doc/filters.texi b/doc/filters.texi
index ed2956fe75..13add0ff01 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -16321,6 +16321,36 @@ Render frames with rounded corners. The value, given as a float ranging from
 square to fully circular. In other words, it gives the radius divided by half
 the smaller side length. Defaults to @code{0.0}.
 
+@item lut
+Specifies a custom LUT (in Adobe .cube format) to apply to the colors
+as part of color conversion. The exact interpretation depends on the value
+of @option{lut_type}.
+
+@item lut_type
+Controls the interpretation of color values fed to and from the LUT
+specified as @option{lut}. Valid values are:
+
+@table @samp
+@item auto
+Chooses the interpretation of the LUT automatically from tagged
+metadata, and otherwise falls back to @samp{native}. (Default)
+
+@item native
+Applied to raw image contents in its native RGB colorspace (non-linear
+light), before conversion to the output color space.
+
+@item normalized
+Applied to the normalized RGB image contents, in linear light, before
+conversion to the output color space.
+
+@item conversion
+Fully replaces the conversion from the image color space to the output
+color space. If such a LUT is present, it has the highest priority, and
+overrides any ICC profiles, as well as options related to tone mapping
+and output colorimetry (@option{color_primaries}, @option{color_trc}).
+
+@end table
+
 @item extra_opts
 Pass extra libplacebo internal configuration options. These can be specified
 as a list of @var{key}=@var{value} pairs separated by ':'. The following example
diff --git a/libavfilter/vf_libplacebo.c b/libavfilter/vf_libplacebo.c
index 475030c80d..cbdbe3e665 100644
--- a/libavfilter/vf_libplacebo.c
+++ b/libavfilter/vf_libplacebo.c
@@ -159,6 +159,7 @@ typedef struct LibplaceboContext {
     pl_vulkan vulkan;
     pl_gpu gpu;
     pl_tex tex[4];
+    struct pl_custom_lut *lut;
 
     /* input state */
     LibplaceboInput *inputs;
@@ -184,6 +185,8 @@ typedef struct LibplaceboContext {
     AVExpr *pos_x_pexpr, *pos_y_pexpr, *pos_w_pexpr, *pos_h_pexpr;
     float pad_crop_ratio;
     float corner_rounding;
+    char *lut_filename;
+    enum pl_lut_type lut_type;
     int force_original_aspect_ratio;
     int force_divisible_by;
     int reset_sar;
@@ -371,6 +374,26 @@ static int find_scaler(AVFilterContext *avctx,
     return AVERROR(EINVAL);
 }
 
+static int parse_custom_lut(LibplaceboContext *s)
+{
+    int ret;
+    uint8_t *lutbuf;
+    size_t lutbuf_size;
+
+    if ((ret = av_file_map(s->lut_filename, &lutbuf, &lutbuf_size, 0, s)) < 0) {
+        av_log(s, AV_LOG_ERROR,
+               "The LUT file '%s' could not be read: %s\n",
+               s->lut_filename, av_err2str(ret));
+        return ret;
+    }
+
+    s->lut = pl_lut_parse_cube(s->log, lutbuf, lutbuf_size);
+    av_file_unmap(lutbuf, lutbuf_size);
+    if (!s->lut)
+        return AVERROR(EINVAL);
+    return 0;
+}
+
 static int update_settings(AVFilterContext *ctx)
 {
     int err = 0;
@@ -729,6 +752,9 @@ static int init_vulkan(AVFilterContext *avctx, const AVVulkanDeviceContext *hwct
         RET(parse_shader(avctx, buf, buf_len));
     }
 
+    if (s->lut_filename)
+        RET(parse_custom_lut(s));
+
     /* Initialize inputs */
     s->inputs = av_calloc(s->nb_inputs, sizeof(*s->inputs));
     if (!s->inputs)
@@ -757,6 +783,7 @@ static void libplacebo_uninit(AVFilterContext *avctx)
         av_freep(&s->inputs);
     }
 
+    pl_lut_free(&s->lut);
 #if PL_API_VER >= 351
     pl_cache_destroy(&s->cache);
 #endif
@@ -1005,6 +1032,8 @@ static bool map_frame(pl_gpu gpu, pl_tex *tex,
         .tex        = tex,
         .map_dovi   = s->apply_dovi,
     ));
+    out->lut = s->lut;
+    out->lut_type = s->lut_type;
 
     if (!s->apply_filmgrain)
         out->film_grain.type = PL_FILM_GRAIN_NONE;
@@ -1406,6 +1435,13 @@ static const AVOption libplacebo_options[] = {
     { "pad_crop_ratio", "ratio between padding and cropping when normalizing SAR (0=pad, 1=crop)", OFFSET(pad_crop_ratio), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, 1.0, DYNAMIC },
     { "fillcolor", "Background fill color", OFFSET(fillcolor), AV_OPT_TYPE_COLOR, {.str = "black@0"}, .flags = DYNAMIC },
     { "corner_rounding", "Corner rounding radius", OFFSET(corner_rounding), AV_OPT_TYPE_FLOAT, {.dbl = 0.0}, 0.0, 1.0, .flags = DYNAMIC },
+    { "lut", "Path to custom LUT file to apply", OFFSET(lut_filename), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = STATIC },
+    { "lut_type", "Application mode of the custom LUT", OFFSET(lut_type), AV_OPT_TYPE_INT, { .i64 = PL_LUT_UNKNOWN }, 0, PL_LUT_CONVERSION, STATIC, .unit = "lut_type" },
+        { "auto",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = PL_LUT_UNKNOWN }, 0, 0, STATIC, .unit = "lut_type" },
+        { "native", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = PL_LUT_NATIVE }, 0, 0, STATIC, .unit = "lut_type" },
+        { "normalized", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = PL_LUT_NORMALIZED }, 0, 0, STATIC, .unit = "lut_type" },
+        { "conversion", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = PL_LUT_CONVERSION }, 0, 0, STATIC, .unit = "lut_type" },
+
     { "extra_opts", "Pass extra libplacebo-specific options using a :-separated list of key=value pairs", OFFSET(extra_opts), AV_OPT_TYPE_DICT, .flags = DYNAMIC },
 #if PL_API_VER >= 351
     { "shader_cache",  "Set shader cache path", OFFSET(shader_cache), AV_OPT_TYPE_STRING, {.str = NULL}, .flags = STATIC },
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 02/13] hwcontext_vulkan: temporarily disable host_image_copy
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
@ 2025-07-12 18:51 ` Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 03/13] hwcontext_vulkan: enable uniformBufferStandardLayout Lynne
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

NVIDIA's support for it is a disaster.
Of no benefit to other vendors.

NVIDIA are working on fixing it, but it may take time.
---
 libavutil/hwcontext_vulkan.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 894bc3dae1..5a9b995eb9 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -613,7 +613,6 @@ static const VulkanOptExtension optional_device_exts[] = {
     { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME,               FF_VK_EXT_COOP_MATRIX            },
     { VK_EXT_SHADER_OBJECT_EXTENSION_NAME,                    FF_VK_EXT_SHADER_OBJECT          },
     { VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME,           FF_VK_EXT_SUBGROUP_ROTATE        },
-    { VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME,                  FF_VK_EXT_HOST_IMAGE_COPY        },
 #ifdef VK_KHR_shader_expect_assume
     { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME,             FF_VK_EXT_EXPECT_ASSUME          },
 #endif
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 03/13] hwcontext_vulkan: enable uniformBufferStandardLayout
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 02/13] hwcontext_vulkan: temporarily disable host_image_copy Lynne
@ 2025-07-12 18:51 ` Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 04/13] vulkan: add support for 16-bit RGGB Bayer pixfmt Lynne
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 libavutil/hwcontext_vulkan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 5a9b995eb9..c20ebde36d 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -280,6 +280,7 @@ static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceF
     COPY_VAL(vulkan_1_2.shaderSharedInt64Atomics);
     COPY_VAL(vulkan_1_2.vulkanMemoryModel);
     COPY_VAL(vulkan_1_2.vulkanMemoryModelDeviceScope);
+    COPY_VAL(vulkan_1_2.uniformBufferStandardLayout);
 
     COPY_VAL(vulkan_1_3.dynamicRendering);
     COPY_VAL(vulkan_1_3.maintenance4);
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 04/13] vulkan: add support for 16-bit RGGB Bayer pixfmt
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 02/13] hwcontext_vulkan: temporarily disable host_image_copy Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 03/13] hwcontext_vulkan: enable uniformBufferStandardLayout Lynne
@ 2025-07-12 18:51 ` Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 05/13] lavc/vulkan/common: sign-ify lengths Lynne
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 libavutil/hwcontext_vulkan.c | 3 +++
 libavutil/vulkan.c           | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index c20ebde36d..e818099fdb 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -376,6 +376,9 @@ static const struct FFVkFormatEntry {
     { VK_FORMAT_R32_UINT,   AV_PIX_FMT_GBRAP32,  VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_UINT,   VK_FORMAT_R32_UINT,   VK_FORMAT_R32_UINT,   VK_FORMAT_R32_UINT   } },
     { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
 
+    /* Bayer */
+    { VK_FORMAT_R16_UNORM, AV_PIX_FMT_BAYER_RGGB16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
+
     /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */
     { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,                  AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM,  VK_FORMAT_R8G8_UNORM   } },
     { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index f36f2bb7cf..5cfb634a62 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1542,7 +1542,7 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
         pix_fmt == AV_PIX_FMT_X2RGB10 || pix_fmt == AV_PIX_FMT_X2BGR10 ||
         pix_fmt == AV_PIX_FMT_RGBAF32 || pix_fmt == AV_PIX_FMT_RGBF32 ||
         pix_fmt == AV_PIX_FMT_RGBA128 || pix_fmt == AV_PIX_FMT_RGB96 ||
-        pix_fmt == AV_PIX_FMT_GBRP)
+        pix_fmt == AV_PIX_FMT_GBRP || pix_fmt == AV_PIX_FMT_BAYER_RGGB16)
         return 1;
     return 0;
 }
@@ -1699,7 +1699,8 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt,
     case AV_PIX_FMT_YUVA422P16:
     case AV_PIX_FMT_YUVA444P10:
     case AV_PIX_FMT_YUVA444P12:
-    case AV_PIX_FMT_YUVA444P16: {
+    case AV_PIX_FMT_YUVA444P16:
+    case AV_PIX_FMT_BAYER_RGGB16: {
         const char *rep_tab[] = {
             [FF_VK_REP_NATIVE] = "r16ui",
             [FF_VK_REP_FLOAT] = "r16f",
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 05/13] lavc/vulkan/common: sign-ify lengths
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
                   ` (2 preceding siblings ...)
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 04/13] vulkan: add support for 16-bit RGGB Bayer pixfmt Lynne
@ 2025-07-12 18:51 ` Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 06/13] lavc: add codec ID and profiles for ProRes RAW Lynne
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

This makes left_bits return useful data rather than overflowing, and
also saves some 64-bit integer operations, which is still always a plus sadly.
---
 libavcodec/vulkan/common.comp         | 22 +++++++++++-----------
 libavcodec/vulkan/ffv1_dec_setup.comp |  2 +-
 libavcodec/vulkan/ffv1_vlc.comp       |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp
index 10af9c0623..6825693fa3 100644
--- a/libavcodec/vulkan/common.comp
+++ b/libavcodec/vulkan/common.comp
@@ -193,8 +193,8 @@ struct GetBitContext {
     uint64_t buf_end;
 
     uint64_t bits;
-    uint bits_valid;
-    uint size_in_bits;
+    int bits_valid;
+    int size_in_bits;
 };
 
 #define LOAD64()                                       \
@@ -216,11 +216,11 @@ struct GetBitContext {
         gb.bits_valid += 32;                                      \
     }
 
-void init_get_bits(inout GetBitContext gb, u8buf data, uint64_t len)
+void init_get_bits(inout GetBitContext gb, u8buf data, int len)
 {
     gb.buf = gb.buf_start = uint64_t(data);
     gb.buf_end = uint64_t(data) + len;
-    gb.size_in_bits = uint(len) * 8;
+    gb.size_in_bits = len * 8;
 
     /* Preload */
     LOAD64()
@@ -237,7 +237,7 @@ bool get_bit(inout GetBitContext gb)
     return val;
 }
 
-uint get_bits(inout GetBitContext gb, uint n)
+uint get_bits(inout GetBitContext gb, int n)
 {
     if (n == 0)
         return 0;
@@ -251,7 +251,7 @@ uint get_bits(inout GetBitContext gb, uint n)
     return val;
 }
 
-uint show_bits(inout GetBitContext gb, uint n)
+uint show_bits(inout GetBitContext gb, int n)
 {
     if (n > gb.bits_valid)
         RELOAD32()
@@ -259,7 +259,7 @@ uint show_bits(inout GetBitContext gb, uint n)
     return uint(gb.bits >> (64 - n));
 }
 
-void skip_bits(inout GetBitContext gb, uint n)
+void skip_bits(inout GetBitContext gb, int n)
 {
     if (n > gb.bits_valid)
         RELOAD32()
@@ -268,12 +268,12 @@ void skip_bits(inout GetBitContext gb, uint n)
     gb.bits_valid -= n;
 }
 
-uint tell_bits(in GetBitContext gb)
+int tell_bits(in GetBitContext gb)
 {
-    return uint(gb.buf - gb.buf_start) * 8 - gb.bits_valid;
+    return int(gb.buf - gb.buf_start) * 8 - gb.bits_valid;
 }
 
-uint left_bits(in GetBitContext gb)
+int left_bits(in GetBitContext gb)
 {
-    return gb.size_in_bits - uint(gb.buf - gb.buf_start) * 8 + gb.bits_valid;
+    return gb.size_in_bits - int(gb.buf - gb.buf_start) * 8 + gb.bits_valid;
 }
diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp
index 671f28e7e7..5da09df21c 100644
--- a/libavcodec/vulkan/ffv1_dec_setup.comp
+++ b/libavcodec/vulkan/ffv1_dec_setup.comp
@@ -107,7 +107,7 @@ void golomb_init(inout SliceContext sc)
 
     uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;
     init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count),
-                  sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count);
+                  int(sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count));
 }
 
 void main(void)
diff --git a/libavcodec/vulkan/ffv1_vlc.comp b/libavcodec/vulkan/ffv1_vlc.comp
index d374e5a069..32a6ca9f37 100644
--- a/libavcodec/vulkan/ffv1_vlc.comp
+++ b/libavcodec/vulkan/ffv1_vlc.comp
@@ -121,7 +121,7 @@ Symbol get_vlc_symbol(inout VlcState state, int v, int bits)
     return set_sr_golomb(code, k, 12, bits);
 }
 
-uint get_ur_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
+uint get_ur_golomb(inout GetBitContext gb, int k, int limit, int esc_len)
 {
     for (uint i = 0; i < 12; i++)
         if (get_bit(gb))
@@ -130,7 +130,7 @@ uint get_ur_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
     return get_bits(gb, esc_len) + 11;
 }
 
-int get_sr_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
+int get_sr_golomb(inout GetBitContext gb, int k, int limit, int esc_len)
 {
     int v = int(get_ur_golomb(gb, k, limit, esc_len));
     return (v >> 1) ^ -(v & 1);
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 06/13] lavc: add codec ID and profiles for ProRes RAW
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
                   ` (3 preceding siblings ...)
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 05/13] lavc/vulkan/common: sign-ify lengths Lynne
@ 2025-07-12 18:51 ` Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 07/13] lavc: add a ProRes RAW parser Lynne
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 libavcodec/codec_desc.c | 8 ++++++++
 libavcodec/codec_id.h   | 1 +
 libavcodec/defs.h       | 3 +++
 libavcodec/profiles.c   | 6 ++++++
 libavcodec/profiles.h   | 1 +
 libavformat/isom_tags.c | 4 ++++
 6 files changed, 23 insertions(+)

diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index dae2296689..36cbaf288e 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -1992,6 +1992,14 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("Advanced Professional Video"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_PRORES_RAW,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "prores_raw",
+        .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes RAW"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_prores_raw_profiles),
+    },
 
     /* various PCM "codecs" */
     {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index d00d3fe121..adf263f6b0 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -330,6 +330,7 @@ enum AVCodecID {
     AV_CODEC_ID_RV60,
     AV_CODEC_ID_JPEGXL_ANIM,
     AV_CODEC_ID_APV,
+    AV_CODEC_ID_PRORES_RAW,
 
     /* various PCM "codecs" */
     AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
diff --git a/libavcodec/defs.h b/libavcodec/defs.h
index 8ce5d424c9..b13e983b13 100644
--- a/libavcodec/defs.h
+++ b/libavcodec/defs.h
@@ -185,6 +185,9 @@
 #define AV_PROFILE_PRORES_4444      4
 #define AV_PROFILE_PRORES_XQ        5
 
+#define AV_PROFILE_PRORES_RAW       0
+#define AV_PROFILE_PRORES_RAW_HQ    1
+
 #define AV_PROFILE_ARIB_PROFILE_A 0
 #define AV_PROFILE_ARIB_PROFILE_C 1
 
diff --git a/libavcodec/profiles.c b/libavcodec/profiles.c
index 991f24135d..2cf733b0a2 100644
--- a/libavcodec/profiles.c
+++ b/libavcodec/profiles.c
@@ -182,6 +182,12 @@ const AVProfile ff_prores_profiles[] = {
     { AV_PROFILE_UNKNOWN }
 };
 
+const AVProfile ff_prores_raw_profiles[] = {
+    { AV_PROFILE_PRORES_RAW,    "RAW" },
+    { AV_PROFILE_PRORES_RAW_HQ, "HQ"  },
+    { AV_PROFILE_UNKNOWN }
+};
+
 const AVProfile ff_mjpeg_profiles[] = {
     { AV_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT,            "Baseline"    },
     { AV_PROFILE_MJPEG_HUFFMAN_EXTENDED_SEQUENTIAL_DCT, "Sequential"  },
diff --git a/libavcodec/profiles.h b/libavcodec/profiles.h
index 4892388149..6f4011ff0c 100644
--- a/libavcodec/profiles.h
+++ b/libavcodec/profiles.h
@@ -74,6 +74,7 @@ extern const AVProfile ff_vp9_profiles[];
 extern const AVProfile ff_av1_profiles[];
 extern const AVProfile ff_sbc_profiles[];
 extern const AVProfile ff_prores_profiles[];
+extern const AVProfile ff_prores_raw_profiles[];
 extern const AVProfile ff_mjpeg_profiles[];
 extern const AVProfile ff_arib_caption_profiles[];
 extern const AVProfile ff_evc_profiles[];
diff --git a/libavformat/isom_tags.c b/libavformat/isom_tags.c
index 69174b4a3f..151c42e9e6 100644
--- a/libavformat/isom_tags.c
+++ b/libavformat/isom_tags.c
@@ -240,6 +240,10 @@ const AVCodecTag ff_codec_movvideo_tags[] = {
     { AV_CODEC_ID_PRORES, MKTAG('a', 'p', 'c', 'o') }, /* Apple ProRes 422 Proxy */
     { AV_CODEC_ID_PRORES, MKTAG('a', 'p', '4', 'h') }, /* Apple ProRes 4444 */
     { AV_CODEC_ID_PRORES, MKTAG('a', 'p', '4', 'x') }, /* Apple ProRes 4444 XQ */
+
+    { AV_CODEC_ID_PRORES_RAW, MKTAG('a', 'p', 'r', 'n') }, /* Apple ProRes RAW */
+    { AV_CODEC_ID_PRORES_RAW, MKTAG('a', 'p', 'r', 'h') }, /* Apple ProRes RAW HQ */
+
     { AV_CODEC_ID_FLIC,   MKTAG('f', 'l', 'i', 'c') },
 
     { AV_CODEC_ID_AIC, MKTAG('i', 'c', 'o', 'd') },
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 07/13] lavc: add a ProRes RAW parser
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
                   ` (4 preceding siblings ...)
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 06/13] lavc: add codec ID and profiles for ProRes RAW Lynne
@ 2025-07-12 18:51 ` Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 08/13] lavc: add a ProRes RAW decoder Lynne
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

Simple parser that only parses frame information.
This helps avoid requiring the software decoder on init to decode a
single frame, since the decoder can be quite slow.
---
 libavcodec/Makefile            |  1 +
 libavcodec/parsers.c           |  1 +
 libavcodec/prores_raw_parser.c | 72 ++++++++++++++++++++++++++++++++++
 libavformat/mov.c              |  1 +
 4 files changed, 75 insertions(+)
 create mode 100644 libavcodec/prores_raw_parser.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 215577f7c9..78e099ce5d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1250,6 +1250,7 @@ OBJS-$(CONFIG_MPEGVIDEO_PARSER)        += mpegvideo_parser.o    \
 OBJS-$(CONFIG_OPUS_PARSER)             += vorbis_data.o
 OBJS-$(CONFIG_PNG_PARSER)              += png_parser.o
 OBJS-$(CONFIG_PNM_PARSER)              += pnm_parser.o pnm.o
+OBJS-$(CONFIG_PRORES_RAW_PARSER)       += prores_raw_parser.o
 OBJS-$(CONFIG_QOI_PARSER)              += qoi_parser.o
 OBJS-$(CONFIG_RV34_PARSER)             += rv34_parser.o
 OBJS-$(CONFIG_SBC_PARSER)              += sbc_parser.o
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index 21164f3751..b12c48f79f 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -68,6 +68,7 @@ extern const AVCodecParser ff_mpegvideo_parser;
 extern const AVCodecParser ff_opus_parser;
 extern const AVCodecParser ff_png_parser;
 extern const AVCodecParser ff_pnm_parser;
+extern const AVCodecParser ff_prores_raw_parser;
 extern const AVCodecParser ff_qoi_parser;
 extern const AVCodecParser ff_rv34_parser;
 extern const AVCodecParser ff_sbc_parser;
diff --git a/libavcodec/prores_raw_parser.c b/libavcodec/prores_raw_parser.c
new file mode 100644
index 0000000000..a286d674b2
--- /dev/null
+++ b/libavcodec/prores_raw_parser.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "parser.h"
+#include "bytestream.h"
+
+static int prores_raw_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                            const uint8_t **poutbuf, int *poutbuf_size,
+                            const uint8_t *buf, int buf_size)
+{
+    GetByteContext gb;
+    uint32_t header_size;
+    int version;
+
+    bytestream2_init(&gb, buf, buf_size);
+    if (bytestream2_get_be32(&gb) != buf_size) /* Packet size */
+        return buf_size;
+
+    if (bytestream2_get_le32(&gb) != MKTAG('p','r','r','f')) /* Frame header */
+        return buf_size;
+
+    header_size = bytestream2_get_be16(&gb) + 8;
+    version  = bytestream2_get_be16(&gb);
+    if (version > 1) {
+        avpriv_request_sample(avctx, "Version %d", version);
+        return buf_size;
+    }
+
+    if (header_size < (version == 0 ? 144 : 96))
+        return buf_size;
+
+    /* Vendor header (e.g. "peac" for Panasonic or "atm0" for Atmos) */
+    bytestream2_skip(&gb, 4);
+
+    s->width = bytestream2_get_be16(&gb);
+    s->height = bytestream2_get_be16(&gb);
+    s->coded_width  = FFALIGN(s->width, 16);
+    s->coded_height = FFALIGN(s->height, 16);
+    s->format = AV_PIX_FMT_BAYER_RGGB16;
+    s->key_frame = 1;
+    s->pict_type = AV_PICTURE_TYPE_I;
+    s->field_order = AV_FIELD_PROGRESSIVE;
+    s->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
+
+    /* This parser only performs analysis */
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
+    return buf_size;
+}
+
+const AVCodecParser ff_prores_raw_parser = {
+    .codec_ids      = { AV_CODEC_ID_PRORES_RAW },
+    .parser_parse   = prores_raw_parse,
+};
diff --git a/libavformat/mov.c b/libavformat/mov.c
index c935bbf0bf..39c1d6c286 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -2987,6 +2987,7 @@ static int mov_finalize_stsd_codec(MOVContext *c, AVIOContext *pb,
     case AV_CODEC_ID_VP9:
         sti->need_parsing = AVSTREAM_PARSE_FULL;
         break;
+    case AV_CODEC_ID_PRORES_RAW:
     case AV_CODEC_ID_APV:
     case AV_CODEC_ID_EVC:
     case AV_CODEC_ID_AV1:
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 08/13] lavc: add a ProRes RAW decoder
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
                   ` (5 preceding siblings ...)
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 07/13] lavc: add a ProRes RAW parser Lynne
@ 2025-07-12 18:51 ` Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 09/13] lavc: add a ProRes RAW Vulkan hwaccel Lynne
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 configure               |   1 +
 libavcodec/Makefile     |   1 +
 libavcodec/allcodecs.c  |   1 +
 libavcodec/prores_raw.c | 525 ++++++++++++++++++++++++++++++++++++++++
 libavcodec/prores_raw.h |  60 +++++
 5 files changed, 588 insertions(+)
 create mode 100644 libavcodec/prores_raw.c
 create mode 100644 libavcodec/prores_raw.h

diff --git a/configure b/configure
index 6df8fa4deb..66e76cd47c 100755
--- a/configure
+++ b/configure
@@ -3087,6 +3087,7 @@ prores_decoder_select="blockdsp idctdsp"
 prores_encoder_select="fdctdsp"
 prores_aw_encoder_select="fdctdsp"
 prores_ks_encoder_select="fdctdsp"
+prores_raw_decoder_select="blockdsp idctdsp"
 qcelp_decoder_select="lsp"
 qdm2_decoder_select="mpegaudiodsp"
 ra_144_decoder_select="audiodsp"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 78e099ce5d..b825d19e9d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -629,6 +629,7 @@ OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o proresdsp.o proresdata.o
 OBJS-$(CONFIG_PRORES_ENCODER)          += proresenc_anatoliy.o proresdata.o
 OBJS-$(CONFIG_PRORES_AW_ENCODER)       += proresenc_anatoliy.o proresdata.o
 OBJS-$(CONFIG_PRORES_KS_ENCODER)       += proresenc_kostya.o proresdata.o
+OBJS-$(CONFIG_PRORES_RAW_DECODER)      += prores_raw.o
 OBJS-$(CONFIG_PRORES_VIDEOTOOLBOX_ENCODER) += videotoolboxenc.o
 OBJS-$(CONFIG_PROSUMER_DECODER)        += prosumer.o
 OBJS-$(CONFIG_PSD_DECODER)             += psd.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 7b01453ca2..dcf399e810 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -269,6 +269,7 @@ extern const FFCodec ff_prores_encoder;
 extern const FFCodec ff_prores_decoder;
 extern const FFCodec ff_prores_aw_encoder;
 extern const FFCodec ff_prores_ks_encoder;
+extern const FFCodec ff_prores_raw_decoder;
 extern const FFCodec ff_prosumer_decoder;
 extern const FFCodec ff_psd_decoder;
 extern const FFCodec ff_ptx_decoder;
diff --git a/libavcodec/prores_raw.c b/libavcodec/prores_raw.c
new file mode 100644
index 0000000000..b4382ef573
--- /dev/null
+++ b/libavcodec/prores_raw.c
@@ -0,0 +1,525 @@
+/*
+ * ProRes RAW decoder
+ * Copyright (c) 2023-2025 Paul B Mahol
+ * Copyright (c) 2025 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/mem.h"
+
+#define CACHED_BITSTREAM_READER !ARCH_X86_32
+
+#include "config_components.h"
+#include "avcodec.h"
+#include "bytestream.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "get_bits.h"
+#include "idctdsp.h"
+#include "proresdata.h"
+#include "thread.h"
+#include "hwconfig.h"
+#include "hwaccel_internal.h"
+
+#include "prores_raw.h"
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    ProResRAWContext *s = avctx->priv_data;
+    uint8_t idct_permutation[64];
+
+    avctx->bits_per_raw_sample = 12;
+    avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
+    avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
+    avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
+
+    s->pix_fmt = AV_PIX_FMT_NONE;
+
+    ff_blockdsp_init(&s->bdsp);
+    ff_proresdsp_init(&s->prodsp, avctx->bits_per_raw_sample);
+
+    ff_init_scantable_permutation(idct_permutation,
+                                  s->prodsp.idct_permutation_type);
+
+    ff_permute_scantable(s->scan, ff_prores_interlaced_scan, idct_permutation);
+
+    return 0;
+}
+
+static int16_t get_value(GetBitContext *gb, int16_t codebook)
+{
+    const int16_t switch_bits = codebook >> 8;
+    const int16_t rice_order  = codebook & 0xf;
+    const int16_t exp_order   = (codebook >> 4) & 0xf;
+    int16_t q, bits;
+
+    uint32_t b = show_bits_long(gb, 32);
+    if (!b)
+        return 0;
+    q = ff_clz(b);
+
+    if (b & 0x80000000) {
+        skip_bits_long(gb, 1 + rice_order);
+        return (b & 0x7FFFFFFF) >> (31 - rice_order);
+    }
+
+    if (q <= switch_bits) {
+        skip_bits_long(gb, 1 + rice_order + q);
+        return (q << rice_order) +
+                (((b << (q + 1)) >> 1) >> (31 - rice_order));
+    }
+
+    bits = exp_order + (q << 1) - switch_bits;
+    skip_bits_long(gb, bits);
+    return (b >> (32 - bits)) +
+           ((switch_bits + 1) << rice_order) -
+           (1 << exp_order);
+}
+
+#define TODCCODEBOOK(x) ((x + 1) >> 1)
+
+static const uint8_t align_tile_w[16] = {
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+};
+
+const uint8_t ff_prores_raw_dc_cb[13] = {
+    16, 33, 50, 51, 51, 51, 68, 68, 68, 68, 68, 68, 118,
+};
+
+const int16_t ff_prores_raw_ac_cb[95] = {
+      0, 529, 273, 273, 546, 546, 546, 290, 290, 290, 563, 563,
+    563, 563, 563, 563, 563, 563, 307, 307, 580, 580, 580, 580,
+    580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580, 580,
+    580, 580, 580, 580, 580, 580, 853, 853, 853, 853, 853, 853,
+    853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853,
+    853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853,
+    853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 853,
+    853, 853, 853, 853, 853, 853, 853, 853, 853, 853, 358
+};
+
+const int16_t ff_prores_raw_rn_cb[28] = {
+    512, 256, 0, 0, 529, 529, 273, 273, 17, 17, 33, 33, 546,
+    34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 50, 50, 68,
+};
+
+const int16_t ff_prores_raw_ln_cb[15] = {
+    256, 273, 546, 546, 290, 290, 1075, 1075, 563, 563, 563, 563, 563, 563, 51
+};
+
+static int decode_comp(AVCodecContext *avctx, TileContext *tile,
+                       AVFrame *frame, const uint8_t *data, int size,
+                       int component, int16_t *qmat)
+{
+    int ret;
+    ProResRAWContext *s = avctx->priv_data;
+    const ptrdiff_t linesize = frame->linesize[0] >> 1;
+    uint16_t *dst = (uint16_t *)(frame->data[0] + tile->y*frame->linesize[0] + 2*tile->x);
+
+    int idx;
+    const int w = FFMIN(s->tw, avctx->width - tile->x) / 2;
+    const int nb_blocks = w / 8;
+    const int log2_nb_blocks = 31 - ff_clz(nb_blocks);
+    const int block_mask = (1 << log2_nb_blocks) - 1;
+    const int nb_codes = 64 * nb_blocks;
+
+    LOCAL_ALIGNED_32(int16_t, block, [64*16]);
+    LOCAL_ALIGNED_32(int16_t, out, [64]);
+
+    int16_t dc;
+    int16_t prev_dc = 0;
+    int16_t sign = 0;
+    int16_t dc_add = 0;
+    int16_t dc_codebook;
+
+    int16_t ac, rn, ln;
+    int16_t ac_codebook = 49;
+    int16_t rn_codebook = 0;
+    int16_t ln_codebook = 66;
+
+    const uint8_t *scan = s->scan;
+    GetBitContext gb;
+
+    if (component > 1)
+        dst += linesize;
+    dst += component & 1;
+
+    if ((ret = init_get_bits8(&gb, data, size)) < 0)
+        return ret;
+
+    for (int n = 0; n < nb_blocks; n++)
+        s->bdsp.clear_block(block + n*64);
+
+    /* Special handling for first block */
+    dc = get_value(&gb, 700);
+    prev_dc = (dc >> 1) ^ -(dc & 1);
+    block[0] = prev_dc;
+
+    for (int n = 1; n < nb_blocks; n++) {
+        if (get_bits_left(&gb) <= 0)
+            break;
+
+        if ((n & 15) == 1)
+            dc_codebook = 100;
+        else
+            dc_codebook = ff_prores_raw_dc_cb[FFMIN(TODCCODEBOOK(dc),
+                                                    FF_ARRAY_ELEMS(ff_prores_raw_dc_cb) - 1)];
+
+        dc = get_value(&gb, dc_codebook);
+
+        sign = sign ^ dc & 1;
+        dc_add = (-sign ^ TODCCODEBOOK(dc)) + sign;
+        sign = dc_add < 0;
+        prev_dc += dc_add;
+
+        block[n*64] = prev_dc;
+    }
+
+    for (int n = nb_blocks; n <= nb_codes;) {
+        if (get_bits_left(&gb) <= 0)
+            break;
+
+        ln = get_value(&gb, ln_codebook);
+
+        for (int i = 0; i < ln; i++) {
+            if (get_bits_left(&gb) <= 0)
+                break;
+
+            if ((n + i) >= nb_codes)
+                break;
+
+            ac = get_value(&gb, ac_codebook);
+            ac_codebook = ff_prores_raw_ac_cb[FFMIN(ac, FF_ARRAY_ELEMS(ff_prores_raw_ac_cb) - 1)];
+            sign = -get_bits1(&gb);
+
+            idx = scan[(n + i) >> log2_nb_blocks] + (((n + i) & block_mask) << 6);
+            block[idx] = ((ac + 1) ^ sign) - sign;
+        }
+
+        n += ln;
+        if (n >= nb_codes)
+            break;
+
+        rn = get_value(&gb, rn_codebook);
+        rn_codebook = ff_prores_raw_rn_cb[FFMIN(rn, FF_ARRAY_ELEMS(ff_prores_raw_rn_cb) - 1)];
+
+        n += rn + 1;
+        if (n >= nb_codes)
+            break;
+
+        if (get_bits_left(&gb) <= 0)
+            break;
+
+        ac = get_value(&gb, ac_codebook);
+        sign = -get_bits1(&gb);
+
+        idx = scan[n >> log2_nb_blocks] + ((n & block_mask) << 6);
+        block[idx] = ((ac + 1) ^ sign) - sign;
+
+        ac_codebook = ff_prores_raw_ac_cb[FFMIN(ac, FF_ARRAY_ELEMS(ff_prores_raw_ac_cb) - 1)];
+        ln_codebook = ff_prores_raw_ln_cb[FFMIN(ac, FF_ARRAY_ELEMS(ff_prores_raw_ln_cb) - 1)];
+
+        n++;
+    }
+
+    for (int n = 0; n < nb_blocks; n++) {
+        uint16_t *ptr = dst + n*16;
+
+        s->prodsp.idct_put(out, 16, block + n*64, qmat);
+        for (int i = 0; i < 8; i++) {
+            for (int j = 0; j < 8; j++)
+                ptr[j * 2] = out[8*i+j] << 4; // 4 bits of LSB padding
+            ptr += 2 * linesize;
+        }
+    }
+
+    return 0;
+}
+
+static int decode_tile(AVCodecContext *avctx, TileContext *tile,
+                       AVFrame *frame)
+{
+    int ret;
+    GetByteContext *gb = &tile->gb;
+
+    uint16_t qscale;
+    LOCAL_ALIGNED_32(int16_t, qmat, [64]);
+    int size[4];
+
+    if (tile->x >= avctx->width)
+        return 0;
+
+    /* Tile header */
+    qscale  = bytestream2_get_be16(gb);
+    size[0] = bytestream2_get_be16(gb);
+    size[1] = bytestream2_get_be16(gb);
+    size[2] = bytestream2_get_be16(gb);
+    size[3] = bytestream2_size(gb) - size[0] - size[1] - size[2] - 8;
+    if (size[3] < 0)
+        return AVERROR_INVALIDDATA;
+
+    for (int i = 0; i < 64; i++)
+        qmat[i] = (qscale - 16384) >> 1;
+
+    ret = decode_comp(avctx, tile, frame, gb->buffer,
+                      size[0], 2, qmat);
+    if (ret < 0)
+        goto fail;
+
+    ret = decode_comp(avctx, tile, frame, gb->buffer + size[0],
+                      size[1], 1, qmat);
+    if (ret < 0)
+        goto fail;
+
+    ret = decode_comp(avctx, tile, frame, gb->buffer + size[0] + size[1],
+                      size[2], 3, qmat);
+    if (ret < 0)
+        goto fail;
+
+    ret = decode_comp(avctx, tile, frame, gb->buffer + size[0] + size[1] + size[2],
+                      size[3], 0, qmat);
+    if (ret < 0)
+        goto fail;
+
+    return 0;
+fail:
+    av_log(avctx, AV_LOG_ERROR, "tile %d/%d decoding error\n", tile->x, tile->y);
+    return ret;
+}
+
+static int decode_tiles(AVCodecContext *avctx, void *arg,
+                        int n, int thread_nb)
+{
+    ProResRAWContext *s = avctx->priv_data;
+    TileContext *tile = &s->tiles[n];
+    AVFrame *frame = arg;
+
+    return decode_tile(avctx, tile, frame);
+}
+
+static enum AVPixelFormat get_pixel_format(AVCodecContext *avctx,
+                                           enum AVPixelFormat pix_fmt)
+{
+    enum AVPixelFormat pix_fmts[] = {
+        pix_fmt,
+        AV_PIX_FMT_NONE,
+    };
+
+    return ff_get_format(avctx, pix_fmts);
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                        AVFrame *frame, int *got_frame_ptr,
+                        AVPacket *avpkt)
+{
+    enum AVPixelFormat pix_fmt;
+    int header_size, ret, w, h, aa, flags;
+    ProResRAWContext *s = avctx->priv_data;
+    GetByteContext gb;
+    uint32_t offset;
+
+    bytestream2_init(&gb, avpkt->data, avpkt->size);
+    if (bytestream2_get_be32(&gb) != avpkt->size)
+        return AVERROR_INVALIDDATA;
+
+    /* ProRes RAW frame */
+    if (bytestream2_get_le32(&gb) != MKTAG('p','r','r','f'))
+        return AVERROR_INVALIDDATA;
+
+    header_size = bytestream2_get_be16(&gb) + 8;
+    s->version  = bytestream2_get_be16(&gb);
+    if (s->version > 1) {
+        avpriv_request_sample(avctx, "Version %d", s->version);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if (header_size < (s->version == 0 ? 144 : 96))
+        return AVERROR_INVALIDDATA;
+
+    /* Vendor header (e.g. "peac" for Panasonic or "atm0" for Atmos) */
+    bytestream2_skip(&gb, 4);
+
+    w = bytestream2_get_be16(&gb);
+    h = bytestream2_get_be16(&gb);
+
+    avctx->coded_width  = FFALIGN(w, 16);
+    avctx->coded_height = FFALIGN(h, 16);
+
+    if (w != avctx->width || h != avctx->height) {
+        av_log(avctx, AV_LOG_WARNING, "picture resolution change: %dx%d -> %dx%d\n",
+               avctx->width, avctx->height, w, h);
+        if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
+            return ret;
+    }
+
+    pix_fmt = AV_PIX_FMT_BAYER_RGGB16;
+    if (pix_fmt != s->pix_fmt) {
+        s->pix_fmt = pix_fmt;
+
+        ret = get_pixel_format(avctx, pix_fmt);
+        if (ret < 0)
+            return ret;
+
+        avctx->pix_fmt = ret;
+    }
+
+    if (s->version == 0) {
+        bytestream2_skip(&gb, 1 * 4);
+        bytestream2_skip(&gb, 2);
+        bytestream2_skip(&gb, 2);
+        bytestream2_skip(&gb, 4);
+        bytestream2_skip(&gb, 4);
+        bytestream2_skip(&gb, 3 * 3 * 4);
+        bytestream2_skip(&gb, 4);
+        bytestream2_skip(&gb, 2);
+
+        flags = bytestream2_get_be16(&gb);
+        aa = (flags >> 1) & 7;
+        bytestream2_skip(&gb, 64);
+    } else {
+        bytestream2_skip(&gb, 10);
+        bytestream2_skip(&gb, 48);
+
+        flags = bytestream2_get_be16(&gb);
+        aa = (flags >> 1) & 7;
+        bytestream2_skip(&gb, 16);
+    }
+
+    bytestream2_skip(&gb, header_size - bytestream2_tell(&gb));
+
+    s->nb_tw = (w + 15) >> 4;
+    s->nb_th = (h + 15) >> 4;
+    s->nb_tw = (s->nb_tw >> aa) + align_tile_w[~(-1 * (1 << aa)) & s->nb_tw];
+    s->nb_tiles = s->nb_tw * s->nb_th;
+    av_log(avctx, AV_LOG_DEBUG, "%dx%d | nb_tiles: %d\n", s->nb_tw, s->nb_th, s->nb_tiles);
+
+    s->tw = s->version == 0 ? 128 : 256;
+    s->th = 16;
+    av_log(avctx, AV_LOG_DEBUG, "tile_size: %dx%d\n", s->tw, s->th);
+
+    av_fast_mallocz(&s->tiles, &s->tiles_size, s->nb_tiles * sizeof(*s->tiles));
+    if (!s->tiles)
+        return AVERROR(ENOMEM);
+
+    if (bytestream2_get_bytes_left(&gb) < s->nb_tiles * 2)
+        return AVERROR_INVALIDDATA;
+
+    /* Read tile data offsets */
+    offset = bytestream2_tell(&gb) + s->nb_tiles * 2;
+    for (int n = 0; n < s->nb_tiles; n++) {
+        TileContext *tile = &s->tiles[n];
+
+        int size = bytestream2_get_be16(&gb);
+        if (offset >= avpkt->size)
+            return AVERROR_INVALIDDATA;
+        if (size >= avpkt->size)
+            return AVERROR_INVALIDDATA;
+        if (offset > avpkt->size - size)
+            return AVERROR_INVALIDDATA;
+
+        bytestream2_init(&tile->gb, avpkt->data + offset, size);
+
+        tile->y = (n / s->nb_tw) * s->th;
+        tile->x = (n % s->nb_tw) * s->tw;
+
+        offset += size;
+    }
+
+    ret = ff_thread_get_buffer(avctx, frame, 0);
+    if (ret < 0)
+        return ret;
+
+    s->frame = frame;
+
+    /* Start */
+    if (avctx->hwaccel) {
+        const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
+
+        ret = ff_hwaccel_frame_priv_alloc(avctx, &s->hwaccel_picture_private);
+        if (ret < 0)
+            return ret;
+
+        ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, avpkt->size);
+        if (ret < 0)
+            return ret;
+
+        for (int n = 0; n < s->nb_tiles; n++) {
+            TileContext *tile = &s->tiles[n];
+            ret = hwaccel->decode_slice(avctx, tile->gb.buffer,
+                                        tile->gb.buffer_end - tile->gb.buffer);
+            if (ret < 0)
+                return ret;
+        }
+
+        ret = hwaccel->end_frame(avctx);
+        if (ret < 0)
+            return ret;
+
+        av_refstruct_unref(&s->hwaccel_picture_private);
+    } else {
+        avctx->execute2(avctx, decode_tiles, frame, NULL, s->nb_tiles);
+    }
+
+    frame->pict_type = AV_PICTURE_TYPE_I;
+    frame->flags    |= AV_FRAME_FLAG_KEY;
+
+    *got_frame_ptr = 1;
+
+    return avpkt->size;
+}
+
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+    ProResRAWContext *s = avctx->priv_data;
+    av_refstruct_unref(&s->hwaccel_picture_private);
+    av_freep(&s->tiles);
+    return 0;
+}
+
+#if HAVE_THREADS
+static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+{
+    ProResRAWContext *rsrc = src->priv_data;
+    ProResRAWContext *rdst = dst->priv_data;
+
+    rdst->pix_fmt = rsrc->pix_fmt;
+
+    return 0;
+}
+#endif
+
+const FFCodec ff_prores_raw_decoder = {
+    .p.name           = "prores_raw",
+    CODEC_LONG_NAME("Apple ProRes RAW"),
+    .p.type           = AVMEDIA_TYPE_VIDEO,
+    .p.id             = AV_CODEC_ID_PRORES_RAW,
+    .priv_data_size   = sizeof(ProResRAWContext),
+    .init             = decode_init,
+    .close            = decode_end,
+    FF_CODEC_DECODE_CB(decode_frame),
+    UPDATE_THREAD_CONTEXT(update_thread_context),
+    .p.capabilities   = AV_CODEC_CAP_DR1 |
+                        AV_CODEC_CAP_FRAME_THREADS |
+                        AV_CODEC_CAP_SLICE_THREADS,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
+                      FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
+    .hw_configs     = (const AVCodecHWConfigInternal *const []) {
+        NULL
+    },
+};
diff --git a/libavcodec/prores_raw.h b/libavcodec/prores_raw.h
new file mode 100644
index 0000000000..f8a54b89db
--- /dev/null
+++ b/libavcodec/prores_raw.h
@@ -0,0 +1,60 @@
+/*
+ * ProRes RAW decoder
+ * Copyright (c) 2025 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PRORES_RAW_H
+#define AVCODEC_PRORES_RAW_H
+
+#include "libavutil/mem_internal.h"
+
+#include "bytestream.h"
+#include "blockdsp.h"
+#include "proresdsp.h"
+
+typedef struct TileContext {
+    GetByteContext gb;
+    unsigned x, y;
+} TileContext;
+
+typedef struct ProResRAWContext {
+    ProresDSPContext prodsp;
+    BlockDSPContext  bdsp;
+
+    TileContext *tiles;
+    unsigned int tiles_size;
+    int nb_tiles;
+    int tw, th;
+    int nb_tw, nb_th;
+
+    enum AVPixelFormat pix_fmt;
+    AVFrame *frame;
+    void *hwaccel_picture_private;
+
+    int version;
+
+    uint8_t scan[64];
+} ProResRAWContext;
+
+extern const uint8_t ff_prores_raw_dc_cb[13];
+extern const int16_t ff_prores_raw_ac_cb[95];
+extern const int16_t ff_prores_raw_rn_cb[28];
+extern const int16_t ff_prores_raw_ln_cb[15];
+
+#endif /* AVCODEC_PRORES_RAW_H */
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 09/13] lavc: add a ProRes RAW Vulkan hwaccel
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
                   ` (6 preceding siblings ...)
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 08/13] lavc: add a ProRes RAW decoder Lynne
@ 2025-07-12 18:51 ` Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 10/13] scale_vulkan: refactor shader initialization Lynne
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

This commit adds a ProRes RAW hardware implementation written in Vulkan.
Both version 0 and version 1 streams are supported.
The implementation is highly parallelized, with 512 invocations dispatched
per every tile, with generally 4k tiles on a 5.8k stream.

Thanks to unlord for the 8-point iDCT.

Benchmark for a generic 5.8k RAW HQ file:
6900XT: 63fps
7900XTX: 84fps
6000 Ada: 120fps
Intel: 9fps
---
 configure                         |   2 +
 libavcodec/Makefile               |   1 +
 libavcodec/hwaccels.h             |   1 +
 libavcodec/prores_raw.c           |   6 +
 libavcodec/vulkan/Makefile        |   3 +
 libavcodec/vulkan/prores_raw.comp | 347 +++++++++++++++++++++
 libavcodec/vulkan_decode.c        |   9 +-
 libavcodec/vulkan_prores_raw.c    | 503 ++++++++++++++++++++++++++++++
 8 files changed, 871 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/vulkan/prores_raw.comp
 create mode 100644 libavcodec/vulkan_prores_raw.c

diff --git a/configure b/configure
index 66e76cd47c..eeb81d7aa3 100755
--- a/configure
+++ b/configure
@@ -3290,6 +3290,8 @@ mpeg4_videotoolbox_hwaccel_deps="videotoolbox"
 mpeg4_videotoolbox_hwaccel_select="mpeg4_decoder"
 prores_videotoolbox_hwaccel_deps="videotoolbox"
 prores_videotoolbox_hwaccel_select="prores_decoder"
+prores_raw_vulkan_hwaccel_deps="vulkan spirv_compiler"
+prores_raw_vulkan_hwaccel_select="prores_raw_decoder"
 vc1_d3d11va_hwaccel_deps="d3d11va"
 vc1_d3d11va_hwaccel_select="vc1_decoder"
 vc1_d3d11va2_hwaccel_deps="d3d11va"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index b825d19e9d..19fdaa9ad3 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1080,6 +1080,7 @@ OBJS-$(CONFIG_VP9_VDPAU_HWACCEL)          += vdpau_vp9.o
 OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)   += videotoolbox_vp9.o
 OBJS-$(CONFIG_VP8_QSV_HWACCEL)            += qsvdec.o
 OBJS-$(CONFIG_VVC_VAAPI_HWACCEL)          += vaapi_vvc.o
+OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL)  += vulkan_decode.o vulkan_prores_raw.o
 
 # Objects duplicated from other libraries for shared builds
 SHLIBOBJS                              += log2_tab.o reverse.o
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 0b2c725247..fb9b850233 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -67,6 +67,7 @@ extern const struct FFHWAccel ff_mpeg4_vaapi_hwaccel;
 extern const struct FFHWAccel ff_mpeg4_vdpau_hwaccel;
 extern const struct FFHWAccel ff_mpeg4_videotoolbox_hwaccel;
 extern const struct FFHWAccel ff_prores_videotoolbox_hwaccel;
+extern const struct FFHWAccel ff_prores_raw_vulkan_hwaccel;
 extern const struct FFHWAccel ff_vc1_d3d11va_hwaccel;
 extern const struct FFHWAccel ff_vc1_d3d11va2_hwaccel;
 extern const struct FFHWAccel ff_vc1_d3d12va_hwaccel;
diff --git a/libavcodec/prores_raw.c b/libavcodec/prores_raw.c
index b4382ef573..cc237c44a7 100644
--- a/libavcodec/prores_raw.c
+++ b/libavcodec/prores_raw.c
@@ -317,6 +317,9 @@ static enum AVPixelFormat get_pixel_format(AVCodecContext *avctx,
                                            enum AVPixelFormat pix_fmt)
 {
     enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+        AV_PIX_FMT_VULKAN,
+#endif
         pix_fmt,
         AV_PIX_FMT_NONE,
     };
@@ -520,6 +523,9 @@ const FFCodec ff_prores_raw_decoder = {
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
     .hw_configs     = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+        HWACCEL_VULKAN(prores_raw),
+#endif
         NULL
     },
 };
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 729cb4f15c..d8e1471fa6 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -14,6 +14,9 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL)  +=  vulkan/common.o \
 					vulkan/ffv1_common.o vulkan/ffv1_reset.o \
 					vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o
 
+OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \
+                                            vulkan/prores_raw.o
+
 VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
 .SECONDARY: $(VULKAN:.comp=.c)
 libavcodec/vulkan/%.c: TAG = VULKAN
diff --git a/libavcodec/vulkan/prores_raw.comp b/libavcodec/vulkan/prores_raw.comp
new file mode 100644
index 0000000000..f5dee35e35
--- /dev/null
+++ b/libavcodec/vulkan/prores_raw.comp
@@ -0,0 +1,347 @@
+/*
+ * ProRes RAW decoder
+ *
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define I16(x) (int16_t(x))
+
+#define COMP_ID (gl_LocalInvocationID.z)
+#define BLOCK_ID (gl_LocalInvocationID.y)
+#define ROW_ID (gl_LocalInvocationID.x)
+
+GetBitContext gb;
+shared float btemp[gl_WorkGroupSize.z][16][64] = { };
+shared float block[gl_WorkGroupSize.z][16][64];
+
+void idct8_horiz(const uint row_id)
+{
+    float t0, t1, t2, t3, t4, t5, t6, t7, u8;
+    float u0, u1, u2, u3, u4, u5, u6, u7;
+
+    /* Input */
+    t0 = block[COMP_ID][BLOCK_ID][8*row_id + 0];
+    u4 = block[COMP_ID][BLOCK_ID][8*row_id + 1];
+    t2 = block[COMP_ID][BLOCK_ID][8*row_id + 2];
+    u6 = block[COMP_ID][BLOCK_ID][8*row_id + 3];
+    t1 = block[COMP_ID][BLOCK_ID][8*row_id + 4];
+    u5 = block[COMP_ID][BLOCK_ID][8*row_id + 5];
+    t3 = block[COMP_ID][BLOCK_ID][8*row_id + 6];
+    u7 = block[COMP_ID][BLOCK_ID][8*row_id + 7];
+
+    /* Embedded scaled inverse 4-point Type-II DCT */
+    u0 = t0 + t1;
+    u1 = t0 - t1;
+    u3 = t2 + t3;
+    u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;
+    t0 = u0 + u3;
+    t3 = u0 - u3;
+    t1 = u1 + u2;
+    t2 = u1 - u2;
+
+    /* Embedded scaled inverse 4-point Type-IV DST */
+    t5 = u5 + u6;
+    t6 = u5 - u6;
+    t7 = u4 + u7;
+    t4 = u4 - u7;
+    u7 = t7 + t5;
+    u5 = (t7 - t5)*(1.4142135623730950488016887242097f);
+    u8 = (t4 + t6)*(1.8477590650225735122563663787936f);
+    u4 = u8 - t4*(1.0823922002923939687994464107328f);
+    u6 = u8 - t6*(2.6131259297527530557132863468544f);
+    t7 = u7;
+    t6 = t7 - u6;
+    t5 = t6 + u5;
+    t4 = t5 - u4;
+
+    /* Butterflies */
+    u0 = t0 + t7;
+    u7 = t0 - t7;
+    u6 = t1 + t6;
+    u1 = t1 - t6;
+    u2 = t2 + t5;
+    u5 = t2 - t5;
+    u4 = t3 + t4;
+    u3 = t3 - t4;
+
+    /* Output */
+    btemp[COMP_ID][BLOCK_ID][0*8 + row_id] = u0;
+    btemp[COMP_ID][BLOCK_ID][1*8 + row_id] = u1;
+    btemp[COMP_ID][BLOCK_ID][2*8 + row_id] = u2;
+    btemp[COMP_ID][BLOCK_ID][3*8 + row_id] = u3;
+    btemp[COMP_ID][BLOCK_ID][4*8 + row_id] = u4;
+    btemp[COMP_ID][BLOCK_ID][5*8 + row_id] = u5;
+    btemp[COMP_ID][BLOCK_ID][6*8 + row_id] = u6;
+    btemp[COMP_ID][BLOCK_ID][7*8 + row_id] = u7;
+}
+
+void idct8_vert(const uint row_id)
+{
+    float t0, t1, t2, t3, t4, t5, t6, t7, u8;
+    float u0, u1, u2, u3, u4, u5, u6, u7;
+
+    /* Input */
+    t0 = btemp[COMP_ID][BLOCK_ID][8*row_id + 0] + 0.5f; // NOTE
+    u4 = btemp[COMP_ID][BLOCK_ID][8*row_id + 1];
+    t2 = btemp[COMP_ID][BLOCK_ID][8*row_id + 2];
+    u6 = btemp[COMP_ID][BLOCK_ID][8*row_id + 3];
+    t1 = btemp[COMP_ID][BLOCK_ID][8*row_id + 4];
+    u5 = btemp[COMP_ID][BLOCK_ID][8*row_id + 5];
+    t3 = btemp[COMP_ID][BLOCK_ID][8*row_id + 6];
+    u7 = btemp[COMP_ID][BLOCK_ID][8*row_id + 7];
+
+    /* Embedded scaled inverse 4-point Type-II DCT */
+    u0 = t0 + t1;
+    u1 = t0 - t1;
+    u3 = t2 + t3;
+    u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;
+    t0 = u0 + u3;
+    t3 = u0 - u3;
+    t1 = u1 + u2;
+    t2 = u1 - u2;
+
+    /* Embedded scaled inverse 4-point Type-IV DST */
+    t5 = u5 + u6;
+    t6 = u5 - u6;
+    t7 = u4 + u7;
+    t4 = u4 - u7;
+    u7 = t7 + t5;
+    u5 = (t7 - t5)*(1.4142135623730950488016887242097f);
+    u8 = (t4 + t6)*(1.8477590650225735122563663787936f);
+    u4 = u8 - t4*(1.0823922002923939687994464107328f);
+    u6 = u8 - t6*(2.6131259297527530557132863468544f);
+    t7 = u7;
+    t6 = t7 - u6;
+    t5 = t6 + u5;
+    t4 = t5 - u4;
+
+    /* Butterflies */
+    u0 = t0 + t7;
+    u7 = t0 - t7;
+    u6 = t1 + t6;
+    u1 = t1 - t6;
+    u2 = t2 + t5;
+    u5 = t2 - t5;
+    u4 = t3 + t4;
+    u3 = t3 - t4;
+
+    /* Output */
+    block[COMP_ID][BLOCK_ID][0*8 + row_id] = u0;
+    block[COMP_ID][BLOCK_ID][1*8 + row_id] = u1;
+    block[COMP_ID][BLOCK_ID][2*8 + row_id] = u2;
+    block[COMP_ID][BLOCK_ID][3*8 + row_id] = u3;
+    block[COMP_ID][BLOCK_ID][4*8 + row_id] = u4;
+    block[COMP_ID][BLOCK_ID][5*8 + row_id] = u5;
+    block[COMP_ID][BLOCK_ID][6*8 + row_id] = u6;
+    block[COMP_ID][BLOCK_ID][7*8 + row_id] = u7;
+}
+
+int16_t get_value(int16_t codebook)
+{
+    const int16_t switch_bits = codebook >> 8;
+    const int16_t rice_order  = codebook & I16(0xf);
+    const int16_t exp_order   = (codebook >> 4) & I16(0xf);
+
+    uint32_t b = show_bits(gb, 32);
+    if (expectEXT(b == 0, false))
+        return I16(0);
+    int16_t q = I16(31) - I16(findMSB(b));
+
+    if ((b & 0x80000000) != 0) {
+        skip_bits(gb, 1 + rice_order);
+        return I16((b & 0x7FFFFFFF) >> (31 - rice_order));
+    }
+
+    if (q <= switch_bits) {
+        skip_bits(gb, q + rice_order + 1);
+        return I16((q << rice_order) +
+                   (((b << (q + 1)) >> 1) >> (31 - rice_order)));
+    }
+
+    int16_t bits = exp_order + (q << 1) - switch_bits;
+    skip_bits(gb, bits);
+    return I16((b >> (32 - bits)) +
+               ((switch_bits + 1) << rice_order) -
+               (1 << exp_order));
+}
+
+#define TODCCODEBOOK(x) ((x + 1) >> 1)
+
+void read_dc_vals(const uint nb_blocks)
+{
+    int16_t dc, dc_add;
+    int16_t prev_dc = I16(0), sign = I16(0);
+
+    /* Special handling for first block */
+    dc = get_value(I16(700));
+    prev_dc = (dc >> 1) ^ -(dc & I16(1));
+    btemp[COMP_ID][0][0] = prev_dc;
+
+    for (uint n = 1; n < nb_blocks; n++) {
+        if (expectEXT(left_bits(gb) <= 0, false))
+            break;
+
+        uint8_t dc_codebook;
+        if ((n & 15) == 1)
+            dc_codebook = uint8_t(100);
+        else
+            dc_codebook = dc_cb[min(TODCCODEBOOK(dc), 13 - 1)];
+
+        dc = get_value(dc_codebook);
+
+        sign = sign ^ dc & int16_t(1);
+        dc_add = (-sign ^ I16(TODCCODEBOOK(dc))) + sign;
+        sign = I16(dc_add < 0);
+        prev_dc += dc_add;
+
+        btemp[COMP_ID][n][0] = prev_dc;
+    }
+}
+
+void read_ac_vals(const uint nb_blocks)
+{
+    const uint nb_codes = nb_blocks << 6;
+    const uint log2_nb_blocks = findMSB(nb_blocks);
+    const uint block_mask = (1 << log2_nb_blocks) - 1;
+
+    int16_t ac, rn, ln;
+    int16_t ac_codebook = I16(49);
+    int16_t rn_codebook = I16( 0);
+    int16_t ln_codebook = I16(66);
+    int16_t sign;
+    int16_t val;
+
+    for (uint n = nb_blocks; n <= nb_codes;) {
+        if (expectEXT(left_bits(gb) <= 0, false))
+            break;
+
+        ln = get_value(ln_codebook);
+        for (uint i = 0; i < ln; i++) {
+            if (expectEXT(left_bits(gb) <= 0, false))
+                break;
+
+            if (expectEXT(n >= nb_codes, false))
+                break;
+
+            ac = get_value(ac_codebook);
+            ac_codebook = ac_cb[min(ac, 95 - 1)];
+            sign = -int16_t(get_bit(gb));
+
+            val = ((ac + I16(1)) ^ sign) - sign;
+            btemp[COMP_ID][n & block_mask][n >> log2_nb_blocks] = val;
+
+            n++;
+        }
+
+        if (expectEXT(n >= nb_codes, false))
+            break;
+
+        rn = get_value(rn_codebook);
+        rn_codebook = rn_cb[min(rn, 28 - 1)];
+
+        n += rn + 1;
+        if (expectEXT(n >= nb_codes, false))
+            break;
+
+        if (expectEXT(left_bits(gb) <= 0, false))
+            break;
+
+        ac = get_value(ac_codebook);
+        sign = -int16_t(get_bit(gb));
+
+        val = ((ac + I16(1)) ^ sign) - sign;
+        btemp[COMP_ID][n & block_mask][n >> log2_nb_blocks] = val;
+
+        ac_codebook = ac_cb[min(ac, 95 - 1)];
+        ln_codebook = ln_cb[min(ac, 15 - 1)];
+
+        n++;
+    }
+}
+
+void main(void)
+{
+    const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+    TileData td = tile_data[tile_idx];
+
+    if (expectEXT(td.pos.x >= frame_size.x, false))
+        return;
+
+    uint64_t pkt_offset = uint64_t(pkt_data) + td.offset;
+    u8vec2buf hdr_data = u8vec2buf(pkt_offset);
+    int qscale = int(pack16(hdr_data[0].v.yx));
+
+    ivec4 size = ivec4(td.size,
+                       pack16(hdr_data[2].v.yx),
+                       pack16(hdr_data[1].v.yx),
+                       pack16(hdr_data[3].v.yx));
+    size[0] = size[0] - size[1] - size[2] - size[3] - 8;
+    if (expectEXT(size[0] < 0, false))
+        return;
+
+    const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1);
+    const float n = float(qscale - 16384) / (2.0f*4096.0f);
+    const uint w = min(tile_size.x, frame_size.x - td.pos.x) / 2;
+    const uint nb_blocks = w / 8;
+
+    const ivec4 comp_offset = ivec4(size[2] + size[1] + size[3],
+                                    size[2],
+                                    0,
+                                    size[2] + size[1]);
+
+    if (BLOCK_ID == 0 && ROW_ID == 0) {
+        init_get_bits(gb, u8buf(pkt_offset + 8 + comp_offset[COMP_ID]),
+                      size[COMP_ID]);
+        read_dc_vals(nb_blocks);
+        read_ac_vals(nb_blocks);
+    }
+
+    barrier();
+
+    [[unroll]]
+    for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x)
+        block[COMP_ID][BLOCK_ID][i] = btemp[COMP_ID][BLOCK_ID][scan[i]] *
+                                      n * idct_8x8_scales[i];
+
+    barrier();
+
+#ifdef PARALLEL_ROWS
+    idct8_horiz(ROW_ID);
+
+    barrier();
+
+    idct8_vert(ROW_ID);
+#else
+    for (uint j = 0; j < 8; j++)
+        idct8_horiz(j);
+
+    barrier();
+
+    for (uint j = 0; j < 8; j++)
+        idct8_vert(j);
+#endif
+
+    barrier();
+
+    [[unroll]]
+    for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x)
+         imageStore(dst,
+                    offs + 2*ivec2(BLOCK_ID*8 + (i & 7), i >> 3),
+                    vec4(block[COMP_ID][BLOCK_ID][i]));
+}
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 7310ba1547..857f16bc0a 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -25,7 +25,8 @@
 #include "libavutil/vulkan_loader.h"
 
 #define DECODER_IS_SDR(codec_id) \
-    ((codec_id) == AV_CODEC_ID_FFV1)
+    (((codec_id) == AV_CODEC_ID_FFV1) || \
+     ((codec_id) == AV_CODEC_ID_PRORES_RAW))
 
 #if CONFIG_H264_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
@@ -39,6 +40,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc;
 #if CONFIG_FFV1_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
 #endif
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc;
+#endif
 
 static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_H264_VULKAN_HWACCEL
@@ -53,6 +57,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_FFV1_VULKAN_HWACCEL
     &ff_vk_dec_ffv1_desc,
 #endif
+#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
+    &ff_vk_dec_prores_raw_desc,
+#endif
 };
 
 static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id)
diff --git a/libavcodec/vulkan_prores_raw.c b/libavcodec/vulkan_prores_raw.c
new file mode 100644
index 0000000000..12571c0e2c
--- /dev/null
+++ b/libavcodec/vulkan_prores_raw.c
@@ -0,0 +1,503 @@
+/*
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan_decode.h"
+#include "hwaccel_internal.h"
+
+#include "prores_raw.h"
+#include "libavutil/vulkan_spirv.h"
+#include "libavutil/mem.h"
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_prores_raw_comp;
+
+const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc = {
+    .codec_id         = AV_CODEC_ID_PRORES_RAW,
+    .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR,
+    .queue_flags      = VK_QUEUE_COMPUTE_BIT,
+};
+
+typedef struct ProResRAWVulkanDecodePicture {
+    FFVulkanDecodePicture vp;
+
+    AVBufferRef *tile_data;
+    uint32_t nb_tiles;
+} ProResRAWVulkanDecodePicture;
+
+typedef struct ProResRAWVulkanDecodeContext {
+    FFVulkanShader decode[2];
+
+    AVBufferPool *tile_data_pool;
+
+    FFVkBuffer uniform_buf;
+} ProResRAWVulkanDecodeContext;
+
+typedef struct DecodePushData {
+    VkDeviceAddress tile_data;
+    VkDeviceAddress pkt_data;
+    uint32_t frame_size[2];
+    uint32_t tile_size[2];
+} DecodePushData;
+
+typedef struct TileData {
+    int32_t pos[2];
+    uint32_t offset;
+    uint32_t size;
+} TileData;
+
+static int vk_prores_raw_start_frame(AVCodecContext          *avctx,
+                                     const AVBufferRef       *buffer_ref,
+                                     av_unused const uint8_t *buffer,
+                                     av_unused uint32_t       size)
+{
+    int err;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx;
+    ProResRAWContext *prr = avctx->priv_data;
+
+    ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &pp->vp;
+
+    /* Host map the input tile data if supported */
+    if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
+        ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data,
+                              buffer_ref,
+                              VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                              VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+
+    /* Allocate tile data */
+    err = ff_vk_get_pooled_buffer(&ctx->s, &prv->tile_data_pool,
+                                  &pp->tile_data,
+                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                  NULL, prr->nb_tiles*sizeof(TileData),
+                                  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+    if (err < 0)
+        return err;
+
+    /* Prepare frame to be used */
+    err = ff_vk_decode_prepare_frame_sdr(dec, prr->frame, vp, 1,
+                                         FF_VK_REP_FLOAT, 0);
+    if (err < 0)
+        return err;
+
+    return 0;
+}
+
+static int vk_prores_raw_decode_slice(AVCodecContext *avctx,
+                                      const uint8_t  *data,
+                                      uint32_t        size)
+{
+    ProResRAWContext *prr = avctx->priv_data;
+
+    ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &pp->vp;
+
+    FFVkBuffer *tile_data_buf = (FFVkBuffer *)pp->tile_data->data;
+    TileData *td = (TileData *)tile_data_buf->mapped_mem;
+    FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL;
+
+    td[pp->nb_tiles].pos[0] = prr->tiles[pp->nb_tiles].x;
+    td[pp->nb_tiles].pos[1] = prr->tiles[pp->nb_tiles].y;
+    td[pp->nb_tiles].size = size;
+
+    if (vp->slices_buf && slices_buf->host_ref) {
+        td[pp->nb_tiles].offset = data - slices_buf->mapped_mem;
+        pp->nb_tiles++;
+    } else {
+        int err;
+        td[pp->nb_tiles].offset = vp->slices_size;
+        err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
+                                     &pp->nb_tiles, NULL);
+        if (err < 0)
+            return err;
+    }
+
+    return 0;
+}
+
+static int vk_prores_raw_end_frame(AVCodecContext *avctx)
+{
+    int err;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    ProResRAWContext *prr = avctx->priv_data;
+    ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx;
+
+    DecodePushData pd_decode;
+    FFVulkanShader *decode_shader;
+
+    ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &pp->vp;
+
+    FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
+    FFVkBuffer *tile_data = (FFVkBuffer *)pp->tile_data->data;
+
+    VkImageMemoryBarrier2 img_bar[8];
+    int nb_img_bar = 0;
+
+    FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
+    ff_vk_exec_start(&ctx->s, exec);
+
+    /* Prepare deps */
+    RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, prr->frame,
+                                 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+    err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
+                                      prr->frame);
+    if (err < 0)
+        return err;
+
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &pp->tile_data, 1, 0));
+    pp->tile_data = NULL;
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
+    vp->slices_buf = NULL;
+
+    ff_vk_frame_barrier(&ctx->s, exec, prr->frame, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_ACCESS_2_TRANSFER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pImageMemoryBarriers = img_bar,
+        .imageMemoryBarrierCount = nb_img_bar,
+    });
+    nb_img_bar = 0;
+
+    decode_shader = &prv->decode[prr->version];
+    ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
+                                  prr->frame, vp->view.out,
+                                  0, 0,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader);
+    pd_decode = (DecodePushData) {
+        .tile_data = tile_data->address,
+        .pkt_data = slices_buf->address,
+        .frame_size[0] = avctx->width,
+        .frame_size[1] = avctx->height,
+        .tile_size[0] = prr->tw,
+        .tile_size[1] = prr->th,
+    };
+    ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd_decode), &pd_decode);
+
+    vk->CmdDispatch(exec->buf, prr->nb_tw, prr->nb_th, 1);
+
+    err = ff_vk_exec_submit(&ctx->s, exec);
+    if (err < 0)
+        return err;
+
+fail:
+    return 0;
+}
+
+static int init_decode_shader(ProResRAWContext *prr, FFVulkanContext *s,
+                              FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                              FFVulkanShader *shd, int version)
+{
+    int err;
+    FFVulkanDescriptorSetBinding *desc_set;
+    int parallel_rows = 1;
+
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+
+    if (s->props.properties.limits.maxComputeWorkGroupInvocations < 512 ||
+        s->props.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU)
+        parallel_rows = 0;
+
+    RET(ff_vk_shader_init(s, shd, "prores_raw",
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2",
+                                             "GL_EXT_null_initializer" }, 3,
+                          parallel_rows ? 8 : 1 /* 8x8 transforms, 8-point width */,
+                          version == 0 ? 8 : 16 /* Horizontal blocks */,
+                          4 /* Components */,
+                          0));
+
+    if (parallel_rows)
+        GLSLC(0, #define PARALLEL_ROWS                                               );
+
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    GLSLC(0, layout(buffer_reference, buffer_reference_align = 16) buffer TileData { );
+    GLSLC(1,    ivec2 pos;                                                           );
+    GLSLC(1,    uint offset;                                                         );
+    GLSLC(1,    uint size;                                                           );
+    GLSLC(0, };                                                                      );
+    GLSLC(0,                                                                         );
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {                   );
+    GLSLC(1,    TileData tile_data;                                                  );
+    GLSLC(1,    u8buf pkt_data;                                                      );
+    GLSLC(1,    uvec2 frame_size;                                                    );
+    GLSLC(1,    uvec2 tile_size;                                                     );
+    GLSLC(0, };                                                                      );
+    GLSLC(0,                                                                         );
+    ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name       = "dst",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .mem_layout = "r16",
+            .mem_quali  = "writeonly",
+            .dimensions = 2,
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0));
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "dct_scale_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "float idct_8x8_scales[64];",
+        },
+        {
+            .name        = "scan_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint8_t scan[64];",
+        },
+        {
+            .name        = "dc_cb_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint8_t dc_cb[13];",
+        },
+        {
+            .name        = "ac_cb_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "int16_t ac_cb[95];",
+        },
+        {
+            .name        = "rn_cb_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "int16_t rn_cb[28];",
+        },
+        {
+            .name        = "ln_cb_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "int16_t ln_cb[15];",
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 6, 1, 0));
+
+    GLSLD(ff_source_prores_raw_comp);
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return err;
+}
+
+static void vk_decode_prores_raw_uninit(FFVulkanDecodeShared *ctx)
+{
+    ProResRAWVulkanDecodeContext *fv = ctx->sd_ctx;
+
+    ff_vk_shader_free(&ctx->s, &fv->decode[0]);
+    ff_vk_shader_free(&ctx->s, &fv->decode[1]);
+
+    ff_vk_free_buf(&ctx->s, &fv->uniform_buf);
+
+    av_buffer_pool_uninit(&fv->tile_data_pool);
+
+    av_freep(&fv);
+}
+
+static int vk_decode_prores_raw_init(AVCodecContext *avctx)
+{
+    int err;
+    ProResRAWContext *prr = avctx->priv_data;
+
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = NULL;
+    size_t ua;
+
+    ProResRAWVulkanDecodeContext *prv;
+    FFVkSPIRVCompiler *spv;
+
+    uint8_t *uniform_buf;
+    float *dct_scale_buf;
+    double idct_8_scales[8] = {
+        cos(4.0*M_PI/16.0)/2.0,
+        cos(1.0*M_PI/16.0)/2.0,
+        cos(2.0*M_PI/16.0)/2.0,
+        cos(3.0*M_PI/16.0)/2.0,
+        cos(4.0*M_PI/16.0)/2.0,
+        cos(5.0*M_PI/16.0)/2.0,
+        cos(6.0*M_PI/16.0)/2.0,
+        cos(7.0*M_PI/16.0)/2.0,
+    };
+    uint8_t *scan_buf;
+    size_t cb_size[5] = {
+        13*sizeof(uint8_t),
+        95*sizeof(int16_t),
+        28*sizeof(int16_t),
+        15*sizeof(int16_t),
+    };
+    size_t cb_offset[5];
+
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    err = ff_vk_decode_init(avctx);
+    if (err < 0)
+        return err;
+    ctx = dec->shared_ctx;
+
+    prv = ctx->sd_ctx = av_mallocz(sizeof(*prv));
+    if (!prv) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    ctx->sd_ctx_free = &vk_decode_prores_raw_uninit;
+
+    ua = ctx->s.props.properties.limits.minUniformBufferOffsetAlignment;
+    cb_offset[0] = 64*sizeof(float) + 64*sizeof(uint8_t);
+    cb_offset[1] = cb_offset[0] + FFALIGN(cb_size[0], ua);
+    cb_offset[2] = cb_offset[1] + FFALIGN(cb_size[1], ua);
+    cb_offset[3] = cb_offset[2] + FFALIGN(cb_size[2], ua);
+    cb_offset[4] = cb_offset[3] + FFALIGN(cb_size[3], ua);
+
+    /* Setup decode shader */
+    RET(init_decode_shader(prr, &ctx->s, &ctx->exec_pool, spv, &prv->decode[0], 0));
+    RET(init_decode_shader(prr, &ctx->s, &ctx->exec_pool, spv, &prv->decode[1], 1));
+
+    RET(ff_vk_create_buf(&ctx->s, &prv->uniform_buf,
+                         64*sizeof(float) + 64*sizeof(uint8_t) + cb_offset[4] + 256,
+                         NULL, NULL,
+                         VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+                         VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+                         VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+
+    RET(ff_vk_map_buffer(&ctx->s, &prv->uniform_buf, &uniform_buf, 0));
+
+    dct_scale_buf = (float *)uniform_buf;
+    for (int i = 0; i < 64; i++)
+        dct_scale_buf[i] = (float)(idct_8_scales[i >> 3] *
+                                   idct_8_scales[i  & 7]);
+
+    scan_buf = uniform_buf + 64*sizeof(float);
+    for (int i = 0; i < 64; i++)
+        scan_buf[prr->scan[i]] = i;
+
+    memcpy(uniform_buf + cb_offset[0], ff_prores_raw_dc_cb,
+           sizeof(ff_prores_raw_dc_cb));
+    memcpy(uniform_buf + cb_offset[1], ff_prores_raw_ac_cb,
+           sizeof(ff_prores_raw_ac_cb));
+    memcpy(uniform_buf + cb_offset[2], ff_prores_raw_rn_cb,
+           sizeof(ff_prores_raw_rn_cb));
+    memcpy(uniform_buf + cb_offset[3], ff_prores_raw_ln_cb,
+           sizeof(ff_prores_raw_ln_cb));
+
+    RET(ff_vk_unmap_buffer(&ctx->s, &prv->uniform_buf, 1));
+
+    for (int i = 0; i < 2; i++) {
+        RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+                                            &prv->decode[i], 1, 0, 0,
+                                            &prv->uniform_buf,
+                                            0, 64*sizeof(float),
+                                            VK_FORMAT_UNDEFINED));
+        RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+                                            &prv->decode[i], 1, 1, 0,
+                                            &prv->uniform_buf,
+                                            64*sizeof(float), 64*sizeof(uint8_t),
+                                            VK_FORMAT_UNDEFINED));
+        for (int j = 0; j < 4; j++)
+            RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+                                                &prv->decode[i], 1, 2 + j, 0,
+                                                &prv->uniform_buf,
+                                                cb_offset[j], cb_size[j],
+                                                VK_FORMAT_UNDEFINED));
+    }
+
+fail:
+    spv->uninit(&spv);
+
+    return err;
+}
+
+static void vk_prores_raw_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+    AVHWDeviceContext *dev_ctx = _hwctx.nc;
+
+    ProResRAWVulkanDecodePicture *pp = data;
+    FFVulkanDecodePicture *vp = &pp->vp;
+
+    ff_vk_decode_free_frame(dev_ctx, vp);
+}
+
+const FFHWAccel ff_prores_raw_vulkan_hwaccel = {
+    .p.name                = "prores_raw_vulkan",
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_PRORES_RAW,
+    .p.pix_fmt             = AV_PIX_FMT_VULKAN,
+    .start_frame           = &vk_prores_raw_start_frame,
+    .decode_slice          = &vk_prores_raw_decode_slice,
+    .end_frame             = &vk_prores_raw_end_frame,
+    .free_frame_priv       = &vk_prores_raw_free_frame_priv,
+    .frame_priv_data_size  = sizeof(ProResRAWVulkanDecodePicture),
+    .init                  = &vk_decode_prores_raw_init,
+    .update_thread_context = &ff_vk_update_thread_context,
+    .decode_params         = &ff_vk_params_invalidate,
+    .flush                 = &ff_vk_decode_flush,
+    .uninit                = &ff_vk_decode_uninit,
+    .frame_params          = &ff_vk_frame_params,
+    .priv_data_size        = sizeof(FFVulkanDecodeContext),
+    .caps_internal         = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 10/13] scale_vulkan: refactor shader initialization
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
                   ` (7 preceding siblings ...)
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 09/13] lavc: add a ProRes RAW Vulkan hwaccel Lynne
@ 2025-07-12 18:51 ` Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 11/13] scale_vulkan: add support for basic Debayering Lynne
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 libavfilter/vf_scale_vulkan.c | 160 ++++++++++++++++++----------------
 1 file changed, 85 insertions(+), 75 deletions(-)

diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index 1d6492e213..46b1476933 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -112,6 +112,78 @@ static const char write_444[] = {
     C(0, }                                                                      )
 };
 
+static int init_scale_shader(ScaleVulkanContext *s, FFVulkanShader *shd,
+                             FFVulkanDescriptorSetBinding *desc, AVFrame *in)
+{
+    GLSLD(   scale_bilinear                                                  );
+
+    if (s->vkctx.output_format != s->vkctx.input_format) {
+        GLSLD(   rgb2yuv                                                     );
+    }
+
+    switch (s->vkctx.output_format) {
+    case AV_PIX_FMT_NV12:    GLSLD(write_nv12); break;
+    case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
+    case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
+    default: break;
+    }
+
+    GLSLC(0, void main()                                                     );
+    GLSLC(0, {                                                               );
+    GLSLC(1,     ivec2 size;                                                 );
+    GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);                );
+    GLSLF(1,     vec2 in_d = vec2(%i, %i);             ,in->width, in->height);
+    GLSLC(1,     vec2 c_r = vec2(crop_w, crop_h) / in_d;                     );
+    GLSLC(1,     vec2 c_o = vec2(crop_x, crop_y) / in_d;                     );
+    GLSLC(0,                                                                 );
+
+    if (s->vkctx.output_format == s->vkctx.input_format) {
+        for (int i = 0; i < desc[1].elems; i++) {
+            GLSLF(1,  size = imageSize(output_img[%i]);                    ,i);
+            GLSLC(1,  if (IS_WITHIN(pos, size)) {                            );
+            switch (s->scaler) {
+            case F_NEAREST:
+            case F_BILINEAR:
+                GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o);     ,i);
+                GLSLF(2, imageStore(output_img[%i], pos, res);             ,i);
+                break;
+            };
+            GLSLC(1, }                                                       );
+        }
+    } else {
+        GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o);                );
+        GLSLF(1, res = rgb2yuv(res, %i);    ,s->out_range == AVCOL_RANGE_JPEG);
+        switch (s->vkctx.output_format) {
+        case AV_PIX_FMT_NV12:    GLSLC(1, write_nv12(res, pos); ); break;
+        case AV_PIX_FMT_YUV420P: GLSLC(1,  write_420(res, pos); ); break;
+        case AV_PIX_FMT_YUV444P: GLSLC(1,  write_444(res, pos); ); break;
+        default: return AVERROR(EINVAL);
+        }
+    }
+
+    GLSLC(0, }                                                               );
+
+    if (s->vkctx.output_format != s->vkctx.input_format) {
+        const AVLumaCoefficients *lcoeffs;
+        double tmp_mat[3][3];
+
+        lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace);
+        if (!lcoeffs) {
+            av_log(s, AV_LOG_ERROR, "Unsupported colorspace\n");
+            return AVERROR(EINVAL);
+        }
+
+        ff_fill_rgb2yuv_table(lcoeffs, tmp_mat);
+
+        for (int y = 0; y < 3; y++)
+            for (int x = 0; x < 3; x++)
+                s->opts.yuv_matrix[x][y] = tmp_mat[x][y];
+        s->opts.yuv_matrix[3][3] = 1.0;
+    }
+
+    return 0;
+}
+
 static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
 {
     int err;
@@ -157,18 +229,6 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
                           32, 32, 1,
                           0));
 
-    GLSLC(0, layout(push_constant, std430) uniform pushConstants {        );
-    GLSLC(1,    mat4 yuv_matrix;                                          );
-    GLSLC(1,    int crop_x;                                               );
-    GLSLC(1,    int crop_y;                                               );
-    GLSLC(1,    int crop_w;                                               );
-    GLSLC(1,    int crop_h;                                               );
-    GLSLC(0, };                                                           );
-    GLSLC(0,                                                              );
-
-    ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts),
-                                VK_SHADER_STAGE_COMPUTE_BIT);
-
     desc = (FFVulkanDescriptorSetBinding []) {
         {
             .name       = "input_img",
@@ -191,71 +251,21 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
 
     RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0));
 
-    GLSLD(   scale_bilinear                                                  );
-
-    if (s->vkctx.output_format != s->vkctx.input_format) {
-        GLSLD(   rgb2yuv                                                     );
-    }
-
-    switch (s->vkctx.output_format) {
-    case AV_PIX_FMT_NV12:    GLSLD(write_nv12); break;
-    case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
-    case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
-    default: break;
-    }
-
-    GLSLC(0, void main()                                                     );
-    GLSLC(0, {                                                               );
-    GLSLC(1,     ivec2 size;                                                 );
-    GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);                );
-    GLSLF(1,     vec2 in_d = vec2(%i, %i);             ,in->width, in->height);
-    GLSLC(1,     vec2 c_r = vec2(crop_w, crop_h) / in_d;                     );
-    GLSLC(1,     vec2 c_o = vec2(crop_x, crop_y) / in_d;                     );
-    GLSLC(0,                                                                 );
-
-    if (s->vkctx.output_format == s->vkctx.input_format) {
-        for (int i = 0; i < desc[1].elems; i++) {
-            GLSLF(1,  size = imageSize(output_img[%i]);                    ,i);
-            GLSLC(1,  if (IS_WITHIN(pos, size)) {                            );
-            switch (s->scaler) {
-            case F_NEAREST:
-            case F_BILINEAR:
-                GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o);     ,i);
-                GLSLF(2, imageStore(output_img[%i], pos, res);             ,i);
-                break;
-            };
-            GLSLC(1, }                                                       );
-        }
-    } else {
-        GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o);                );
-        GLSLF(1, res = rgb2yuv(res, %i);    ,s->out_range == AVCOL_RANGE_JPEG);
-        switch (s->vkctx.output_format) {
-        case AV_PIX_FMT_NV12:    GLSLC(1, write_nv12(res, pos); ); break;
-        case AV_PIX_FMT_YUV420P: GLSLC(1,  write_420(res, pos); ); break;
-        case AV_PIX_FMT_YUV444P: GLSLC(1,  write_444(res, pos); ); break;
-        default: return AVERROR(EINVAL);
-        }
-    }
-
-    GLSLC(0, }                                                               );
-
-    if (s->vkctx.output_format != s->vkctx.input_format) {
-        const AVLumaCoefficients *lcoeffs;
-        double tmp_mat[3][3];
-
-        lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace);
-        if (!lcoeffs) {
-            av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n");
-            return AVERROR(EINVAL);
-        }
+    GLSLC(0, layout(push_constant, std430) uniform pushConstants {        );
+    GLSLC(1,    mat4 yuv_matrix;                                          );
+    GLSLC(1,    int crop_x;                                               );
+    GLSLC(1,    int crop_y;                                               );
+    GLSLC(1,    int crop_w;                                               );
+    GLSLC(1,    int crop_h;                                               );
+    GLSLC(0, };                                                           );
+    GLSLC(0,                                                              );
 
-        ff_fill_rgb2yuv_table(lcoeffs, tmp_mat);
+    ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
 
-        for (int y = 0; y < 3; y++)
-            for (int x = 0; x < 3; x++)
-                s->opts.yuv_matrix[x][y] = tmp_mat[x][y];
-        s->opts.yuv_matrix[3][3] = 1.0;
-    }
+    err = init_scale_shader(s, shd, desc, in);
+    if (err < 0)
+        goto fail;
 
     RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main",
                             &spv_opaque));
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 11/13] scale_vulkan: add support for basic Debayering
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
                   ` (8 preceding siblings ...)
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 10/13] scale_vulkan: refactor shader initialization Lynne
@ 2025-07-12 18:51 ` Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 12/13] lavc/vp9dec: use cbs_vp9 to parse the frame header Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 13/13] WIP vp9: add Vulkan VP9 hwaccel Lynne
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 libavfilter/vf_scale_vulkan.c   |  60 +++++++++++++++++--
 libavfilter/vulkan/Makefile     |   1 +
 libavfilter/vulkan/debayer.comp | 102 ++++++++++++++++++++++++++++++++
 3 files changed, 159 insertions(+), 4 deletions(-)
 create mode 100644 libavfilter/vulkan/debayer.comp

diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index 46b1476933..5cb09ac385 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -27,6 +27,8 @@
 #include "colorspace.h"
 #include "video.h"
 
+extern const char *ff_source_debayer_comp;
+
 enum ScalerFunc {
     F_BILINEAR = 0,
     F_NEAREST,
@@ -34,6 +36,13 @@ enum ScalerFunc {
     F_NB,
 };
 
+enum DebayerFunc {
+    DB_BILINEAR = 0,
+    DB_BILINEAR_HQ,
+
+    DB_NB,
+};
+
 typedef struct ScaleVulkanContext {
     FFVulkanContext vkctx;
 
@@ -58,6 +67,7 @@ typedef struct ScaleVulkanContext {
 
     enum ScalerFunc scaler;
     enum AVColorRange out_range;
+    enum DebayerFunc debayer;
 } ScaleVulkanContext;
 
 static const char scale_bilinear[] = {
@@ -184,6 +194,25 @@ static int init_scale_shader(ScaleVulkanContext *s, FFVulkanShader *shd,
     return 0;
 }
 
+static int init_debayer_shader(ScaleVulkanContext *s, FFVulkanShader *shd,
+                               FFVulkanDescriptorSetBinding *desc, AVFrame *in)
+{
+    GLSLD(ff_source_debayer_comp);
+
+    GLSLC(0, void main(void));
+    GLSLC(0, {              );
+    if (s->debayer == DB_BILINEAR)
+        GLSLC(1, debayer_bilinear(););
+    else if (s->debayer == DB_BILINEAR_HQ)
+        GLSLC(1, debayer_bilinear_hq(););
+    GLSLC(0, }              );
+
+    shd->lg_size[0] <<= 1;
+    shd->lg_size[1] <<= 1;
+
+    return 0;
+}
+
 static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
 {
     int err;
@@ -197,6 +226,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     FFVkSPIRVCompiler *spv;
     FFVulkanDescriptorSetBinding *desc;
 
+    int debayer = s->vkctx.input_format == AV_PIX_FMT_BAYER_RGGB16;
     int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format);
 
     switch (s->scaler) {
@@ -222,7 +252,10 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     }
 
     RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL));
-    RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode));
+
+    if (!debayer)
+        RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode));
+
     RET(ff_vk_shader_init(vkctx, &s->shd, "scale",
                           VK_SHADER_STAGE_COMPUTE_BIT,
                           NULL, 0,
@@ -232,7 +265,13 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     desc = (FFVulkanDescriptorSetBinding []) {
         {
             .name       = "input_img",
-            .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .type       = debayer ?
+                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE :
+                          VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .mem_layout = debayer ?
+                          ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT) :
+                          NULL,
+            .mem_quali  = "readonly",
             .dimensions = 2,
             .elems      = in_planes,
             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -263,7 +302,10 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts),
                                 VK_SHADER_STAGE_COMPUTE_BIT);
 
-    err = init_scale_shader(s, shd, desc, in);
+    if (debayer)
+        err = init_debayer_shader(s, shd, desc, in);
+    else
+        err = init_scale_shader(s, shd, desc, in);
     if (err < 0)
         goto fail;
 
@@ -361,7 +403,14 @@ static int scale_vulkan_config_output(AVFilterLink *outlink)
         s->vkctx.output_format = s->vkctx.input_format;
     }
 
-    if (s->vkctx.output_format != s->vkctx.input_format) {
+    if (s->vkctx.input_format == AV_PIX_FMT_BAYER_RGGB16) {
+        if (s->vkctx.output_format == s->vkctx.input_format) {
+            s->vkctx.output_format = AV_PIX_FMT_RGBA64;
+        } else if (!ff_vk_mt_is_np_rgb(s->vkctx.output_format)) {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported output format for debayer\n");
+            return AVERROR(EINVAL);
+        }
+    } else if (s->vkctx.output_format != s->vkctx.input_format) {
         if (!ff_vk_mt_is_np_rgb(s->vkctx.input_format)) {
             av_log(avctx, AV_LOG_ERROR, "Unsupported input format for conversion\n");
             return AVERROR(EINVAL);
@@ -406,6 +455,9 @@ static const AVOption scale_vulkan_options[] = {
     { "scaler", "Scaler function", OFFSET(scaler), AV_OPT_TYPE_INT, {.i64 = F_BILINEAR}, 0, F_NB, .flags = FLAGS, .unit = "scaler" },
         { "bilinear", "Bilinear interpolation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = F_BILINEAR}, 0, 0, .flags = FLAGS, .unit = "scaler" },
         { "nearest", "Nearest (useful for pixel art)", 0, AV_OPT_TYPE_CONST, {.i64 = F_NEAREST}, 0, 0, .flags = FLAGS, .unit = "scaler" },
+    { "debayer", "Debayer algorithm to use", OFFSET(debayer), AV_OPT_TYPE_INT, {.i64 = DB_BILINEAR_HQ}, 0, DB_NB, .flags = FLAGS, .unit = "debayer" },
+        { "bilinear", "Bilinear debayering (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = DB_BILINEAR}, 0, 0, .flags = FLAGS, .unit = "debayer" },
+        { "bilinear_hq", "Bilinear debayering (high quality)", 0, AV_OPT_TYPE_CONST, {.i64 = DB_BILINEAR_HQ}, 0, 0, .flags = FLAGS, .unit = "debayer" },
     { "format", "Output video format (software format of hardware frames)", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
     { "out_range", "Output colour range (from 0 to 2) (default 0)", OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED}, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, .flags = FLAGS, .unit = "range" },
         { "full", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" },
diff --git a/libavfilter/vulkan/Makefile b/libavfilter/vulkan/Makefile
index 573eee32c7..c77aaf4f6b 100644
--- a/libavfilter/vulkan/Makefile
+++ b/libavfilter/vulkan/Makefile
@@ -4,6 +4,7 @@ clean::
 	$(RM) $(GEN_CLEANSUFFIXES:%=libavfilter/vulkan/%)
 
 OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.o
+OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vulkan/debayer.o
 
 VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavfilter/vulkan/*.comp))
 .SECONDARY: $(VULKAN:.comp=.c)
diff --git a/libavfilter/vulkan/debayer.comp b/libavfilter/vulkan/debayer.comp
new file mode 100644
index 0000000000..c86c2f5eec
--- /dev/null
+++ b/libavfilter/vulkan/debayer.comp
@@ -0,0 +1,102 @@
+/*
+ *
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define LD(xo, yo) \
+    (imageLoad(input_img[0], pos + ivec2((xo), (yo))).r)
+
+void debayer_bilinear(void)
+{
+    ivec2 pos = ivec2(gl_GlobalInvocationID.xy) << 1;
+
+    /* R basis */
+    vec4 tl = vec4(LD(0, 0),
+                   (LD(1, 0) + LD(-1, 0) + LD(0, 1) + LD(0, -1)) / 4.0f,
+                   (LD(-1, -1) + LD(1, 1) + LD(-1, 1) + LD(1, -1)) / 4.0f,
+                   1.0f);
+    imageStore(output_img[0], pos, tl);
+
+    /* G1 basis */
+    vec4 tr = vec4((LD(2, 0) + LD(0, 0)) / 2.0f,
+                   LD(1, 0),
+                   (LD(1, 1) + LD(1, -1)) / 2.0f,
+                   1.0f);
+    imageStore(output_img[0], pos + ivec2(1, 0), tr);
+
+    /* G2 basis */
+    vec4 bl = vec4((LD(0, 2) + LD(0, 0)) / 2.0f,
+                   LD(0, 1),
+                   (LD(1, 1) + LD(-1, 1)) / 2.0f,
+                   1.0f);
+    imageStore(output_img[0], pos + ivec2(0, 1), bl);
+
+    /* B basis */
+    vec4 br = vec4((LD(0, 0) + LD(2, 2) + LD(0, 2) + LD(2, 0)) / 4.0f,
+                   (LD(2, 1) + LD(0, 1) + LD(1, 2) + LD(1, 0)) / 4.0f,
+                   LD(1, 1),
+                   1.0f);
+    imageStore(output_img[0], pos + ivec2(1, 1), br);
+}
+
+void debayer_bilinear_hq(void)
+{
+    ivec2 pos = ivec2(gl_GlobalInvocationID.xy) << 1;
+
+    /* R basis */
+    vec4 tl = vec4(LD(0, 0),
+                   (4.0f*LD(0, 0) + 2.0f*(LD(0, -1) + LD(0, 1) + LD(-1, 0) + LD(1, 0)) -
+                    (LD(0, -2) + LD(0, 2) + LD(-2, 0) + LD(2, 0))) / 8.0f,
+                   (12.0f*LD(0, 0) + 4.0f*(LD(-1, -1) + LD(-1, 1) + LD(1, -1) + LD(1, 1)) -
+                    3.0f*(LD(0, -2) + LD(0, 2) + LD(-2, 0) + LD(2, 0))) / 16.0f,
+                   1.0f);
+    imageStore(output_img[0], pos, tl);
+
+    /* G1 basis */
+    vec4 tr = vec4((10.0f*LD(1, 0) + 8.0f*(LD(0, 0) + LD(2, 0)) -
+                    2.0f*(LD(0, -1) + LD(2, 1) + LD(0, 1) + LD(2, -1) + LD(-1, 0) + LD(3, 0)) +
+                    LD(1, -2) + LD(1, 2)) / 16.0f,
+                   LD(1, 0),
+                   (10.0f*LD(1, 0) + 8.0f*(LD(1, -1) + LD(1, 1)) -
+                    2.0f*(LD(0, -1) + LD(0, 1) + LD(2, -1) + LD(2, 1) + LD(1, -2) + LD(1, 2)) +
+                    LD(-1, 0) + LD(3, 0)) / 16.0f,
+                   1.0f);
+    imageStore(output_img[0], pos + ivec2(1, 0), tr);
+
+
+    /* G2 basis */
+    vec4 bl = vec4((10.0f*LD(0, 1) + 8.0f*(LD(0, 0) + LD(0, 2)) -
+                   2.0f*(LD(-1, 0) + LD(-1, 2) + LD(1, 0) + LD(1, 2) + LD(0, -1) + LD(0, 3)) +
+                   LD(-2, 1) + LD(2, 1)) / 16.0f,
+                   LD(0, 1),
+                   (10.0f*LD(0, 1) + 8.0f*(LD(-1, 1) + LD(1, 1)) -
+                   2.0f*(LD(-1, 0) + LD(1, 2) + LD(-1, 2) + LD(1, 0) + LD(-2, 1) + LD(2, 1)) +
+                    LD(0, -1) + LD(0, 3)) / 16.0f,
+                   1.0f);
+    imageStore(output_img[0], pos + ivec2(0, 1), bl);
+
+    /* B basis */
+    vec4 br = vec4((12.0f*LD(1, 1) + 4.0f*(LD(0, 0) + LD(0, 2) + LD(2, 0) + LD(2, 2)) -
+                    3.0f*(LD(1, -1) + LD(1, 3) + LD(-1, 1) + LD(3, 1))) / 16.0f,
+                   (4.0f*LD(1, 1) + 2.0f*(LD(1, 0) + LD(1, 2) + LD(0, 1) + LD(2, 1)) -
+                    (LD(1, -1) + LD(1, 3) + LD(-1, 1) + LD(3, 1))) / 8.0f,
+                   LD(1, 1),
+                   1.0f);
+    imageStore(output_img[0], pos + ivec2(1, 1), br);
+}
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 12/13] lavc/vp9dec: use cbs_vp9 to parse the frame header
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
                   ` (9 preceding siblings ...)
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 11/13] scale_vulkan: add support for basic Debayering Lynne
@ 2025-07-12 18:51 ` Lynne
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 13/13] WIP vp9: add Vulkan VP9 hwaccel Lynne
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 configure              |  2 +-
 libavcodec/vp9.c       | 40 ++++++++++++++++++++++++++++++++++++++++
 libavcodec/vp9dec.h    |  6 ++++++
 libavcodec/vp9shared.h |  4 ++++
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index eeb81d7aa3..92ee54c7a6 100755
--- a/configure
+++ b/configure
@@ -3153,7 +3153,7 @@ vp6a_decoder_select="vp6_decoder"
 vp6f_decoder_select="vp6_decoder"
 vp7_decoder_select="h264pred videodsp vp8dsp"
 vp8_decoder_select="h264pred videodsp vp8dsp"
-vp9_decoder_select="videodsp vp9_parser vp9_superframe_split_bsf"
+vp9_decoder_select="videodsp vp9_parser cbs_vp9 vp9_superframe_split_bsf"
 vvc_decoder_select="cabac cbs_h266 golomb videodsp vvc_sei"
 wcmv_decoder_select="inflate_wrapper"
 webp_decoder_select="vp8_decoder exif"
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 141f0941b4..a385956f4f 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -97,6 +97,7 @@ static void vp9_tile_data_free(VP9TileData *td)
 static void vp9_frame_unref(VP9Frame *f)
 {
     ff_progress_frame_unref(&f->tf);
+    av_refstruct_unref(&f->header_ref);
     av_refstruct_unref(&f->extradata);
     av_refstruct_unref(&f->hwaccel_picture_private);
     f->segmentation_map = NULL;
@@ -145,6 +146,9 @@ fail:
 
 static void vp9_frame_replace(VP9Frame *dst, const VP9Frame *src)
 {
+    av_refstruct_replace(&dst->header_ref, src->header_ref);
+    dst->frame_header = src->frame_header;
+
     ff_progress_frame_replace(&dst->tf, &src->tf);
 
     av_refstruct_replace(&dst->extradata, src->extradata);
@@ -1255,6 +1259,11 @@ static av_cold int vp9_decode_free(AVCodecContext *avctx)
     av_freep(&s->entries);
     ff_pthread_free(s, vp9_context_offsets);
 #endif
+
+    av_refstruct_unref(&s->header_ref);
+    ff_cbs_fragment_free(&s->current_frag);
+    ff_cbs_close(&s->cbc);
+
     av_freep(&s->td);
     return 0;
 }
@@ -1557,11 +1566,27 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     int size = pkt->size;
     VP9Context *s = avctx->priv_data;
     int ret, i, j, ref;
+    CodedBitstreamUnit *unit;
+    VP9RawFrame *rf;
+
     int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
                             (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
     const VP9Frame *src;
     AVFrame *f;
 
+    ret = ff_cbs_read_packet(s->cbc, &s->current_frag, pkt);
+    if (ret < 0) {
+        ff_cbs_fragment_reset(&s->current_frag);
+        av_log(avctx, AV_LOG_ERROR, "Failed to read frame header.\n");
+        return ret;
+    }
+
+    unit = &s->current_frag.units[0];
+    rf = unit->content;
+
+    av_refstruct_replace(&s->header_ref, unit->content_ref);
+    s->frame_header = &rf->header;
+
     if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
         return ret;
     } else if (ret == 0) {
@@ -1592,6 +1617,10 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     vp9_frame_unref(&s->s.frames[CUR_FRAME]);
     if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
         return ret;
+
+    s->s.frames[CUR_FRAME].header_ref = av_refstruct_ref(s->header_ref);
+    s->s.frames[CUR_FRAME].frame_header = s->frame_header;
+
     f = s->s.frames[CUR_FRAME].tf.f;
     if (s->s.h.keyframe)
         f->flags |= AV_FRAME_FLAG_KEY;
@@ -1779,6 +1808,9 @@ static void vp9_decode_flush(AVCodecContext *avctx)
     for (i = 0; i < 8; i++)
         ff_progress_frame_unref(&s->s.refs[i]);
 
+    ff_cbs_fragment_reset(&s->current_frag);
+    ff_cbs_flush(s->cbc);
+
     if (FF_HW_HAS_CB(avctx, flush))
         FF_HW_SIMPLE_CALL(avctx, flush);
 }
@@ -1791,6 +1823,10 @@ static av_cold int vp9_decode_init(AVCodecContext *avctx)
     s->last_bpp = 0;
     s->s.h.filter.sharpness = -1;
 
+    ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_VP9, avctx);
+    if (ret < 0)
+        return ret;
+
 #if HAVE_THREADS
     if (avctx->active_thread_type & FF_THREAD_SLICE) {
         ret = ff_pthread_init(s, vp9_context_offsets);
@@ -1814,6 +1850,10 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
     av_refstruct_replace(&s->frame_extradata_pool, ssrc->frame_extradata_pool);
     s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
 
+    av_refstruct_replace(&s->header_ref, ssrc->header_ref);
+    s->frame_header = ssrc->frame_header;
+    memcpy(s->cbc->priv_data, ssrc->cbc->priv_data, sizeof(CodedBitstreamVP9Context));
+
     s->s.h.invisible = ssrc->s.h.invisible;
     s->s.h.keyframe = ssrc->s.h.keyframe;
     s->s.h.intraonly = ssrc->s.h.intraonly;
diff --git a/libavcodec/vp9dec.h b/libavcodec/vp9dec.h
index e41f47a82a..c3ad2bbcdb 100644
--- a/libavcodec/vp9dec.h
+++ b/libavcodec/vp9dec.h
@@ -38,6 +38,7 @@
 #include "vp9dsp.h"
 #include "vp9shared.h"
 #include "vpx_rac.h"
+#include "cbs_vp9.h"
 
 #define REF_INVALID_SCALE 0xFFFF
 
@@ -97,6 +98,11 @@ typedef struct VP9Context {
     VP9SharedContext s;
     VP9TileData *td;
 
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment current_frag;
+    VP9RawFrame *header_ref; ///< RefStruct reference backing frame_header
+    VP9RawFrameHeader *frame_header;
+
     VP9DSPContext dsp;
     VideoDSPContext vdsp;
     GetBitContext gb;
diff --git a/libavcodec/vp9shared.h b/libavcodec/vp9shared.h
index 8a450c26a6..d2226e0072 100644
--- a/libavcodec/vp9shared.h
+++ b/libavcodec/vp9shared.h
@@ -30,6 +30,7 @@
 #include "libavutil/mem_internal.h"
 
 #include "progressframe.h"
+#include "cbs_vp9.h"
 #include "vp9.h"
 
 enum BlockPartition {
@@ -63,6 +64,9 @@ typedef struct VP9mvrefPair {
 } VP9mvrefPair;
 
 typedef struct VP9Frame {
+    VP9RawFrame *header_ref; ///< RefStruct reference backing frame_header
+    VP9RawFrameHeader *frame_header;
+
     ProgressFrame tf;
     void *extradata;               ///< RefStruct reference
     uint8_t *segmentation_map;
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [FFmpeg-devel] [PATCH v2 13/13] WIP vp9: add Vulkan VP9 hwaccel
  2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
                   ` (10 preceding siblings ...)
  2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 12/13] lavc/vp9dec: use cbs_vp9 to parse the frame header Lynne
@ 2025-07-12 18:51 ` Lynne
  11 siblings, 0 replies; 13+ messages in thread
From: Lynne @ 2025-07-12 18:51 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 configure                    |   2 +
 libavcodec/Makefile          |   1 +
 libavcodec/hwaccels.h        |   1 +
 libavcodec/vp9.c             |  18 +-
 libavcodec/vulkan_decode.c   |  24 +++
 libavcodec/vulkan_decode.h   |   1 +
 libavcodec/vulkan_vp9.c      | 366 +++++++++++++++++++++++++++++++++++
 libavutil/hwcontext_vulkan.c |  15 ++
 libavutil/vulkan_functions.h |   3 +-
 libavutil/vulkan_loader.h    |   3 +
 10 files changed, 432 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/vulkan_vp9.c

diff --git a/configure b/configure
index 92ee54c7a6..d1c54c0b8e 100755
--- a/configure
+++ b/configure
@@ -3326,6 +3326,8 @@ vp9_vdpau_hwaccel_deps="vdpau VdpPictureInfoVP9"
 vp9_vdpau_hwaccel_select="vp9_decoder"
 vp9_videotoolbox_hwaccel_deps="videotoolbox"
 vp9_videotoolbox_hwaccel_select="vp9_decoder"
+vp9_vulkan_hwaccel_deps="vulkan"
+vp9_vulkan_hwaccel_select="vp9_decoder"
 vvc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVVC"
 vvc_vaapi_hwaccel_select="vvc_decoder"
 wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 19fdaa9ad3..fcd1ae2a9e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1078,6 +1078,7 @@ OBJS-$(CONFIG_VP9_NVDEC_HWACCEL)          += nvdec_vp9.o
 OBJS-$(CONFIG_VP9_VAAPI_HWACCEL)          += vaapi_vp9.o
 OBJS-$(CONFIG_VP9_VDPAU_HWACCEL)          += vdpau_vp9.o
 OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)   += videotoolbox_vp9.o
+OBJS-$(CONFIG_VP9_VULKAN_HWACCEL)         += vulkan_decode.o vulkan_vp9.o
 OBJS-$(CONFIG_VP8_QSV_HWACCEL)            += qsvdec.o
 OBJS-$(CONFIG_VVC_VAAPI_HWACCEL)          += vaapi_vvc.o
 OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL)  += vulkan_decode.o vulkan_prores_raw.o
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index fb9b850233..4b205d386e 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -85,6 +85,7 @@ extern const struct FFHWAccel ff_vp9_nvdec_hwaccel;
 extern const struct FFHWAccel ff_vp9_vaapi_hwaccel;
 extern const struct FFHWAccel ff_vp9_vdpau_hwaccel;
 extern const struct FFHWAccel ff_vp9_videotoolbox_hwaccel;
+extern const struct FFHWAccel ff_vp9_vulkan_hwaccel;
 extern const struct FFHWAccel ff_vvc_vaapi_hwaccel;
 extern const struct FFHWAccel ff_wmv3_d3d11va_hwaccel;
 extern const struct FFHWAccel ff_wmv3_d3d11va2_hwaccel;
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index a385956f4f..31792962b4 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -169,7 +169,8 @@ static int update_size(AVCodecContext *avctx, int w, int h)
                      CONFIG_VP9_NVDEC_HWACCEL + \
                      CONFIG_VP9_VAAPI_HWACCEL + \
                      CONFIG_VP9_VDPAU_HWACCEL + \
-                     CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
+                     CONFIG_VP9_VIDEOTOOLBOX_HWACCEL + \
+                     CONFIG_VP9_VULKAN_HWACCEL)
     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
     VP9Context *s = avctx->priv_data;
     uint8_t *p;
@@ -206,6 +207,9 @@ static int update_size(AVCodecContext *avctx, int w, int h)
 #endif
 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
             *fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VULKAN;
 #endif
             break;
         case AV_PIX_FMT_YUV420P12:
@@ -217,6 +221,9 @@ static int update_size(AVCodecContext *avctx, int w, int h)
 #endif
 #if CONFIG_VP9_VDPAU_HWACCEL
             *fmtp++ = AV_PIX_FMT_VDPAU;
+#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VULKAN;
 #endif
             break;
         case AV_PIX_FMT_YUV444P:
@@ -224,6 +231,9 @@ static int update_size(AVCodecContext *avctx, int w, int h)
         case AV_PIX_FMT_YUV444P12:
 #if CONFIG_VP9_VAAPI_HWACCEL
             *fmtp++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VULKAN;
 #endif
             break;
         case AV_PIX_FMT_GBRP:
@@ -231,6 +241,9 @@ static int update_size(AVCodecContext *avctx, int w, int h)
         case AV_PIX_FMT_GBRP12:
 #if CONFIG_VP9_VAAPI_HWACCEL
             *fmtp++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VULKAN;
 #endif
             break;
         }
@@ -1919,6 +1932,9 @@ const FFCodec ff_vp9_decoder = {
 #endif
 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
                                HWACCEL_VIDEOTOOLBOX(vp9),
+#endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+                               HWACCEL_VULKAN(vp9),
 #endif
                                NULL
                            },
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 857f16bc0a..dea25d93aa 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -34,6 +34,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
 #if CONFIG_HEVC_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc;
 #endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_vp9_desc;
+#endif
 #if CONFIG_AV1_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc;
 #endif
@@ -51,6 +54,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_HEVC_VULKAN_HWACCEL
     &ff_vk_dec_hevc_desc,
 #endif
+#if CONFIG_VP9_VULKAN_HWACCEL
+    &ff_vk_dec_vp9_desc,
+#endif
 #if CONFIG_AV1_VULKAN_HWACCEL
     &ff_vk_dec_av1_desc,
 #endif
@@ -78,6 +84,7 @@ static const VkVideoProfileInfoKHR *get_video_profile(FFVulkanDecodeShared *ctx,
     VkStructureType profile_struct_type =
         codec_id == AV_CODEC_ID_H264 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR :
         codec_id == AV_CODEC_ID_HEVC ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR :
+        codec_id == AV_CODEC_ID_VP9  ? VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR :
         codec_id == AV_CODEC_ID_AV1  ? VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR :
                                        VK_STRUCTURE_TYPE_MAX_ENUM;
     if (profile_struct_type == VK_STRUCTURE_TYPE_MAX_ENUM)
@@ -688,6 +695,7 @@ static VkResult vulkan_setup_profile(AVCodecContext *avctx,
                                      const FFVulkanDecodeDescriptor *vk_desc,
                                      VkVideoDecodeH264CapabilitiesKHR *h264_caps,
                                      VkVideoDecodeH265CapabilitiesKHR *h265_caps,
+                                     VkVideoDecodeVP9CapabilitiesKHR *vp9_caps,
                                      VkVideoDecodeAV1CapabilitiesKHR *av1_caps,
                                      VkVideoCapabilitiesKHR *caps,
                                      VkVideoDecodeCapabilitiesKHR *dec_caps,
@@ -699,6 +707,7 @@ static VkResult vulkan_setup_profile(AVCodecContext *avctx,
 
     VkVideoDecodeH264ProfileInfoKHR *h264_profile = &prof->h264_profile;
     VkVideoDecodeH265ProfileInfoKHR *h265_profile = &prof->h265_profile;
+    VkVideoDecodeVP9ProfileInfoKHR *vp9_profile  = &prof->vp9_profile;
     VkVideoDecodeAV1ProfileInfoKHR *av1_profile  = &prof->av1_profile;
 
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
@@ -724,6 +733,11 @@ static VkResult vulkan_setup_profile(AVCodecContext *avctx,
         usage->pNext = h265_profile;
         h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR;
         h265_profile->stdProfileIdc = cur_profile;
+    } else if (avctx->codec_id == AV_CODEC_ID_VP9) {
+        dec_caps->pNext = vp9_caps;
+        usage->pNext = vp9_profile;
+        vp9_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR;
+        vp9_profile->stdProfile = cur_profile;
     } else if (avctx->codec_id == AV_CODEC_ID_AV1) {
         dec_caps->pNext = av1_caps;
         usage->pNext = av1_profile;
@@ -784,6 +798,9 @@ static int vulkan_decode_get_profile(AVCodecContext *avctx, AVBufferRef *frames_
     VkVideoDecodeH265CapabilitiesKHR h265_caps = {
         .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR,
     };
+    VkVideoDecodeVP9CapabilitiesKHR vp9_caps = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR,
+    };
     VkVideoDecodeAV1CapabilitiesKHR av1_caps = {
         .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_KHR,
     };
@@ -804,12 +821,14 @@ static int vulkan_decode_get_profile(AVCodecContext *avctx, AVBufferRef *frames_
     cur_profile = avctx->profile;
     base_profile = avctx->codec_id == AV_CODEC_ID_H264 ? AV_PROFILE_H264_CONSTRAINED_BASELINE :
                    avctx->codec_id == AV_CODEC_ID_H265 ? AV_PROFILE_HEVC_MAIN :
+                   avctx->codec_id == AV_CODEC_ID_VP9  ? STD_VIDEO_VP9_PROFILE_0 :
                    avctx->codec_id == AV_CODEC_ID_AV1  ? STD_VIDEO_AV1_PROFILE_MAIN :
                    0;
 
     ret = vulkan_setup_profile(avctx, prof, hwctx, vk, vk_desc,
                                &h264_caps,
                                &h265_caps,
+                               &vp9_caps,
                                &av1_caps,
                                caps,
                                dec_caps,
@@ -826,6 +845,7 @@ static int vulkan_decode_get_profile(AVCodecContext *avctx, AVBufferRef *frames_
         ret = vulkan_setup_profile(avctx, prof, hwctx, vk, vk_desc,
                                    &h264_caps,
                                    &h265_caps,
+                                   &vp9_caps,
                                    &av1_caps,
                                    caps,
                                    dec_caps,
@@ -852,6 +872,7 @@ static int vulkan_decode_get_profile(AVCodecContext *avctx, AVBufferRef *frames_
 
     max_level = avctx->codec_id == AV_CODEC_ID_H264 ? ff_vk_h264_level_to_av(h264_caps.maxLevelIdc) :
                 avctx->codec_id == AV_CODEC_ID_H265 ? ff_vk_h265_level_to_av(h265_caps.maxLevelIdc) :
+                avctx->codec_id == AV_CODEC_ID_VP9  ? vp9_caps.maxLevel :
                 avctx->codec_id == AV_CODEC_ID_AV1  ? av1_caps.maxLevel :
                 0;
 
@@ -1175,6 +1196,9 @@ static int create_empty_session_parameters(AVCodecContext *avctx,
         .videoSession = ctx->common.session,
     };
 
+    if (avctx->codec_id == AV_CODEC_ID_VP9)
+        return 0;
+
     ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create,
                                               s->hwctx->alloc, &ctx->empty_session_params);
     if (ret != VK_SUCCESS) {
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index bf6506f280..bf51d5a170 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -38,6 +38,7 @@ typedef struct FFVulkanDecodeDescriptor {
 typedef struct FFVulkanDecodeProfileData {
     VkVideoDecodeH264ProfileInfoKHR h264_profile;
     VkVideoDecodeH265ProfileInfoKHR h265_profile;
+    VkVideoDecodeVP9ProfileInfoKHR vp9_profile;
     VkVideoDecodeAV1ProfileInfoKHR av1_profile;
     VkVideoDecodeUsageInfoKHR usage;
     VkVideoProfileInfoKHR profile;
diff --git a/libavcodec/vulkan_vp9.c b/libavcodec/vulkan_vp9.c
new file mode 100644
index 0000000000..6713ab2218
--- /dev/null
+++ b/libavcodec/vulkan_vp9.c
@@ -0,0 +1,366 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vp9shared.h"
+
+#include "vulkan_decode.h"
+
+const FFVulkanDecodeDescriptor ff_vk_dec_vp9_desc = {
+    .codec_id         = AV_CODEC_ID_VP9,
+    .decode_extension = FF_VK_EXT_VIDEO_DECODE_VP9,
+    .queue_flags      = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
+    .decode_op        = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR,
+    .ext_props = {
+        .extensionName = VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME,
+        .specVersion   = VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION,
+    },
+};
+
+typedef struct VP9VulkanDecodePicture {
+    FFVulkanDecodePicture           vp;
+
+    /* TODO: investigate if this can be removed to make decoding completely
+     * independent. */
+    FFVulkanDecodeContext          *dec;
+
+    /* Current picture */
+    StdVideoVP9ColorConfig color_config;
+    StdVideoVP9Segmentation segmentation;
+    StdVideoVP9LoopFilter loop_filter;
+    StdVideoDecodeVP9PictureInfo std_pic_info;
+    VkVideoDecodeVP9PictureInfoKHR vp9_pic_info;
+
+    const VP9Frame *ref_src[8];
+
+    uint8_t frame_id_set;
+    uint8_t frame_id;
+    uint8_t ref_frame_sign_bias_mask;
+} VP9VulkanDecodePicture;
+
+static int vk_vp9_fill_pict(AVCodecContext *avctx, const VP9Frame **ref_src,
+                            VkVideoReferenceSlotInfoKHR *ref_slot,      /* Main structure */
+                            VkVideoPictureResourceInfoKHR *ref,         /* Goes in ^ */
+                            const VP9Frame *pic, int is_current)
+{
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    VP9VulkanDecodePicture *hp = pic->hwaccel_picture_private;
+    FFVulkanDecodePicture *vkpic = &hp->vp;
+
+    int err = ff_vk_decode_prepare_frame(dec, pic->tf.f, vkpic, is_current,
+                                         dec->dedicated_dpb);
+    if (err < 0)
+        return err;
+
+    *ref = (VkVideoPictureResourceInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+        .codedOffset = (VkOffset2D){ 0, 0 },
+        .codedExtent = (VkExtent2D){ pic->tf.f->width, pic->tf.f->height },
+        .baseArrayLayer = (dec->dedicated_dpb && ctx->common.layered_dpb) ?
+                          hp->frame_id : 0,
+        .imageViewBinding = vkpic->view.ref[0],
+    };
+
+    *ref_slot = (VkVideoReferenceSlotInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
+        .slotIndex = hp->frame_id,
+        .pPictureResource = ref,
+    };
+
+    if (ref_src)
+        *ref_src = pic;
+
+    return 0;
+}
+
+static enum StdVideoVP9InterpolationFilter remap_interp(uint8_t is_filter_switchable,
+                                                        uint8_t raw_interpolation_filter_type)
+{
+    static const enum StdVideoVP9InterpolationFilter remap[] = {
+        STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SMOOTH,
+        STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP,
+        STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SHARP,
+        STD_VIDEO_VP9_INTERPOLATION_FILTER_BILINEAR,
+    };
+    if (is_filter_switchable)
+        return STD_VIDEO_VP9_INTERPOLATION_FILTER_SWITCHABLE;
+    return remap[raw_interpolation_filter_type];
+}
+
+static int vk_vp9_start_frame(AVCodecContext          *avctx,
+                              av_unused const AVBufferRef *buffer_ref,
+                              av_unused const uint8_t *buffer,
+                              av_unused uint32_t       size)
+{
+    int err;
+    int ref_count = 0;
+    const VP9SharedContext *s = avctx->priv_data;
+
+    const VP9Frame *pic = &s->frames[CUR_FRAME];
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    uint8_t profile = (pic->frame_header->profile_high_bit << 1) | pic->frame_header->profile_low_bit;
+
+    VP9VulkanDecodePicture *ap = pic->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &ap->vp;
+
+    if (!ap->frame_id_set) {
+        unsigned slot_idx = 0;
+        for (unsigned i = 0; i < 32; i++) {
+            if (!(dec->frame_id_alloc_mask & (1 << i))) {
+                slot_idx = i;
+                break;
+            }
+        }
+        ap->frame_id = slot_idx;
+        ap->frame_id_set = 1;
+        dec->frame_id_alloc_mask |= (1 << slot_idx);
+    }
+
+    for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) {
+        const int idx = pic->frame_header->ref_frame_idx[i];
+        const VP9Frame *ref_frame = &s->frames[idx];
+        VP9VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private;
+        int found = 0;
+
+        if (!ref_frame->tf.f)
+            continue;
+
+        for (int j = 0; j < ref_count; j++) {
+            if (vp->ref_slots[j].slotIndex == hp->frame_id) {
+                found = 1;
+                break;
+            }
+        }
+        if (found)
+            continue;
+
+        err = vk_vp9_fill_pict(avctx, &ap->ref_src[ref_count],
+                               &vp->ref_slots[ref_count], &vp->refs[ref_count],
+                               ref_frame, 0);
+        if (err < 0)
+            return err;
+
+        ref_count++;
+    }
+
+    err = vk_vp9_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref,
+                           pic, 1);
+    if (err < 0)
+        return err;
+
+    ap->loop_filter = (StdVideoVP9LoopFilter) {
+        .flags = (StdVideoVP9LoopFilterFlags) {
+            .loop_filter_delta_enabled = pic->frame_header->loop_filter_delta_enabled,
+            .loop_filter_delta_update = pic->frame_header->loop_filter_delta_update,
+        },
+        .loop_filter_level = pic->frame_header->loop_filter_level,
+        .loop_filter_sharpness = pic->frame_header->loop_filter_sharpness,
+        .update_ref_delta = 0x0,
+        .update_mode_delta = 0x0,
+    };
+
+    for (int i = 0; i < 2; i++)
+        ap->loop_filter.update_mode_delta |= pic->frame_header->update_mode_delta[i];
+
+    for (int i = 0; i < STD_VIDEO_VP9_MAX_REF_FRAMES; i++) {
+        ap->loop_filter.loop_filter_ref_deltas[i] = pic->frame_header->loop_filter_ref_deltas[i];
+        ap->loop_filter.update_ref_delta |= pic->frame_header->update_ref_delta[i];
+    }
+    for (int i = 0; i < STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS; i++)
+        ap->loop_filter.loop_filter_mode_deltas[i] = pic->frame_header->loop_filter_mode_deltas[i];
+
+    ap->segmentation = (StdVideoVP9Segmentation) {
+        .flags = (StdVideoVP9SegmentationFlags) {
+            .segmentation_update_map = pic->frame_header->segmentation_update_map,
+            .segmentation_temporal_update = pic->frame_header->segmentation_temporal_update,
+            .segmentation_update_data = pic->frame_header->segmentation_update_data,
+            .segmentation_abs_or_delta_update = pic->frame_header->segmentation_abs_or_delta_update,
+        },
+    };
+
+    for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_TREE_PROBS; i++)
+        ap->segmentation.segmentation_tree_probs[i] = pic->frame_header->segmentation_tree_probs[i];
+    for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_PRED_PROB; i++)
+        ap->segmentation.segmentation_pred_prob[i] = pic->frame_header->segmentation_pred_prob[i];
+    for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTS; i++) {
+        ap->segmentation.FeatureEnabled[i] = 0x0;
+        for (int j = 0; j < STD_VIDEO_VP9_SEG_LVL_MAX; j++) {
+            ap->segmentation.FeatureEnabled[i] |= pic->frame_header->feature_enabled[i][j];
+            ap->segmentation.FeatureData[i][j] = pic->frame_header->feature_sign[i][j] ?
+                                                 -pic->frame_header->feature_value[i][j] :
+                                                 +pic->frame_header->feature_value[i][j];
+        }
+    }
+
+    ap->color_config = (StdVideoVP9ColorConfig) {
+        .flags = (StdVideoVP9ColorConfigFlags) {
+            .color_range = pic->frame_header->color_range,
+        },
+        .BitDepth = profile < 2 ? 8 :
+                    pic->frame_header->ten_or_twelve_bit ? 12 : 10,
+        .subsampling_x = pic->frame_header->subsampling_x,
+        .subsampling_y = pic->frame_header->subsampling_y,
+        .color_space = pic->frame_header->color_space,
+    };
+
+    ap->std_pic_info = (StdVideoDecodeVP9PictureInfo) {
+        .flags = (StdVideoDecodeVP9PictureInfoFlags) {
+           .error_resilient_mode = pic->frame_header->error_resilient_mode,
+           .intra_only = pic->frame_header->intra_only,
+           .allow_high_precision_mv = pic->frame_header->allow_high_precision_mv,
+           .refresh_frame_context = pic->frame_header->refresh_frame_context,
+           .frame_parallel_decoding_mode = pic->frame_header->frame_parallel_decoding_mode,
+           .segmentation_enabled = pic->frame_header->segmentation_enabled,
+           .show_frame = pic->frame_header->segmentation_enabled,
+           .UsePrevFrameMvs = s->h.use_last_frame_mvs,
+        },
+        .profile = profile,
+        .frame_type = pic->frame_header->frame_type,
+        .frame_context_idx = pic->frame_header->frame_context_idx,
+        .reset_frame_context = pic->frame_header->reset_frame_context,
+        .refresh_frame_flags = pic->frame_header->refresh_frame_flags,
+        .ref_frame_sign_bias_mask = 0x0,
+        .interpolation_filter = remap_interp(pic->frame_header->is_filter_switchable,
+                                             pic->frame_header->raw_interpolation_filter_type),
+        .base_q_idx = pic->frame_header->base_q_idx,
+        .delta_q_y_dc = pic->frame_header->delta_q_y_dc,
+        .delta_q_uv_dc = pic->frame_header->delta_q_uv_dc,
+        .delta_q_uv_ac = pic->frame_header->delta_q_uv_ac,
+        .tile_cols_log2 = pic->frame_header->tile_cols_log2,
+        .tile_rows_log2 = pic->frame_header->tile_rows_log2,
+        /* Reserved */
+        .pColorConfig = &ap->color_config,
+        .pLoopFilter = &ap->loop_filter,
+        .pSegmentation = &ap->segmentation,
+    };
+
+    for (int i = 0; i < 3; i++)
+        ap->std_pic_info.ref_frame_sign_bias_mask |= pic->frame_header->ref_frame_sign_bias[i] << i;
+
+    ap->vp9_pic_info = (VkVideoDecodeVP9PictureInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR,
+        .pStdPictureInfo = &ap->std_pic_info,
+        .uncompressedHeaderOffset = 0,
+        .compressedHeaderOffset = s->h.uncompressed_header_size,
+        .tilesOffset = s->h.uncompressed_header_size +
+                       s->h.compressed_header_size,
+    };
+
+    for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) {
+        const int idx = pic->frame_header->ref_frame_idx[i];
+        const VP9Frame *ref_frame = &s->frames[idx];
+        VP9VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private;
+
+        if (!ref_frame->tf.f)
+            ap->vp9_pic_info.referenceNameSlotIndices[i] = -1;
+        else
+            ap->vp9_pic_info.referenceNameSlotIndices[i] = hp->frame_id;
+    }
+
+    vp->decode_info = (VkVideoDecodeInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR,
+        .pNext = &ap->vp9_pic_info,
+        .flags = 0x0,
+        .pSetupReferenceSlot = &vp->ref_slot,
+        .referenceSlotCount = ref_count,
+        .pReferenceSlots = vp->ref_slots,
+        .dstPictureResource = (VkVideoPictureResourceInfoKHR) {
+            .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+            .codedOffset = (VkOffset2D){ 0, 0 },
+            .codedExtent = (VkExtent2D){ pic->tf.f->width, pic->tf.f->height },
+            .baseArrayLayer = 0,
+            .imageViewBinding = vp->view.out[0],
+        },
+    };
+
+    ap->dec = dec;
+
+    return 0;
+}
+
+static int vk_vp9_decode_slice(AVCodecContext *avctx,
+                               const uint8_t  *data,
+                               uint32_t        size)
+{
+    int err;
+    const VP9SharedContext *s = avctx->priv_data;
+    VP9VulkanDecodePicture *ap = s->frames[CUR_FRAME].hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &ap->vp;
+
+    err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, NULL, NULL);
+    if (err < 0)
+        return err;
+
+    return 0;
+}
+
+static int vk_vp9_end_frame(AVCodecContext *avctx)
+{
+    const VP9SharedContext *s = avctx->priv_data;
+
+    const VP9Frame *pic = &s->frames[CUR_FRAME];
+    VP9VulkanDecodePicture *ap = pic->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &ap->vp;
+    FFVulkanDecodePicture *rvp[STD_VIDEO_VP9_REFS_PER_FRAME] = { 0 };
+    AVFrame *rav[STD_VIDEO_VP9_REFS_PER_FRAME] = { 0 };
+
+    for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) {
+        const VP9Frame *rp = ap->ref_src[i];
+        VP9VulkanDecodePicture *rhp = rp->hwaccel_picture_private;
+
+        rvp[i] = &rhp->vp;
+        rav[i] = ap->ref_src[i]->tf.f;
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %"SIZE_SPECIFIER" bytes\n",
+           vp->slices_size);
+
+    return ff_vk_decode_frame(avctx, pic->tf.f, vp, rav, rvp);
+}
+
+static void vk_vp9_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+    AVHWDeviceContext *hwctx = _hwctx.nc;
+    VP9VulkanDecodePicture *ap = data;
+
+    /* Workaround for a spec issue. */
+    if (ap->frame_id_set)
+        ap->dec->frame_id_alloc_mask &= ~(1 << ap->frame_id);
+
+    /* Free frame resources, this also destroys the session parameters. */
+    ff_vk_decode_free_frame(hwctx, &ap->vp);
+}
+
+const FFHWAccel ff_vp9_vulkan_hwaccel = {
+    .p.name                = "av1_vulkan",
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_VP9,
+    .p.pix_fmt             = AV_PIX_FMT_VULKAN,
+    .start_frame           = &vk_vp9_start_frame,
+    .decode_slice          = &vk_vp9_decode_slice,
+    .end_frame             = &vk_vp9_end_frame,
+    .free_frame_priv       = &vk_vp9_free_frame_priv,
+    .frame_priv_data_size  = sizeof(VP9VulkanDecodePicture),
+    .init                  = &ff_vk_decode_init,
+    .update_thread_context = &ff_vk_update_thread_context,
+    .flush                 = &ff_vk_decode_flush,
+    .uninit                = &ff_vk_decode_uninit,
+    .frame_params          = &ff_vk_frame_params,
+    .priv_data_size        = sizeof(FFVulkanDecodeContext),
+    .caps_internal         = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index e818099fdb..2c1c38ba66 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -90,6 +90,9 @@ typedef struct VulkanDeviceFeatures {
 #ifdef VK_KHR_video_maintenance2
     VkPhysicalDeviceVideoMaintenance2FeaturesKHR video_maintenance_2;
 #endif
+#ifdef VK_KHR_video_decode_vp9
+    VkPhysicalDeviceVideoDecodeVP9FeaturesKHR vp9_decode;
+#endif
 
     VkPhysicalDeviceShaderObjectFeaturesEXT shader_object;
     VkPhysicalDeviceCooperativeMatrixFeaturesKHR cooperative_matrix;
@@ -227,6 +230,10 @@ static void device_features_init(AVHWDeviceContext *ctx, VulkanDeviceFeatures *f
     FF_VK_STRUCT_EXT(s, &feats->device, &feats->video_maintenance_2, FF_VK_EXT_VIDEO_MAINTENANCE_2,
                      VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_2_FEATURES_KHR);
 #endif
+#ifdef VK_KHR_video_decode_vp9
+    FF_VK_STRUCT_EXT(s, &feats->device, &feats->vp9_decode, FF_VK_EXT_VIDEO_DECODE_VP9,
+                     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_DECODE_VP9_FEATURES_KHR);
+#endif
 
     FF_VK_STRUCT_EXT(s, &feats->device, &feats->shader_object, FF_VK_EXT_SHADER_OBJECT,
                      VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT);
@@ -299,6 +306,10 @@ static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceF
     COPY_VAL(video_maintenance_2.videoMaintenance2);
 #endif
 
+#ifdef VK_KHR_video_decode_vp9
+    COPY_VAL(vp9_decode.videoDecodeVP9);
+#endif
+
     COPY_VAL(shader_object.shaderObject);
 
     COPY_VAL(cooperative_matrix.cooperativeMatrix);
@@ -644,6 +655,9 @@ static const VulkanOptExtension optional_device_exts[] = {
     { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME,                FF_VK_EXT_VIDEO_DECODE_H264      },
     { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME,                FF_VK_EXT_VIDEO_ENCODE_H265      },
     { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME,                FF_VK_EXT_VIDEO_DECODE_H265      },
+#ifdef VK_KHR_video_decode_vp9
+    { VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME,                 FF_VK_EXT_VIDEO_DECODE_VP9       },
+#endif
     { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME,                 FF_VK_EXT_VIDEO_DECODE_AV1       },
 };
 
@@ -1548,6 +1562,7 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
     PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR);
     PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR);
 
+    PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR);
     PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
 
     av_free(qf);
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 68fa7b802d..9fd646fa4e 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -59,7 +59,8 @@ typedef uint64_t FFVulkanExtensions;
 #define FF_VK_EXT_VIDEO_DECODE_QUEUE     (1ULL << 40) /* VK_KHR_video_decode_queue */
 #define FF_VK_EXT_VIDEO_DECODE_H264      (1ULL << 41) /* VK_KHR_video_decode_h264 */
 #define FF_VK_EXT_VIDEO_DECODE_H265      (1ULL << 42) /* VK_KHR_video_decode_h265 */
-#define FF_VK_EXT_VIDEO_DECODE_AV1       (1ULL << 43) /* VK_KHR_video_decode_av1 */
+#define FF_VK_EXT_VIDEO_DECODE_VP9       (1ULL << 43) /* VK_KHR_video_decode_av1 */
+#define FF_VK_EXT_VIDEO_DECODE_AV1       (1ULL << 44) /* VK_KHR_video_decode_av1 */
 
 #define FF_VK_EXT_VIDEO_ENCODE_QUEUE     (1ULL << 50) /* VK_KHR_video_encode_queue */
 #define FF_VK_EXT_VIDEO_ENCODE_H264      (1ULL << 51) /* VK_KHR_video_encode_h264 */
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index 7e805fdd4c..37a3731feb 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -76,6 +76,9 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
         { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME,         FF_VK_EXT_VIDEO_DECODE_H264      },
         { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME,         FF_VK_EXT_VIDEO_ENCODE_H265      },
         { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME,         FF_VK_EXT_VIDEO_DECODE_H265      },
+#ifdef VK_KHR_video_decode_vp9
+        { VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME,          FF_VK_EXT_VIDEO_DECODE_VP9       },
+#endif
         { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME,          FF_VK_EXT_VIDEO_DECODE_AV1       },
         { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,           FF_VK_EXT_PUSH_DESCRIPTOR        },
 #ifdef VK_KHR_shader_expect_assume
-- 
2.50.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2025-07-12 18:54 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-07-12 18:51 [FFmpeg-devel] [PATCH v2 01/13] vf_libplacebo: add support for specifying a LUT for the input Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 02/13] hwcontext_vulkan: temporarily disable host_image_copy Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 03/13] hwcontext_vulkan: enable uniformBufferStandardLayout Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 04/13] vulkan: add support for 16-bit RGGB Bayer pixfmt Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 05/13] lavc/vulkan/common: sign-ify lengths Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 06/13] lavc: add codec ID and profiles for ProRes RAW Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 07/13] lavc: add a ProRes RAW parser Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 08/13] lavc: add a ProRes RAW decoder Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 09/13] lavc: add a ProRes RAW Vulkan hwaccel Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 10/13] scale_vulkan: refactor shader initialization Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 11/13] scale_vulkan: add support for basic Debayering Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 12/13] lavc/vp9dec: use cbs_vp9 to parse the frame header Lynne
2025-07-12 18:51 ` [FFmpeg-devel] [PATCH v2 13/13] WIP vp9: add Vulkan VP9 hwaccel Lynne

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git