* [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview @ 2025-01-19 10:36 Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 02/12] ffv1dec: use dedicated pix_fmt field and call ff_get_format Lynne ` (9 more replies) 0 siblings, 10 replies; 13+ messages in thread From: Lynne @ 2025-01-19 10:36 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne --- libavutil/vulkan.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++ libavutil/vulkan.h | 7 +++++++ 2 files changed, 58 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 7315af928f..c86b77c3ee 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1586,6 +1586,57 @@ static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt) return VK_FORMAT_UNDEFINED; } +int ff_vk_create_imageview(FFVulkanContext *s, + VkImageView *img_view, VkImageAspectFlags *aspect, + AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + const int nb_images = ff_vk_count_images(vkf); + + VkImageViewUsageCreateInfo view_usage_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, + .usage = vkfc->usage & + (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)), + }; + VkImageViewCreateInfo view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &view_usage_info, + .image = vkf->img[FFMIN(plane, nb_images - 1)], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = map_fmt_to_rep(rep_fmts[plane], rep_fmt), + .components = ff_comp_identity_map, + .subresourceRange = { + .aspectMask = ff_vk_aspect_flag(f, plane), + .levelCount = 1, + .layerCount = 1, + }, + }; + if (view_create_info.format == VK_FORMAT_UNDEFINED) { + av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation " + "of format %i and mode %i\n", + rep_fmts[plane], rep_fmt); + return AVERROR(EINVAL); + } + + ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, + s->hwctx->alloc, img_view); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + *aspect = view_create_info.subresourceRange.aspectMask; + + return 0; +} + int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f, enum FFVkShaderRepFormat rep_fmt) diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 41f71df376..4bc5b57a1b 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -456,6 +456,13 @@ int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f); void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e); +/** + * Create a single imageview for a given plane. + */ +int ff_vk_create_imageview(FFVulkanContext *s, + VkImageView *img_view, VkImageAspectFlags *aspect, + AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt); + /** * Create an imageview and add it as a dependency to an execution. */ -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 02/12] ffv1dec: use dedicated pix_fmt field and call ff_get_format 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne @ 2025-01-19 10:36 ` Lynne 2025-01-20 3:13 ` Michael Niedermayer 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 03/12] ffv1dec: move slice start finding into a function Lynne ` (8 subsequent siblings) 9 siblings, 1 reply; 13+ messages in thread From: Lynne @ 2025-01-19 10:36 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne Adding support for hwaccels means that avctx->pix_fmt will indicate hardware formats. --- libavcodec/ffv1.h | 1 + libavcodec/ffv1dec.c | 145 +++++++++++++++++++++++-------------------- 2 files changed, 79 insertions(+), 67 deletions(-) diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h index ca03fd2b10..6b4ffca3f9 100644 --- a/libavcodec/ffv1.h +++ b/libavcodec/ffv1.h @@ -122,6 +122,7 @@ typedef struct FFV1Context { int key_frame; ProgressFrame picture, last_picture; uint32_t crcref; + enum AVPixelFormat pix_fmt; const AVFrame *cur_enc_frame; int plane_count; diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index 7845815873..b7f235c47f 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -40,6 +40,7 @@ #include "progressframe.h" #include "libavutil/refstruct.h" #include "thread.h" +#include "decode.h" static inline av_flatten int get_symbol_inline(RangeCoder *c, uint8_t *state, int is_signed) @@ -268,7 +269,7 @@ static int decode_slice(AVCodecContext *c, void *arg) FFV1Context *f = c->priv_data; FFV1SliceContext *sc = arg; int width, height, x, y, ret; - const int ps = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step; + const int ps = av_pix_fmt_desc_get(f->pix_fmt)->comp[0].step; AVFrame * const p = f->picture.f; const int si = sc - f->slices; GetBitContext gb; @@ -537,6 +538,16 @@ static int read_extra_header(FFV1Context *f) return 0; } +static enum AVPixelFormat get_pixel_format(FFV1Context *f) +{ + enum AVPixelFormat pix_fmts[] = { + f->pix_fmt, + AV_PIX_FMT_NONE, + }; + + return ff_get_format(f->avctx, pix_fmts); +} + static int read_header(FFV1Context *f) { uint8_t state[CONTEXT_SIZE]; @@ -606,109 +617,109 @@ static int read_header(FFV1Context *f) if (f->colorspace == 0) { if (!f->transparency && !f->chroma_planes) { if (f->avctx->bits_per_raw_sample <= 8) - f->avctx->pix_fmt = AV_PIX_FMT_GRAY8; + f->pix_fmt = AV_PIX_FMT_GRAY8; else if (f->avctx->bits_per_raw_sample == 9) { f->packed_at_lsb = 1; - f->avctx->pix_fmt = AV_PIX_FMT_GRAY9; + f->pix_fmt = AV_PIX_FMT_GRAY9; } else if (f->avctx->bits_per_raw_sample == 10) { f->packed_at_lsb = 1; - f->avctx->pix_fmt = AV_PIX_FMT_GRAY10; + f->pix_fmt = AV_PIX_FMT_GRAY10; } else if (f->avctx->bits_per_raw_sample == 12) { f->packed_at_lsb = 1; - f->avctx->pix_fmt = AV_PIX_FMT_GRAY12; + f->pix_fmt = AV_PIX_FMT_GRAY12; } else if (f->avctx->bits_per_raw_sample == 14) { f->packed_at_lsb = 1; - f->avctx->pix_fmt = AV_PIX_FMT_GRAY14; + f->pix_fmt = AV_PIX_FMT_GRAY14; } else if (f->avctx->bits_per_raw_sample == 16) { f->packed_at_lsb = 1; - f->avctx->pix_fmt = AV_PIX_FMT_GRAY16; + f->pix_fmt = AV_PIX_FMT_GRAY16; } else if (f->avctx->bits_per_raw_sample < 16) { - f->avctx->pix_fmt = AV_PIX_FMT_GRAY16; + f->pix_fmt = AV_PIX_FMT_GRAY16; } else return AVERROR(ENOSYS); } else if (f->transparency && !f->chroma_planes) { if (f->avctx->bits_per_raw_sample <= 8) - f->avctx->pix_fmt = AV_PIX_FMT_YA8; + f->pix_fmt = AV_PIX_FMT_YA8; else return AVERROR(ENOSYS); } else if (f->avctx->bits_per_raw_sample<=8 && !f->transparency) { switch(16 * f->chroma_h_shift + f->chroma_v_shift) { - case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P; break; - case 0x01: f->avctx->pix_fmt = AV_PIX_FMT_YUV440P; break; - case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P; break; - case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P; break; - case 0x20: f->avctx->pix_fmt = AV_PIX_FMT_YUV411P; break; - case 0x22: f->avctx->pix_fmt = AV_PIX_FMT_YUV410P; break; + case 0x00: f->pix_fmt = AV_PIX_FMT_YUV444P; break; + case 0x01: f->pix_fmt = AV_PIX_FMT_YUV440P; break; + case 0x10: f->pix_fmt = AV_PIX_FMT_YUV422P; break; + case 0x11: f->pix_fmt = AV_PIX_FMT_YUV420P; break; + case 0x20: f->pix_fmt = AV_PIX_FMT_YUV411P; break; + case 0x22: f->pix_fmt = AV_PIX_FMT_YUV410P; break; } } else if (f->avctx->bits_per_raw_sample <= 8 && f->transparency) { switch(16*f->chroma_h_shift + f->chroma_v_shift) { - case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUVA444P; break; - case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUVA422P; break; - case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUVA420P; break; + case 0x00: f->pix_fmt = AV_PIX_FMT_YUVA444P; break; + case 0x10: f->pix_fmt = AV_PIX_FMT_YUVA422P; break; + case 0x11: f->pix_fmt = AV_PIX_FMT_YUVA420P; break; } } else if (f->avctx->bits_per_raw_sample == 9 && !f->transparency) { f->packed_at_lsb = 1; switch(16 * f->chroma_h_shift + f->chroma_v_shift) { - case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P9; break; - case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P9; break; - case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P9; break; + case 0x00: f->pix_fmt = AV_PIX_FMT_YUV444P9; break; + case 0x10: f->pix_fmt = AV_PIX_FMT_YUV422P9; break; + case 0x11: f->pix_fmt = AV_PIX_FMT_YUV420P9; break; } } else if (f->avctx->bits_per_raw_sample == 9 && f->transparency) { f->packed_at_lsb = 1; switch(16 * f->chroma_h_shift + f->chroma_v_shift) { - case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUVA444P9; break; - case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUVA422P9; break; - case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUVA420P9; break; + case 0x00: f->pix_fmt = AV_PIX_FMT_YUVA444P9; break; + case 0x10: f->pix_fmt = AV_PIX_FMT_YUVA422P9; break; + case 0x11: f->pix_fmt = AV_PIX_FMT_YUVA420P9; break; } } else if (f->avctx->bits_per_raw_sample == 10 && !f->transparency) { f->packed_at_lsb = 1; switch(16 * f->chroma_h_shift + f->chroma_v_shift) { - case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P10; break; - case 0x01: f->avctx->pix_fmt = AV_PIX_FMT_YUV440P10; break; - case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P10; break; - case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P10; break; + case 0x00: f->pix_fmt = AV_PIX_FMT_YUV444P10; break; + case 0x01: f->pix_fmt = AV_PIX_FMT_YUV440P10; break; + case 0x10: f->pix_fmt = AV_PIX_FMT_YUV422P10; break; + case 0x11: f->pix_fmt = AV_PIX_FMT_YUV420P10; break; } } else if (f->avctx->bits_per_raw_sample == 10 && f->transparency) { f->packed_at_lsb = 1; switch(16 * f->chroma_h_shift + f->chroma_v_shift) { - case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUVA444P10; break; - case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUVA422P10; break; - case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUVA420P10; break; + case 0x00: f->pix_fmt = AV_PIX_FMT_YUVA444P10; break; + case 0x10: f->pix_fmt = AV_PIX_FMT_YUVA422P10; break; + case 0x11: f->pix_fmt = AV_PIX_FMT_YUVA420P10; break; } } else if (f->avctx->bits_per_raw_sample == 12 && !f->transparency) { f->packed_at_lsb = 1; switch(16 * f->chroma_h_shift + f->chroma_v_shift) { - case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P12; break; - case 0x01: f->avctx->pix_fmt = AV_PIX_FMT_YUV440P12; break; - case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P12; break; - case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P12; break; + case 0x00: f->pix_fmt = AV_PIX_FMT_YUV444P12; break; + case 0x01: f->pix_fmt = AV_PIX_FMT_YUV440P12; break; + case 0x10: f->pix_fmt = AV_PIX_FMT_YUV422P12; break; + case 0x11: f->pix_fmt = AV_PIX_FMT_YUV420P12; break; } } else if (f->avctx->bits_per_raw_sample == 12 && f->transparency) { f->packed_at_lsb = 1; switch(16 * f->chroma_h_shift + f->chroma_v_shift) { - case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUVA444P12; break; - case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUVA422P12; break; + case 0x00: f->pix_fmt = AV_PIX_FMT_YUVA444P12; break; + case 0x10: f->pix_fmt = AV_PIX_FMT_YUVA422P12; break; } } else if (f->avctx->bits_per_raw_sample == 14 && !f->transparency) { f->packed_at_lsb = 1; switch(16 * f->chroma_h_shift + f->chroma_v_shift) { - case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P14; break; - case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P14; break; - case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P14; break; + case 0x00: f->pix_fmt = AV_PIX_FMT_YUV444P14; break; + case 0x10: f->pix_fmt = AV_PIX_FMT_YUV422P14; break; + case 0x11: f->pix_fmt = AV_PIX_FMT_YUV420P14; break; } } else if (f->avctx->bits_per_raw_sample == 16 && !f->transparency){ f->packed_at_lsb = 1; switch(16 * f->chroma_h_shift + f->chroma_v_shift) { - case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P16; break; - case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P16; break; - case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P16; break; + case 0x00: f->pix_fmt = AV_PIX_FMT_YUV444P16; break; + case 0x10: f->pix_fmt = AV_PIX_FMT_YUV422P16; break; + case 0x11: f->pix_fmt = AV_PIX_FMT_YUV420P16; break; } } else if (f->avctx->bits_per_raw_sample == 16 && f->transparency){ f->packed_at_lsb = 1; switch(16 * f->chroma_h_shift + f->chroma_v_shift) { - case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUVA444P16; break; - case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUVA422P16; break; - case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUVA420P16; break; + case 0x00: f->pix_fmt = AV_PIX_FMT_YUVA444P16; break; + case 0x10: f->pix_fmt = AV_PIX_FMT_YUVA422P16; break; + case 0x11: f->pix_fmt = AV_PIX_FMT_YUVA420P16; break; } } } else if (f->colorspace == 1) { @@ -718,42 +729,42 @@ static int read_header(FFV1Context *f) return AVERROR(ENOSYS); } if ( f->avctx->bits_per_raw_sample <= 8 && !f->transparency) - f->avctx->pix_fmt = AV_PIX_FMT_0RGB32; + f->pix_fmt = AV_PIX_FMT_0RGB32; else if (f->avctx->bits_per_raw_sample <= 8 && f->transparency) - f->avctx->pix_fmt = AV_PIX_FMT_RGB32; + f->pix_fmt = AV_PIX_FMT_RGB32; else if (f->avctx->bits_per_raw_sample == 9 && !f->transparency) - f->avctx->pix_fmt = AV_PIX_FMT_GBRP9; + f->pix_fmt = AV_PIX_FMT_GBRP9; else if (f->avctx->bits_per_raw_sample == 10 && !f->transparency) - f->avctx->pix_fmt = AV_PIX_FMT_GBRP10; + f->pix_fmt = AV_PIX_FMT_GBRP10; else if (f->avctx->bits_per_raw_sample == 10 && f->transparency) - f->avctx->pix_fmt = AV_PIX_FMT_GBRAP10; + f->pix_fmt = AV_PIX_FMT_GBRAP10; else if (f->avctx->bits_per_raw_sample == 12 && !f->transparency) - f->avctx->pix_fmt = AV_PIX_FMT_GBRP12; + f->pix_fmt = AV_PIX_FMT_GBRP12; else if (f->avctx->bits_per_raw_sample == 12 && f->transparency) - f->avctx->pix_fmt = AV_PIX_FMT_GBRAP12; + f->pix_fmt = AV_PIX_FMT_GBRAP12; else if (f->avctx->bits_per_raw_sample == 14 && !f->transparency) - f->avctx->pix_fmt = AV_PIX_FMT_GBRP14; + f->pix_fmt = AV_PIX_FMT_GBRP14; else if (f->avctx->bits_per_raw_sample == 14 && f->transparency) - f->avctx->pix_fmt = AV_PIX_FMT_GBRAP14; - else if (f->avctx->bits_per_raw_sample == 16 && !f->transparency) { - f->avctx->pix_fmt = AV_PIX_FMT_GBRP16; - f->use32bit = 1; - } - else if (f->avctx->bits_per_raw_sample == 16 && f->transparency) { - f->avctx->pix_fmt = AV_PIX_FMT_GBRAP16; - f->use32bit = 1; - } + f->pix_fmt = AV_PIX_FMT_GBRAP14; + else if (f->avctx->bits_per_raw_sample == 16 && !f->transparency) + f->pix_fmt = AV_PIX_FMT_GBRP16; + else if (f->avctx->bits_per_raw_sample == 16 && f->transparency) + f->pix_fmt = AV_PIX_FMT_GBRAP16; } else { av_log(f->avctx, AV_LOG_ERROR, "colorspace not supported\n"); return AVERROR(ENOSYS); } - if (f->avctx->pix_fmt == AV_PIX_FMT_NONE) { + if (f->pix_fmt == AV_PIX_FMT_NONE) { av_log(f->avctx, AV_LOG_ERROR, "format not supported\n"); return AVERROR(ENOSYS); } + f->avctx->pix_fmt = get_pixel_format(f); + if (f->avctx->pix_fmt < 0) + return AVERROR(EINVAL); + ff_dlog(f->avctx, "%d %d %d\n", - f->chroma_h_shift, f->chroma_v_shift, f->avctx->pix_fmt); + f->chroma_h_shift, f->chroma_v_shift, f->pix_fmt); if (f->version < 2) { context_count = read_quant_tables(c, f->quant_tables[0]); if (context_count < 0) { @@ -986,7 +997,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, for (int i = f->slice_count - 1; i >= 0; i--) { FFV1SliceContext *sc = &f->slices[i]; if (sc->slice_damaged && f->last_picture.f) { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(f->pix_fmt); const uint8_t *src[4]; uint8_t *dst[4]; ff_progress_frame_await(&f->last_picture, INT_MAX); @@ -1003,7 +1014,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, av_image_copy(dst, p->linesize, src, f->last_picture.f->linesize, - avctx->pix_fmt, + f->pix_fmt, sc->slice_width, sc->slice_height); -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [FFmpeg-devel] [PATCH 02/12] ffv1dec: use dedicated pix_fmt field and call ff_get_format 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 02/12] ffv1dec: use dedicated pix_fmt field and call ff_get_format Lynne @ 2025-01-20 3:13 ` Michael Niedermayer 0 siblings, 0 replies; 13+ messages in thread From: Michael Niedermayer @ 2025-01-20 3:13 UTC (permalink / raw) To: FFmpeg development discussions and patches [-- Attachment #1.1: Type: text/plain, Size: 1378 bytes --] On Sun, Jan 19, 2025 at 07:36:20PM +0900, Lynne wrote: > Adding support for hwaccels means that avctx->pix_fmt will indicate > hardware formats. > --- > libavcodec/ffv1.h | 1 + > libavcodec/ffv1dec.c | 145 +++++++++++++++++++++++-------------------- > 2 files changed, 79 insertions(+), 67 deletions(-) fails on my old ubuntu --- ./tests/ref/vsynth/vsynth1-ffv1-v3-rgb48 2025-01-05 20:53:25.659847546 +0100 +++ tests/data/fate/vsynth1-ffv1-v3-rgb48 2025-01-20 04:12:49.127448635 +0100 @@ -1,4 +1,4 @@ f457dba7d58f0c28aedcfef518758d23 *tests/data/fate/vsynth1-ffv1-v3-rgb48.avi 16858304 tests/data/fate/vsynth1-ffv1-v3-rgb48.avi -e2542b84193de462b04da530ab941f31 *tests/data/fate/vsynth1-ffv1-v3-rgb48.out.rawvideo -stddev: 3.52 PSNR: 37.19 MAXDIFF: 74 bytes: 7603200/ 7603200 +2dc8a935cdcaa45b1d1968115ebd702e *tests/data/fate/vsynth1-ffv1-v3-rgb48.out.rawvideo +stddev: 64.67 PSNR: 11.92 MAXDIFF: 225 bytes: 7603200/ 7603200 Test vsynth1-ffv1-v3-rgb48 failed. Look at tests/data/fate/vsynth1-ffv1-v3-rgb48.err for details. make: *** [tests/Makefile:311: fate-vsynth1-ffv1-v3-rgb48] Error 1 thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB No human being will ever know the Truth, for even if they happen to say it by chance, they would not even known they had done so. -- Xenophanes [-- Attachment #1.2: signature.asc --] [-- Type: application/pgp-signature, Size: 195 bytes --] [-- Attachment #2: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 03/12] ffv1dec: move slice start finding into a function 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 02/12] ffv1dec: use dedicated pix_fmt field and call ff_get_format Lynne @ 2025-01-19 10:36 ` Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 04/12] ffv1dec: move header parsing into a separate function Lynne ` (7 subsequent siblings) 9 siblings, 0 replies; 13+ messages in thread From: Lynne @ 2025-01-19 10:36 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne This also cleans up and gives the code some much needed comments. --- libavcodec/ffv1dec.c | 66 +++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index b7f235c47f..aa988571bc 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -866,6 +866,38 @@ static av_cold int decode_init(AVCodecContext *avctx) return 0; } +static int find_next_slice(AVCodecContext *avctx, + uint8_t *buf, uint8_t *buf_end, int idx, + uint8_t **pos, uint32_t *len) +{ + FFV1Context *f = avctx->priv_data; + + /* Length field */ + uint32_t v = buf_end - buf; + if (idx || f->version > 2) { + /* Three bytes of length, plus flush bit + CRC */ + uint32_t trailer = 3 + 5*!!f->ec; + if (trailer > buf_end - buf) + v = INT_MAX; + else + v = AV_RB24(buf_end - trailer) + trailer; + } + + if (buf_end - buf < v) { + av_log(avctx, AV_LOG_ERROR, "Slice pointer chain broken\n"); + ff_progress_frame_report(&f->picture, INT_MAX); + return AVERROR_INVALIDDATA; + } + + *len = v; + if (idx) + *pos = buf_end - v; + else + *pos = buf; + + return 0; +} + static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame, AVPacket *avpkt) { @@ -875,7 +907,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, RangeCoder *const c = &f->slices[0].c; int ret, key_frame; uint8_t keystate = 128; - uint8_t *buf_p; + uint8_t *buf_end; AVFrame *p; ff_progress_frame_unref(&f->last_picture); @@ -941,27 +973,23 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, ff_thread_finish_setup(avctx); - buf_p = buf + buf_size; + buf_end = buf + buf_size; for (int i = f->slice_count - 1; i >= 0; i--) { FFV1SliceContext *sc = &f->slices[i]; - int trailer = 3 + 5*!!f->ec; - int v; - sc->slice_damaged = 0; + uint8_t *pos; + uint32_t len; + int err = find_next_slice(avctx, buf, buf_end, i, + &pos, &len); + if (err < 0) + return err; - if (i || f->version > 2) { - if (trailer > buf_p - buf) v = INT_MAX; - else v = AV_RB24(buf_p-trailer) + trailer; - } else v = buf_p - c->bytestream_start; - if (buf_p - c->bytestream_start < v) { - av_log(avctx, AV_LOG_ERROR, "Slice pointer chain broken\n"); - ff_progress_frame_report(&f->picture, INT_MAX); - return AVERROR_INVALIDDATA; - } - buf_p -= v; + buf_end -= len; + + sc->slice_damaged = 0; if (f->ec) { - unsigned crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), f->crcref, buf_p, v); + unsigned crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), f->crcref, pos, len); if (crc != f->crcref) { int64_t ts = avpkt->pts != AV_NOPTS_VALUE ? avpkt->pts : avpkt->dts; av_log(f->avctx, AV_LOG_ERROR, "slice CRC mismatch %X!", crc); @@ -975,15 +1003,15 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, slice_set_damaged(f, sc); } if (avctx->debug & FF_DEBUG_PICT_INFO) { - av_log(avctx, AV_LOG_DEBUG, "slice %d, CRC: 0x%08"PRIX32"\n", i, AV_RB32(buf_p + v - 4)); + av_log(avctx, AV_LOG_DEBUG, "slice %d, CRC: 0x%08"PRIX32"\n", i, AV_RB32(pos + len - 4)); } } if (i) { - ff_init_range_decoder(&sc->c, buf_p, v); + ff_init_range_decoder(&sc->c, pos, len); ff_build_rac_states(&sc->c, 0.05 * (1LL << 32), 256 - 8); } else - sc->c.bytestream_end = buf_p + v; + sc->c.bytestream_end = pos + len; } -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 04/12] ffv1dec: move header parsing into a separate function 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 02/12] ffv1dec: use dedicated pix_fmt field and call ff_get_format Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 03/12] ffv1dec: move slice start finding into a function Lynne @ 2025-01-19 10:36 ` Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 05/12] ffv1dec: move slice decoding " Lynne ` (6 subsequent siblings) 9 siblings, 0 replies; 13+ messages in thread From: Lynne @ 2025-01-19 10:36 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne --- libavcodec/ffv1dec.c | 64 +++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index aa988571bc..68a233932d 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -548,11 +548,10 @@ static enum AVPixelFormat get_pixel_format(FFV1Context *f) return ff_get_format(f->avctx, pix_fmts); } -static int read_header(FFV1Context *f) +static int read_header(FFV1Context *f, RangeCoder *c) { uint8_t state[CONTEXT_SIZE]; int context_count = -1; //-1 to avoid warning - RangeCoder *const c = &f->slices[0].c; memset(state, 128, sizeof(state)); @@ -898,31 +897,20 @@ static int find_next_slice(AVCodecContext *avctx, return 0; } -static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, - int *got_frame, AVPacket *avpkt) +static int decode_header(AVCodecContext *avctx, RangeCoder *c, + uint8_t *buf, size_t buf_size) { - uint8_t *buf = avpkt->data; - int buf_size = avpkt->size; - FFV1Context *f = avctx->priv_data; - RangeCoder *const c = &f->slices[0].c; - int ret, key_frame; - uint8_t keystate = 128; - uint8_t *buf_end; - AVFrame *p; - - ff_progress_frame_unref(&f->last_picture); - FFSWAP(ProgressFrame, f->picture, f->last_picture); - + int ret; + FFV1Context *f = avctx->priv_data; - f->avctx = avctx; - f->frame_damaged = 0; + uint8_t keystate = 128; ff_init_range_decoder(c, buf, buf_size); ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8); if (get_rac(c, &keystate)) { - key_frame = AV_FRAME_FLAG_KEY; + f->key_frame = AV_FRAME_FLAG_KEY; f->key_frame_ok = 0; - if ((ret = read_header(f)) < 0) + if ((ret = read_header(f, c)) < 0) return ret; f->key_frame_ok = 1; } else { @@ -931,7 +919,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, "Cannot decode non-keyframe without valid keyframe\n"); return AVERROR_INVALIDDATA; } - key_frame = 0; + f->key_frame = 0; } if (f->ac != AC_GOLOMB_RICE) { @@ -950,6 +938,33 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, return AVERROR_INVALIDDATA; } + return 0; +} + +static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, + int *got_frame, AVPacket *avpkt) +{ + uint8_t *buf = avpkt->data; + int buf_size = avpkt->size; + FFV1Context *f = avctx->priv_data; + int ret; + uint8_t *buf_end; + AVFrame *p; + + /* This is copied onto the first slice's range coder context */ + RangeCoder c; + + ff_progress_frame_unref(&f->last_picture); + FFSWAP(ProgressFrame, f->picture, f->last_picture); + + + f->avctx = avctx; + f->frame_damaged = 0; + + ret = decode_header(avctx, &c, avpkt->data, avpkt->size); + if (ret < 0) + return ret; + ret = ff_progress_frame_get_buffer(avctx, &f->picture, AV_GET_BUFFER_FLAG_REF); if (ret < 0) @@ -958,7 +973,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, p = f->picture.f; p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P - p->flags = (p->flags & ~AV_FRAME_FLAG_KEY) | key_frame; + p->flags = (p->flags & ~AV_FRAME_FLAG_KEY) | f->key_frame; if (f->version < 3 && avctx->field_order > AV_FIELD_PROGRESSIVE) { /* we have interlaced material flagged in container */ @@ -1010,9 +1025,10 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, if (i) { ff_init_range_decoder(&sc->c, pos, len); ff_build_rac_states(&sc->c, 0.05 * (1LL << 32), 256 - 8); - } else + } else { + sc->c = c; sc->c.bytestream_end = pos + len; - + } } avctx->execute(avctx, -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 05/12] ffv1dec: move slice decoding into a separate function 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne ` (2 preceding siblings ...) 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 04/12] ffv1dec: move header parsing into a separate function Lynne @ 2025-01-19 10:36 ` Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 06/12] ffv1dec: set f->state_transition for default range coder table Lynne ` (5 subsequent siblings) 9 siblings, 0 replies; 13+ messages in thread From: Lynne @ 2025-01-19 10:36 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne This simply movies all slice decoding code from decode_frame to decode_slices; nothing more. --- libavcodec/ffv1dec.c | 106 ++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 46 deletions(-) diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index 68a233932d..83c5975550 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -941,54 +941,16 @@ static int decode_header(AVCodecContext *avctx, RangeCoder *c, return 0; } -static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, - int *got_frame, AVPacket *avpkt) +static int decode_slices(AVCodecContext *avctx, RangeCoder c, + AVPacket *avpkt) { - uint8_t *buf = avpkt->data; - int buf_size = avpkt->size; - FFV1Context *f = avctx->priv_data; - int ret; - uint8_t *buf_end; - AVFrame *p; - - /* This is copied onto the first slice's range coder context */ - RangeCoder c; - - ff_progress_frame_unref(&f->last_picture); - FFSWAP(ProgressFrame, f->picture, f->last_picture); - - - f->avctx = avctx; - f->frame_damaged = 0; - - ret = decode_header(avctx, &c, avpkt->data, avpkt->size); - if (ret < 0) - return ret; - - ret = ff_progress_frame_get_buffer(avctx, &f->picture, - AV_GET_BUFFER_FLAG_REF); - if (ret < 0) - return ret; - - p = f->picture.f; - - p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P - p->flags = (p->flags & ~AV_FRAME_FLAG_KEY) | f->key_frame; - - if (f->version < 3 && avctx->field_order > AV_FIELD_PROGRESSIVE) { - /* we have interlaced material flagged in container */ - p->flags |= AV_FRAME_FLAG_INTERLACED; - if (avctx->field_order == AV_FIELD_TT || avctx->field_order == AV_FIELD_TB) - p->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST; - } - - if (avctx->debug & FF_DEBUG_PICT_INFO) - av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n", - f->version, !!(p->flags & AV_FRAME_FLAG_KEY), f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample); + FFV1Context *f = avctx->priv_data; + AVFrame *p = f->picture.f; - ff_thread_finish_setup(avctx); + uint8_t *buf = avpkt->data; + size_t buf_size = avpkt->size; + uint8_t *buf_end = buf + buf_size; - buf_end = buf + buf_size; for (int i = f->slice_count - 1; i >= 0; i--) { FFV1SliceContext *sc = &f->slices[i]; @@ -1065,6 +1027,58 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, f->slice_damaged[i] = 1; } } + + return 0; +} + +static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, + int *got_frame, AVPacket *avpkt) +{ + FFV1Context *f = avctx->priv_data; + int ret; + AVFrame *p; + + /* This is copied onto the first slice's range coder context */ + RangeCoder c; + + ff_progress_frame_unref(&f->last_picture); + FFSWAP(ProgressFrame, f->picture, f->last_picture); + + + f->avctx = avctx; + f->frame_damaged = 0; + + ret = decode_header(avctx, &c, avpkt->data, avpkt->size); + if (ret < 0) + return ret; + + ret = ff_progress_frame_get_buffer(avctx, &f->picture, + AV_GET_BUFFER_FLAG_REF); + if (ret < 0) + return ret; + + p = f->picture.f; + + p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P + p->flags = (p->flags & ~AV_FRAME_FLAG_KEY) | f->key_frame; + + if (f->version < 3 && avctx->field_order > AV_FIELD_PROGRESSIVE) { + /* we have interlaced material flagged in container */ + p->flags |= AV_FRAME_FLAG_INTERLACED; + if (avctx->field_order == AV_FIELD_TT || avctx->field_order == AV_FIELD_TB) + p->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST; + } + + if (avctx->debug & FF_DEBUG_PICT_INFO) + av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n", + f->version, !!(p->flags & AV_FRAME_FLAG_KEY), f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample); + + ff_thread_finish_setup(avctx); + + ret = decode_slices(avctx, c, avpkt); + if (ret < 0) + return ret; + ff_progress_frame_report(&f->picture, INT_MAX); ff_progress_frame_unref(&f->last_picture); @@ -1073,7 +1087,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, *got_frame = 1; - return buf_size; + return avpkt->size; } #if HAVE_THREADS -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 06/12] ffv1dec: set f->state_transition for default range coder table 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne ` (3 preceding siblings ...) 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 05/12] ffv1dec: move slice decoding " Lynne @ 2025-01-19 10:36 ` Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 07/12] ffv1dec: add support for hwaccels Lynne ` (4 subsequent siblings) 9 siblings, 0 replies; 13+ messages in thread From: Lynne @ 2025-01-19 10:36 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne The table is only set when f->ac is set to CUSTOM. Setting it for default range coder tables simplifies hardware accelerator code. --- libavcodec/ffv1dec.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index 83c5975550..fdd3d1c15d 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -444,6 +444,11 @@ static int read_extra_header(FFV1Context *f) if (f->ac == AC_RANGE_CUSTOM_TAB) { for (int i = 1; i < 256; i++) f->state_transition[i] = get_symbol(&c, state, 1) + c.one_state[i]; + } else { + RangeCoder rc; + ff_build_rac_states(&rc, 0.05 * (1LL << 32), 256 - 8); + for (int i = 1; i < 256; i++) + f->state_transition[i] = rc.one_state[i]; } f->colorspace = get_symbol(&c, state, 0); //YUV cs type @@ -574,6 +579,11 @@ static int read_header(FFV1Context *f, RangeCoder *c) } f->state_transition[i] = st; } + } else { + RangeCoder rc; + ff_build_rac_states(&rc, 0.05 * (1LL << 32), 256 - 8); + for (int i = 1; i < 256; i++) + f->state_transition[i] = rc.one_state[i]; } colorspace = get_symbol(c, state, 0); //YUV cs type -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 07/12] ffv1dec: add support for hwaccels 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne ` (4 preceding siblings ...) 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 06/12] ffv1dec: set f->state_transition for default range coder table Lynne @ 2025-01-19 10:36 ` Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 08/12] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne ` (3 subsequent siblings) 9 siblings, 0 replies; 13+ messages in thread From: Lynne @ 2025-01-19 10:36 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne This commit adds support for hardware accelerated decoding to the decoder. The previous commits already refactored the decoder, this commit simply adds calls to hooks to decode. --- libavcodec/ffv1.h | 1 + libavcodec/ffv1dec.c | 62 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h index 6b4ffca3f9..93174bd45e 100644 --- a/libavcodec/ffv1.h +++ b/libavcodec/ffv1.h @@ -121,6 +121,7 @@ typedef struct FFV1Context { int64_t picture_number; int key_frame; ProgressFrame picture, last_picture; + void *hwaccel_picture_private, *hwaccel_last_picture_private; uint32_t crcref; enum AVPixelFormat pix_fmt; diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index fdd3d1c15d..2dac905cf5 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -41,6 +41,9 @@ #include "libavutil/refstruct.h" #include "thread.h" #include "decode.h" +#include "hwconfig.h" +#include "hwaccel_internal.h" +#include "config_components.h" static inline av_flatten int get_symbol_inline(RangeCoder *c, uint8_t *state, int is_signed) @@ -1047,6 +1050,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, FFV1Context *f = avctx->priv_data; int ret; AVFrame *p; + const FFHWAccel *hwaccel = NULL; /* This is copied onto the first slice's range coder context */ RangeCoder c; @@ -1054,7 +1058,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, ff_progress_frame_unref(&f->last_picture); FFSWAP(ProgressFrame, f->picture, f->last_picture); - f->avctx = avctx; f->frame_damaged = 0; @@ -1062,11 +1065,18 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, if (ret < 0) return ret; + if (avctx->hwaccel) + hwaccel = ffhwaccel(avctx->hwaccel); + ret = ff_progress_frame_get_buffer(avctx, &f->picture, AV_GET_BUFFER_FLAG_REF); if (ret < 0) return ret; + ret = ff_hwaccel_frame_priv_alloc(avctx, &f->hwaccel_picture_private); + if (ret < 0) + return ret; + p = f->picture.f; p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P @@ -1083,15 +1093,53 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n", f->version, !!(p->flags & AV_FRAME_FLAG_KEY), f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample); + /* Start */ + if (hwaccel) { + ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size); + if (ret < 0) + return ret; + } + ff_thread_finish_setup(avctx); - ret = decode_slices(avctx, c, avpkt); - if (ret < 0) - return ret; + /* Decode slices */ + if (hwaccel) { + uint8_t *buf_end = avpkt->data + avpkt->size; + + if (!(p->flags & AV_FRAME_FLAG_KEY) && f->last_picture.f) + ff_progress_frame_await(&f->last_picture, f->slice_count - 1); + + for (int i = f->slice_count - 1; i >= 0; i--) { + uint8_t *pos; + uint32_t len; + ret = find_next_slice(avctx, avpkt->data, buf_end, i, + &pos, &len); + if (ret < 0) + return ret; + + buf_end -= len; + + ret = hwaccel->decode_slice(avctx, pos, len); + if (ret < 0) + return ret; + } + } else { + ret = decode_slices(avctx, c, avpkt); + if (ret < 0) + return ret; + } + + /* Finalize */ + if (hwaccel) { + ret = hwaccel->end_frame(avctx); + if (ret < 0) + return ret; + } ff_progress_frame_report(&f->picture, INT_MAX); ff_progress_frame_unref(&f->last_picture); + av_refstruct_unref(&f->hwaccel_last_picture_private); if ((ret = av_frame_ref(rframe, f->picture.f)) < 0) return ret; @@ -1163,7 +1211,10 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx) FFV1Context *const s = avctx->priv_data; ff_progress_frame_unref(&s->picture); + av_refstruct_unref(&s->hwaccel_picture_private); + ff_progress_frame_unref(&s->last_picture); + av_refstruct_unref(&s->hwaccel_last_picture_private); return ff_ffv1_close(avctx); } @@ -1182,4 +1233,7 @@ const FFCodec ff_ffv1_decoder = { AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS, .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_USES_PROGRESSFRAMES, + .hw_configs = (const AVCodecHWConfigInternal *const []) { + NULL + }, }; -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 08/12] ffv1enc_vulkan: refactor shaders slightly to support sharing 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne ` (5 preceding siblings ...) 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 07/12] ffv1dec: add support for hwaccels Lynne @ 2025-01-19 10:36 ` Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 09/12] ffv1_vulkan: move global buffer creation to a shared file Lynne ` (2 subsequent siblings) 9 siblings, 0 replies; 13+ messages in thread From: Lynne @ 2025-01-19 10:36 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne The shaders were written to support sharing, but needed slight tweaking. --- libavcodec/ffv1enc_vulkan.c | 75 +++++++++++++++------------ libavcodec/vulkan/ffv1_common.comp | 24 +++++++-- libavcodec/vulkan/ffv1_enc_setup.comp | 18 +------ libavcodec/vulkan/ffv1_reset.comp | 3 +- libavcodec/vulkan/rangecoder.comp | 27 +++++----- 5 files changed, 82 insertions(+), 65 deletions(-) diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index 243f472568..53d648bcec 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -136,14 +136,15 @@ typedef struct FFv1VkResetParameters { uint32_t context_count; uint8_t codec_planes; uint8_t key_frame; - uint8_t padding[3]; + uint8_t version; + uint8_t micro_version; + uint8_t padding[1]; } FFv1VkResetParameters; typedef struct FFv1VkParameters { VkDeviceAddress slice_state; VkDeviceAddress scratch_data; VkDeviceAddress out_data; - uint64_t slice_size_max; int32_t sar[2]; uint32_t chroma_shift[2]; @@ -151,6 +152,7 @@ typedef struct FFv1VkParameters { uint32_t plane_state_size; uint32_t context_count; uint32_t crcref; + uint32_t slice_size_max; uint8_t bits_per_raw_sample; uint8_t context_model; @@ -175,7 +177,6 @@ static void add_push_data(FFVulkanShader *shd) GLSLC(1, u8buf slice_state; ); GLSLC(1, u8buf scratch_data; ); GLSLC(1, u8buf out_data; ); - GLSLC(1, uint64_t slice_size_max; ); GLSLC(0, ); GLSLC(1, ivec2 sar; ); GLSLC(1, uvec2 chroma_shift; ); @@ -183,6 +184,7 @@ static void add_push_data(FFVulkanShader *shd) GLSLC(1, uint plane_state_size; ); GLSLC(1, uint context_count; ); GLSLC(1, uint32_t crcref; ); + GLSLC(1, uint32_t slice_size_max; ); GLSLC(0, ); GLSLC(1, uint8_t bits_per_raw_sample; ); GLSLC(1, uint8_t context_model; ); @@ -492,7 +494,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, .slice_state = slice_data_buf->address + f->slice_count*256, .scratch_data = tmp_data_buf->address, .out_data = out_data_buf->address, - .slice_size_max = out_data_buf->size / f->slice_count, .bits_per_raw_sample = f->bits_per_raw_sample, .sar[0] = pict->sample_aspect_ratio.num, .sar[1] = pict->sample_aspect_ratio.den, @@ -501,6 +502,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, .plane_state_size = plane_state_size, .context_count = context_count, .crcref = f->crcref, + .slice_size_max = out_data_buf->size / f->slice_count, .context_model = fv->ctx.context_model, .version = f->version, .micro_version = f->micro_version, @@ -966,7 +968,6 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd) GLSLF(0, #define TYPE int%i_t ,smp_bits); GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); - GLSLD(ff_source_common_comp); GLSLD(ff_source_rangecoder_comp); if (f->ac == AC_GOLOMB_RICE) @@ -993,6 +994,10 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) 1, 1, 1, 0)); + /* Common codec header */ + GLSLD(ff_source_common_comp); + add_push_data(shd); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1038,8 +1043,6 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0)); - add_push_data(shd); - GLSLD(ff_source_ffv1_enc_setup_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", @@ -1074,6 +1077,22 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) wg_dim, 1, 1, 0)); + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint context_count; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t padding[1]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1110,17 +1129,6 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0)); - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, u8buf slice_state; ); - GLSLC(1, uint plane_state_size; ); - GLSLC(1, uint context_count; ); - GLSLC(1, uint8_t codec_planes; ); - GLSLC(1, uint8_t key_frame; ); - GLSLC(1, uint8_t padding[3]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), - VK_SHADER_STAGE_COMPUTE_BIT); - GLSLD(ff_source_ffv1_reset_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", @@ -1164,6 +1172,18 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) wg_count, wg_count, 1, 0)); + define_shared_code(avctx, shd); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, int offset; ); + GLSLC(1, uint8_t bits; ); + GLSLC(1, uint8_t planar_rgb; ); + GLSLC(1, uint8_t transparency; ); + GLSLC(1, uint8_t padding[1]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1187,8 +1207,6 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0)); - define_shared_code(avctx, shd); - desc_set = (FFVulkanDescriptorSetBinding []) { { .name = "slice_data_buf", @@ -1220,16 +1238,6 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0)); - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, int offset; ); - GLSLC(1, uint8_t bits; ); - GLSLC(1, uint8_t planar_rgb; ); - GLSLC(1, uint8_t transparency; ); - GLSLC(1, uint8_t padding[1]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters), - VK_SHADER_STAGE_COMPUTE_BIT); - GLSLD(ff_source_ffv1_enc_rct_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", @@ -1268,6 +1276,11 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) 1, 1, 1, 0)); + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1328,8 +1341,6 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0)); - add_push_data(shd); - /* Assemble the shader body */ GLSLD(ff_source_ffv1_enc_common_comp); diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp index 5b4a882367..604d03b2de 100644 --- a/libavcodec/vulkan/ffv1_common.comp +++ b/libavcodec/vulkan/ffv1_common.comp @@ -22,17 +22,18 @@ struct SliceContext { RangeCoder c; - -#ifdef GOLOMB PutBitContext pb; /* 8*8 bytes */ -#endif ivec2 slice_dim; ivec2 slice_pos; ivec2 slice_rct_coef; + u8vec4 quant_table_idx; + uint context_count; uint hdr_len; // only used for golomb - int slice_coding_mode; + + uint slice_coding_mode; + bool slice_reset_contexts; }; /* -1, { -1, 0 } */ @@ -72,3 +73,18 @@ const uint32_t log2_run[41] = { 16, 17, 18, 19, 20, 21, 22, 23, 24, }; + +uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) +{ + uint mpw = 1 << chroma_shift; + uint awidth = align(width, mpw); + + if ((version < 4) || ((version == 4) && (micro_version < 3))) + return width * sx / num_h_slices; + + sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw; + if (sx == awidth) + sx = width; + + return sx; +} diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp index b861e25f74..23f09b2af6 100644 --- a/libavcodec/vulkan/ffv1_enc_setup.comp +++ b/libavcodec/vulkan/ffv1_enc_setup.comp @@ -20,21 +20,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) -{ - uint mpw = 1 << chroma_shift; - uint awidth = align(width, mpw); - - if ((version < 4) || ((version == 4) && (micro_version < 3))) - return width * sx / num_h_slices; - - sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw; - if (sx == awidth) - sx = width; - - return sx; -} - void init_slice(out SliceContext sc, const uint slice_idx) { /* Set coordinates */ @@ -52,6 +37,7 @@ void init_slice(out SliceContext sc, const uint slice_idx) sc.slice_dim = ivec2(sxe - sxs, sye - sys); sc.slice_rct_coef = ivec2(1, 1); sc.slice_coding_mode = int(force_pcm == 1); + sc.slice_reset_contexts = sc.slice_coding_mode == 1; rac_init(sc.c, OFFBUF(u8buf, out_data, slice_idx * slice_size_max), @@ -105,7 +91,7 @@ void write_slice_header(inout SliceContext sc, uint64_t state) put_symbol_unsigned(sc.c, state, sar.y); if (version >= 4) { - put_rac_full(sc.c, state, sc.slice_coding_mode == 1); + put_rac_full(sc.c, state, sc.slice_reset_contexts); put_symbol_unsigned(sc.c, state, sc.slice_coding_mode); if (sc.slice_coding_mode != 1 && colorspace == 1) { put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y); diff --git a/libavcodec/vulkan/ffv1_reset.comp b/libavcodec/vulkan/ffv1_reset.comp index c7c7962850..1b87ca754e 100644 --- a/libavcodec/vulkan/ffv1_reset.comp +++ b/libavcodec/vulkan/ffv1_reset.comp @@ -24,7 +24,8 @@ void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - if (slice_ctx[slice_idx].slice_coding_mode == 0 && key_frame == 0) + if (key_frame == 0 && + slice_ctx[slice_idx].slice_reset_contexts == false) return; uint64_t slice_state_off = uint64_t(slice_state) + diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp index 848a056fb1..6e3b9c1238 100644 --- a/libavcodec/vulkan/rangecoder.comp +++ b/libavcodec/vulkan/rangecoder.comp @@ -21,8 +21,9 @@ */ struct RangeCoder { - u8buf bytestream_start; - u8buf bytestream; + uint64_t bytestream_start; + uint64_t bytestream; + uint64_t bytestream_end; uint low; uint16_t range; @@ -34,28 +35,29 @@ struct RangeCoder { void renorm_encoder_full(inout RangeCoder c) { int bs_cnt = 0; + u8buf bytestream = u8buf(c.bytestream); if (c.outstanding_byte == 0xFF) { c.outstanding_byte = uint8_t(c.low >> 8); } else if (c.low <= 0xFF00) { - c.bytestream[bs_cnt++].v = c.outstanding_byte; + bytestream[bs_cnt++].v = c.outstanding_byte; uint16_t cnt = c.outstanding_count; for (; cnt > 0; cnt--) - c.bytestream[bs_cnt++].v = uint8_t(0xFF); + bytestream[bs_cnt++].v = uint8_t(0xFF); c.outstanding_count = uint16_t(0); c.outstanding_byte = uint8_t(c.low >> 8); } else if (c.low >= 0x10000) { - c.bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1); + bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1); uint16_t cnt = c.outstanding_count; for (; cnt > 0; cnt--) - c.bytestream[bs_cnt++].v = uint8_t(0x00); + bytestream[bs_cnt++].v = uint8_t(0x00); c.outstanding_count = uint16_t(0); c.outstanding_byte = uint8_t(bitfieldExtract(c.low, 8, 8)); } else { c.outstanding_count++; } - c.bytestream = OFFBUF(u8buf, c.bytestream, bs_cnt); + c.bytestream += bs_cnt; c.range <<= 8; c.low = bitfieldInsert(0, c.low, 8, 8); } @@ -74,10 +76,10 @@ void renorm_encoder(inout RangeCoder c) return; } - u8buf bs = c.bytestream; + u8buf bs = u8buf(c.bytestream); uint8_t outstanding_byte = c.outstanding_byte; - c.bytestream = OFFBUF(u8buf, bs, oc); + c.bytestream = uint64_t(bs) + oc; c.outstanding_count = uint16_t(0); c.outstanding_byte = uint8_t(low >> 8); @@ -179,10 +181,11 @@ uint32_t rac_terminate(inout RangeCoder c) return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start)); } -void rac_init(out RangeCoder r, u8buf data, uint64_t buf_size) +void rac_init(out RangeCoder r, u8buf data, uint buf_size) { - r.bytestream_start = data; - r.bytestream = data; + r.bytestream_start = uint64_t(data); + r.bytestream = uint64_t(data); + r.bytestream_end = uint64_t(data) + buf_size; r.low = 0; r.range = uint16_t(0xFF00); r.outstanding_count = uint16_t(0); -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 09/12] ffv1_vulkan: move global buffer creation to a shared file 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne ` (6 preceding siblings ...) 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 08/12] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne @ 2025-01-19 10:36 ` Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 10/12] ffv1enc_vulkan: support default range coder tables Lynne 2025-01-19 10:38 ` [FFmpeg-devel] [PATCH 11/12] vulkan_decode: support software-defined decoders Lynne 9 siblings, 0 replies; 13+ messages in thread From: Lynne @ 2025-01-19 10:36 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne --- libavcodec/Makefile | 2 +- libavcodec/ffv1_vulkan.c | 123 ++++++++++++++++++++++++++++++ libavcodec/ffv1_vulkan.h | 39 ++++++++++ libavcodec/ffv1enc_vulkan.c | 145 +++++++++--------------------------- 4 files changed, 199 insertions(+), 110 deletions(-) create mode 100644 libavcodec/ffv1_vulkan.c create mode 100644 libavcodec/ffv1_vulkan.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index a3ef11a258..6ed0fbc705 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -370,7 +370,7 @@ OBJS-$(CONFIG_EXR_ENCODER) += exrenc.o float2half.o OBJS-$(CONFIG_FASTAUDIO_DECODER) += fastaudio.o OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1.o OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1.o -OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += ffv1enc.o ffv1.o ffv1enc_vulkan.o +OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += ffv1enc.o ffv1.o ffv1_vulkan.o ffv1enc_vulkan.o OBJS-$(CONFIG_FFWAVESYNTH_DECODER) += ffwavesynth.o OBJS-$(CONFIG_FIC_DECODER) += fic.o OBJS-$(CONFIG_FITS_DECODER) += fitsdec.o fits.o diff --git a/libavcodec/ffv1_vulkan.c b/libavcodec/ffv1_vulkan.c new file mode 100644 index 0000000000..6f49e2ebb1 --- /dev/null +++ b/libavcodec/ffv1_vulkan.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2025 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "ffv1_vulkan.h" +#include "libavutil/crc.h" + +int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + int err; + uint8_t *buf_mapped; + + RET(ff_vk_map_buffer(s, vkb, &buf_mapped, 0)); + + for (int i = 1; i < 256; i++) { + buf_mapped[256 + i] = f->state_transition[i]; + buf_mapped[256 - i] = 256 - (int)f->state_transition[i]; + } + + RET(ff_vk_unmap_buffer(s, vkb, 1)); + +fail: + return err; +} + +static int init_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f, + int (*write_data)(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f)) +{ + int err; + size_t buf_len = 512*sizeof(uint8_t); + + RET(ff_vk_create_buf(s, vkb, + buf_len, + NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + + write_data(s, vkb, f); + +fail: + return err; +} + +int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + return init_state_transition_data(s, vkb, f, + ff_ffv1_vk_update_state_transition_data); +} + +int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + int err; + + int16_t *buf_mapped; + size_t buf_len = MAX_QUANT_TABLES* + MAX_CONTEXT_INPUTS* + MAX_QUANT_TABLE_SIZE*sizeof(int16_t); + + RET(ff_vk_create_buf(s, vkb, + buf_len, + NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0)); + + memcpy(buf_mapped, f->quant_tables, + sizeof(f->quant_tables)); + + RET(ff_vk_unmap_buffer(s, vkb, 1)); + +fail: + return err; +} + +int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + int err; + + uint32_t *buf_mapped; + size_t buf_len = 256*sizeof(int32_t); + + RET(ff_vk_create_buf(s, vkb, + buf_len, + NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0)); + + memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len); + + RET(ff_vk_unmap_buffer(s, vkb, 1)); + +fail: + return err; +} diff --git a/libavcodec/ffv1_vulkan.h b/libavcodec/ffv1_vulkan.h new file mode 100644 index 0000000000..061e4ccc4c --- /dev/null +++ b/libavcodec/ffv1_vulkan.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_FFV1_VULKAN_H +#define AVCODEC_FFV1_VULKAN_H + +#include "libavutil/vulkan.h" +#include "ffv1.h" + +int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +#endif /* AVCODEC_FFV1_VULKAN_H */ diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index 53d648bcec..baeadf2b12 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -18,7 +18,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/crc.h" #include "libavutil/mem.h" #include "libavutil/vulkan.h" #include "libavutil/vulkan_spirv.h" @@ -32,6 +31,7 @@ #include "ffv1.h" #include "ffv1enc.h" +#include "ffv1_vulkan.h" /* Parallel Golomb alignment */ #define LG_ALIGN_W 32 @@ -1367,110 +1367,6 @@ fail: return err; } -static int init_state_transition_data(AVCodecContext *avctx) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - - uint8_t *buf_mapped; - size_t buf_len = 512*sizeof(uint8_t); - - RET(ff_vk_create_buf(&fv->s, &fv->rangecoder_static_buf, - buf_len, - NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - RET(ff_vk_map_buffer(&fv->s, &fv->rangecoder_static_buf, - &buf_mapped, 0)); - - for (int i = 1; i < 256; i++) { - buf_mapped[256 + i] = fv->ctx.state_transition[i]; - buf_mapped[256 - i] = 256 - (int)fv->ctx.state_transition[i]; - } - - RET(ff_vk_unmap_buffer(&fv->s, &fv->rangecoder_static_buf, 1)); - - /* Update descriptors */ - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->setup, 0, 0, 0, - &fv->rangecoder_static_buf, - 0, fv->rangecoder_static_buf.size, - VK_FORMAT_UNDEFINED)); - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->enc, 0, 0, 0, - &fv->rangecoder_static_buf, - 0, fv->rangecoder_static_buf.size, - VK_FORMAT_UNDEFINED)); - -fail: - return err; -} - -static int init_quant_table_data(AVCodecContext *avctx) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - - int16_t *buf_mapped; - size_t buf_len = MAX_QUANT_TABLES* - MAX_CONTEXT_INPUTS* - MAX_QUANT_TABLE_SIZE*sizeof(int16_t); - - RET(ff_vk_create_buf(&fv->s, &fv->quant_buf, - buf_len, - NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - RET(ff_vk_map_buffer(&fv->s, &fv->quant_buf, (void *)&buf_mapped, 0)); - - memcpy(buf_mapped, fv->ctx.quant_tables, - sizeof(fv->ctx.quant_tables)); - - RET(ff_vk_unmap_buffer(&fv->s, &fv->quant_buf, 1)); - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->enc, 0, 1, 0, - &fv->quant_buf, - 0, fv->quant_buf.size, - VK_FORMAT_UNDEFINED)); - -fail: - return err; -} - -static int init_crc_table_data(AVCodecContext *avctx) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - - uint32_t *buf_mapped; - size_t buf_len = 256*sizeof(int32_t); - - RET(ff_vk_create_buf(&fv->s, &fv->crc_tab_buf, - buf_len, - NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - RET(ff_vk_map_buffer(&fv->s, &fv->crc_tab_buf, (void *)&buf_mapped, 0)); - - memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len); - - RET(ff_vk_unmap_buffer(&fv->s, &fv->crc_tab_buf, 1)); - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->enc, 0, 2, 0, - &fv->crc_tab_buf, - 0, fv->crc_tab_buf.size, - VK_FORMAT_UNDEFINED)); - -fail: - return err; -} - static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) { int err; @@ -1719,20 +1615,50 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) spv->uninit(&spv); /* Range coder data */ - err = init_state_transition_data(avctx); + err = ff_ffv1_vk_init_state_transition_data(&fv->s, + &fv->rangecoder_static_buf, + f); if (err < 0) return err; /* Quantization table data */ - err = init_quant_table_data(avctx); + err = ff_ffv1_vk_init_quant_table_data(&fv->s, + &fv->quant_buf, + f); if (err < 0) return err; /* CRC table buffer */ - err = init_crc_table_data(avctx); + err = ff_ffv1_vk_init_crc_table_data(&fv->s, + &fv->crc_tab_buf, + f); if (err < 0) return err; + /* Update setup global descriptors */ + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->setup, 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + + /* Update encode global descriptors */ + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->enc, 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->enc, 0, 1, 0, + &fv->quant_buf, + 0, fv->quant_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->enc, 0, 2, 0, + &fv->crc_tab_buf, + 0, fv->crc_tab_buf.size, + VK_FORMAT_UNDEFINED)); + /* Temporary frame */ fv->frame = av_frame_alloc(); if (!fv->frame) @@ -1751,7 +1677,8 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) if (!fv->buf_regions) return AVERROR(ENOMEM); - return 0; +fail: + return err; } static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx) -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 10/12] ffv1enc_vulkan: support default range coder tables 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne ` (7 preceding siblings ...) 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 09/12] ffv1_vulkan: move global buffer creation to a shared file Lynne @ 2025-01-19 10:36 ` Lynne 2025-01-19 10:38 ` [FFmpeg-devel] [PATCH 11/12] vulkan_decode: support software-defined decoders Lynne 9 siblings, 0 replies; 13+ messages in thread From: Lynne @ 2025-01-19 10:36 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne This adds support for default range coder tables, rather than only custom ones. Its two lines, as the same code can be used for both thanks to ffv1enc.c setting f->state_transition properly. --- libavcodec/ffv1enc_vulkan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index baeadf2b12..a3438e17e9 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -1488,7 +1488,7 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) if (f->version < 4) { av_log(avctx, AV_LOG_ERROR, "PCM coding only supported by version 4 (-level 4)\n"); return AVERROR_INVALIDDATA; - } else if (f->ac != AC_RANGE_CUSTOM_TAB) { + } else if (f->ac == AC_GOLOMB_RICE) { av_log(avctx, AV_LOG_ERROR, "PCM coding requires range coding\n"); return AVERROR_INVALIDDATA; } @@ -1736,6 +1736,8 @@ static const AVOption vulkan_encode_ffv1_options[] = { { .i64 = AC_RANGE_CUSTOM_TAB }, -2, 2, VE, .unit = "coder" }, { "rice", "Golomb rice", 0, AV_OPT_TYPE_CONST, { .i64 = AC_GOLOMB_RICE }, INT_MIN, INT_MAX, VE, .unit = "coder" }, + { "range_def", "Range with default table", 0, AV_OPT_TYPE_CONST, + { .i64 = AC_RANGE_DEFAULT_TAB_FORCE }, INT_MIN, INT_MAX, VE, .unit = "coder" }, { "range_tab", "Range with custom table", 0, AV_OPT_TYPE_CONST, { .i64 = AC_RANGE_CUSTOM_TAB }, INT_MIN, INT_MAX, VE, .unit = "coder" }, { "qtable", "Quantization table", OFFSET(ctx.qtable), AV_OPT_TYPE_INT, -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 11/12] vulkan_decode: support software-defined decoders 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne ` (8 preceding siblings ...) 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 10/12] ffv1enc_vulkan: support default range coder tables Lynne @ 2025-01-19 10:38 ` Lynne 2025-01-19 10:38 ` [FFmpeg-devel] [PATCH 12/12] [RFC] ffv1dec_vulkan: add a Vulkan compute-based hardware decoding implementation Lynne 9 siblings, 1 reply; 13+ messages in thread From: Lynne @ 2025-01-19 10:38 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne --- libavcodec/vulkan_decode.c | 192 +++++++++++++++++++++++++++---------- libavcodec/vulkan_decode.h | 10 ++ 2 files changed, 152 insertions(+), 50 deletions(-) diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index c57998108c..87132651e2 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -24,6 +24,9 @@ #include "libavutil/mem.h" #include "libavutil/vulkan_loader.h" +#define DECODER_IS_SDR(codec_id) \ + ((codec_id) == AV_CODEC_ID_FFV1) + #if CONFIG_H264_VULKAN_HWACCEL extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc; #endif @@ -63,7 +66,9 @@ static const VkVideoProfileInfoKHR *get_video_profile(FFVulkanDecodeShared *ctx, codec_id == AV_CODEC_ID_H264 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR : codec_id == AV_CODEC_ID_HEVC ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR : codec_id == AV_CODEC_ID_AV1 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR : - 0; + VK_STRUCTURE_TYPE_MAX_ENUM; + if (profile_struct_type == VK_STRUCTURE_TYPE_MAX_ENUM) + return NULL; profile_list = ff_vk_find_struct(ctx->s.hwfc->create_pnext, VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); @@ -119,6 +124,20 @@ static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx) return avf; } +static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic) +{ + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + vkpic->dpb_frame = NULL; + vkpic->img_view_ref = VK_NULL_HANDLE; + vkpic->img_view_out = VK_NULL_HANDLE; + vkpic->img_view_dest = VK_NULL_HANDLE; + + vkpic->destroy_image_view = vk->DestroyImageView; + vkpic->wait_semaphores = vk->WaitSemaphores; +} + int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, FFVulkanDecodePicture *vkpic, int is_current, int alloc_dpb) @@ -134,13 +153,7 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, if (vkpic->img_view_ref) return 0; - vkpic->dpb_frame = NULL; - vkpic->img_view_ref = VK_NULL_HANDLE; - vkpic->img_view_out = VK_NULL_HANDLE; - vkpic->img_view_dest = VK_NULL_HANDLE; - - vkpic->destroy_image_view = vk->DestroyImageView; - vkpic->wait_semaphores = vk->WaitSemaphores; + init_frame(dec, vkpic); if (ctx->common.layered_dpb && alloc_dpb) { vkpic->img_view_ref = ctx->common.layered_view; @@ -183,6 +196,59 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, return 0; } +int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + enum FFVkShaderRepFormat rep_fmt, int alloc_dpb) +{ + int err; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + + vkpic->slices_size = 0; + + if (vkpic->img_view_ref) + return 0; + + init_frame(dec, vkpic); + + if (ctx->common.layered_dpb && alloc_dpb) { + vkpic->img_view_ref = ctx->common.layered_view; + vkpic->img_aspect_ref = ctx->common.layered_aspect; + } else if (alloc_dpb) { + AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data; + AVVulkanFramesContext *dpb_hwfc = dpb_frames->hwctx; + + vkpic->dpb_frame = vk_get_dpb_pool(ctx); + if (!vkpic->dpb_frame) + return AVERROR(ENOMEM); + + err = ff_vk_create_imageview(&ctx->s, + &vkpic->img_view_ref, &vkpic->img_aspect_ref, + vkpic->dpb_frame, 0, rep_fmt); + if (err < 0) + return err; + + vkpic->img_view_dest = vkpic->img_view_ref; + } + + if (!alloc_dpb || is_current) { + AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data; + AVVulkanFramesContext *hwfc = frames->hwctx; + + err = ff_vk_create_imageview(&ctx->s, + &vkpic->img_view_out, &vkpic->img_aspect, + pic, 0, rep_fmt); + if (err < 0) + return err; + + if (!alloc_dpb) { + vkpic->img_view_ref = vkpic->img_view_out; + vkpic->img_aspect_ref = vkpic->img_aspect; + } + } + + return 0; +} + int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, const uint8_t *data, size_t size, int add_startcode, uint32_t *nb_slices, const uint32_t **offsets) @@ -223,9 +289,14 @@ int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, buf_size = 2 << av_log2(buf_size); err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &new_ref, + DECODER_IS_SDR(avctx->codec_id) ? + (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, ctx->s.hwfc->create_pnext, buf_size, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + (DECODER_IS_SDR(avctx->codec_id) ? + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0)); if (err < 0) return err; @@ -276,6 +347,10 @@ void ff_vk_decode_flush(AVCodecContext *avctx) VkCommandBuffer cmd_buf; FFVkExecContext *exec; + /* Non-video queues do not need to be reset */ + if (!(get_codecdesc(avctx->codec_id)->decode_op)) + return; + exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); ff_vk_exec_start(&ctx->s, exec); cmd_buf = exec->buf; @@ -551,6 +626,7 @@ static int vulkan_decode_bootstrap(AVCodecContext *avctx, AVBufferRef *frames_re { int err; FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + const FFVulkanDecodeDescriptor *vk_desc = get_codecdesc(avctx->codec_id); AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data; AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data; AVVulkanDeviceContext *hwctx = device->hwctx; @@ -569,11 +645,13 @@ static int vulkan_decode_bootstrap(AVCodecContext *avctx, AVBufferRef *frames_re ctx->s.extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions); - if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) { - av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", - VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME); - av_refstruct_unref(&dec->shared_ctx); - return AVERROR(ENOSYS); + if (vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) { + if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", + VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME); + av_refstruct_unref(&dec->shared_ctx); + return AVERROR(ENOSYS); + } } err = ff_vk_load_functions(device, &ctx->s.vkfn, ctx->s.extensions, 1, 1); @@ -927,53 +1005,61 @@ static void free_profile_data(AVHWFramesContext *hwfc) int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) { - VkFormat vkfmt; + VkFormat vkfmt = VK_FORMAT_UNDEFINED; int err, dedicated_dpb; AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; AVVulkanFramesContext *hwfc = frames_ctx->hwctx; FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeProfileData *prof; - FFVulkanDecodeShared *ctx; - - frames_ctx->sw_format = AV_PIX_FMT_NONE; + FFVulkanDecodeProfileData *prof = NULL; err = vulkan_decode_bootstrap(avctx, hw_frames_ctx); if (err < 0) return err; - prof = av_mallocz(sizeof(FFVulkanDecodeProfileData)); - if (!prof) - return AVERROR(ENOMEM); + frames_ctx->sw_format = avctx->sw_pix_fmt; - err = vulkan_decode_get_profile(avctx, hw_frames_ctx, - &frames_ctx->sw_format, &vkfmt, - prof, &dedicated_dpb); - if (err < 0) { - av_free(prof); - return err; - } + if (!DECODER_IS_SDR(avctx->codec_id)) { + prof = av_mallocz(sizeof(FFVulkanDecodeProfileData)); + if (!prof) + return AVERROR(ENOMEM); + + err = vulkan_decode_get_profile(avctx, hw_frames_ctx, + &frames_ctx->sw_format, &vkfmt, + prof, &dedicated_dpb); + if (err < 0) { + av_free(prof); + return err; + } - frames_ctx->user_opaque = prof; - frames_ctx->free = free_profile_data; + frames_ctx->user_opaque = prof; + frames_ctx->free = free_profile_data; + + hwfc->create_pnext = &prof->profile_list; + } frames_ctx->width = avctx->coded_width; frames_ctx->height = avctx->coded_height; frames_ctx->format = AV_PIX_FMT_VULKAN; hwfc->format[0] = vkfmt; - hwfc->create_pnext = &prof->profile_list; hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + VK_IMAGE_USAGE_SAMPLED_BIT; - if (!dec->dedicated_dpb) - hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + if (prof) { + FFVulkanDecodeShared *ctx; - ctx = dec->shared_ctx; - if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | - FF_VK_EXT_VIDEO_MAINTENANCE_1)) - hwfc->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + if (!dec->dedicated_dpb) + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + + ctx = dec->shared_ctx; + if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | + FF_VK_EXT_VIDEO_MAINTENANCE_1)) + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; + } else if (DECODER_IS_SDR(avctx->codec_id)) { + hwfc->usage |= VK_IMAGE_USAGE_STORAGE_BIT; + } return err; } @@ -1075,8 +1161,10 @@ int ff_vk_decode_init(AVCodecContext *avctx) if (err < 0) return err; + vk_desc = get_codecdesc(avctx->codec_id); + profile = get_video_profile(ctx, avctx->codec_id); - if (!profile) { + if ((vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) && !profile) { av_log(avctx, AV_LOG_ERROR, "Video profile missing from frames context!"); return AVERROR(EINVAL); } @@ -1109,9 +1197,11 @@ int ff_vk_decode_init(AVCodecContext *avctx) if (err < 0) goto fail; - err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); - if (err < 0) - goto fail; + if (profile) { + err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); + if (err < 0) + goto fail; + } /* If doing an out-of-place decoding, create a DPB pool */ if (dec->dedicated_dpb || avctx->codec_id == AV_CODEC_ID_AV1) { @@ -1163,12 +1253,14 @@ int ff_vk_decode_init(AVCodecContext *avctx) } session_params_create.videoSession = ctx->common.session; - ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, - s->hwctx->alloc, &ctx->empty_session_params); - if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + if (profile) { + ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, + s->hwctx->alloc, &ctx->empty_session_params); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } } driver_props = &dec->shared_ctx->s.driver_props; diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h index 1d89db323f..9a11a80f95 100644 --- a/libavcodec/vulkan_decode.h +++ b/libavcodec/vulkan_decode.h @@ -56,6 +56,9 @@ typedef struct FFVulkanDecodeShared { VkVideoDecodeCapabilitiesKHR dec_caps; VkVideoSessionParametersKHR empty_session_params; + + /* Software-defined decoder context */ + void *sd_ctx; } FFVulkanDecodeShared; typedef struct FFVulkanDecodeContext { @@ -141,6 +144,13 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, FFVulkanDecodePicture *vkpic, int is_current, int alloc_dpb); +/** + * Software-defined decoder version of ff_vk_decode_prepare_frame. + */ +int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + enum FFVkShaderRepFormat rep_fmt, int alloc_dpb); + /** * Add slice data to frame. */ -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
* [FFmpeg-devel] [PATCH 12/12] [RFC] ffv1dec_vulkan: add a Vulkan compute-based hardware decoding implementation 2025-01-19 10:38 ` [FFmpeg-devel] [PATCH 11/12] vulkan_decode: support software-defined decoders Lynne @ 2025-01-19 10:38 ` Lynne 0 siblings, 0 replies; 13+ messages in thread From: Lynne @ 2025-01-19 10:38 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne This commit adds a Vulkan-based accelerated decoding of FFv1. Currently, only monochrome, arithmetic coding, version 3 and 4 are supported. Posting this more as an RFC. --- configure | 2 + libavcodec/Makefile | 1 + libavcodec/ffv1dec.c | 6 + libavcodec/hwaccels.h | 1 + libavcodec/vulkan/Makefile | 5 + libavcodec/vulkan/ffv1_dec.comp | 174 +++++ libavcodec/vulkan/ffv1_dec_setup.comp | 114 +++ libavcodec/vulkan/rangecoder.comp | 75 ++ libavcodec/vulkan_decode.c | 6 + libavcodec/vulkan_ffv1.c | 969 ++++++++++++++++++++++++++ 10 files changed, 1353 insertions(+) create mode 100644 libavcodec/vulkan/ffv1_dec.comp create mode 100644 libavcodec/vulkan/ffv1_dec_setup.comp create mode 100644 libavcodec/vulkan_ffv1.c diff --git a/configure b/configure index 3a1e72e1c6..94c14ee583 100755 --- a/configure +++ b/configure @@ -3192,6 +3192,8 @@ av1_videotoolbox_hwaccel_deps="videotoolbox" av1_videotoolbox_hwaccel_select="av1_decoder" av1_vulkan_hwaccel_deps="vulkan" av1_vulkan_hwaccel_select="av1_decoder" +ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler" +ffv1_vulkan_hwaccel_select="ffv1_decoder" h263_vaapi_hwaccel_deps="vaapi" h263_vaapi_hwaccel_select="h263_decoder" h263_videotoolbox_hwaccel_deps="videotoolbox" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 6ed0fbc705..20fc4ca0cf 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1014,6 +1014,7 @@ OBJS-$(CONFIG_AV1_VAAPI_HWACCEL) += vaapi_av1.o OBJS-$(CONFIG_AV1_VDPAU_HWACCEL) += vdpau_av1.o OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL) += videotoolbox_av1.o OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_decode.o vulkan_av1.o +OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan_decode.o ffv1_vulkan.o vulkan_ffv1.o OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index 2dac905cf5..f09e7ea163 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -549,6 +549,9 @@ static int read_extra_header(FFV1Context *f) static enum AVPixelFormat get_pixel_format(FFV1Context *f) { enum AVPixelFormat pix_fmts[] = { +#if CONFIG_FFV1_VULKAN_HWACCEL + AV_PIX_FMT_VULKAN, +#endif f->pix_fmt, AV_PIX_FMT_NONE, }; @@ -1234,6 +1237,9 @@ const FFCodec ff_ffv1_decoder = { .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_USES_PROGRESSFRAMES, .hw_configs = (const AVCodecHWConfigInternal *const []) { +#if CONFIG_FFV1_VULKAN_HWACCEL + HWACCEL_VULKAN(ffv1), +#endif NULL }, }; diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index 910a024032..0b2c725247 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -28,6 +28,7 @@ extern const struct FFHWAccel ff_av1_vaapi_hwaccel; extern const struct FFHWAccel ff_av1_vdpau_hwaccel; extern const struct FFHWAccel ff_av1_videotoolbox_hwaccel; extern const struct FFHWAccel ff_av1_vulkan_hwaccel; +extern const struct FFHWAccel ff_ffv1_vulkan_hwaccel; extern const struct FFHWAccel ff_h263_vaapi_hwaccel; extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel; extern const struct FFHWAccel ff_h264_d3d11va_hwaccel; diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index 351332ee44..feb5d2ea51 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -11,6 +11,11 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \ vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \ vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o +OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \ + vulkan/rangecoder.o vulkan/ffv1_vlc.o \ + vulkan/ffv1_common.o vulkan/ffv1_reset.o \ + vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o + VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp)) .SECONDARY: $(VULKAN:.comp=.c) libavcodec/vulkan/%.c: TAG = VULKAN diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp new file mode 100644 index 0000000000..dffa0e0d2b --- /dev/null +++ b/libavcodec/vulkan/ffv1_dec.comp @@ -0,0 +1,174 @@ +/* + * FFv1 codec + * + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef GOLOMB +int get_isymbol(inout RangeCoder c, uint64_t state) +{ + if (get_rac(c, state + 0)) + return 0; + + int e = 0; + while (get_rac(c, state + 1 + min(e, 9))) { // 1..10 + e++; + if (e > 31) { + corrupt = true; + return 0; + } + } + + int a = 1; + for (int i = e - 1; i >= 0; i--) + a += a + int(get_rac(c, state + 22 + min(i, 9))); // 22..31 + + e = -int(get_rac(c, state + 11 + min(e, 10))); // 11..21 sign + return (a ^ e) - e; +} + +void decode_line_pcm(inout SliceContext sc, int y, int p, int comp, + int bits) +{ + ivec2 sp = sc.slice_pos; + int w = sc.slice_dim.x; + if (p > 0 && p < 3) { + w >>= chroma_shift.x; + sp >>= chroma_shift; + } + + for (int x = 0; x < w; x++) { + uint v = 0; + for (int i = (bits - 1); i >= 0; i--) + v |= uint(get_rac_equi(sc.c)) << i; + imageStore(dst[p], (sp + ivec2(x, y)), uvec4(v)); + } +} + +ivec2 get_pred(ivec2 pos, ivec2 off, int p, int comp, int sw, + uint8_t context_model) +{ + const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0); + const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); + + TYPE top2 = TYPE(0); + if (off.y > 1) + top2 = TYPE(imageLoad(dst[p], pos + ivec2(0, -2))[comp]); + + VTYPE3 top = VTYPE3(TYPE(0), + TYPE(0), + TYPE(0)); + if (off.y > 0 && off != ivec2(0, 1)) + top[0] = TYPE(imageLoad(dst[p], pos + ivec2(-1, -1) + yoff_border1)[comp]); + if (off.y > 0) { + top[1] = TYPE(imageLoad(dst[p], pos + ivec2(0, -1))[comp]); + top[2] = TYPE(imageLoad(dst[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]); + } + + VTYPE2 cur = VTYPE2(TYPE(0), + TYPE(0)); + if (off.x > 0 && off != ivec2(1, 0)) + cur[0] = TYPE(imageLoad(dst[p], pos + ivec2(-2, 0) + yoff_border2)[comp]); + if (off != ivec2(0, 0)) + cur[1] = TYPE(imageLoad(dst[p], pos + ivec2(-1, 0) + yoff_border1)[comp]); + + int ctx = get_context(cur, top, top2, context_model); + int pred = predict(cur[1], VTYPE2(top)); + int csum = top.x ^ top.y ^ top.z ^ cur.x ^ cur.y; + + /* context, prediction */ + return ivec2(ctx, + pred); +} + +void decode_line(inout SliceContext sc, uint64_t state, + int y, int p, int comp, int bits, const int run_index) +{ + ivec2 sp = sc.slice_pos; + + int w = sc.slice_dim.x; + if (p > 0 && p < 3) { + w >>= chroma_shift.x; + sp >>= chroma_shift; + } + + for (int x = 0; x < w; x++) { + ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, comp, w, + sc.quant_table_idx[p]); + + bool neg = pr[0] < 0; + if (neg) + pr[0] = -pr[0]; + + int range = sc.c.range; + int diff = get_isymbol(sc.c, state + CONTEXT_SIZE*pr[0]); + + if (neg) + diff = -diff; + + int pix = zero_extend(pr[0] + diff, bits); + + imageStore(dst[p], (sp + ivec2(x, y)), uvec4(pix)); + } +} +#endif + +void decode_slice(inout SliceContext sc, const uint slice_idx) +{ + int bits = bits_per_raw_sample; + +#ifndef GOLOMB + if (sc.slice_coding_mode == 1) { + for (int p = 0; p < planes; p++) { + + int h = sc.slice_dim.y; + if (p > 0 && p < 3) + h >>= chroma_shift.y; + + for (int y = 0; y < h; y++) + decode_line_pcm(sc, y, p, 0, bits); + } + } else +#endif + { + uint64_t slice_state_off = uint64_t(slice_state) + + slice_idx*plane_state_size*codec_planes; + + for (int p = 0; p < planes; p++) { + int run_index = 0; + + int h = sc.slice_dim.y; + if (p > 0 && p < 3) + h >>= chroma_shift.y; + + for (int y = 0; y < h; y++) + decode_line(sc, slice_state_off, y, p, 0, bits, run_index); + + /* For the second chroma plane, reuse the first plane's state */ + if (p != 1) + slice_state_off += plane_state_size; + } + } +} + +void main(void) +{ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; + decode_slice(slice_ctx[slice_idx], slice_idx); +} diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp new file mode 100644 index 0000000000..6b88274829 --- /dev/null +++ b/libavcodec/vulkan/ffv1_dec_setup.comp @@ -0,0 +1,114 @@ +/* + * FFv1 codec + * + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +uint get_usymbol(inout RangeCoder c, uint64_t state) +{ + if (get_rac(c, state + 0)) + return 0; + + int e = 0; + while (get_rac(c, state + 1 + min(e, 9))) { // 1..10 + e++; + if (e > 31) { + corrupt = true; + return 0; + } + } + + uint a = 1; + for (int i = e - 1; i >= 0; i--) + a += a + uint(get_rac(c, state + 22 + min(i, 9))); // 22..31 + + // e = -int(get_rac(c, state + 11 + min(e, 10))); // 11..21 sign + return (a ^ e) - e; +} + +bool decode_slice_header(inout SliceContext sc, uint64_t state) +{ + u8buf sb = u8buf(state); + + [[unroll]] + for (int i = 0; i < CONTEXT_SIZE; i++) + sb[i].v = uint8_t(128); + + uint sx = get_usymbol(sc.c, state); + uint sy = get_usymbol(sc.c, state); + uint sw = get_usymbol(sc.c, state) + 1; + uint sh = get_usymbol(sc.c, state) + 1; + + if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 || + sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) || + corrupt) { + return true; + } + + /* Set coordinates */ + uint sxs = slice_coord(img_size.x, sx , gl_NumWorkGroups.x, chroma_shift.x); + uint sxe = slice_coord(img_size.x, sx + sw, gl_NumWorkGroups.x, chroma_shift.x); + uint sys = slice_coord(img_size.y, sy , gl_NumWorkGroups.y, chroma_shift.y); + uint sye = slice_coord(img_size.y, sy + sh, gl_NumWorkGroups.y, chroma_shift.y); + + sc.slice_pos = ivec2(sxs, sys); + sc.slice_dim = ivec2(sxe - sxs, sye - sys); + sc.slice_rct_coef = ivec2(1, 1); + sc.slice_coding_mode = int(0); + + for (uint i = 0; i < codec_planes; i++) { + uint idx = get_usymbol(sc.c, state); + if (idx >= quant_table_count) + return true; + sc.quant_table_idx[i] = uint8_t(idx); + sc.context_count = context_count[idx]; + } + + get_usymbol(sc.c, state); + get_usymbol(sc.c, state); + get_usymbol(sc.c, state); + + if (version >= 4) { + sc.slice_reset_contexts = get_rac(sc.c, state); + sc.slice_coding_mode = get_usymbol(sc.c, state); + if (sc.slice_coding_mode != 1 && colorspace == 1) { + sc.slice_rct_coef.x = int(get_usymbol(sc.c, state)); + sc.slice_rct_coef.y = int(get_usymbol(sc.c, state)); + if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4) + return true; + } + } + + return false; +} + +void main(void) +{ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; + uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE; + + rac_init_dec(slice_ctx[slice_idx].c, + u8buf(slice_data + slice_offsets[slice_idx]), + slice_offsets[slice_idx + 1] - slice_offsets[slice_idx]); + + if (slice_idx == 0) + get_rac_equi(slice_ctx[slice_idx].c); + + decode_slice_header(slice_ctx[slice_idx], scratch_state); +} diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp index 6e3b9c1238..ff59cbd289 100644 --- a/libavcodec/vulkan/rangecoder.comp +++ b/libavcodec/vulkan/rangecoder.comp @@ -191,3 +191,78 @@ void rac_init(out RangeCoder r, u8buf data, uint buf_size) r.outstanding_count = uint16_t(0); r.outstanding_byte = uint8_t(0xFF); } + +/* Decoder */ +uint overread; +bool corrupt; + +void rac_init_dec(out RangeCoder r, u8buf data, uint buf_size) +{ + overread = 0; + corrupt = false; + + /* Skip priming bytes */ + rac_init(r, OFFBUF(u8buf, data, 2), buf_size - 2); + + uint16_t prime = u16buf(data).v; + /* Switch endianess of the priming bytes */ + prime = pack16(unpack8(prime).yx); + + r.low = prime; + if (prime >= 0xFF00) { + r.low = 0xFF00; + r.bytestream_end = uint64_t(data) + 2; + } +} + +void refill(inout RangeCoder c) +{ + c.range <<= 8; + c.low <<= 8; + if (c.bytestream < c.bytestream_end) { + c.low += u8buf(c.bytestream).v; + c.bytestream++; + } else { + overread++; + } +} + +bool get_rac(inout RangeCoder c, uint64_t state) +{ + u8buf sb = u8buf(state); + uint val = uint(sb.v); + uint16_t range1 = uint16_t((uint(c.range) * val) >> 8); + + c.range -= range1; + + bool bit = c.low >= c.range; + sb.v = zero_one_state[(uint(bit) << 8) + val]; + + if (bit) { + c.low -= c.range; + c.range = range1; + } + + if (c.range < 0x100) + refill(c); + + return bit; +} + +bool get_rac_equi(inout RangeCoder c) +{ + uint16_t range1 = c.range >> 1; + + c.range -= range1; + + bool bit = c.low >= c.range; + if (bit) { + c.low -= c.range; + c.range = range1; + } + + if (c.range < 0x100) + refill(c); + + return bit; +} diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index 87132651e2..010e1fadb4 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -36,6 +36,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc; #if CONFIG_AV1_VULKAN_HWACCEL extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc; #endif +#if CONFIG_FFV1_VULKAN_HWACCEL +extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc; +#endif static const FFVulkanDecodeDescriptor *dec_descs[] = { #if CONFIG_H264_VULKAN_HWACCEL @@ -47,6 +50,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = { #if CONFIG_AV1_VULKAN_HWACCEL &ff_vk_dec_av1_desc, #endif +#if CONFIG_FFV1_VULKAN_HWACCEL + &ff_vk_dec_ffv1_desc, +#endif }; static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id) diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c new file mode 100644 index 0000000000..477d2b38b0 --- /dev/null +++ b/libavcodec/vulkan_ffv1.c @@ -0,0 +1,969 @@ +/* + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_decode.h" +#include "hwaccel_internal.h" + +#include "ffv1.h" +#include "ffv1_vulkan.h" +#include "libavutil/vulkan_spirv.h" +#include "libavutil/mem.h" + +extern const char *ff_source_common_comp; +extern const char *ff_source_rangecoder_comp; +extern const char *ff_source_ffv1_vlc_comp; +extern const char *ff_source_ffv1_common_comp; +extern const char *ff_source_ffv1_dec_setup_comp; +extern const char *ff_source_ffv1_reset_comp; +extern const char *ff_source_ffv1_dec_comp; +extern const char *ff_source_ffv1_dec_rct_comp; + +const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = { + .codec_id = AV_CODEC_ID_FFV1, + .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR, + .queue_flags = VK_QUEUE_COMPUTE_BIT, +}; + +typedef struct FFv1VulkanDecodePicture { + FFVulkanDecodePicture vp; + + VkImageView img_view_rct; + AVFrame *rct; + + AVBufferRef *tmp_data; + + AVBufferRef *slice_state; + uint32_t plane_state_size; + uint32_t slice_state_size; + uint32_t slice_data_size; + uint32_t max_context_count; + + AVBufferRef *slice_offset_buf; + uint32_t *slice_offset; + int slice_num; +} FFv1VulkanDecodePicture; + +typedef struct FFv1VulkanDecodeContext { + AVBufferRef *intermediate_frames_ref; + + FFVulkanShader setup; + FFVulkanShader reset[2]; /* AC/Golomb */ + FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */ + FFVulkanShader rct[2]; /* 16/32 bit */ + + FFVkBuffer rangecoder_static_buf; + FFVkBuffer quant_buf; + FFVkBuffer crc_tab_buf; + + AVBufferPool *slice_state_pool; + AVBufferPool *tmp_data_pool; + AVBufferPool *slice_offset_pool; +} FFv1VulkanDecodeContext; + +typedef struct FFv1VkResetParameters { + VkDeviceAddress slice_state; + uint32_t plane_state_size; + uint32_t context_count; + uint8_t codec_planes; + uint8_t key_frame; + uint8_t version; + uint8_t micro_version; + uint8_t padding[1]; +} FFv1VkResetParameters; + +typedef struct FFv1VkParameters { + uint32_t context_count[MAX_QUANT_TABLES]; + + VkDeviceAddress slice_data; + VkDeviceAddress slice_state; + VkDeviceAddress scratch_data; + + uint32_t img_size[2]; + uint32_t chroma_shift[2]; + + uint32_t plane_state_size; + uint32_t crcref; + + uint8_t bits_per_raw_sample; + uint8_t quant_table_count; + uint8_t version; + uint8_t micro_version; + uint8_t key_frame; + uint8_t planes; + uint8_t codec_planes; + uint8_t transparency; + uint8_t colorspace; + uint8_t ec; + uint8_t padding[2]; +} FFv1VkParameters; + +static void add_push_data(FFVulkanShader *shd) +{ + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES); + GLSLC(0, ); + GLSLC(1, u8buf slice_data; ); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, u8buf scratch_data; ); + GLSLC(0, ); + GLSLC(1, uvec2 img_size; ); + GLSLC(1, uvec2 chroma_shift; ); + GLSLC(0, ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint32_t crcref; ); + GLSLC(0, ); + GLSLC(1, uint8_t bits_per_raw_sample; ); + GLSLC(1, uint8_t quant_table_count; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t planes; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t transparency; ); + GLSLC(1, uint8_t colorspace; ); + GLSLC(1, uint8_t ec; ); + GLSLC(1, uint8_t padding[2]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters), + VK_SHADER_STAGE_COMPUTE_BIT); +} + +static int vk_ffv1_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + FFV1Context *f = avctx->priv_data; + size_t tmp_data_size; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + + for (int i = 0; i < f->quant_table_count; i++) + fp->max_context_count = FFMAX(f->context_count[i], fp->max_context_count); + + /* Allocate slice buffer data */ + if (f->ac == AC_GOLOMB_RICE) + fp->plane_state_size = 8; + else + fp->plane_state_size = CONTEXT_SIZE; + + fp->plane_state_size *= fp->max_context_count; + fp->slice_state_size = fp->plane_state_size*f->plane_count; + + fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */ + fp->slice_state_size += fp->slice_data_size; + fp->slice_state_size = FFALIGN(fp->slice_state_size, 8); + + /* Allocate slice state data */ + if (f->key_frame) { + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_state_pool, + &fp->slice_state, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, fp->slice_state_size*f->slice_count, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + } else { + FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private; + fp->slice_state = av_buffer_ref(fpl->slice_state); + if (!fp->slice_state) + return AVERROR(ENOMEM); + } + + /* Allocate temporary data buffer */ + tmp_data_size = f->slice_count*CONTEXT_SIZE; + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->tmp_data_pool, + &fp->tmp_data, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, tmp_data_size, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + + /* Allocate slice offsets buffer */ + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool, + &fp->slice_offset_buf, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, (f->max_slice_count + 8)*sizeof(uint32_t), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) + return err; + + /* Prepare frame to be used */ + err = ff_vk_decode_prepare_frame_sdr(dec, f->picture.f, vp, 1, + FF_VK_REP_NATIVE, 0); + if (err < 0) + return err; + + return 0; +} + +static int vk_ffv1_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + FFV1Context *f = avctx->priv_data; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; + + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, + &fp->slice_num, + (const uint32_t **)&fp->slice_offset); + if (err < 0) + return err; + + AV_WN32(slice_offset->mapped_mem + fp->slice_num*sizeof(uint32_t), + fp->slice_offset[fp->slice_num]); + AV_WN32(slice_offset->mapped_mem + (fp->slice_num + 1)*sizeof(uint32_t), + fp->slice_offset[fp->slice_num] + size); + + return 0; +} + +static int vk_ffv1_end_frame(AVCodecContext *avctx) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + FFV1Context *f = avctx->priv_data; + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + FFv1VkParameters pd; + FFv1VkResetParameters pd_reset; + + int is_rgb = !(f->colorspace == 0 && avctx->sw_pix_fmt != AV_PIX_FMT_YA8) && + !(avctx->sw_pix_fmt == AV_PIX_FMT_YA8); + + FFVulkanShader *decode_shader = &fv->decode[f->use32bit] + [f->ac == AC_GOLOMB_RICE] + [is_rgb]; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + + FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data; + FFVkBuffer *slice_state = (FFVkBuffer *)fp->slice_state->data; + FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; + + FFVkBuffer *tmp_data = (FFVkBuffer *)fp->tmp_data->data; + + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); + ff_vk_exec_start(&ctx->s, exec); + + /* Prepare deps */ + RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f->picture.f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_state, 1, 1)); + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0)); + vp->slices_buf = NULL; + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0)); + fp->slice_offset_buf = NULL; + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->tmp_data, 1, 0)); + fp->tmp_data = NULL; + + /* Input frame barrier */ + ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + /* Entry barrier */ + if (!f->key_frame) { + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = 0, + .size = VK_WHOLE_SIZE, + }; + } + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + nb_img_bar = 0; + if (nb_buf_bar) { + slice_state->stage = buf_bar[1].dstStageMask; + slice_state->access = buf_bar[1].dstAccessMask; + nb_buf_bar = 0; + } + + /* Update descriptors */ + ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, + 1, 0, 0, + slice_state, + 0, fp->slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, + 1, 1, 0, + slice_offset, + 0, (fp->slice_num + 8)*sizeof(uint32_t), + VK_FORMAT_UNDEFINED); + + ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup); + pd = (FFv1VkParameters) { + /* context_count */ + + .slice_data = slices_buf->address, + .slice_state = slice_state->address, + .scratch_data = tmp_data->address, + + .img_size[0] = f->picture.f->width, + .img_size[1] = f->picture.f->height, + .chroma_shift[0] = f->chroma_h_shift, + .chroma_shift[1] = f->chroma_v_shift, + + .plane_state_size = fp->plane_state_size, + .crcref = f->crcref, + + .bits_per_raw_sample = avctx->bits_per_raw_sample, + .quant_table_count = f->quant_table_count, + .version = f->version, + .micro_version = f->micro_version, + .key_frame = f->key_frame, + .planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt), + .codec_planes = f->plane_count, + .transparency = f->transparency, + .colorspace = f->colorspace, + .ec = f->ec, + }; + for (int i = 0; i < MAX_QUANT_TABLES; i++) + pd.context_count[i] = f->context_count[i]; + + ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); + + /* Reset shader */ + ff_vk_exec_bind_shader(&ctx->s, exec, &fv->reset[f->ac == AC_GOLOMB_RICE]); + + pd_reset = (FFv1VkResetParameters) { + .slice_state = slice_state->address + f->slice_count*256, + .plane_state_size = fp->plane_state_size, + .context_count = fp->max_context_count, + .codec_planes = f->plane_count, + .key_frame = f->key_frame, + .version = f->version, + .micro_version = f->micro_version, + }; + ff_vk_shader_update_push_const(&ctx->s, exec, + &fv->reset[f->ac == AC_GOLOMB_RICE], + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd_reset), &pd_reset); + + /* Sync between setup and reset shaders */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = 0, + .size = fp->slice_data_size*f->slice_count, + }; + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + slice_state->stage = buf_bar[0].dstStageMask; + slice_state->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, + f->plane_count); + + /* Decode */ + ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader, + 1, 0, 0, + slice_state, + 0, fp->slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, + f->picture.f, &vp->img_view_out, + 1, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader); + ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + /* Sync between reset and decode shaders */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = fp->slice_data_size*f->slice_count, + .size = slice_state->size - fp->slice_data_size*f->slice_count, + }; + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + slice_state->stage = buf_bar[0].dstStageMask; + slice_state->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); + + err = ff_vk_exec_submit(&ctx->s, exec); + if (err < 0) + return err; + +fail: + return 0; +} + +static void define_shared_code(FFVulkanShader *shd, int use32bit) +{ + int smp_bits = use32bit ? 32 : 16; + + av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK); + + GLSLF(0, #define TYPE int%i_t ,smp_bits); + GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); + GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); + GLSLD(ff_source_rangecoder_comp); + GLSLD(ff_source_ffv1_common_comp); +} + +static int init_setup_shader(FFVulkanContext *s, FFVkExecPool *pool, + FFVkSPIRVCompiler *spv, FFVulkanShader *shd) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + 1, 1, 1, + 0)); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + { + .name = "crc_ieee_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint32_t crc_ieee[256];", + }, + }; + + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0)); + + define_shared_code(shd, 0 /* Irrelevant */); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx[1024];", + }, + { + .name = "slice_offsets_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_quali = "readonly", + .buf_content = "uint32_t slice_offsets[1025];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0)); + + GLSLD(ff_source_ffv1_dec_setup_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_reset_shader(FFVulkanContext *s, FFVkExecPool *pool, + FFVkSPIRVCompiler *spv, FFVulkanShader *shd, + int ac) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024); + + RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + wg_dim, 1, 1, + 0)); + + if (ac == AC_GOLOMB_RICE) { + av_bprintf(&shd->src, "#define PB_UNALIGNED\n"); + av_bprintf(&shd->src, "#define GOLOMB\n"); + } + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint context_count; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t padding[1]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0)); + + define_shared_code(shd, 0 /* Irrelevant */); + if (ac == AC_GOLOMB_RICE) + GLSLD(ff_source_ffv1_vlc_comp); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx[1024];", + }, + { + .name = "slice_addr_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "uint64_t slice_addr[1024];", + }, + { + .name = "slice_size_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "uint64_t slice_size[1024];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0)); + + GLSLD(ff_source_ffv1_reset_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_decode_shader(FFVulkanContext *s, FFVkExecPool *pool, + FFVkSPIRVCompiler *spv, FFVulkanShader *shd, + AVHWFramesContext *frames_ctx, + int use32bit, int ac, int rgb) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + RET(ff_vk_shader_init(s, shd, "ffv1_dec", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + 1, 1, 1, + 0)); + + if (ac == AC_GOLOMB_RICE) { + av_bprintf(&shd->src, "#define PB_UNALIGNED\n"); + av_bprintf(&shd->src, "#define GOLOMB\n"); + } + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + { + .name = "crc_ieee_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint32_t crc_ieee[256];", + }, + }; + + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0)); + + define_shared_code(shd, use32bit); + if (ac == AC_GOLOMB_RICE) + GLSLD(ff_source_ffv1_vlc_comp); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx[1024];", + }, + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format, + FF_VK_REP_NATIVE), + .elems = av_pix_fmt_count_planes(frames_ctx->sw_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0)); + + GLSLD(ff_source_ffv1_dec_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int vk_decode_ffv1_init(AVCodecContext *avctx) +{ + int err; + FFV1Context *f = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = NULL; + FFv1VulkanDecodeContext *fv; + FFVkSPIRVCompiler *spv; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + err = ff_vk_decode_init(avctx); + if (err < 0) + return err; + ctx = dec->shared_ctx; + + fv = ctx->sd_ctx = av_mallocz(sizeof(*fv)); + if (!fv) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Setup shader */ + err = init_setup_shader(&ctx->s, &ctx->exec_pool, spv, &fv->setup); + if (err < 0) + return err; + + /* Reset shaders */ + for (int i = 0; i < 2; i++) { + err = init_reset_shader(&ctx->s, &ctx->exec_pool, + spv, &fv->reset[i], !i ? AC_RANGE_CUSTOM_TAB : 0); + if (err < 0) + return err; + } + + /* Decode shaders */ + for (int i = 0; i < 1; i++) { + for (int j = 0; j < 1; j++) { + for (int k = 0; k < 1; k++) { + AVHWFramesContext *frames_ctx; + frames_ctx = k ?(AVHWFramesContext *)fv->intermediate_frames_ref->data : + (AVHWFramesContext *)avctx->hw_frames_ctx->data; + + err = init_decode_shader(&ctx->s, &ctx->exec_pool, + spv, &fv->decode[i][j][k], + frames_ctx, + i, + !j ? AC_RANGE_CUSTOM_TAB : 0, + k); + if (err < 0) + return err; + } + } + } + + /* Range coder data */ + err = ff_ffv1_vk_init_state_transition_data(&ctx->s, + &fv->rangecoder_static_buf, + f); + if (err < 0) + return err; + + /* Quantization table data */ + err = ff_ffv1_vk_init_quant_table_data(&ctx->s, + &fv->quant_buf, + f); + if (err < 0) + return err; + + /* CRC table buffer */ + err = ff_ffv1_vk_init_crc_table_data(&ctx->s, + &fv->crc_tab_buf, + f); + if (err < 0) + return err; + + /* Update setup global descriptors */ + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->setup, 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + + /* Update decode global descriptors */ + for (int i = 0; i < 1; i++) { + for (int j = 0; j < 1; j++) { + for (int k = 0; k < 1; k++) { + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->decode[i][j][k], 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->decode[i][j][k], 0, 1, 0, + &fv->quant_buf, + 0, fv->quant_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->decode[i][j][k], 0, 2, 0, + &fv->crc_tab_buf, + 0, fv->crc_tab_buf.size, + VK_FORMAT_UNDEFINED)); + } + } + } + + +fail: + return err; +} + +static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data) +{ + AVHWDeviceContext *hwctx = _hwctx.nc; + + FFv1VulkanDecodePicture *fp = data; + FFVulkanDecodePicture *vp = &fp->vp; + + ff_vk_decode_free_frame(hwctx, vp); + + av_buffer_unref(&vp->slices_buf); + av_buffer_unref(&fp->slice_state); + av_buffer_unref(&fp->slice_offset_buf); + av_buffer_unref(&fp->tmp_data); + +// FFVulkanFunctions *vk = &ctx->s.vkfn; +// vk->DestroyImageView(hwctx->act_dev, fp->img_view_rct, hwctx->alloc); + + av_frame_free(&fp->rct); +} + +static int vk_decode_ffv1_uninit(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + + ff_vk_decode_uninit(avctx); + + ff_vk_shader_free(&ctx->s, &fv->setup); + + for (int i = 0; i < 2; i++) + ff_vk_shader_free(&ctx->s, &fv->reset[i]); + + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]); + + for (int i = 0; i < 2; i++) + ff_vk_shader_free(&ctx->s, &fv->rct[i]); + + av_buffer_pool_uninit(&fv->tmp_data_pool); + av_buffer_pool_uninit(&fv->slice_state_pool); + av_buffer_pool_uninit(&fv->slice_offset_pool); + + ff_vk_free_buf(&ctx->s, &fv->quant_buf); + ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf); + ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf); + + return 0; +} + +const FFHWAccel ff_ffv1_vulkan_hwaccel = { + .p.name = "ffv1_vulkan", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_FFV1, + .p.pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_ffv1_start_frame, + .decode_slice = &vk_ffv1_decode_slice, + .end_frame = &vk_ffv1_end_frame, + .free_frame_priv = &vk_ffv1_free_frame_priv, + .frame_priv_data_size = sizeof(FFv1VulkanDecodePicture), + .init = &vk_decode_ffv1_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_invalidate, + .flush = &ff_vk_decode_flush, + .uninit = &vk_decode_ffv1_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +}; -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2025-01-20 3:13 UTC | newest] Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 02/12] ffv1dec: use dedicated pix_fmt field and call ff_get_format Lynne 2025-01-20 3:13 ` Michael Niedermayer 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 03/12] ffv1dec: move slice start finding into a function Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 04/12] ffv1dec: move header parsing into a separate function Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 05/12] ffv1dec: move slice decoding " Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 06/12] ffv1dec: set f->state_transition for default range coder table Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 07/12] ffv1dec: add support for hwaccels Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 08/12] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 09/12] ffv1_vulkan: move global buffer creation to a shared file Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 10/12] ffv1enc_vulkan: support default range coder tables Lynne 2025-01-19 10:38 ` [FFmpeg-devel] [PATCH 11/12] vulkan_decode: support software-defined decoders Lynne 2025-01-19 10:38 ` [FFmpeg-devel] [PATCH 12/12] [RFC] ffv1dec_vulkan: add a Vulkan compute-based hardware decoding implementation Lynne
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git