From: averne via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Cc: averne <code@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH] [GSoC 25] lavc: add a shader-based Prores hwaccel (PR #20381) Message-ID: <175666001298.25.14063903422932327910@463a07221176> (raw) PR #20381 opened by averne URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20381 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20381.patch The first few commits prepare the ground for the addition of the actual hwaccel, in the last commit. Provisionary benchmark: - AMD Radeon 6700XT: 178 fps - Intel i7 Tiger Lake: 37 fps - NVidia Orin Nano: 70 fps Please don't pay too much attention to these numbers, I'm planning on optimizing it and submitting a follow-up series later. >From b7435fe8a44b27155a5ad3757a49c4a2cb4b0e69 Mon Sep 17 00:00:00 2001 From: averne <averne381@gmail.com> Date: Tue, 22 Jul 2025 19:06:55 +0200 Subject: [PATCH 1/5] avcodec/prores: add parser Introduce a basic parser for ProRes frame headers. This avoid having to decode an entire frame to extract codec information. --- libavcodec/Makefile | 1 + libavcodec/parsers.c | 1 + libavcodec/prores_parser.c | 132 +++++++++++++++++++++++++++++++++++++ libavcodec/proresdec.c | 1 + libavformat/mov.c | 1 + 5 files changed, 136 insertions(+) create mode 100644 libavcodec/prores_parser.c diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 3d036de4b6..51cd3db30b 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1265,6 +1265,7 @@ OBJS-$(CONFIG_PNG_PARSER) += png_parser.o OBJS-$(CONFIG_PNM_PARSER) += pnm_parser.o pnm.o OBJS-$(CONFIG_PRORES_RAW_PARSER) += prores_raw_parser.o OBJS-$(CONFIG_QOI_PARSER) += qoi_parser.o +OBJS-$(CONFIG_PRORES_PARSER) += prores_parser.o OBJS-$(CONFIG_RV34_PARSER) += rv34_parser.o OBJS-$(CONFIG_SBC_PARSER) += sbc_parser.o OBJS-$(CONFIG_SIPR_PARSER) += sipr_parser.o diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c index b12c48f79f..c922b65ce5 100644 --- a/libavcodec/parsers.c +++ b/libavcodec/parsers.c @@ -66,6 +66,7 @@ extern const AVCodecParser ff_mpeg4video_parser; extern const AVCodecParser ff_mpegaudio_parser; extern const AVCodecParser ff_mpegvideo_parser; extern const AVCodecParser ff_opus_parser; +extern const AVCodecParser ff_prores_parser; extern const AVCodecParser ff_png_parser; extern const AVCodecParser ff_pnm_parser; extern const AVCodecParser ff_prores_raw_parser; diff --git a/libavcodec/prores_parser.c b/libavcodec/prores_parser.c new file mode 100644 index 0000000000..0dd0c2bc3a --- /dev/null +++ b/libavcodec/prores_parser.c @@ -0,0 +1,132 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/intreadwrite.h" +#include "bytestream.h" + +#include "avcodec.h" + +static int parse(AVCodecParserContext *s, + AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + GetByteContext gb; + uint8_t flags, depth, chroma_format, alpha_channel_type; + + /* Frame fields + frame header size */ + if (buf_size < 28) + return buf_size; + + bytestream2_init(&gb, buf, buf_size); + + /* Frame size */ + if (bytestream2_get_be32(&gb) != buf_size) + return buf_size; + + /* Frame identifier */ + if (bytestream2_get_le32(&gb) != MKTAG('i','c','p','f')) + return buf_size; + + /* Frame header size */ + if (bytestream2_get_be16(&gb) < 20) + return buf_size; + + bytestream2_skip(&gb, 6); /* Bitstream version, encoder identifier */ + + switch (avctx->codec_tag) { + case MKTAG('a','p','c','o'): + case MKTAG('a','p','c','s'): + case MKTAG('a','p','c','n'): + case MKTAG('a','p','c','h'): + depth = 10; + break; + case MKTAG('a','p','4','h'): + case MKTAG('a','p','4','x'): + depth = 12; + break; + default: + return buf_size; + } + + s->key_frame = 1; + s->pict_type = AV_PICTURE_TYPE_I; + + s->width = bytestream2_get_be16(&gb); + s->height = bytestream2_get_be16(&gb); + s->coded_width = FFALIGN(s->width, 16); + s->coded_height = FFALIGN(s->height, 16); + + flags = bytestream2_get_byte(&gb); + + chroma_format = flags >> 6 & 3; + if (chroma_format < 2) + return buf_size; + + /* Interlace mode */ + switch (flags >> 2 & 3) { + case 0: + s->field_order = AV_FIELD_PROGRESSIVE; + s->picture_structure = AV_PICTURE_STRUCTURE_FRAME; + break; + case 1: + s->field_order = AV_FIELD_TT; + s->picture_structure = AV_PICTURE_STRUCTURE_TOP_FIELD; + break; + case 2: + s->field_order = AV_FIELD_BB; + s->picture_structure = AV_PICTURE_STRUCTURE_BOTTOM_FIELD; + break; + default: + return buf_size; + } + + bytestream2_skip(&gb, 4); /* Aspect ratio information, frame rate code, color primaries, transfer characteristic, matrix coefficients */ + + alpha_channel_type = bytestream2_get_byte(&gb) & 0xf; + + /* Pixel format */ + switch (depth | (chroma_format << 4) | (alpha_channel_type << 8)) { + case 10 | (2 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV422P10; break; + case 10 | (2 << 4) | (1 << 8): + case 10 | (2 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA422P10; break; + case 10 | (3 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV444P10; break; + case 10 | (3 << 4) | (1 << 8): + case 10 | (3 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA444P10; break; + case 12 | (2 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV422P12; break; + case 12 | (2 << 4) | (1 << 8): + case 12 | (2 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA422P12; break; + case 12 | (3 << 4) | (0 << 8): s->format = AV_PIX_FMT_YUV444P12; break; + case 12 | (3 << 4) | (1 << 8): + case 12 | (3 << 4) | (2 << 8): s->format = AV_PIX_FMT_YUVA444P12; break; + default: + return buf_size; + } + + avctx->pix_fmt = s->format; + + *poutbuf = buf; + *poutbuf_size = buf_size; + + return buf_size; +} + +const AVCodecParser ff_prores_parser = { + .codec_ids = { AV_CODEC_ID_PRORES }, + .parser_parse = parse, +}; diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c index deaf84bda0..1437037897 100644 --- a/libavcodec/proresdec.c +++ b/libavcodec/proresdec.c @@ -851,6 +851,7 @@ const FFCodec ff_prores_decoder = { FF_CODEC_DECODE_CB(decode_frame), UPDATE_THREAD_CONTEXT(update_thread_context), .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS, + .caps_internal = FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM, .p.profiles = NULL_IF_CONFIG_SMALL(ff_prores_profiles), #if HWACCEL_MAX .hw_configs = (const AVCodecHWConfigInternal *const []) { diff --git a/libavformat/mov.c b/libavformat/mov.c index ac0c01a67f..59881d9ce1 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -2990,6 +2990,7 @@ static int mov_finalize_stsd_codec(MOVContext *c, AVIOContext *pb, sti->need_parsing = AVSTREAM_PARSE_FULL; break; case AV_CODEC_ID_PRORES_RAW: + case AV_CODEC_ID_PRORES: case AV_CODEC_ID_APV: case AV_CODEC_ID_EVC: case AV_CODEC_ID_AV1: -- 2.49.1 >From f3cc6406470862a4ef9109f49723f504afecccfc Mon Sep 17 00:00:00 2001 From: averne <averne381@gmail.com> Date: Thu, 12 Jun 2025 19:28:53 +0200 Subject: [PATCH 2/5] proresdec: allocate private memory for hwaccel pictures In preparation for the Vulkan hwaccel, which stores per-frame acceleration structures. --- libavcodec/proresdec.c | 11 +++++++++++ libavcodec/proresdec.h | 1 + 2 files changed, 12 insertions(+) diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c index 1437037897..9ec23f7eea 100644 --- a/libavcodec/proresdec.c +++ b/libavcodec/proresdec.c @@ -777,6 +777,13 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame, if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0) return ret; + + av_refstruct_unref(&ctx->hwaccel_last_picture_private); + FFSWAP(void *, ctx->hwaccel_picture_private, ctx->hwaccel_last_picture_private); + + if ((ret = ff_hwaccel_frame_priv_alloc(avctx, &ctx->hwaccel_picture_private)) < 0) + return ret; + ff_thread_finish_setup(avctx); if (HWACCEL_MAX && avctx->hwaccel) { @@ -814,6 +821,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame, } finish: + av_refstruct_unref(&ctx->hwaccel_last_picture_private); + *got_frame = 1; return avpkt->size; @@ -824,6 +833,8 @@ static av_cold int decode_close(AVCodecContext *avctx) ProresContext *ctx = avctx->priv_data; av_freep(&ctx->slices); + av_refstruct_unref(&ctx->hwaccel_picture_private); + av_refstruct_unref(&ctx->hwaccel_last_picture_private); return 0; } diff --git a/libavcodec/proresdec.h b/libavcodec/proresdec.h index 230fca41f2..d15e5b2c1d 100644 --- a/libavcodec/proresdec.h +++ b/libavcodec/proresdec.h @@ -44,6 +44,7 @@ typedef struct { BlockDSPContext bdsp; ProresDSPContext prodsp; AVFrame *frame; + void *hwaccel_picture_private, *hwaccel_last_picture_private; int frame_type; ///< 0 = progressive, 1 = tff, 2 = bff uint8_t qmat_luma[64]; uint8_t qmat_chroma[64]; -- 2.49.1 >From 75fb2d33c2a35f854148d0360caaa92172565961 Mon Sep 17 00:00:00 2001 From: averne <averne381@gmail.com> Date: Thu, 12 Jun 2025 19:31:44 +0200 Subject: [PATCH 3/5] avcodec/prores: adapt hwaccel code for slice-based accelerators In preparation for the Vulkan hwaccel. The existing hwaccel code was designed around videotoolbox, which ingests the whole frame bitstream including picture headers. This adapts the code to accomodate lower-level, slice-based hwaccels. --- libavcodec/proresdec.c | 34 ++++++++++++++++++---------------- libavcodec/videotoolbox.c | 16 ++++++++++++---- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c index 9ec23f7eea..7469c63342 100644 --- a/libavcodec/proresdec.c +++ b/libavcodec/proresdec.c @@ -756,6 +756,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame, const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; int frame_hdr_size, pic_size, ret; + int i; if (buf_size < 28 || AV_RL32(buf + 4) != AV_RL32("icpf")) { av_log(avctx, AV_LOG_ERROR, "invalid frame header\n"); @@ -786,20 +787,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame, ff_thread_finish_setup(avctx); - if (HWACCEL_MAX && avctx->hwaccel) { - const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel); - ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, avpkt->size); - if (ret < 0) - return ret; - ret = hwaccel->decode_slice(avctx, avpkt->data, avpkt->size); - if (ret < 0) - return ret; - ret = hwaccel->end_frame(avctx); - if (ret < 0) - return ret; - goto finish; - } - decode_picture: pic_size = decode_picture_header(avctx, buf, buf_size); if (pic_size < 0) { @@ -807,7 +794,23 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame, return pic_size; } - if ((ret = decode_picture(avctx)) < 0) { + if (HWACCEL_MAX && avctx->hwaccel) { + const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel); + + ret = hwaccel->start_frame(avctx, avpkt->buf, avpkt->data, avpkt->size); + if (ret < 0) + return ret; + + for (i = 0; i < ctx->slice_count; ++i) { + ret = hwaccel->decode_slice(avctx, ctx->slices[i].data, ctx->slices[i].data_size); + if (ret < 0) + return ret; + } + + ret = hwaccel->end_frame(avctx); + if (ret < 0) + return ret; + } else if ((ret = decode_picture(avctx)) < 0) { av_log(avctx, AV_LOG_ERROR, "error decoding picture\n"); return ret; } @@ -820,7 +823,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame, goto decode_picture; } -finish: av_refstruct_unref(&ctx->hwaccel_last_picture_private); *got_frame = 1; diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c index ccba249140..2cd22cba1a 100644 --- a/libavcodec/videotoolbox.c +++ b/libavcodec/videotoolbox.c @@ -1161,16 +1161,21 @@ static int videotoolbox_prores_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) { - return 0; + VTContext *vtctx = avctx->internal->hwaccel_priv_data; + ProresContext *ctx = avctx->priv_data; + + /* Videotoolbox decodes both fields simultaneously */ + if (!ctx->first_field) + return 0; + + return ff_videotoolbox_buffer_copy(vtctx, buffer, size); } static int videotoolbox_prores_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) { - VTContext *vtctx = avctx->internal->hwaccel_priv_data; - - return ff_videotoolbox_buffer_copy(vtctx, buffer, size); + return 0; } static int videotoolbox_prores_end_frame(AVCodecContext *avctx) @@ -1178,6 +1183,9 @@ static int videotoolbox_prores_end_frame(AVCodecContext *avctx) ProresContext *ctx = avctx->priv_data; AVFrame *frame = ctx->frame; + if (!ctx->first_field) + return 0; + return ff_videotoolbox_common_end_frame(avctx, frame); } -- 2.49.1 >From e4674dd00df666c0152c4428938bbda24647436a Mon Sep 17 00:00:00 2001 From: averne <averne381@gmail.com> Date: Tue, 12 Aug 2025 14:31:00 +0200 Subject: [PATCH 4/5] avcodec/proresdec: save slice width parameter in codec context Save the log2_desired_slice_size_in_mb syntax element in the codec context. Required by the Vulkan hwaccel to compute slice widths and positions. --- libavcodec/proresdec.c | 9 ++++++--- libavcodec/proresdec.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c index 7469c63342..ab9b65bd7f 100644 --- a/libavcodec/proresdec.c +++ b/libavcodec/proresdec.c @@ -335,6 +335,9 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons return AVERROR_INVALIDDATA; } + ctx->slice_mb_width = 1 << log2_slice_mb_width; + ctx->slice_mb_height = 1 << log2_slice_mb_height; + ctx->mb_width = (avctx->width + 15) >> 4; if (ctx->frame_type) ctx->mb_height = (avctx->height + 31) >> 5; @@ -344,7 +347,7 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons // QT ignores the written value // slice_count = AV_RB16(buf + 5); slice_count = ctx->mb_height * ((ctx->mb_width >> log2_slice_mb_width) + - av_popcount(ctx->mb_width & (1 << log2_slice_mb_width) - 1)); + av_popcount(ctx->mb_width & ctx->slice_mb_width - 1)); if (ctx->slice_count != slice_count || !ctx->slices) { av_freep(&ctx->slices); @@ -367,7 +370,7 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons index_ptr = buf + hdr_size; data_ptr = index_ptr + slice_count*2; - slice_mb_count = 1 << log2_slice_mb_width; + slice_mb_count = ctx->slice_mb_width; mb_x = 0; mb_y = 0; @@ -392,7 +395,7 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons mb_x += slice_mb_count; if (mb_x == ctx->mb_width) { - slice_mb_count = 1 << log2_slice_mb_width; + slice_mb_count = ctx->slice_mb_width; mb_x = 0; mb_y++; } diff --git a/libavcodec/proresdec.h b/libavcodec/proresdec.h index d15e5b2c1d..d33eab149b 100644 --- a/libavcodec/proresdec.h +++ b/libavcodec/proresdec.h @@ -52,6 +52,8 @@ typedef struct { int slice_count; ///< number of slices in the current picture unsigned mb_width; ///< width of the current picture in mb unsigned mb_height; ///< height of the current picture in mb + unsigned slice_mb_width; ///< maximum width of a slice in mb + unsigned slice_mb_height; ///< maximum height of a slice in mb uint8_t progressive_scan[64]; uint8_t interlaced_scan[64]; const uint8_t *scan; -- 2.49.1 >From ae856cac654537f81b6d189489d267291cc2248d Mon Sep 17 00:00:00 2001 From: averne <averne381@gmail.com> Date: Mon, 2 Jun 2025 21:31:59 +0200 Subject: [PATCH 5/5] lavc: add a ProRes Vulkan hwaccel Add a shader-based Apple ProRes decoder. It supports all codec features for profiles up to the 4444 XQ profile, ie.: - 4:2:2 and 4:4:4 chroma subsampling - 10- and 12-bit component depth - Interlacing - Alpha The implementation consists in two shaders: the VLD kernel does entropy decoding for color/alpha, and the IDCT kernel performs the inverse transform on color components. Benchmarks for a 4k yuv422p10 sample: - AMD Radeon 6700XT: 178 fps - Intel i7 Tiger Lake: 37 fps - NVidia Orin Nano: 70 fps --- configure | 2 + libavcodec/Makefile | 3 +- libavcodec/hwaccels.h | 1 + libavcodec/proresdec.c | 8 +- libavcodec/vulkan/Makefile | 5 + libavcodec/vulkan/prores_idct.comp | 115 ++++++ libavcodec/vulkan/prores_reset.comp | 38 ++ libavcodec/vulkan/prores_vld.comp | 316 ++++++++++++++++ libavcodec/vulkan_decode.c | 9 +- libavcodec/vulkan_prores.c | 547 ++++++++++++++++++++++++++++ 10 files changed, 1041 insertions(+), 3 deletions(-) create mode 100644 libavcodec/vulkan/prores_idct.comp create mode 100644 libavcodec/vulkan/prores_reset.comp create mode 100644 libavcodec/vulkan/prores_vld.comp create mode 100644 libavcodec/vulkan_prores.c diff --git a/configure b/configure index 9fe28c5af4..b3c10a8e65 100755 --- a/configure +++ b/configure @@ -3302,6 +3302,8 @@ prores_videotoolbox_hwaccel_deps="videotoolbox" prores_videotoolbox_hwaccel_select="prores_decoder" prores_raw_vulkan_hwaccel_deps="vulkan spirv_compiler" prores_raw_vulkan_hwaccel_select="prores_raw_decoder" +prores_vulkan_hwaccel_deps="vulkan spirv_compiler" +prores_vulkan_hwaccel_select="prores_decoder" vc1_d3d11va_hwaccel_deps="d3d11va" vc1_d3d11va_hwaccel_select="vc1_decoder" vc1_d3d11va2_hwaccel_deps="d3d11va" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 51cd3db30b..8e22d79acc 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1092,6 +1092,7 @@ OBJS-$(CONFIG_VP9_VULKAN_HWACCEL) += vulkan_decode.o vulkan_vp9.o OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores_raw.o +OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores.o # Objects duplicated from other libraries for shared builds SHLIBOBJS += log2_tab.o reverse.o @@ -1335,7 +1336,7 @@ SKIPHEADERS-$(CONFIG_QSVENC) += qsvenc.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h -SKIPHEADERS-$(CONFIG_VULKAN) += ffv1_vulkan.h vulkan_video.h \ +SKIPHEADERS-$(CONFIG_VULKAN) += ffv1_vulkan.h prores_vulkan.h vulkan_video.h \ vulkan_encode.h vulkan_decode.h SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index 4b205d386e..0894d84a9c 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -68,6 +68,7 @@ extern const struct FFHWAccel ff_mpeg4_vdpau_hwaccel; extern const struct FFHWAccel ff_mpeg4_videotoolbox_hwaccel; extern const struct FFHWAccel ff_prores_videotoolbox_hwaccel; extern const struct FFHWAccel ff_prores_raw_vulkan_hwaccel; +extern const struct FFHWAccel ff_prores_vulkan_hwaccel; extern const struct FFHWAccel ff_vc1_d3d11va_hwaccel; extern const struct FFHWAccel ff_vc1_d3d11va2_hwaccel; extern const struct FFHWAccel ff_vc1_d3d12va_hwaccel; diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c index ab9b65bd7f..b0e9075851 100644 --- a/libavcodec/proresdec.c +++ b/libavcodec/proresdec.c @@ -251,7 +251,7 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf, } if (pix_fmt != ctx->pix_fmt) { -#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL) +#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL + CONFIG_PRORES_VULKAN_HWACCEL) #if HWACCEL_MAX enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts; int ret; @@ -260,6 +260,9 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf, #if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL *fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_PRORES_VULKAN_HWACCEL + *fmtp++ = AV_PIX_FMT_VULKAN; #endif *fmtp++ = ctx->pix_fmt; *fmtp = AV_PIX_FMT_NONE; @@ -873,6 +876,9 @@ const FFCodec ff_prores_decoder = { .hw_configs = (const AVCodecHWConfigInternal *const []) { #if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL HWACCEL_VIDEOTOOLBOX(prores), +#endif +#if CONFIG_PRORES_VULKAN_HWACCEL + HWACCEL_VULKAN(prores), #endif NULL }, diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index d8e1471fa6..ec3015fee6 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -17,6 +17,11 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \ OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \ vulkan/prores_raw.o +OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o \ + vulkan/prores_reset.o \ + vulkan/prores_vld.o \ + vulkan/prores_idct.o + VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp)) .SECONDARY: $(VULKAN:.comp=.c) libavcodec/vulkan/%.c: TAG = VULKAN diff --git a/libavcodec/vulkan/prores_idct.comp b/libavcodec/vulkan/prores_idct.comp new file mode 100644 index 0000000000..c540d43420 --- /dev/null +++ b/libavcodec/vulkan/prores_idct.comp @@ -0,0 +1,115 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Two macroblocks, padded to avoid bank conflicts */ +shared float blocks[4*2][8*(8+1)]; + +uint get_px(uint tex_idx, ivec2 pos) { +#ifndef INTERLACED + return imageLoad(dst[tex_idx], pos).x; +#else + return imageLoad(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field)).x; +#endif +} + +void put_px(uint tex_idx, ivec2 pos, uint v) { +#ifndef INTERLACED + imageStore(dst[tex_idx], pos, uvec4(v)); +#else + imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(v)); +#endif +} + +/* 7.4 Inverse Transform */ +void idct(uint block, uint offset, uint stride) { + float c0 = blocks[block][0*stride + offset]; + float c1 = blocks[block][1*stride + offset]; + float c2 = blocks[block][2*stride + offset]; + float c3 = blocks[block][3*stride + offset]; + float c4 = blocks[block][4*stride + offset]; + float c5 = blocks[block][5*stride + offset]; + float c6 = blocks[block][6*stride + offset]; + float c7 = blocks[block][7*stride + offset]; + + float tmp1 = c6 * 1.4142134189605712891 + (c2 - c6); + float tmp2 = c6 * 1.4142134189605712891 - (c2 - c6); + + float a1 = (c0 + c4) * 0.35355341434478759766 + tmp1 * 0.46193981170654296875; + float a4 = (c0 + c4) * 0.35355341434478759766 - tmp1 * 0.46193981170654296875; + + float a3 = (c0 - c4) * 0.35355341434478759766 + tmp2 * 0.19134169816970825195; + float a2 = (c0 - c4) * 0.35355341434478759766 - tmp2 * 0.19134169816970825195; + + float tmp3 = (c3 - c5) * 0.70710682868957519531 + c7; + float tmp4 = (c3 - c5) * 0.70710682868957519531 - c7; + + float tmp5 = (c5 - c7) * 1.4142134189605712891 + (c5 - c7) + (c1 - c3); + float tmp6 = (c5 - c7) * -1.4142134189605712891 + (c5 - c7) + (c1 - c3); + + float m1 = tmp3 * 2.6131260395050048828 + tmp5; + float m4 = tmp3 * -2.6131260395050048828 + tmp5; + + float m2 = tmp4 * 1.0823919773101806641 + tmp6; + float m3 = tmp4 * -1.0823919773101806641 + tmp6; + + blocks[block][0*stride + offset] = m1 * 0.49039259552955627441 + a1; + blocks[block][7*stride + offset] = m1 * -0.49039259552955627441 + a1; + blocks[block][1*stride + offset] = m2 * 0.41573479771614074707 + a2; + blocks[block][6*stride + offset] = m2 * -0.41573479771614074707 + a2; + blocks[block][2*stride + offset] = m3 * 0.27778509259223937988 + a3; + blocks[block][5*stride + offset] = m3 * -0.27778509259223937988 + a3; + blocks[block][3*stride + offset] = m4 * 0.097545139491558074951 + a4; + blocks[block][4*stride + offset] = m4 * -0.097545139491558074951 + a4; +} + +void main(void) +{ + uvec3 gid = gl_GlobalInvocationID, lid = gl_LocalInvocationID; + uint comp = gid.z, block = (lid.y << 2) | (lid.x >> 3), idx = lid.x & 0x7; + uint chroma_shift = comp != 0 ? log2_chroma_w : 0; + bool act = gid.x < mb_width << (4 - chroma_shift); + + /* Coalesced load of DCT coeffs in shared memory, second part of inverse quantization */ + if (act) { + [[unroll]] for (uint i = 0; i < 8; ++i) { + int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | i))), 16); + int w = comp == 0 ? qmat_luma[i][idx] : qmat_chroma[i][idx]; + blocks[block][i * 9 + idx] = float(v * w); + } + } + + /* Row-wise iDCT */ + barrier(); + idct(block, idx * 9, 1); + + /* Column-wise iDCT */ + barrier(); + idct(block, idx, 9); + + float fact = 1.0f / (1 << (12 - depth)), off = 1 << (depth - 1); + int maxv = (1 << depth) - 1; + + /* 7.5.1 Color Component Samples. Rescale, clamp and write back to global memory */ + barrier(); + if (act) { + [[unroll]] for (uint i = 0; i < 8; ++i) { + float v = blocks[block][i * 9 + idx] * fact + off; + put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, maxv)); + } + } +} diff --git a/libavcodec/vulkan/prores_reset.comp b/libavcodec/vulkan/prores_reset.comp new file mode 100644 index 0000000000..51cbc6b3d9 --- /dev/null +++ b/libavcodec/vulkan/prores_reset.comp @@ -0,0 +1,38 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +void main(void) +{ + uvec3 gid = gl_GlobalInvocationID; +#ifndef INTERLACED + ivec2 pos = ivec2(gid); +#else + ivec2 pos = ivec2(gid.x, (gid.y << 1) + bottom_field); +#endif + + /* Clear luma plane */ + imageStore(dst[0], pos, uvec4(0)); + + /* Clear chroma plane */ + if (gid.x < mb_width << (4 - log2_chroma_w)) { + imageStore(dst[1], pos, uvec4(0)); + imageStore(dst[2], pos, uvec4(0)); + } + + /* Alpha plane doesn't need a clear because it is not sparsely encoded */ +} diff --git a/libavcodec/vulkan/prores_vld.comp b/libavcodec/vulkan/prores_vld.comp new file mode 100644 index 0000000000..7ee73f1ab9 --- /dev/null +++ b/libavcodec/vulkan/prores_vld.comp @@ -0,0 +1,316 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define U8(x) (uint8_t (x)) +#define U16(x) (uint16_t(x)) + +void put_px(uint tex_idx, ivec2 pos, uint v) { +#ifndef INTERLACED + imageStore(dst[tex_idx], pos, uvec4(v)); +#else + imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(v)); +#endif +} + +/* 7.5.3 Pixel Arrangement */ +ivec2 pos_to_block(uint pos, uint luma) +{ + return ivec2((pos & -luma - 2) + luma >> 1, pos >> luma & 1) << 3; +} + +/* 7.1.1.2 Signed Golomb Combination Codes */ +uint to_signed(uint x) +{ + return (x >> 1) ^ -(x & 1); +} + +/* 7.1.1.1 Golomb Combination Codes */ +uint decode_codeword(inout GetBitContext gb, int codebook) +{ + int last_rice_q = bitfieldExtract(codebook, 0, 4), + krice = bitfieldExtract(codebook, 4, 4), + kexp = bitfieldExtract(codebook, 8, 4); + + int q = 31 - findMSB(show_bits(gb, 32)); + if (q <= last_rice_q) { + /* Golomb-Rice encoding */ + return (get_bits(gb, krice + q + 1) & ~(1 << krice)) + (q << krice); + } else { + /* exp-Golomb encoding */ + return get_bits(gb, (q << 1) + kexp - last_rice_q) - (1 << kexp) + ((last_rice_q + 1) << krice); + } +} + +void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale) +{ + uvec3 gid = gl_GlobalInvocationID; + uint is_luma = uint(gid.z == 0); + uint chroma_shift = bool(is_luma) ? 0 : log2_chroma_w; + + uint num_blocks = mb_count << (2 - chroma_shift); + ivec2 base_pos = ivec2(mb_pos.x << (4 - chroma_shift), mb_pos.y << 4); + + /* 7.1.1.3 DC Coefficients */ + { + /* First coeff */ + uint c = to_signed(decode_codeword(gb, 0x650)); + put_px(gid.z, base_pos, c * qscale & 0xffff); + + /** + * Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | ((kexp or kexp + 1) << 8) + * According to the SMPTE document, abs(prev_dc_diff) should be used + * to index the table, duplicating the entries removes the abs operation. + */ + const uint16_t dc_codebook[] = { U16(0x100), + U16(0x210), U16(0x210), + U16(0x321), U16(0x321), + U16(0x430), U16(0x430), }; + + uint cw = 5, prev_dc_diff = 0; + for (int i = 1; i < num_blocks; ++i) { + cw = decode_codeword(gb, dc_codebook[min(cw, 6)]); + + int s = int(prev_dc_diff) >> 31; + c += prev_dc_diff = (to_signed(cw) ^ s) - s; + + put_px(gid.z, base_pos + pos_to_block(i, is_luma), c * qscale & 0xffff); + } + } + + /* 7.1.1.4 AC Coefficients */ + { + /* Table 10 */ + const uint16_t ac_run_codebook [] = { U16(0x102), U16(0x102), U16(0x101), U16(0x101), + U16(0x100), U16(0x211), U16(0x211), U16(0x211), + U16(0x211), U16(0x210), U16(0x210), U16(0x210), + U16(0x210), U16(0x210), U16(0x210), U16(0x320), }; + + /* Table 11 */ + const uint16_t ac_level_codebook[] = { U16(0x202), U16(0x101), U16(0x102), U16(0x100), + U16(0x210), U16(0x210), U16(0x210), U16(0x210), + U16(0x320) }; + +#ifndef INTERLACED + /* Figure 4, encoded as (x << 0) | (y << 4) */ + const uint8_t scan_tbl[] = { + U8(0x00), U8(0x01), U8(0x10), U8(0x11), U8(0x02), U8(0x03), U8(0x12), U8(0x13), + U8(0x20), U8(0x21), U8(0x30), U8(0x31), U8(0x22), U8(0x23), U8(0x32), U8(0x33), + U8(0x04), U8(0x05), U8(0x14), U8(0x24), U8(0x15), U8(0x06), U8(0x07), U8(0x16), + U8(0x25), U8(0x34), U8(0x35), U8(0x26), U8(0x17), U8(0x27), U8(0x36), U8(0x37), + U8(0x40), U8(0x41), U8(0x50), U8(0x60), U8(0x51), U8(0x42), U8(0x43), U8(0x52), + U8(0x61), U8(0x70), U8(0x71), U8(0x62), U8(0x53), U8(0x44), U8(0x45), U8(0x54), + U8(0x63), U8(0x72), U8(0x73), U8(0x64), U8(0x55), U8(0x46), U8(0x47), U8(0x56), + U8(0x65), U8(0x74), U8(0x75), U8(0x66), U8(0x57), U8(0x67), U8(0x76), U8(0x77), + }; +#else + /* Figure 5 */ + const uint8_t scan_tbl[] = { + U8(0x00), U8(0x10), U8(0x01), U8(0x11), U8(0x20), U8(0x30), U8(0x21), U8(0x31), + U8(0x02), U8(0x12), U8(0x03), U8(0x13), U8(0x22), U8(0x32), U8(0x23), U8(0x33), + U8(0x40), U8(0x50), U8(0x41), U8(0x42), U8(0x51), U8(0x60), U8(0x70), U8(0x61), + U8(0x52), U8(0x43), U8(0x53), U8(0x62), U8(0x71), U8(0x72), U8(0x63), U8(0x73), + U8(0x04), U8(0x14), U8(0x05), U8(0x06), U8(0x15), U8(0x24), U8(0x34), U8(0x25), + U8(0x16), U8(0x07), U8(0x17), U8(0x26), U8(0x35), U8(0x44), U8(0x54), U8(0x45), + U8(0x36), U8(0x27), U8(0x37), U8(0x46), U8(0x55), U8(0x64), U8(0x74), U8(0x65), + U8(0x56), U8(0x47), U8(0x57), U8(0x66), U8(0x75), U8(0x76), U8(0x67), U8(0x77), + }; +#endif + + uint block_mask = num_blocks - 1; + uint block_shift = findLSB(num_blocks); + + uint pos = num_blocks - 1, run = 4, level = 1, s; + while (pos < num_blocks << 6) { + int left = left_bits(gb); + if (left <= 0 || (left < 32 && show_bits(gb, left) == 0)) + break; + + run = decode_codeword(gb, ac_run_codebook [min(run, 15)]); + level = decode_codeword(gb, ac_level_codebook[min(level, 8 )]); + s = get_bits(gb, 1); + + pos += run + 1; + + uint bidx = pos & block_mask, scan = scan_tbl[pos >> block_shift]; + ivec2 spos = pos_to_block(bidx, is_luma); + ivec2 bpos = ivec2(scan & 0xf, scan >> 4); + + uint c = ((level + 1) ^ -s) + s; + put_px(gid.z, base_pos + spos + bpos, c * qscale & 0xffff); + } + } +} + +/* 7.1.2 Scanned Alpha */ +void decode_alpha(in GetBitContext gb, uvec2 mb_pos, uint mb_count) +{ + uvec3 gid = gl_GlobalInvocationID; + + ivec2 base_pos = ivec2(mb_pos) << 4; + uint block_shift = findMSB(mb_count) + 4, block_mask = (1 << block_shift) - 1; + + uint mask = (1 << (4 << alpha_info)) - 1; + uint num_values = (mb_count << 4) * min(height - (gid.y << 4), 16); + + int num_cw_bits = alpha_info == 1 ? 5 : 8, + num_flc_bits = alpha_info == 1 ? 9 : 17; + + uint alpha_rescale_lshift = alpha_info == 1 ? depth - 8 : 16, + alpha_rescale_rshift = 16 - depth; + + uint alpha = -1; + for (uint pos = 0; pos < num_values;) { + uint diff, run; + + /* Decode run value */ + { + uint bits = show_bits(gb, num_cw_bits), q = num_cw_bits - 1 - findMSB(bits); + + /* Tables 13/14 */ + if (q != 0) { + uint m = (bits >> 1) + 1, s = bits & 1; + diff = (m ^ -s) + s; + skip_bits(gb, num_cw_bits); + } else { + diff = get_bits(gb, num_flc_bits); + } + + alpha = alpha + diff & mask; + } + + /* Decode run length */ + { + uint bits = show_bits(gb, 5), q = 4 - findMSB(bits); + + /* Table 12 */ + if (q == 0) { + run = 1; + skip_bits(gb, 1); + } else if (q <= 4) { + run = bits + 1; + skip_bits(gb, 5); + } else { + run = get_bits(gb, 16) + 1; + } + + run = min(run, num_values - pos); + } + + /** + * FFmpeg doesn't support color and alpha with different precision, + * so we need to rescale to the color range. + */ + uint val = (alpha << alpha_rescale_lshift) | (alpha >> alpha_rescale_rshift); + for (uint end = pos + run; pos < end; ++pos) + put_px(3, base_pos + ivec2(pos & block_mask, pos >> block_shift), val & 0xffff); + } +} + +void main(void) +{ + uvec3 gid = gl_GlobalInvocationID; + if (gid.x >= slice_width || gid.y >= slice_height) + return; + + uint slice_idx = gid.y * slice_width + gid.x; + uint slice_off = slice_offsets[slice_idx], + slice_size = slice_offsets[slice_idx + 1] - slice_off; + + u8buf bs = u8buf(slice_data + slice_off); + + /* Decode slice header */ + uint hdr_size, y_size, u_size, v_size, a_size; + hdr_size = bs[0].v >> 3; + + /* Table 15 */ + uint qidx = clamp(bs[1].v, 1, 224), + qscale = qidx > 128 ? (qidx - 96) << 2 : qidx; + + y_size = (uint(bs[2].v) << 8) | bs[3].v; + u_size = (uint(bs[4].v) << 8) | bs[5].v; + + /** + * The alpha_info field can be 0 even when an alpha plane is present, + * if skip_alpha is enabled, so use the header size instead. + */ + if (hdr_size > 6) + v_size = (uint(bs[6].v) << 8) | bs[7].v; + else + v_size = slice_size - hdr_size - y_size - u_size; + + a_size = slice_size - hdr_size - y_size - u_size - v_size; + + GetBitContext gb; + switch (gid.z) { + case 0: + init_get_bits(gb, u8buf(bs + hdr_size), int(y_size)); + break; + case 1: + init_get_bits(gb, u8buf(bs + hdr_size + y_size), int(u_size)); + break; + case 2: + init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size), int(v_size)); + break; + case 3: + init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size + v_size), int(a_size)); + break; + } + + /** + * Support for the grayscale "extension" in the prores_aw encoder. + * According to the spec, entropy coded data should never be empty, + * and instead contain at least the DC coefficients. + * This avoids undefined behavior. + */ + if (left_bits(gb) == 0) + return; + + /** + * 4 ProRes Frame Structure + * ProRes tiles pictures into a grid of slices, whose size is determined + * by the log2_slice_width parameter (height is always 1 MB). + * Each slice has a width of (1 << log2_slice_width) MBs, until the picture + * cannot accommodate a full one. At this point, the remaining space + * is recursively completed using the first smaller power of two that fits + * (see Figure 1). + * The maximum number of extra slices is 3, when log2_slice_width is 3, + * with sizes 4, 2 and 1 MBs. + * The mb_width parameter therefore also represents the number of full slices, + * when interpreted as a fixed-point number with log2_slice_width fractional bits. + */ + uint frac = bitfieldExtract(uint(mb_width), 0, log2_slice_width), + num_extra = bitCount(frac); + + uint diff = slice_width - gid.x - 1, + off = max(int(diff - num_extra + 1) << 2, 0); + + uint log2_width = min(findLSB(frac - diff >> diff) + diff + off, log2_slice_width); + + uint mb_x = (min(gid.x, slice_width - num_extra) << log2_slice_width) + + (frac & (0xf << log2_width + 1)), + mb_y = gid.y; + uint mb_count = 1 << log2_width; + + if (gid.z < 3) { + /* Color entropy decoding, inverse scanning, first part of inverse quantization */ + decode_comp(gb, uvec2(mb_x, mb_y), mb_count, qscale); + } else { + /* Alpha entropy decoding */ + decode_alpha(gb, uvec2(mb_x, mb_y), mb_count); + } +} diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index b038d456dd..cbf2ab8194 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -26,7 +26,8 @@ #define DECODER_IS_SDR(codec_id) \ (((codec_id) == AV_CODEC_ID_FFV1) || \ - ((codec_id) == AV_CODEC_ID_PRORES_RAW)) + ((codec_id) == AV_CODEC_ID_PRORES_RAW) || \ + ((codec_id) == AV_CODEC_ID_PRORES)) #if CONFIG_H264_VULKAN_HWACCEL extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc; @@ -46,6 +47,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc; #if CONFIG_PRORES_RAW_VULKAN_HWACCEL extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc; #endif +#if CONFIG_PRORES_VULKAN_HWACCEL +extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc; +#endif static const FFVulkanDecodeDescriptor *dec_descs[] = { #if CONFIG_H264_VULKAN_HWACCEL @@ -66,6 +70,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = { #if CONFIG_PRORES_RAW_VULKAN_HWACCEL &ff_vk_dec_prores_raw_desc, #endif +#if CONFIG_PRORES_VULKAN_HWACCEL + &ff_vk_dec_prores_desc, +#endif }; typedef struct FFVulkanDecodeProfileData { diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c new file mode 100644 index 0000000000..cab7c6986e --- /dev/null +++ b/libavcodec/vulkan_prores.c @@ -0,0 +1,547 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "proresdec.h" +#include "vulkan_decode.h" +#include "hwaccel_internal.h" +#include "libavutil/mem.h" +#include "libavutil/vulkan.h" +#include "libavutil/vulkan_loader.h" +#include "libavutil/vulkan_spirv.h" + +extern const char *ff_source_common_comp; +extern const char *ff_source_prores_reset_comp; +extern const char *ff_source_prores_vld_comp; +extern const char *ff_source_prores_idct_comp; + +const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc = { + .codec_id = AV_CODEC_ID_PRORES, + .queue_flags = VK_QUEUE_COMPUTE_BIT, +}; + +typedef struct ProresVulkanDecodePicture { + FFVulkanDecodePicture vp; + + AVBufferRef *slice_offset_buf; + uint32_t slice_num; + + uint32_t bitstream_start; + uint32_t bitstream_size; +} ProresVulkanDecodePicture; + +typedef struct ProresVulkanDecodeContext { + struct ProresVulkanShaderVariants { + FFVulkanShader reset; + FFVulkanShader vld; + FFVulkanShader idct; + } shaders[2]; /* Progressive/interlaced */ + + AVBufferPool *slice_offset_pool; +} ProresVulkanDecodeContext; + +typedef struct ProresVkParameters { + VkDeviceAddress slice_data; + uint32_t bitstream_size; + + uint16_t width; + uint16_t height; + uint16_t mb_width; + uint16_t mb_height; + uint16_t slice_width; + uint16_t slice_height; + uint8_t log2_slice_width; + uint8_t log2_chroma_w; + uint8_t depth; + uint8_t alpha_info; + uint8_t bottom_field; + + uint8_t qmat_luma [8][8]; + uint8_t qmat_chroma[8][8]; +} ProresVkParameters; + +typedef struct { + uint16_t mb_x; + uint16_t mb_y; + uint8_t mb_count; +} ProresVkSliceContext; + +static int vk_prores_start_frame(AVCodecContext *avctx, + const AVBufferRef *buffer_ref, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + ProresContext *pr = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + ProresVulkanDecodeContext *pv = ctx->sd_ctx; + ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &pp->vp; + + int err; + + /* Host map the input slices data if supported */ + if (!vp->slices_buf && ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) + RET(ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data, + buffer_ref, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)); + + /* Allocate slice offsets buffer */ + RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->slice_offset_pool, + &pp->slice_offset_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, (pr->slice_count + 1) * sizeof(uint32_t), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + + /* Prepare frame to be used */ + RET(ff_vk_decode_prepare_frame_sdr(dec, pr->frame, vp, 1, + FF_VK_REP_NATIVE, 0)); + + pp->slice_num = 0; + pp->bitstream_start = pp->bitstream_size = 0; + +fail: + return err; +} + +static int vk_prores_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + ProresContext *pr = avctx->priv_data; + ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &pp->vp; + + FFVkBuffer *slice_offset = (FFVkBuffer *)pp->slice_offset_buf->data; + FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL; + + /* Skip picture header */ + if (slices_buf && slices_buf->host_ref && !pp->slice_num) + pp->bitstream_size = data - slices_buf->mapped_mem; + + AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 0) * sizeof(uint32_t), + pp->bitstream_size); + AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 1) * sizeof(uint32_t), + pp->bitstream_size += size); + + if (!slices_buf || !slices_buf->host_ref) { + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, + &pp->slice_num, NULL); + if (err < 0) + return err; + } else { + pp->slice_num++; + } + + return 0; +} + +static int vk_prores_end_frame(AVCodecContext *avctx) +{ + ProresContext *pr = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + ProresVulkanDecodeContext *pv = ctx->sd_ctx; + ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &pp->vp; + + ProresVkParameters pd; + FFVkBuffer *slice_data, *slice_offsets; + struct ProresVulkanShaderVariants *shaders; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; + VkBufferMemoryBarrier2 buf_bar[2]; + int nb_img_bar = 0, nb_buf_bar = 0, err; + const AVPixFmtDescriptor *pix_desc; + + if (!pp->slice_num) + return 0; + + pix_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + if (!pix_desc) + return AVERROR(EINVAL); + + slice_data = (FFVkBuffer *)vp->slices_buf->data; + slice_offsets = (FFVkBuffer *)pp->slice_offset_buf->data; + + shaders = &pv->shaders[pr->frame_type != 0]; + + pd = (ProresVkParameters) { + .slice_data = slice_data->address, + .bitstream_size = pp->bitstream_size, + + .width = avctx->width, + .height = avctx->height, + .mb_width = pr->mb_width, + .mb_height = pr->mb_height, + .slice_width = pr->slice_count / pr->mb_height, + .slice_height = pr->mb_height, + .log2_slice_width = av_log2(pr->slice_mb_width), + .log2_chroma_w = pix_desc->log2_chroma_w, + .depth = avctx->bits_per_raw_sample, + .alpha_info = pr->alpha_info, + .bottom_field = pr->first_field ^ (pr->frame_type == 1), + }; + + memcpy(pd.qmat_luma, pr->qmat_luma, sizeof(pd.qmat_luma )); + memcpy(pd.qmat_chroma, pr->qmat_chroma, sizeof(pd.qmat_chroma)); + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); + RET(ff_vk_exec_start(&ctx->s, exec)); + + /* Prepare deps */ + RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, pr->frame, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + RET(ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, + pr->frame)); + + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, + (AVBufferRef *[]){ vp->slices_buf, pp->slice_offset_buf }, + 2, 0)); + + /* Transfer ownership to the exec context */ + vp->slices_buf = pp->slice_offset_buf = NULL; + + /* Input frame barrier */ + ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + nb_img_bar = nb_buf_bar = 0; + + /* Reset */ + ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->reset, + pr->frame, vp->view.out, + 0, 0, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->reset, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->reset); + + vk->CmdDispatch(exec->buf, pr->mb_width << 1, pr->mb_height << 1, 1); + + /* Input frame barrier after reset */ + ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + nb_img_bar = nb_buf_bar = 0; + + /* Entropy decode */ + ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld, + 0, 0, 0, + slice_offsets, + 0, (pp->slice_num + 1) * sizeof(uint32_t), + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->vld, + pr->frame, vp->view.out, + 0, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->vld, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->vld); + + vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->slice_count / pr->mb_height, 3), AV_CEIL_RSHIFT(pr->mb_height, 3), + 3 + !!pr->alpha_info); + + /* Synchronize vld and idct shaders */ + nb_img_bar = 0; + ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + nb_img_bar = nb_buf_bar = 0; + + /* Inverse transform */ + ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->idct, + pr->frame, vp->view.out, + 0, 0, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->idct); + + ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->idct, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->mb_width, 1), pr->mb_height, 3); + + RET(ff_vk_exec_submit(&ctx->s, exec)); + +fail: + return err; +} + +static int add_push_data(FFVulkanShader *shd) +{ + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, u8buf slice_data; ); + GLSLC(1, uint bitstream_size; ); + GLSLC(0, ); + GLSLC(1, uint16_t width; ); + GLSLC(1, uint16_t height; ); + GLSLC(1, uint16_t mb_width; ); + GLSLC(1, uint16_t mb_height; ); + GLSLC(1, uint16_t slice_width; ); + GLSLC(1, uint16_t slice_height; ); + GLSLC(1, uint8_t log2_slice_width; ); + GLSLC(1, uint8_t log2_chroma_w; ); + GLSLC(1, uint8_t depth; ); + GLSLC(1, uint8_t alpha_info; ); + GLSLC(1, uint8_t bottom_field; ); + GLSLC(0, ); + GLSLC(1, uint8_t qmat_luma [8][8]; ); + GLSLC(1, uint8_t qmat_chroma[8][8]; ); + GLSLC(0, }; ); + + return ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters), + VK_SHADER_STAGE_COMPUTE_BIT); +} + +static int init_shader(AVCodecContext *avctx, FFVulkanContext *s, + FFVkExecPool *pool, FFVkSPIRVCompiler *spv, + FFVulkanShader *shd, const char *name, const char *entrypoint, + FFVulkanDescriptorSetBinding *descs, int num_descs, + const char *source, int local_size, int interlaced) +{ + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + int err; + + RET(ff_vk_shader_init(s, shd, name, + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + local_size >> 16 & 0xff, local_size >> 8 & 0xff, local_size >> 0 & 0xff, + 0)); + + /* Common code */ + GLSLD(ff_source_common_comp); + + /* Push constants layout */ + RET(add_push_data(shd)); + + RET(ff_vk_shader_add_descriptor_set(s, shd, descs, num_descs, 0, 0)); + + if (interlaced) + av_bprintf(&shd->src, "#define INTERLACED\n"); + + /* Main code */ + GLSLD(source); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, entrypoint, + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, entrypoint)); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return 0; +} + +static void vk_decode_prores_uninit(FFVulkanDecodeShared *ctx) +{ + ProresVulkanDecodeContext *pv = ctx->sd_ctx; + int i; + + for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) { + ff_vk_shader_free(&ctx->s, &pv->shaders[i].reset); + ff_vk_shader_free(&ctx->s, &pv->shaders[i].vld); + ff_vk_shader_free(&ctx->s, &pv->shaders[i].idct); + } + + av_buffer_pool_uninit(&pv->slice_offset_pool); + + av_freep(&pv); +} + +static int vk_decode_prores_init(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = NULL; + + AVHWFramesContext *out_frames_ctx; + ProresVulkanDecodeContext *pv; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc_set; + int max_num_slices, i, err; + + max_num_slices = (avctx->coded_width >> 4) * (avctx->coded_height >> 4); + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + err = ff_vk_decode_init(avctx); + if (err < 0) + return err; + ctx = dec->shared_ctx; + + pv = ctx->sd_ctx = av_mallocz(sizeof(*pv)); + if (!pv) { + err = AVERROR(ENOMEM); + goto fail; + } + + out_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data; + + ctx->sd_ctx_free = vk_decode_prores_uninit; + + for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) { /* Progressive/interlaced */ + struct ProresVulkanShaderVariants *shaders = &pv->shaders[i]; + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format, + FF_VK_REP_NATIVE), + .mem_quali = "writeonly", + .elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->reset, + "prores_dec_reset", "main", desc_set, 1, + ff_source_prores_reset_comp, 0x080801, i)); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_offsets_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_quali = "readonly", + .buf_content = "uint32_t slice_offsets", + .buf_elems = max_num_slices + 1, + }, + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format, + FF_VK_REP_NATIVE), + .mem_quali = "writeonly", + .elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->vld, + "prores_dec_vld", "main", desc_set, 2, + ff_source_prores_vld_comp, 0x080801, i)); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format, + FF_VK_REP_NATIVE), + .elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->idct, + "prores_dec_idct", "main", desc_set, 1, + ff_source_prores_idct_comp, 0x200201, i)); + } + + err = 0; + +fail: + spv->uninit(&spv); + + return err; +} + +static void vk_prores_free_frame_priv(AVRefStructOpaque _hwctx, void *data) +{ + AVHWDeviceContext *dev_ctx = _hwctx.nc; + ProresVulkanDecodePicture *pp = data; + + ff_vk_decode_free_frame(dev_ctx, &pp->vp); +} + +const FFHWAccel ff_prores_vulkan_hwaccel = { + .p.name = "prores_vulkan", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_PRORES, + .p.pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_prores_start_frame, + .decode_slice = &vk_prores_decode_slice, + .end_frame = &vk_prores_end_frame, + .free_frame_priv = &vk_prores_free_frame_priv, + .frame_priv_data_size = sizeof(ProresVulkanDecodePicture), + .init = &vk_decode_prores_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_invalidate, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +}; -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2025-08-31 17:07 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=175666001298.25.14063903422932327910@463a07221176 \ --to=ffmpeg-devel@ffmpeg.org \ --cc=code@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git