On 30/01/2025 15:09, Araz Iusubov wrote: > The Reference-Only feature in DirectX 12 is a memory optimization > technique designed for video decoding scenarios. > This feature requires that reference resources must be allocated with > the D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY resource flag. > Reference textures must also be separated from output textures. > This feature is not supported in the current version of ffmpeg. > Since AMD GPU uses this feature in Direct 12 decoder, > ffmpeg does not support AMD GPU Direct 12 decoding. > To properly support the Reference-Only feature, > two parallel resource pools must be configured and managed: > General Resource Pool: > Contains resources used for output decoded frames. > Defined in AVHWFramesContext and manages the final decoded textures. > Reference-Only Resource Pool: > Intended for storing reference frame resources. > Resources created with the > D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY flag > are allocated to AVBufferPool. > > --- > libavcodec/d3d12va_decode.c | 58 ++++++++++++++++++++++++++++--- > libavutil/hwcontext_d3d12va.c | 65 ++++++++++++++++++++++++++++++++--- > 2 files changed, 115 insertions(+), 8 deletions(-) > > diff --git a/libavcodec/d3d12va_decode.c b/libavcodec/d3d12va_decode.c > index 3b8978635e..8916f94d10 100644 > --- a/libavcodec/d3d12va_decode.c > +++ b/libavcodec/d3d12va_decode.c > @@ -51,11 +51,19 @@ unsigned ff_d3d12va_get_surface_index(const AVCodecContext *avctx, > D3D12VADecodeContext *ctx, const AVFrame *frame, > int curr) > { > + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); > + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; > + > AVD3D12VAFrame *f; > ID3D12Resource *res; > unsigned i; > > - f = (AVD3D12VAFrame *)frame->data[0]; > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + f = (AVD3D12VAFrame*)frame->data[1]; > + } else { > + f = (AVD3D12VAFrame*)frame->data[0]; > + } > + > if (!f) > goto fail; > > @@ -250,6 +258,11 @@ static int d3d12va_create_decoder(AVCodecContext *avctx) > return AVERROR_PATCHWELCOME; > } > > + if (feature.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) { > + frames_hwctx->flags |= (D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE); > + av_log(avctx, AV_LOG_INFO, "Reference-Only Allocations are required for this configuration.\n"); > + } > + > desc = (D3D12_VIDEO_DECODER_DESC) { > .NodeMask = 0, > .Configuration = ctx->cfg, > @@ -440,8 +453,19 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > ID3D12Resource *buffer = NULL; > ID3D12CommandAllocator *command_allocator = NULL; > - AVD3D12VAFrame *f = (AVD3D12VAFrame *)frame->data[0]; > - ID3D12Resource *resource = (ID3D12Resource *)f->texture; > + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); > + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; > + AVD3D12VAFrame *f = NULL; > + AVD3D12VAFrame *output_data = NULL; > + > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + f = (AVD3D12VAFrame*)frame->data[1]; > + output_data = (AVD3D12VAFrame*)frame->data[0]; > + } else { > + f = (AVD3D12VAFrame*)frame->data[0]; > + } > + > + ID3D12Resource* resource = (ID3D12Resource*)f->texture; > > ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list; > D3D12_RESOURCE_BARRIER barriers[32] = { 0 }; > @@ -469,6 +493,14 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > .pOutputTexture2D = resource, > }; > > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + output_args.pOutputTexture2D = output_data->texture; > + > + output_args.ConversionArguments.Enable = 1; > + output_args.ConversionArguments.pReferenceTexture2D = resource; > + output_args.ConversionArguments.ReferenceSubresource = 0; > + } > + > UINT num_barrier = 1; > barriers[0] = (D3D12_RESOURCE_BARRIER) { > .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, > @@ -481,6 +513,20 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > }, > }; > > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + barriers[1] = (D3D12_RESOURCE_BARRIER) { > + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, > + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, > + .Transition = { > + .pResource = output_data->texture, > + .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, > + .StateBefore = D3D12_RESOURCE_STATE_COMMON, > + .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, > + }, > + }; > + num_barrier++; > + } > + > memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref); > input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref; > input_args.ReferenceFrames.ppTexture2Ds = ctx->ref_resources; > @@ -505,7 +551,7 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > > DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list, command_allocator)); > > - num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[1], resource, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ); > + num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[num_barrier], resource, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ); > > ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers); > > @@ -522,6 +568,10 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > > DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, f->sync_ctx.fence, ++f->sync_ctx.fence_value)); > > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, output_data->sync_ctx.fence, ++output_data->sync_ctx.fence_value)); > + } > + > DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx.fence, ++ctx->sync_ctx.fence_value)); > > ret = d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value); > diff --git a/libavutil/hwcontext_d3d12va.c b/libavutil/hwcontext_d3d12va.c > index 6507cf69c1..328827b040 100644 > --- a/libavutil/hwcontext_d3d12va.c > +++ b/libavutil/hwcontext_d3d12va.c > @@ -49,6 +49,24 @@ typedef struct D3D12VAFramesContext { > ID3D12GraphicsCommandList *command_list; > AVD3D12VASyncContext sync_ctx; > UINT luma_component_size; > + > + /** > + * The Reference-Only feature in DirectX 12 is a memory optimization > + * technique designed for video decoding/encoding scenarios. > + * This feature requires that reference resources must be allocated > + * with the `D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY` resource flag. > + * Reference textures must also be separated from output textures. > + * To correctly support the Reference-Only feature, two parallel resource > + * pools must be configured and managed: > + * 1. General Resource Pool: > + * - Contains resources used for outputting decoded frames. > + * - Defined in `AVHWFramesContext` and manages the final decoded textures. > + * 2. Reference-Only Resource Pool: > + * - Dedicated to storing reference frame resources. > + * - Resources created with the `D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY` > + * flag are allocated to this pool. > + */ > + AVBufferPool *pool_reference_only; This information doesn't belong here, and this value does definitely not belong here, as the hwcontext_d3d12.c code doesn't even touch it. We try to not put random internal fields in public structs anymore.