From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id B8B4B4BA2C for ; Fri, 31 Jan 2025 01:22:31 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id CFB2868B49D; Fri, 31 Jan 2025 03:22:27 +0200 (EET) Received: from out-172.mta1.migadu.com (out-172.mta1.migadu.com [95.215.58.172]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 6238068B49D for ; Fri, 31 Jan 2025 03:22:21 +0200 (EET) Message-ID: <15be8639-03c8-42e0-be39-ce86efe490c0@bcheng.me> Date: Thu, 30 Jan 2025 20:22:00 -0500 MIME-Version: 1.0 To: ffmpeg-devel@ffmpeg.org References: <20250130140934.1800-1-Primeadvice@gmail.com> Content-Language: en-US X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. In-Reply-To: <20250130140934.1800-1-Primeadvice@gmail.com> X-Migadu-Flow: FLOW_OUT Subject: Re: [FFmpeg-devel] [PATCH] avcodec/amfenc: DX12 Reference-only feature support X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Benjamin Cheng via ffmpeg-devel Reply-To: FFmpeg development discussions and patches Cc: Benjamin Cheng Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="us-ascii"; Format="flowed" Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: On 2025-01-30 9:09 a.m., Araz Iusubov wrote: > The Reference-Only feature in DirectX 12 is a memory optimization > technique designed for video decoding scenarios. > This feature requires that reference resources must be allocated with > the D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY resource flag. > Reference textures must also be separated from output textures. > This feature is not supported in the current version of ffmpeg. > Since AMD GPU uses this feature in Direct 12 decoder, > ffmpeg does not support AMD GPU Direct 12 decoding. > To properly support the Reference-Only feature, > two parallel resource pools must be configured and managed: > General Resource Pool: > Contains resources used for output decoded frames. > Defined in AVHWFramesContext and manages the final decoded textures. > Reference-Only Resource Pool: > Intended for storing reference frame resources. > Resources created with the > D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY flag > are allocated to AVBufferPool. > > --- > libavcodec/d3d12va_decode.c | 58 ++++++++++++++++++++++++++++--- > libavutil/hwcontext_d3d12va.c | 65 ++++++++++++++++++++++++++++++++--- > 2 files changed, 115 insertions(+), 8 deletions(-) This patch only affects d3d12va, why is the commit message amfenc? > > diff --git a/libavcodec/d3d12va_decode.c b/libavcodec/d3d12va_decode.c > index 3b8978635e..8916f94d10 100644 > --- a/libavcodec/d3d12va_decode.c > +++ b/libavcodec/d3d12va_decode.c > @@ -51,11 +51,19 @@ unsigned ff_d3d12va_get_surface_index(const AVCodecContext *avctx, > D3D12VADecodeContext *ctx, const AVFrame *frame, > int curr) > { > + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); > + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; > + > AVD3D12VAFrame *f; > ID3D12Resource *res; > unsigned i; > > - f = (AVD3D12VAFrame *)frame->data[0]; > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + f = (AVD3D12VAFrame*)frame->data[1]; > + } else { > + f = (AVD3D12VAFrame*)frame->data[0]; > + } > + > if (!f) > goto fail; > > @@ -250,6 +258,11 @@ static int d3d12va_create_decoder(AVCodecContext *avctx) > return AVERROR_PATCHWELCOME; > } > > + if (feature.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) { > + frames_hwctx->flags |= (D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE); > + av_log(avctx, AV_LOG_INFO, "Reference-Only Allocations are required for this configuration.\n"); > + } > + > desc = (D3D12_VIDEO_DECODER_DESC) { > .NodeMask = 0, > .Configuration = ctx->cfg, > @@ -440,8 +453,19 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); > ID3D12Resource *buffer = NULL; > ID3D12CommandAllocator *command_allocator = NULL; > - AVD3D12VAFrame *f = (AVD3D12VAFrame *)frame->data[0]; > - ID3D12Resource *resource = (ID3D12Resource *)f->texture; > + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); > + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; > + AVD3D12VAFrame *f = NULL; > + AVD3D12VAFrame *output_data = NULL; > + > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + f = (AVD3D12VAFrame*)frame->data[1]; > + output_data = (AVD3D12VAFrame*)frame->data[0]; > + } else { > + f = (AVD3D12VAFrame*)frame->data[0]; > + } > + > + ID3D12Resource* resource = (ID3D12Resource*)f->texture; > > ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list; > D3D12_RESOURCE_BARRIER barriers[32] = { 0 }; > @@ -469,6 +493,14 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > .pOutputTexture2D = resource, > }; > > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + output_args.pOutputTexture2D = output_data->texture; > + > + output_args.ConversionArguments.Enable = 1; > + output_args.ConversionArguments.pReferenceTexture2D = resource; > + output_args.ConversionArguments.ReferenceSubresource = 0; > + } > + > UINT num_barrier = 1; > barriers[0] = (D3D12_RESOURCE_BARRIER) { > .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, > @@ -481,6 +513,20 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > }, > }; > > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + barriers[1] = (D3D12_RESOURCE_BARRIER) { > + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, > + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, > + .Transition = { > + .pResource = output_data->texture, > + .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, > + .StateBefore = D3D12_RESOURCE_STATE_COMMON, > + .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, > + }, > + }; > + num_barrier++; > + } > + > memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref); > input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref; > input_args.ReferenceFrames.ppTexture2Ds = ctx->ref_resources; > @@ -505,7 +551,7 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > > DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list, command_allocator)); > > - num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[1], resource, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ); > + num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[num_barrier], resource, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ); > You could optimize these barriers since reference-only resources don't need to be transitioned to COMMON, and can remain in VIDEO_DECODE_{READ,WRITE}. I propose the following: - Transition all reference texture to VIDEO_DECODE_READ at creation time. - When preparing resources for input to DecodeFrame(), transition only the texture for reference output to VIDEO_DECODE_WRITE - After DecodeFrame(), transition the reference output texture to VIDEO_DECODE_READ. This is already implicitly handled by the barrier SWAP. All-in-all, for the cost of an initial transition at creation time, you decrease the number of barriers in each frame to just 2. > ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers); > > @@ -522,6 +568,10 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, > > DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, f->sync_ctx.fence, ++f->sync_ctx.fence_value)); > > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, output_data->sync_ctx.fence, ++output_data->sync_ctx.fence_value)); > + } > + > DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx.fence, ++ctx->sync_ctx.fence_value)); > > ret = d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value); > diff --git a/libavutil/hwcontext_d3d12va.c b/libavutil/hwcontext_d3d12va.c > index 6507cf69c1..328827b040 100644 > --- a/libavutil/hwcontext_d3d12va.c > +++ b/libavutil/hwcontext_d3d12va.c > @@ -49,6 +49,24 @@ typedef struct D3D12VAFramesContext { > ID3D12GraphicsCommandList *command_list; > AVD3D12VASyncContext sync_ctx; > UINT luma_component_size; > + > + /** > + * The Reference-Only feature in DirectX 12 is a memory optimization > + * technique designed for video decoding/encoding scenarios. > + * This feature requires that reference resources must be allocated > + * with the `D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY` resource flag. > + * Reference textures must also be separated from output textures. > + * To correctly support the Reference-Only feature, two parallel resource > + * pools must be configured and managed: > + * 1. General Resource Pool: > + * - Contains resources used for outputting decoded frames. > + * - Defined in `AVHWFramesContext` and manages the final decoded textures. > + * 2. Reference-Only Resource Pool: > + * - Dedicated to storing reference frame resources. > + * - Resources created with the `D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY` > + * flag are allocated to this pool. > + */ > + AVBufferPool *pool_reference_only; > } D3D12VAFramesContext; > > typedef struct D3D12VADevicePriv { > @@ -174,7 +192,8 @@ fail: > > static void d3d12va_frames_uninit(AVHWFramesContext *ctx) > { > - D3D12VAFramesContext *s = ctx->hwctx; > + D3D12VAFramesContext *s = ctx->hwctx; > + AVD3D12VAFramesContext *frames_hwctx = &s->p; > > D3D12_OBJECT_RELEASE(s->sync_ctx.fence); > if (s->sync_ctx.event) > @@ -185,6 +204,11 @@ static void d3d12va_frames_uninit(AVHWFramesContext *ctx) > D3D12_OBJECT_RELEASE(s->command_allocator); > D3D12_OBJECT_RELEASE(s->command_list); > D3D12_OBJECT_RELEASE(s->command_queue); > + > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + if (s->pool_reference_only) > + av_buffer_pool_uninit(&s->pool_reference_only); > + } > } > > static int d3d12va_frames_get_constraints(AVHWDeviceContext *ctx, const void *hwconfig, AVHWFramesConstraints *constraints) > @@ -281,6 +305,7 @@ fail: > static int d3d12va_frames_init(AVHWFramesContext *ctx) > { > AVD3D12VAFramesContext *hwctx = ctx->hwctx; > + D3D12VAFramesContext *s = ctx->hwctx; > int i; > > for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { > @@ -304,16 +329,43 @@ static int d3d12va_frames_init(AVHWFramesContext *ctx) > if (!ffhwframesctx(ctx)->pool_internal) > return AVERROR(ENOMEM); > > + s->pool_reference_only = NULL; > + > return 0; > } > > static int d3d12va_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) > { > int ret; > + D3D12VAFramesContext *s = ctx->hwctx; > + AVD3D12VAFramesContext *frames_hwctx = &s->p; > > - frame->buf[0] = av_buffer_pool_get(ctx->pool); > - if (!frame->buf[0]) > - return AVERROR(ENOMEM); > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + /* > + * for the output texture, temporarily unset D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY > + * and D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE > + */ > + D3D12_RESOURCE_FLAGS temp_flags = frames_hwctx->flags; > + frames_hwctx->flags &= ~(D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE); > + > + frame->buf[0] = av_buffer_pool_get(ctx->pool); > + if (!frame->buf[0]) > + return AVERROR(ENOMEM); > + > + if (s->pool_reference_only == NULL) { > + s->pool_reference_only = av_buffer_pool_init2(sizeof(AVD3D12VAFrame), > + ctx, d3d12va_pool_alloc, NULL); > + } > + > + frames_hwctx->flags = temp_flags; > + frame->buf[1] = av_buffer_pool_get(s->pool_reference_only); > + if (!frame->buf[1]) > + return AVERROR(ENOMEM); > + } else { > + frame->buf[0] = av_buffer_pool_get(ctx->pool); > + if (!frame->buf[0]) > + return AVERROR(ENOMEM); > + } > > ret = av_image_fill_arrays(frame->data, frame->linesize, NULL, > ctx->sw_format, ctx->width, ctx->height, > @@ -322,6 +374,11 @@ static int d3d12va_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) > return ret; > > frame->data[0] = frame->buf[0]->data; > + > + if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) { > + frame->data[1] = frame->buf[1]->data; > + } > + > frame->format = AV_PIX_FMT_D3D12; > frame->width = ctx->width; > frame->height = ctx->height; _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".