From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by master.gitmailbox.com (Postfix) with ESMTPS id B8B4B4BA2C
	for <ffmpegdev@gitmailbox.com>; Fri, 31 Jan 2025 01:22:31 +0000 (UTC)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id CFB2868B49D;
	Fri, 31 Jan 2025 03:22:27 +0200 (EET)
Received: from out-172.mta1.migadu.com (out-172.mta1.migadu.com
 [95.215.58.172])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 6238068B49D
 for <ffmpeg-devel@ffmpeg.org>; Fri, 31 Jan 2025 03:22:21 +0200 (EET)
Message-ID: <15be8639-03c8-42e0-be39-ce86efe490c0@bcheng.me>
Date: Thu, 30 Jan 2025 20:22:00 -0500
MIME-Version: 1.0
To: ffmpeg-devel@ffmpeg.org
References: <20250130140934.1800-1-Primeadvice@gmail.com>
Content-Language: en-US
X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and
 include these headers.
In-Reply-To: <20250130140934.1800-1-Primeadvice@gmail.com>
X-Migadu-Flow: FLOW_OUT
Subject: Re: [FFmpeg-devel] [PATCH] avcodec/amfenc: DX12 Reference-only
 feature support
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
From: Benjamin Cheng via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: Benjamin Cheng <ben@bcheng.me>
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="us-ascii"; Format="flowed"
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Archived-At: <https://master.gitmailbox.com/ffmpegdev/15be8639-03c8-42e0-be39-ce86efe490c0@bcheng.me/>
List-Archive: <https://master.gitmailbox.com/ffmpegdev/>
List-Post: <mailto:ffmpegdev@gitmailbox.com>

On 2025-01-30 9:09 a.m., Araz Iusubov wrote:
> The Reference-Only feature in DirectX 12 is a memory optimization
> technique designed for video decoding scenarios.
> This feature requires that reference resources must be allocated with
> the D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY resource flag.
> Reference textures must also be separated from output textures.
> This feature is not supported in the current version of ffmpeg.
> Since AMD GPU uses this feature in Direct 12 decoder,
> ffmpeg does not support AMD GPU Direct 12 decoding.
> To properly support the Reference-Only feature,
> two parallel resource pools must be configured and managed:
> General Resource Pool:
> Contains resources used for output decoded frames.
> Defined in AVHWFramesContext and manages the final decoded textures.
> Reference-Only Resource Pool:
> Intended for storing reference frame resources.
> Resources created with the
> D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY flag
> are allocated to AVBufferPool.
> 
> ---
>   libavcodec/d3d12va_decode.c   | 58 ++++++++++++++++++++++++++++---
>   libavutil/hwcontext_d3d12va.c | 65 ++++++++++++++++++++++++++++++++---
>   2 files changed, 115 insertions(+), 8 deletions(-)

This patch only affects d3d12va, why is the commit message amfenc?

> 
> diff --git a/libavcodec/d3d12va_decode.c b/libavcodec/d3d12va_decode.c
> index 3b8978635e..8916f94d10 100644
> --- a/libavcodec/d3d12va_decode.c
> +++ b/libavcodec/d3d12va_decode.c
> @@ -51,11 +51,19 @@ unsigned ff_d3d12va_get_surface_index(const AVCodecContext *avctx,
>                                         D3D12VADecodeContext *ctx, const AVFrame *frame,
>                                         int curr)
>   {
> +    AVHWFramesContext      *frames_ctx   = D3D12VA_FRAMES_CONTEXT(avctx);
> +    AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
> +
>       AVD3D12VAFrame *f;
>       ID3D12Resource *res;
>       unsigned i;
>   
> -    f = (AVD3D12VAFrame *)frame->data[0];
> +    if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
> +        f = (AVD3D12VAFrame*)frame->data[1];
> +    } else {
> +        f = (AVD3D12VAFrame*)frame->data[0];
> +    }
> +
>       if (!f)
>           goto fail;
>   
> @@ -250,6 +258,11 @@ static int d3d12va_create_decoder(AVCodecContext *avctx)
>           return AVERROR_PATCHWELCOME;
>       }
>   
> +    if (feature.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) {
> +        frames_hwctx->flags |= (D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE);
> +        av_log(avctx, AV_LOG_INFO, "Reference-Only Allocations are required for this configuration.\n");
> +    }
> +
>       desc = (D3D12_VIDEO_DECODER_DESC) {
>           .NodeMask = 0,
>           .Configuration = ctx->cfg,
> @@ -440,8 +453,19 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
>       D3D12VADecodeContext   *ctx               = D3D12VA_DECODE_CONTEXT(avctx);
>       ID3D12Resource         *buffer            = NULL;
>       ID3D12CommandAllocator *command_allocator = NULL;
> -    AVD3D12VAFrame         *f                 = (AVD3D12VAFrame *)frame->data[0];
> -    ID3D12Resource         *resource          = (ID3D12Resource *)f->texture;
> +    AVHWFramesContext      *frames_ctx        = D3D12VA_FRAMES_CONTEXT(avctx);
> +    AVD3D12VAFramesContext *frames_hwctx      = frames_ctx->hwctx;
> +    AVD3D12VAFrame         *f                 = NULL;
> +    AVD3D12VAFrame         *output_data       = NULL;
> +
> +    if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
> +        f           = (AVD3D12VAFrame*)frame->data[1];
> +        output_data = (AVD3D12VAFrame*)frame->data[0];
> +    } else {
> +        f           = (AVD3D12VAFrame*)frame->data[0];
> +    }
> +
> +    ID3D12Resource* resource = (ID3D12Resource*)f->texture;
>   
>       ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list;
>       D3D12_RESOURCE_BARRIER barriers[32] = { 0 };
> @@ -469,6 +493,14 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
>           .pOutputTexture2D    = resource,
>       };
>   
> +    if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
> +        output_args.pOutputTexture2D = output_data->texture;
> +
> +        output_args.ConversionArguments.Enable               = 1;
> +        output_args.ConversionArguments.pReferenceTexture2D  = resource;
> +        output_args.ConversionArguments.ReferenceSubresource = 0;
> +    }
> +
>       UINT num_barrier = 1;
>       barriers[0] = (D3D12_RESOURCE_BARRIER) {
>           .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
> @@ -481,6 +513,20 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
>           },
>       };
>   
> +    if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
> +        barriers[1] = (D3D12_RESOURCE_BARRIER) {
> +            .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
> +            .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
> +            .Transition = {
> +                .pResource   = output_data->texture,
> +                .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
> +                .StateBefore = D3D12_RESOURCE_STATE_COMMON,
> +                .StateAfter  = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
> +            },
> +        };
> +        num_barrier++;
> +    }
> +
>       memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref);
>       input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref;
>       input_args.ReferenceFrames.ppTexture2Ds  = ctx->ref_resources;
> @@ -505,7 +551,7 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
>   
>       DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list, command_allocator));
>   
> -    num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[1], resource, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ);
> +    num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[num_barrier], resource, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ);
>   

You could optimize these barriers since reference-only resources don't 
need to be transitioned to COMMON, and can remain in 
VIDEO_DECODE_{READ,WRITE}.

I propose the following:
- Transition all reference texture to VIDEO_DECODE_READ at creation time.
- When preparing resources for input to DecodeFrame(), transition only 
the texture for reference output to VIDEO_DECODE_WRITE
- After DecodeFrame(), transition the reference output texture to 
VIDEO_DECODE_READ. This is already implicitly handled by the barrier SWAP.

All-in-all, for the cost of an initial transition at creation time, you 
decrease the number of barriers in each frame to just 2.

>       ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers);
>   
> @@ -522,6 +568,10 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
>   
>       DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, f->sync_ctx.fence, ++f->sync_ctx.fence_value));
>   
> +    if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
> +        DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, output_data->sync_ctx.fence, ++output_data->sync_ctx.fence_value));
> +    }
> +
>       DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx.fence, ++ctx->sync_ctx.fence_value));
>   
>       ret = d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value);
> diff --git a/libavutil/hwcontext_d3d12va.c b/libavutil/hwcontext_d3d12va.c
> index 6507cf69c1..328827b040 100644
> --- a/libavutil/hwcontext_d3d12va.c
> +++ b/libavutil/hwcontext_d3d12va.c
> @@ -49,6 +49,24 @@ typedef struct D3D12VAFramesContext {
>       ID3D12GraphicsCommandList *command_list;
>       AVD3D12VASyncContext       sync_ctx;
>       UINT                       luma_component_size;
> +
> +    /**
> +     * The Reference-Only feature in DirectX 12 is a memory optimization
> +     * technique designed for video decoding/encoding scenarios.
> +     * This feature requires that reference resources must be allocated
> +     * with the `D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY` resource flag.
> +     * Reference textures must also be separated from output textures.
> +     * To correctly support the Reference-Only feature, two parallel resource
> +     * pools must be configured and managed:
> +     * 1. General Resource Pool:
> +     *   - Contains resources used for outputting decoded frames.
> +     *   - Defined in `AVHWFramesContext` and manages the final decoded textures.
> +     * 2. Reference-Only Resource Pool:
> +     *   - Dedicated to storing reference frame resources.
> +     *   - Resources created with the `D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY`
> +     *     flag are allocated to this pool.
> +     */
> +    AVBufferPool              *pool_reference_only;
>   } D3D12VAFramesContext;
>   
>   typedef struct D3D12VADevicePriv {
> @@ -174,7 +192,8 @@ fail:
>   
>   static void d3d12va_frames_uninit(AVHWFramesContext *ctx)
>   {
> -    D3D12VAFramesContext *s = ctx->hwctx;
> +    D3D12VAFramesContext   *s            = ctx->hwctx;
> +    AVD3D12VAFramesContext *frames_hwctx = &s->p;
>   
>       D3D12_OBJECT_RELEASE(s->sync_ctx.fence);
>       if (s->sync_ctx.event)
> @@ -185,6 +204,11 @@ static void d3d12va_frames_uninit(AVHWFramesContext *ctx)
>       D3D12_OBJECT_RELEASE(s->command_allocator);
>       D3D12_OBJECT_RELEASE(s->command_list);
>       D3D12_OBJECT_RELEASE(s->command_queue);
> +
> +    if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
> +        if (s->pool_reference_only)
> +            av_buffer_pool_uninit(&s->pool_reference_only);
> +    }
>   }
>   
>   static int d3d12va_frames_get_constraints(AVHWDeviceContext *ctx, const void *hwconfig, AVHWFramesConstraints *constraints)
> @@ -281,6 +305,7 @@ fail:
>   static int d3d12va_frames_init(AVHWFramesContext *ctx)
>   {
>       AVD3D12VAFramesContext *hwctx = ctx->hwctx;
> +    D3D12VAFramesContext   *s     = ctx->hwctx;
>       int i;
>   
>       for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
> @@ -304,16 +329,43 @@ static int d3d12va_frames_init(AVHWFramesContext *ctx)
>       if (!ffhwframesctx(ctx)->pool_internal)
>           return AVERROR(ENOMEM);
>   
> +    s->pool_reference_only = NULL;
> +
>       return 0;
>   }
>   
>   static int d3d12va_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
>   {
>       int ret;
> +    D3D12VAFramesContext   *s            = ctx->hwctx;
> +    AVD3D12VAFramesContext *frames_hwctx = &s->p;
>   
> -    frame->buf[0] = av_buffer_pool_get(ctx->pool);
> -    if (!frame->buf[0])
> -        return AVERROR(ENOMEM);
> +    if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
> +        /*
> +         * for the output texture, temporarily unset D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY
> +         * and D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE
> +        */
> +        D3D12_RESOURCE_FLAGS temp_flags = frames_hwctx->flags;
> +        frames_hwctx->flags &= ~(D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE);
> +
> +        frame->buf[0] = av_buffer_pool_get(ctx->pool);
> +        if (!frame->buf[0])
> +            return AVERROR(ENOMEM);
> +
> +        if (s->pool_reference_only == NULL) {
> +            s->pool_reference_only = av_buffer_pool_init2(sizeof(AVD3D12VAFrame),
> +                ctx, d3d12va_pool_alloc, NULL);
> +        }
> +
> +        frames_hwctx->flags = temp_flags;
> +        frame->buf[1] = av_buffer_pool_get(s->pool_reference_only);
> +        if (!frame->buf[1])
> +            return AVERROR(ENOMEM);
> +    } else {
> +        frame->buf[0] = av_buffer_pool_get(ctx->pool);
> +        if (!frame->buf[0])
> +            return AVERROR(ENOMEM);
> +    }
>   
>       ret = av_image_fill_arrays(frame->data, frame->linesize, NULL,
>                                  ctx->sw_format, ctx->width, ctx->height,
> @@ -322,6 +374,11 @@ static int d3d12va_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
>           return ret;
>   
>       frame->data[0] = frame->buf[0]->data;
> +
> +    if (frames_hwctx->flags & D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
> +        frame->data[1] = frame->buf[1]->data;
> +    }
> +
>       frame->format  = AV_PIX_FMT_D3D12;
>       frame->width   = ctx->width;
>       frame->height  = ctx->height;

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".