Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH, v3] avcodec/d3d12va_encode: texture array support for HEVC
@ 2025-05-24  9:35 Araz Iusubov
  2025-06-02  0:33 ` Tong Wu
  0 siblings, 1 reply; 2+ messages in thread
From: Araz Iusubov @ 2025-05-24  9:35 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Araz Iusubov

This patch adds support for the texture array feature
used by AMD boards in the D3D12 HEVC encoder.
In texture array mode, a single texture array is shared for all
reference and reconstructed pictures using different subresources.
The implementation ensures compatibility
and has been successfully tested on AMD, Intel, and NVIDIA GPUs.

v2 updates: 
1. The reference to MaxL1ReferencesForB for the H.264 codec 
was updated to use the corresponding H.264 field instead of the HEVC one.
2. Max_subresource_array_size calculation was adjusted 
by removing the D3D12VA_VIDEO_ENC_ASYNC_DEPTH offset.

v3 updates:
1. Fixed a type mismatch by explicitly casting AVD3D12VAFrame* to 
(uint8_t*) when assigning to data[0].
2. Adjusted logging format specifier for HRESULT to use `%lx`.

---
 libavcodec/d3d12va_encode.c      | 241 +++++++++++++++++++++++++------
 libavcodec/d3d12va_encode.h      |  29 ++++
 libavcodec/d3d12va_encode_hevc.c |   5 +-
 3 files changed, 231 insertions(+), 44 deletions(-)

diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c
index 4d738200fe..85e79b2e14 100644
--- a/libavcodec/d3d12va_encode.c
+++ b/libavcodec/d3d12va_encode.c
@@ -264,6 +264,11 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
 
     av_log(avctx, AV_LOG_DEBUG, "Input surface is %p.\n", pic->input_surface->texture);
 
+    if (ctx->is_texture_array) {
+        base_pic->recon_image->data[0] = (uint8_t *)ctx->texture_array_frame;
+        pic->subresource_index = (ctx->subresource_used_index++) % ctx->max_subresource_array_size;
+    }
+
     pic->recon_surface = (AVD3D12VAFrame *)base_pic->recon_image->data[0];
     av_log(avctx, AV_LOG_DEBUG, "Recon surface is %p.\n",
            pic->recon_surface->texture);
@@ -325,11 +330,28 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
             goto fail;
         }
 
+        if (ctx->is_texture_array) {
+            d3d12_refs.pSubresources = av_calloc(d3d12_refs.NumTexture2Ds,
+                                                sizeof(*d3d12_refs.pSubresources));
+            if (!d3d12_refs.pSubresources) {
+                err = AVERROR(ENOMEM);
+                goto fail;
+            }
+        }
+
         i = 0;
-        for (j = 0; j < base_pic->nb_refs[0]; j++)
-            d3d12_refs.ppTexture2Ds[i++] = ((D3D12VAEncodePicture *)base_pic->refs[0][j]->priv)->recon_surface->texture;
-        for (j = 0; j < base_pic->nb_refs[1]; j++)
-            d3d12_refs.ppTexture2Ds[i++] = ((D3D12VAEncodePicture *)base_pic->refs[1][j]->priv)->recon_surface->texture;
+        for (j = 0; j < base_pic->nb_refs[0]; j++) {
+            d3d12_refs.ppTexture2Ds[i]  = ((D3D12VAEncodePicture *)base_pic->refs[0][j]->priv)->recon_surface->texture;
+            if (ctx->is_texture_array)
+                d3d12_refs.pSubresources[i] = ((D3D12VAEncodePicture *)base_pic->refs[0][j]->priv)->subresource_index;
+            i++;
+        }
+        for (j = 0; j < base_pic->nb_refs[1]; j++) {
+            d3d12_refs.ppTexture2Ds[i]  = ((D3D12VAEncodePicture *)base_pic->refs[1][j]->priv)->recon_surface->texture;
+            if (ctx->is_texture_array)
+                d3d12_refs.pSubresources[i] = ((D3D12VAEncodePicture *)base_pic->refs[1][j]->priv)->subresource_index;
+            i++;
+        }
     }
 
     input_args.PictureControlDesc.IntraRefreshFrameIndex  = 0;
@@ -343,7 +365,11 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
     output_args.Bitstream.pBuffer                                    = pic->output_buffer;
     output_args.Bitstream.FrameStartOffset                           = pic->aligned_header_size;
     output_args.ReconstructedPicture.pReconstructedPicture           = pic->recon_surface->texture;
-    output_args.ReconstructedPicture.ReconstructedPictureSubresource = 0;
+    if (ctx->is_texture_array) {
+        output_args.ReconstructedPicture.ReconstructedPictureSubresource = pic->subresource_index;
+    } else {
+        output_args.ReconstructedPicture.ReconstructedPictureSubresource = 0;
+    }
     output_args.EncoderOutputMetadata.pBuffer                        = pic->encoded_metadata;
     output_args.EncoderOutputMetadata.Offset                         = 0;
 
@@ -381,35 +407,87 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
         },                                                          \
     }
 
+#define TRANSITION_BARRIER_SUBRESOURCE(res, subres,before, after)   \
+    (D3D12_RESOURCE_BARRIER) {                                      \
+        .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,            \
+        .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,                  \
+        .Transition = {                                             \
+            .pResource   = res,                                     \
+            .Subresource = subres,                                  \
+            .StateBefore = before,                                  \
+            .StateAfter  = after,                                   \
+        },                                                          \
+    }
+
     barriers[0] = TRANSITION_BARRIER(pic->input_surface->texture,
                                      D3D12_RESOURCE_STATE_COMMON,
                                      D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
     barriers[1] = TRANSITION_BARRIER(pic->output_buffer,
                                      D3D12_RESOURCE_STATE_COMMON,
                                      D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
-    barriers[2] = TRANSITION_BARRIER(pic->recon_surface->texture,
+    barriers[2] = TRANSITION_BARRIER(pic->encoded_metadata,
                                      D3D12_RESOURCE_STATE_COMMON,
                                      D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
-    barriers[3] = TRANSITION_BARRIER(pic->encoded_metadata,
+    barriers[3] = TRANSITION_BARRIER(pic->resolved_metadata,
                                      D3D12_RESOURCE_STATE_COMMON,
                                      D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
-    barriers[4] = TRANSITION_BARRIER(pic->resolved_metadata,
-                                     D3D12_RESOURCE_STATE_COMMON,
-                                     D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
-
-    ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 5, barriers);
-
-    if (d3d12_refs.NumTexture2Ds) {
-        D3D12_RESOURCE_BARRIER refs_barriers[3];
-
-        for (i = 0; i < d3d12_refs.NumTexture2Ds; i++)
-            refs_barriers[i] = TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i],
-                                                  D3D12_RESOURCE_STATE_COMMON,
-                                                  D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
-
-        ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, d3d12_refs.NumTexture2Ds,
-                                                      refs_barriers);
+    ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 4, barriers);
+
+    //set transit barriers for reference pic and recon pic
+    int barriers_ref_index = 0;
+    D3D12_RESOURCE_BARRIER *barriers_ref = NULL;
+    if (ctx->is_texture_array) {
+        barriers_ref = av_calloc(ctx->max_subresource_array_size * ctx->plane_count,
+            sizeof(D3D12_RESOURCE_BARRIER));
+    } else {
+        barriers_ref = av_calloc(MAX_DPB_SIZE,sizeof(D3D12_RESOURCE_BARRIER));
+    }
+
+    if (ctx->is_texture_array) {
+         // In Texture array mode, the D3D12 uses the same texture array for all the input
+         // reference pics in ppTexture2Ds and also for the pReconstructedPicture output allocations,
+         //just different subresources.
+        D3D12_RESOURCE_DESC references_tex_array_desc = { 0 };
+        pic->recon_surface->texture->lpVtbl->GetDesc(pic->recon_surface->texture, &references_tex_array_desc);
+
+        for (uint32_t reference_subresource = 0; reference_subresource < references_tex_array_desc.DepthOrArraySize;
+            reference_subresource++) {
+
+            //D3D12 DecomposeSubresource
+            uint32_t mip_slice, plane_slice, array_slice, array_size;
+            array_size = references_tex_array_desc.DepthOrArraySize;
+            mip_slice = reference_subresource % references_tex_array_desc.MipLevels;
+            array_slice = (reference_subresource / references_tex_array_desc.MipLevels) % array_size;
+
+            for (plane_slice = 0; plane_slice < ctx->plane_count; plane_slice++) {
+                //Calculate the subresource index
+                uint32_t planeOutputSubresource = mip_slice + array_slice * references_tex_array_desc.MipLevels +
+                                        plane_slice * references_tex_array_desc.MipLevels * array_size;
+
+                if (reference_subresource == pic->subresource_index) {
+                    barriers_ref[barriers_ref_index++] = TRANSITION_BARRIER_SUBRESOURCE(pic->recon_surface->texture, planeOutputSubresource,
+                                        D3D12_RESOURCE_STATE_COMMON,
+                                        D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
+                } else {
+                    barriers_ref[barriers_ref_index++] = TRANSITION_BARRIER_SUBRESOURCE(pic->recon_surface->texture, planeOutputSubresource,
+                                        D3D12_RESOURCE_STATE_COMMON,
+                                        D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
+                }
+            }
+        }
+    } else {
+        barriers_ref[barriers_ref_index++] = TRANSITION_BARRIER(pic->recon_surface->texture,
+                                        D3D12_RESOURCE_STATE_COMMON,
+                                        D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
+
+        if (d3d12_refs.NumTexture2Ds) {
+            for (i = 0; i < d3d12_refs.NumTexture2Ds; i++)
+                barriers_ref[barriers_ref_index++] = TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i],
+                                                    D3D12_RESOURCE_STATE_COMMON,
+                                                    D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
+        }
     }
+    ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, barriers_ref_index, barriers_ref);
 
     ID3D12VideoEncodeCommandList2_EncodeFrame(cmd_list, ctx->encoder, ctx->encoder_heap,
                                               &input_args, &output_args);
@@ -422,16 +500,15 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
 
     ID3D12VideoEncodeCommandList2_ResolveEncoderOutputMetadata(cmd_list, &input_metadata, &output_metadata);
 
-    if (d3d12_refs.NumTexture2Ds) {
-        D3D12_RESOURCE_BARRIER refs_barriers[3];
-
-        for (i = 0; i < d3d12_refs.NumTexture2Ds; i++)
-            refs_barriers[i] = TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i],
-                                                  D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ,
-                                                  D3D12_RESOURCE_STATE_COMMON);
-
-        ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, d3d12_refs.NumTexture2Ds,
-                                                      refs_barriers);
+    //swap the barriers_ref transition state
+    if (barriers_ref_index > 0) {
+        for (i = 0; i < barriers_ref_index; i++) {
+            D3D12_RESOURCE_STATES temp_statue = barriers_ref[i].Transition.StateBefore;
+            barriers_ref[i].Transition.StateBefore = barriers_ref[i].Transition.StateAfter;
+            barriers_ref[i].Transition.StateAfter = temp_statue;
+        }
+        ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, barriers_ref_index,
+                                                      barriers_ref);
     }
 
     barriers[0] = TRANSITION_BARRIER(pic->input_surface->texture,
@@ -440,17 +517,14 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
     barriers[1] = TRANSITION_BARRIER(pic->output_buffer,
                                      D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE,
                                      D3D12_RESOURCE_STATE_COMMON);
-    barriers[2] = TRANSITION_BARRIER(pic->recon_surface->texture,
-                                     D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE,
-                                     D3D12_RESOURCE_STATE_COMMON);
-    barriers[3] = TRANSITION_BARRIER(pic->encoded_metadata,
+    barriers[2] = TRANSITION_BARRIER(pic->encoded_metadata,
                                      D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ,
                                      D3D12_RESOURCE_STATE_COMMON);
-    barriers[4] = TRANSITION_BARRIER(pic->resolved_metadata,
+    barriers[3] = TRANSITION_BARRIER(pic->resolved_metadata,
                                      D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE,
                                      D3D12_RESOURCE_STATE_COMMON);
 
-    ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 5, barriers);
+    ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 4, barriers);
 
     hr = ID3D12VideoEncodeCommandList2_Close(cmd_list);
     if (FAILED(hr)) {
@@ -489,6 +563,14 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
     if (d3d12_refs.ppTexture2Ds)
         av_freep(&d3d12_refs.ppTexture2Ds);
 
+    if (ctx->is_texture_array) {
+        if (d3d12_refs.pSubresources)
+            av_freep(&d3d12_refs.pSubresources);
+    }
+
+    if (barriers_ref)
+        av_freep(&barriers_ref);
+
     return 0;
 
 fail:
@@ -498,6 +580,14 @@ fail:
     if (d3d12_refs.ppTexture2Ds)
         av_freep(&d3d12_refs.ppTexture2Ds);
 
+    if (ctx->is_texture_array) {
+        if (d3d12_refs.pSubresources)
+            av_freep(&d3d12_refs.pSubresources);
+    }
+
+    if (barriers_ref)
+        av_freep(&barriers_ref);
+
     if (ctx->codec->free_picture_params)
         ctx->codec->free_picture_params(pic);
 
@@ -1088,13 +1178,15 @@ static int d3d12va_encode_init_gop_structure(AVCodecContext *avctx)
         switch (ctx->codec->d3d12_codec) {
             case D3D12_VIDEO_ENCODER_CODEC_H264:
                 ref_l0 = FFMIN(support.PictureSupport.pH264Support->MaxL0ReferencesForP,
-                               support.PictureSupport.pH264Support->MaxL1ReferencesForB);
+                               support.PictureSupport.pH264Support->MaxL1ReferencesForB ?
+                               support.PictureSupport.pH264Support->MaxL1ReferencesForB : UINT_MAX);
                 ref_l1 = support.PictureSupport.pH264Support->MaxL1ReferencesForB;
                 break;
 
             case D3D12_VIDEO_ENCODER_CODEC_HEVC:
                 ref_l0 = FFMIN(support.PictureSupport.pHEVCSupport->MaxL0ReferencesForP,
-                               support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB);
+                               support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB ?
+                               support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB : UINT_MAX);
                 ref_l1 = support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB;
                 break;
 
@@ -1336,6 +1428,47 @@ fail:
     return err;
 }
 
+static int d3d12va_create_texture_array(AVHWFramesContext *ctx, D3D12VAEncodeContext *encode_context)
+{
+    AVD3D12VAFramesContext *hwctx        = ctx->hwctx;
+    AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+
+    AVD3D12VAFrame *frame;
+    D3D12_HEAP_PROPERTIES props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
+
+    encode_context->max_subresource_array_size = MAX_DPB_SIZE + 1;
+
+    D3D12_RESOURCE_DESC desc = {
+        .Dimension        = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
+        .Alignment        = 0,
+        .Width            = ctx->width,
+        .Height           = ctx->height,
+        .DepthOrArraySize = encode_context->max_subresource_array_size,
+        .MipLevels        = 1,
+        .Format           = hwctx->format,
+        .SampleDesc       = {.Count = 1, .Quality = 0 },
+        .Layout           = D3D12_TEXTURE_LAYOUT_UNKNOWN,
+        .Flags            = hwctx->flags,
+    };
+
+    frame = av_mallocz(sizeof(AVD3D12VAFrame));
+    if (!frame)
+        return AVERROR(ENOMEM);
+
+    if (FAILED(ID3D12Device_CreateCommittedResource(device_hwctx->device, &props, D3D12_HEAP_FLAG_NONE, &desc,
+        D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void **)&frame->texture))) {
+        av_log(ctx, AV_LOG_ERROR, "Could not create the texture\n");
+        return AVERROR(EINVAL);
+    }
+
+    ID3D12Device_CreateFence(device_hwctx->device, 0, D3D12_FENCE_FLAG_NONE,
+                                      &IID_ID3D12Fence, (void **)&frame->sync_ctx.fence);
+
+    frame->sync_ctx.event = CreateEvent(NULL, FALSE, FALSE, NULL);
+    encode_context->texture_array_frame = frame;
+    return 0;
+}
+
 static int d3d12va_encode_create_recon_frames(AVCodecContext *avctx)
 {
     FFHWBaseEncodeContext *base_ctx = avctx->priv_data;
@@ -1394,6 +1527,7 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx)
     FFHWBaseEncodeContext *base_ctx = avctx->priv_data;
     D3D12VAEncodeContext       *ctx = avctx->priv_data;
     D3D12_FEATURE_DATA_VIDEO_FEATURE_AREA_SUPPORT support = { 0 };
+    D3D12_FEATURE_DATA_FORMAT_INFO format_info = {0};
     int err;
     HRESULT hr;
 
@@ -1429,6 +1563,15 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx)
         goto fail;
     }
 
+    format_info.Format = ((AVD3D12VAFramesContext*)base_ctx->input_frames->hwctx)->format;
+    if (FAILED(ID3D12VideoDevice_CheckFeatureSupport(ctx->hwctx->device, D3D12_FEATURE_FORMAT_INFO,
+        &format_info, sizeof(format_info)))) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query format plane count: 0x%lx\n", hr);
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+    ctx->plane_count = format_info.PlaneCount;
+
     err = d3d12va_encode_set_profile(avctx);
     if (err < 0)
         goto fail;
@@ -1485,6 +1628,10 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx)
             goto fail;
     }
 
+    if (ctx->is_texture_array) {
+        d3d12va_create_texture_array(base_ctx->recon_frames, avctx->priv_data);
+    }
+
     base_ctx->output_delay = base_ctx->b_per_p;
     base_ctx->decode_delay = base_ctx->max_b_depth;
 
@@ -1528,6 +1675,18 @@ int ff_d3d12va_encode_close(AVCodecContext *avctx)
 
     av_buffer_pool_uninit(&ctx->output_buffer_pool);
 
+    if (ctx->is_texture_array) {
+        ID3D12Resource *pResource = ctx->texture_array_frame->texture;
+        if (pResource) {
+            D3D12_OBJECT_RELEASE(pResource);
+            ctx->texture_array_frame->texture = NULL;
+        }
+        D3D12_OBJECT_RELEASE(ctx->texture_array_frame->sync_ctx.fence);
+        if (ctx->texture_array_frame->sync_ctx.event)
+            CloseHandle(ctx->texture_array_frame->sync_ctx.event);
+        av_free(ctx->texture_array_frame);
+    }
+
     D3D12_OBJECT_RELEASE(ctx->command_list);
     D3D12_OBJECT_RELEASE(ctx->command_queue);
 
diff --git a/libavcodec/d3d12va_encode.h b/libavcodec/d3d12va_encode.h
index 3b0b8153d5..fc31857f1a 100644
--- a/libavcodec/d3d12va_encode.h
+++ b/libavcodec/d3d12va_encode.h
@@ -52,6 +52,8 @@ typedef struct D3D12VAEncodePicture {
     ID3D12Resource *encoded_metadata;
     ID3D12Resource *resolved_metadata;
 
+    int            subresource_index;
+
     D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA pic_ctl;
 
     int             fence_value;
@@ -189,6 +191,33 @@ typedef struct D3D12VAEncodeContext {
      */
     AVBufferPool *output_buffer_pool;
 
+   /**
+    * Flag indicates if the HW is texture array mode.
+    */
+   int is_texture_array;
+
+   /**
+    * In texture array mode, the D3D12 uses the same texture array for all the input
+    * reference pics in ppTexture2Ds and also for the pReconstructedPicture output
+    * allocations, just different subresources.
+    */
+   AVD3D12VAFrame *texture_array_frame;
+
+   /**
+    * The max number of subresources in the texture array.
+    */
+   int max_subresource_array_size;
+
+   /**
+    * The used subresource index for pic in the texture array.
+    */
+   int subresource_used_index;
+
+   /**
+    * The number of planes in the input DXGI FORMAT .
+    */
+   int plane_count;
+
     /**
      * D3D12 video encoder.
      */
diff --git a/libavcodec/d3d12va_encode_hevc.c b/libavcodec/d3d12va_encode_hevc.c
index 938ba01f54..7e1d973f7e 100644
--- a/libavcodec/d3d12va_encode_hevc.c
+++ b/libavcodec/d3d12va_encode_hevc.c
@@ -280,9 +280,8 @@ static int d3d12va_encode_hevc_init_sequence_params(AVCodecContext *avctx)
     }
 
     if (support.SupportFlags & D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RECONSTRUCTED_FRAMES_REQUIRE_TEXTURE_ARRAYS) {
-        av_log(avctx, AV_LOG_ERROR, "D3D12 video encode on this device requires texture array support, "
-               "but it's not implemented.\n");
-        return AVERROR_PATCHWELCOME;
+        ctx->is_texture_array = 1;
+        av_log(avctx, AV_LOG_DEBUG, "D3D12 video encode on this device uses texture array mode.\n");
     }
 
     desc = av_pix_fmt_desc_get(base_ctx->input_frames->sw_format);
-- 
2.45.2.windows.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [FFmpeg-devel] [PATCH, v3] avcodec/d3d12va_encode: texture array support for HEVC
  2025-05-24  9:35 [FFmpeg-devel] [PATCH, v3] avcodec/d3d12va_encode: texture array support for HEVC Araz Iusubov
@ 2025-06-02  0:33 ` Tong Wu
  0 siblings, 0 replies; 2+ messages in thread
From: Tong Wu @ 2025-06-02  0:33 UTC (permalink / raw)
  To: FFmpeg development discussions and patches; +Cc: Araz Iusubov

Araz Iusubov:
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Araz
> Iusubov
> Sent: Saturday, May 24, 2025 5:35 PM
> To: ffmpeg-devel@ffmpeg.org
> Cc: Araz Iusubov <Primeadvice@gmail.com>
> Subject: [FFmpeg-devel] [PATCH, v3] avcodec/d3d12va_encode: texture array
> support for HEVC
> 
> This patch adds support for the texture array feature used by AMD boards in the
> D3D12 HEVC encoder.
> In texture array mode, a single texture array is shared for all reference and
> reconstructed pictures using different subresources.
> The implementation ensures compatibility and has been successfully tested on
> AMD, Intel, and NVIDIA GPUs.
> 
> v2 updates:
> 1. The reference to MaxL1ReferencesForB for the H.264 codec was updated to
> use the corresponding H.264 field instead of the HEVC one.
> 2. Max_subresource_array_size calculation was adjusted by removing the
> D3D12VA_VIDEO_ENC_ASYNC_DEPTH offset.
> 
> v3 updates:
> 1. Fixed a type mismatch by explicitly casting AVD3D12VAFrame* to
> (uint8_t*) when assigning to data[0].
> 2. Adjusted logging format specifier for HRESULT to use `%lx`.
> 
> ---
>  libavcodec/d3d12va_encode.c      | 241 +++++++++++++++++++++++++------
>  libavcodec/d3d12va_encode.h      |  29 ++++
>  libavcodec/d3d12va_encode_hevc.c |   5 +-
>  3 files changed, 231 insertions(+), 44 deletions(-)
> 
> diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c index
> 4d738200fe..85e79b2e14 100644
> --- a/libavcodec/d3d12va_encode.c
> +++ b/libavcodec/d3d12va_encode.c
> @@ -264,6 +264,11 @@ static int d3d12va_encode_issue(AVCodecContext
> *avctx,
> 
>      av_log(avctx, AV_LOG_DEBUG, "Input surface is %p.\n", pic->input_surface-
> >texture);
> 
> +    if (ctx->is_texture_array) {
> +        base_pic->recon_image->data[0] = (uint8_t *)ctx->texture_array_frame;

This does not look right. base_pic->recon_image is matained by frame pool. If you just overwrite it, the original memory will be lost.

> +        pic->subresource_index = (ctx->subresource_used_index++) % ctx-
> >max_subresource_array_size;
> +    }
> +
>      pic->recon_surface = (AVD3D12VAFrame *)base_pic->recon_image->data[0];
>      av_log(avctx, AV_LOG_DEBUG, "Recon surface is %p.\n",
>             pic->recon_surface->texture); @@ -325,11 +330,28 @@ static int
> d3d12va_encode_issue(AVCodecContext *avctx,
>              goto fail;
>          }
> 
> +        if (ctx->is_texture_array) {
> +            d3d12_refs.pSubresources = av_calloc(d3d12_refs.NumTexture2Ds,
> +                                                sizeof(*d3d12_refs.pSubresources));
> +            if (!d3d12_refs.pSubresources) {
> +                err = AVERROR(ENOMEM);
> +                goto fail;
> +            }
> +        }
> +
>          i = 0;
> -        for (j = 0; j < base_pic->nb_refs[0]; j++)
> -            d3d12_refs.ppTexture2Ds[i++] = ((D3D12VAEncodePicture *)base_pic-
> >refs[0][j]->priv)->recon_surface->texture;
> -        for (j = 0; j < base_pic->nb_refs[1]; j++)
> -            d3d12_refs.ppTexture2Ds[i++] = ((D3D12VAEncodePicture *)base_pic-
> >refs[1][j]->priv)->recon_surface->texture;
> +        for (j = 0; j < base_pic->nb_refs[0]; j++) {
> +            d3d12_refs.ppTexture2Ds[i]  = ((D3D12VAEncodePicture *)base_pic-
> >refs[0][j]->priv)->recon_surface->texture;
> +            if (ctx->is_texture_array)
> +                d3d12_refs.pSubresources[i] = ((D3D12VAEncodePicture *)base_pic-
> >refs[0][j]->priv)->subresource_index;
> +            i++;
> +        }
> +        for (j = 0; j < base_pic->nb_refs[1]; j++) {
> +            d3d12_refs.ppTexture2Ds[i]  = ((D3D12VAEncodePicture *)base_pic-
> >refs[1][j]->priv)->recon_surface->texture;
> +            if (ctx->is_texture_array)
> +                d3d12_refs.pSubresources[i] = ((D3D12VAEncodePicture *)base_pic-
> >refs[1][j]->priv)->subresource_index;
> +            i++;
> +        }
>      }
> 
>      input_args.PictureControlDesc.IntraRefreshFrameIndex  = 0; @@ -343,7
> +365,11 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
>      output_args.Bitstream.pBuffer                                    = pic->output_buffer;
>      output_args.Bitstream.FrameStartOffset                           = pic-
> >aligned_header_size;
>      output_args.ReconstructedPicture.pReconstructedPicture           = pic-
> >recon_surface->texture;
> -    output_args.ReconstructedPicture.ReconstructedPictureSubresource = 0;
> +    if (ctx->is_texture_array) {
> +        output_args.ReconstructedPicture.ReconstructedPictureSubresource = pic-
> >subresource_index;
> +    } else {
> +        output_args.ReconstructedPicture.ReconstructedPictureSubresource = 0;
> +    }
>      output_args.EncoderOutputMetadata.pBuffer                        = pic-
> >encoded_metadata;
>      output_args.EncoderOutputMetadata.Offset                         = 0;
> 
> @@ -381,35 +407,87 @@ static int d3d12va_encode_issue(AVCodecContext
> *avctx,
>          },                                                          \
>      }
> 
> +#define TRANSITION_BARRIER_SUBRESOURCE(res, subres,before, after)   \
> +    (D3D12_RESOURCE_BARRIER) {                                      \
> +        .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,            \
> +        .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,                  \
> +        .Transition = {                                             \
> +            .pResource   = res,                                     \
> +            .Subresource = subres,                                  \
> +            .StateBefore = before,                                  \
> +            .StateAfter  = after,                                   \
> +        },                                                          \
> +    }
> +

Looks like you could rewrite the original TRANSITION_BARRIER instead of adding a new one.

>      barriers[0] = TRANSITION_BARRIER(pic->input_surface->texture,
>                                       D3D12_RESOURCE_STATE_COMMON,
>                                       D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
>      barriers[1] = TRANSITION_BARRIER(pic->output_buffer,
>                                       D3D12_RESOURCE_STATE_COMMON,
>                                       D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
> -    barriers[2] = TRANSITION_BARRIER(pic->recon_surface->texture,
> +    barriers[2] = TRANSITION_BARRIER(pic->encoded_metadata,
>                                       D3D12_RESOURCE_STATE_COMMON,
>                                       D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
> -    barriers[3] = TRANSITION_BARRIER(pic->encoded_metadata,
> +    barriers[3] = TRANSITION_BARRIER(pic->resolved_metadata,
>                                       D3D12_RESOURCE_STATE_COMMON,
>                                       D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
> -    barriers[4] = TRANSITION_BARRIER(pic->resolved_metadata,
> -                                     D3D12_RESOURCE_STATE_COMMON,
> -                                     D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
> -
> -    ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 5, barriers);
> -
> -    if (d3d12_refs.NumTexture2Ds) {
> -        D3D12_RESOURCE_BARRIER refs_barriers[3];
> -
> -        for (i = 0; i < d3d12_refs.NumTexture2Ds; i++)
> -            refs_barriers[i] = TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i],
> -                                                  D3D12_RESOURCE_STATE_COMMON,
> -                                                  D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
> -
> -        ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list,
> d3d12_refs.NumTexture2Ds,
> -                                                      refs_barriers);
> +    ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 4,
> + barriers);
> +
> +    //set transit barriers for reference pic and recon pic
> +    int barriers_ref_index = 0;
> +    D3D12_RESOURCE_BARRIER *barriers_ref = NULL;
> +    if (ctx->is_texture_array) {
> +        barriers_ref = av_calloc(ctx->max_subresource_array_size * ctx-
> >plane_count,
> +            sizeof(D3D12_RESOURCE_BARRIER));
> +    } else {
> +        barriers_ref =
> av_calloc(MAX_DPB_SIZE,sizeof(D3D12_RESOURCE_BARRIER));
> +    }
> +
> +    if (ctx->is_texture_array) {
> +         // In Texture array mode, the D3D12 uses the same texture array for all the
> input
> +         // reference pics in ppTexture2Ds and also for the pReconstructedPicture
> output allocations,
> +         //just different subresources.
> +        D3D12_RESOURCE_DESC references_tex_array_desc = { 0 };
> +
> + pic->recon_surface->texture->lpVtbl->GetDesc(pic->recon_surface->textu
> + re, &references_tex_array_desc);
> +
> +        for (uint32_t reference_subresource = 0; reference_subresource <
> references_tex_array_desc.DepthOrArraySize;
> +            reference_subresource++) {
> +
> +            //D3D12 DecomposeSubresource
> +            uint32_t mip_slice, plane_slice, array_slice, array_size;
> +            array_size = references_tex_array_desc.DepthOrArraySize;
> +            mip_slice = reference_subresource %
> references_tex_array_desc.MipLevels;
> +            array_slice = (reference_subresource /
> + references_tex_array_desc.MipLevels) % array_size;
> +
> +            for (plane_slice = 0; plane_slice < ctx->plane_count; plane_slice++) {
> +                //Calculate the subresource index
> +                uint32_t planeOutputSubresource = mip_slice + array_slice *
> references_tex_array_desc.MipLevels +
> +                                        plane_slice *
> + references_tex_array_desc.MipLevels * array_size;
> +
> +                if (reference_subresource == pic->subresource_index) {
> +                    barriers_ref[barriers_ref_index++] =
> TRANSITION_BARRIER_SUBRESOURCE(pic->recon_surface->texture,
> planeOutputSubresource,
> +                                        D3D12_RESOURCE_STATE_COMMON,
> +                                        D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
> +                } else {
> +                    barriers_ref[barriers_ref_index++] =
> TRANSITION_BARRIER_SUBRESOURCE(pic->recon_surface->texture,
> planeOutputSubresource,
> +                                        D3D12_RESOURCE_STATE_COMMON,
> +                                        D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
> +                }
> +            }
> +        }
> +    } else {
> +        barriers_ref[barriers_ref_index++] = TRANSITION_BARRIER(pic-
> >recon_surface->texture,
> +                                        D3D12_RESOURCE_STATE_COMMON,
> +
> + D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE);
> +
> +        if (d3d12_refs.NumTexture2Ds) {
> +            for (i = 0; i < d3d12_refs.NumTexture2Ds; i++)
> +                barriers_ref[barriers_ref_index++] =
> TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i],
> +                                                    D3D12_RESOURCE_STATE_COMMON,
> +
> D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
> +        }
>      }
> +    ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list,
> + barriers_ref_index, barriers_ref);
> 
>      ID3D12VideoEncodeCommandList2_EncodeFrame(cmd_list, ctx->encoder, ctx-
> >encoder_heap,
>                                                &input_args, &output_args); @@ -422,16 +500,15 @@
> static int d3d12va_encode_issue(AVCodecContext *avctx,
> 
> 
> ID3D12VideoEncodeCommandList2_ResolveEncoderOutputMetadata(cmd_list,
> &input_metadata, &output_metadata);
> 
> -    if (d3d12_refs.NumTexture2Ds) {
> -        D3D12_RESOURCE_BARRIER refs_barriers[3];
> -
> -        for (i = 0; i < d3d12_refs.NumTexture2Ds; i++)
> -            refs_barriers[i] = TRANSITION_BARRIER(d3d12_refs.ppTexture2Ds[i],
> -                                                  D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ,
> -                                                  D3D12_RESOURCE_STATE_COMMON);
> -
> -        ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list,
> d3d12_refs.NumTexture2Ds,
> -                                                      refs_barriers);
> +    //swap the barriers_ref transition state
> +    if (barriers_ref_index > 0) {
> +        for (i = 0; i < barriers_ref_index; i++) {
> +            D3D12_RESOURCE_STATES temp_statue =
> barriers_ref[i].Transition.StateBefore;
> +            barriers_ref[i].Transition.StateBefore =
> barriers_ref[i].Transition.StateAfter;
> +            barriers_ref[i].Transition.StateAfter = temp_statue;
> +        }
> +        ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list,
> barriers_ref_index,
> +                                                      barriers_ref);
>      }
> 
>      barriers[0] = TRANSITION_BARRIER(pic->input_surface->texture,
> @@ -440,17 +517,14 @@ static int d3d12va_encode_issue(AVCodecContext
> *avctx,
>      barriers[1] = TRANSITION_BARRIER(pic->output_buffer,
>                                       D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE,
>                                       D3D12_RESOURCE_STATE_COMMON);
> -    barriers[2] = TRANSITION_BARRIER(pic->recon_surface->texture,
> -                                     D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE,
> -                                     D3D12_RESOURCE_STATE_COMMON);
> -    barriers[3] = TRANSITION_BARRIER(pic->encoded_metadata,
> +    barriers[2] = TRANSITION_BARRIER(pic->encoded_metadata,
>                                       D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ,
>                                       D3D12_RESOURCE_STATE_COMMON);
> -    barriers[4] = TRANSITION_BARRIER(pic->resolved_metadata,
> +    barriers[3] = TRANSITION_BARRIER(pic->resolved_metadata,
>                                       D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE,
>                                       D3D12_RESOURCE_STATE_COMMON);
> 
> -    ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 5, barriers);
> +    ID3D12VideoEncodeCommandList2_ResourceBarrier(cmd_list, 4,
> + barriers);
> 
>      hr = ID3D12VideoEncodeCommandList2_Close(cmd_list);
>      if (FAILED(hr)) {
> @@ -489,6 +563,14 @@ static int d3d12va_encode_issue(AVCodecContext
> *avctx,
>      if (d3d12_refs.ppTexture2Ds)
>          av_freep(&d3d12_refs.ppTexture2Ds);
> 
> +    if (ctx->is_texture_array) {
> +        if (d3d12_refs.pSubresources)
> +            av_freep(&d3d12_refs.pSubresources);
> +    }
> +
> +    if (barriers_ref)
> +        av_freep(&barriers_ref);
> +
>      return 0;
> 
>  fail:
> @@ -498,6 +580,14 @@ fail:
>      if (d3d12_refs.ppTexture2Ds)
>          av_freep(&d3d12_refs.ppTexture2Ds);
> 
> +    if (ctx->is_texture_array) {
> +        if (d3d12_refs.pSubresources)
> +            av_freep(&d3d12_refs.pSubresources);
> +    }
> +
> +    if (barriers_ref)
> +        av_freep(&barriers_ref);
> +
>      if (ctx->codec->free_picture_params)
>          ctx->codec->free_picture_params(pic);
> 
> @@ -1088,13 +1178,15 @@ static int
> d3d12va_encode_init_gop_structure(AVCodecContext *avctx)
>          switch (ctx->codec->d3d12_codec) {
>              case D3D12_VIDEO_ENCODER_CODEC_H264:
>                  ref_l0 = FFMIN(support.PictureSupport.pH264Support-
> >MaxL0ReferencesForP,
> -                               support.PictureSupport.pH264Support-
> >MaxL1ReferencesForB);
> +                               support.PictureSupport.pH264Support-
> >MaxL1ReferencesForB ?
> +
> + support.PictureSupport.pH264Support->MaxL1ReferencesForB : UINT_MAX);
>                  ref_l1 = support.PictureSupport.pH264Support->MaxL1ReferencesForB;
>                  break;
> 
>              case D3D12_VIDEO_ENCODER_CODEC_HEVC:
>                  ref_l0 = FFMIN(support.PictureSupport.pHEVCSupport-
> >MaxL0ReferencesForP,
> -                               support.PictureSupport.pHEVCSupport-
> >MaxL1ReferencesForB);
> +                               support.PictureSupport.pHEVCSupport-
> >MaxL1ReferencesForB ?
> +
> + support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB : UINT_MAX);
>                  ref_l1 = support.PictureSupport.pHEVCSupport->MaxL1ReferencesForB;
>                  break;
> 

It seems to be another bugfix. If so please add it as a separate patch.

> @@ -1336,6 +1428,47 @@ fail:
>      return err;
>  }
> 
> +static int d3d12va_create_texture_array(AVHWFramesContext *ctx,
> +D3D12VAEncodeContext *encode_context) {
> +    AVD3D12VAFramesContext *hwctx        = ctx->hwctx;
> +    AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
> +
> +    AVD3D12VAFrame *frame;
> +    D3D12_HEAP_PROPERTIES props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
> +
> +    encode_context->max_subresource_array_size = MAX_DPB_SIZE + 1;
> +
> +    D3D12_RESOURCE_DESC desc = {
> +        .Dimension        = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
> +        .Alignment        = 0,
> +        .Width            = ctx->width,
> +        .Height           = ctx->height,
> +        .DepthOrArraySize = encode_context->max_subresource_array_size,
> +        .MipLevels        = 1,
> +        .Format           = hwctx->format,
> +        .SampleDesc       = {.Count = 1, .Quality = 0 },
> +        .Layout           = D3D12_TEXTURE_LAYOUT_UNKNOWN,
> +        .Flags            = hwctx->flags,
> +    };
> +
> +    frame = av_mallocz(sizeof(AVD3D12VAFrame));
> +    if (!frame)
> +        return AVERROR(ENOMEM);
> +
> +    if (FAILED(ID3D12Device_CreateCommittedResource(device_hwctx->device,
> &props, D3D12_HEAP_FLAG_NONE, &desc,
> +        D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void
> **)&frame->texture))) {
> +        av_log(ctx, AV_LOG_ERROR, "Could not create the texture\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    ID3D12Device_CreateFence(device_hwctx->device, 0,
> D3D12_FENCE_FLAG_NONE,
> +                                      &IID_ID3D12Fence, (void
> + **)&frame->sync_ctx.fence);
> +
> +    frame->sync_ctx.event = CreateEvent(NULL, FALSE, FALSE, NULL);
> +    encode_context->texture_array_frame = frame;
> +    return 0;
> +}
> +
>  static int d3d12va_encode_create_recon_frames(AVCodecContext *avctx)  {
>      FFHWBaseEncodeContext *base_ctx = avctx->priv_data; @@ -1394,6 +1527,7
> @@ int ff_d3d12va_encode_init(AVCodecContext *avctx)
>      FFHWBaseEncodeContext *base_ctx = avctx->priv_data;
>      D3D12VAEncodeContext       *ctx = avctx->priv_data;
>      D3D12_FEATURE_DATA_VIDEO_FEATURE_AREA_SUPPORT support = { 0 };
> +    D3D12_FEATURE_DATA_FORMAT_INFO format_info = {0};
>      int err;
>      HRESULT hr;
> 
> @@ -1429,6 +1563,15 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx)
>          goto fail;
>      }
> 
> +    format_info.Format = ((AVD3D12VAFramesContext*)base_ctx->input_frames-
> >hwctx)->format;
> +    if (FAILED(ID3D12VideoDevice_CheckFeatureSupport(ctx->hwctx->device,
> D3D12_FEATURE_FORMAT_INFO,
> +        &format_info, sizeof(format_info)))) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to query format plane count:
> 0x%lx\n", hr);
> +        err = AVERROR_EXTERNAL;
> +        goto fail;
> +    }
> +    ctx->plane_count = format_info.PlaneCount;
> +
>      err = d3d12va_encode_set_profile(avctx);
>      if (err < 0)
>          goto fail;
> @@ -1485,6 +1628,10 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx)
>              goto fail;
>      }
> 
> +    if (ctx->is_texture_array) {
> +        d3d12va_create_texture_array(base_ctx->recon_frames, avctx-
> >priv_data);
> +    }
> +

You don't need too many arguments for this function. You could just feed avctx like any other functions here.

>      base_ctx->output_delay = base_ctx->b_per_p;
>      base_ctx->decode_delay = base_ctx->max_b_depth;
> 
> @@ -1528,6 +1675,18 @@ int ff_d3d12va_encode_close(AVCodecContext
> *avctx)
> 
>      av_buffer_pool_uninit(&ctx->output_buffer_pool);
> 
> +    if (ctx->is_texture_array) {
> +        ID3D12Resource *pResource = ctx->texture_array_frame->texture;
> +        if (pResource) {
> +            D3D12_OBJECT_RELEASE(pResource);
> +            ctx->texture_array_frame->texture = NULL;
> +        }
> +        D3D12_OBJECT_RELEASE(ctx->texture_array_frame->sync_ctx.fence);
> +        if (ctx->texture_array_frame->sync_ctx.event)
> +            CloseHandle(ctx->texture_array_frame->sync_ctx.event);
> +        av_free(ctx->texture_array_frame);
> +    }
> +
>      D3D12_OBJECT_RELEASE(ctx->command_list);
>      D3D12_OBJECT_RELEASE(ctx->command_queue);
> 
> diff --git a/libavcodec/d3d12va_encode.h b/libavcodec/d3d12va_encode.h index
> 3b0b8153d5..fc31857f1a 100644
> --- a/libavcodec/d3d12va_encode.h
> +++ b/libavcodec/d3d12va_encode.h
> @@ -52,6 +52,8 @@ typedef struct D3D12VAEncodePicture {
>      ID3D12Resource *encoded_metadata;
>      ID3D12Resource *resolved_metadata;
> 
> +    int            subresource_index;
> +
>      D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA pic_ctl;
> 
>      int             fence_value;
> @@ -189,6 +191,33 @@ typedef struct D3D12VAEncodeContext {
>       */
>      AVBufferPool *output_buffer_pool;
> 
> +   /**
> +    * Flag indicates if the HW is texture array mode.
> +    */
> +   int is_texture_array;
> +
> +   /**
> +    * In texture array mode, the D3D12 uses the same texture array for all the
> input
> +    * reference pics in ppTexture2Ds and also for the pReconstructedPicture
> output
> +    * allocations, just different subresources.
> +    */
> +   AVD3D12VAFrame *texture_array_frame;
> +
> +   /**
> +    * The max number of subresources in the texture array.
> +    */
> +   int max_subresource_array_size;
> +
> +   /**
> +    * The used subresource index for pic in the texture array.
> +    */
> +   int subresource_used_index;
> +
> +   /**
> +    * The number of planes in the input DXGI FORMAT .
> +    */
> +   int plane_count;
> +
>      /**
>       * D3D12 video encoder.
>       */
> diff --git a/libavcodec/d3d12va_encode_hevc.c
> b/libavcodec/d3d12va_encode_hevc.c
> index 938ba01f54..7e1d973f7e 100644
> --- a/libavcodec/d3d12va_encode_hevc.c
> +++ b/libavcodec/d3d12va_encode_hevc.c
> @@ -280,9 +280,8 @@ static int
> d3d12va_encode_hevc_init_sequence_params(AVCodecContext *avctx)
>      }
> 
>      if (support.SupportFlags &
> D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RECONSTRUCTED_FRAMES_REQUIR
> E_TEXTURE_ARRAYS) {
> -        av_log(avctx, AV_LOG_ERROR, "D3D12 video encode on this device requires
> texture array support, "
> -               "but it's not implemented.\n");
> -        return AVERROR_PATCHWELCOME;
> +        ctx->is_texture_array = 1;
> +        av_log(avctx, AV_LOG_DEBUG, "D3D12 video encode on this device
> + uses texture array mode.\n");
>      }
> 
>      desc = av_pix_fmt_desc_get(base_ctx->input_frames->sw_format);
> --

Again, thanks for the patch that enables texture array. However, I don't think it's a good idea that you create the texture here and overwrite the original memory which is allocated and maintained by frame pool. Could you please take a look at hwcontext_d3d11va to see how it handles both texture array and single texture. There's already a initial_pool_size variable to handle the situation where the pool does not support dynamic allocation. In my opinion, we could add same features to hwcontext_d3d12va to make it better.

Regards,
Tong

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-06-02  0:34 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-05-24  9:35 [FFmpeg-devel] [PATCH, v3] avcodec/d3d12va_encode: texture array support for HEVC Araz Iusubov
2025-06-02  0:33 ` Tong Wu

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git