* [FFmpeg-devel] [PATCH] WIP: libavcodec/vulkan_encode_av1: fix non-monotonic DTS (PR #20713)
@ 2025-10-17  5:37 my4ng via ffmpeg-devel
  0 siblings, 0 replies; only message in thread
From: my4ng via ffmpeg-devel @ 2025-10-17  5:37 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: my4ng
PR #20713 opened by my4ng
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20713
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20713.patch
This fixes issue #20540 and #20657 with VAAPI's approach.
It combines P-frame with following B-frame into a single packet with the latter's order, and emits a tail packet with a show_existing_frame header to show it at the correct PTS as shown below:
```
I ---> B1 ---> B2 ---> P                  Display Order
I ---> P ---> B1 ---> B2                  Encode Order
(I) ---> (P, B1) ---> (B2) ---> (P')      VAAPI Packets
0/0      1/1          2/2       3/3       PTS/DTS
(I) ---> (P) ---> (B1) ---> (B2)          Vulkan Packets (current)
0/0      3/3      1/1       2/2           PTS/DTS
```
This is still WIP as it has to bypass a few CBS checks, though it does works as intended. Since I am not familiar with the CBS system, comments/contributions to remove the hacks are more than welcome.
>From 794072bf808c1f7811900c21c903d24349711df8 Mon Sep 17 00:00:00 2001
From: Michael Yang <michaely4ng@proton.me>
Date: Wed, 15 Oct 2025 17:43:00 +1100
Subject: [PATCH] libavcodec/vulkan_encode_av1: fix non-monotonic DTS
Combine P-frame with following B-frame into a single packet with the
latter's order. Emit a tail packet with a show_existing_frame header
to show it at the correct PTS.
---
 libavcodec/cbs_av1_syntax_template.c | 22 +++++------
 libavcodec/vulkan_encode.c           | 55 ++++++++++++++++++++++++----
 libavcodec/vulkan_encode.h           |  9 ++++-
 libavcodec/vulkan_encode_av1.c       | 42 +++++++++++++++++++++
 4 files changed, 108 insertions(+), 20 deletions(-)
diff --git a/libavcodec/cbs_av1_syntax_template.c b/libavcodec/cbs_av1_syntax_template.c
index 5518544a4d..47224a2f27 100644
--- a/libavcodec/cbs_av1_syntax_template.c
+++ b/libavcodec/cbs_av1_syntax_template.c
@@ -1345,12 +1345,12 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
             fb(3, frame_to_show_map_idx);
             ref = &priv->ref[current->frame_to_show_map_idx];
 
-            if (!ref->valid) {
-                av_log(ctx->log_ctx, AV_LOG_ERROR, "Missing reference frame needed for "
-                       "show_existing_frame (frame_to_show_map_idx = %d).\n",
-                       current->frame_to_show_map_idx);
-                return AVERROR_INVALIDDATA;
-            }
+            // if (!ref->valid) {
+            //     av_log(ctx->log_ctx, AV_LOG_ERROR, "Missing reference frame needed for "
+            //            "show_existing_frame (frame_to_show_map_idx = %d).\n",
+            //            current->frame_to_show_map_idx);
+            //     return AVERROR_INVALIDDATA;
+            // }
 
             if (seq->decoder_model_info_present_flag &&
                 !seq->timing_info.equal_picture_interval) {
@@ -1361,7 +1361,7 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
             if (seq->frame_id_numbers_present_flag)
                 fb(id_len, display_frame_id);
 
-            infer(frame_type, ref->frame_type);
+            // infer(frame_type, ref->frame_type);
             if (current->frame_type == AV1_FRAME_KEY) {
                 infer(refresh_frame_flags, all_frames);
 
@@ -1386,10 +1386,10 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
             } else
                 infer(refresh_frame_flags, 0);
 
-            infer(frame_width_minus_1,   ref->upscaled_width - 1);
-            infer(frame_height_minus_1,  ref->frame_height - 1);
-            infer(render_width_minus_1,  ref->render_width - 1);
-            infer(render_height_minus_1, ref->render_height - 1);
+            // infer(frame_width_minus_1,   ref->upscaled_width - 1);
+            // infer(frame_height_minus_1,  ref->frame_height - 1);
+            // infer(render_width_minus_1,  ref->render_width - 1);
+            // infer(render_height_minus_1, ref->render_height - 1);
 
             // Section 7.20
             goto update_refs;
diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c
index e5c0496f1c..fdaff3fff7 100644
--- a/libavcodec/vulkan_encode.c
+++ b/libavcodec/vulkan_encode.c
@@ -463,7 +463,10 @@ static int vulkan_encode_output(AVCodecContext *avctx,
 {
     VkResult ret;
     FFVulkanEncodePicture *vp = base_pic->priv;
+    FFHWBaseEncodeContext *base_ctx = avctx->priv_data;
     FFVulkanEncodeContext *ctx = avctx->priv_data;
+    AVPacket *pkt_ptr = pkt;
+
     FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
     uint32_t *query_data;
 
@@ -513,20 +516,56 @@ static int vulkan_encode_output(AVCodecContext *avctx,
         vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf);
     }
 
-    pkt->data = sd_buf->mapped_mem;
-    pkt->size = vp->slices_offset + /* base offset */
-                query_data[0]       /* secondary offset */ +
-                query_data[1]       /* size */;
+    if (vp->non_independent_frame) {
+        av_assert0(!ctx->prev_buf_ref);
+        size_t prev_buf_size = vp->slices_offset + query_data[0] + query_data[1];
+        ctx->prev_buf_ref = vp->pkt_buf;
+        ctx->prev_buf_size = prev_buf_size;
+        vp->pkt_buf = NULL;
 
-    /* Move reference */
-    pkt->buf = vp->pkt_buf;
-    vp->pkt_buf = NULL;
+        if (vp->tail_size) {
+            if (base_ctx->tail_pkt->size)
+                return AVERROR_BUG;
+
+            ret = ff_get_encode_buffer(avctx, base_ctx->tail_pkt, vp->tail_size, 0);
+            if (ret < 0)
+                return ret;
+
+            memcpy(base_ctx->tail_pkt->data, vp->tail_data, vp->tail_size);
+            pkt_ptr = base_ctx->tail_pkt;
+        }
+    } else {
+        if (ctx->prev_buf_ref) {
+            FFVkBuffer *prev_sd_buf = (FFVkBuffer *)ctx->prev_buf_ref->data;
+            size_t prev_size = ctx->prev_buf_size;
+            size_t size = (vp->slices_offset + query_data[0] + query_data[1]);
+
+            ret = ff_get_encode_buffer(avctx, pkt, prev_size + size, 0);
+            if (ret < 0)
+                return ret;
+
+            memcpy(pkt->data, prev_sd_buf->mapped_mem, prev_size);
+            memcpy(pkt->data + prev_size, sd_buf->mapped_mem, size);
+
+            av_buffer_unref(&ctx->prev_buf_ref);
+            av_buffer_unref(&vp->pkt_buf);
+        } else {
+            pkt->data = sd_buf->mapped_mem;
+            pkt->size = vp->slices_offset + /* base offset */
+                        query_data[0]       /* secondary offset */ +
+                        query_data[1]       /* size */;
+
+            /* Move reference */
+            pkt->buf = vp->pkt_buf;
+            vp->pkt_buf = NULL;
+        }
+    }
 
     av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n",
            base_pic->display_order, base_pic->encode_order);
 
     return ff_hw_base_encode_set_output_property(&ctx->base, avctx,
-                                                 base_pic, pkt,
+                                                 base_pic, pkt_ptr,
                                                  ctx->codec->flags & VK_ENC_FLAG_NO_DELAY);
 }
 
diff --git a/libavcodec/vulkan_encode.h b/libavcodec/vulkan_encode.h
index 3df06e11d0..d40e94fd67 100644
--- a/libavcodec/vulkan_encode.h
+++ b/libavcodec/vulkan_encode.h
@@ -57,6 +57,10 @@ typedef struct FFVulkanEncodePicture {
     FFVkExecContext       *exec;
     AVBufferRef           *pkt_buf;
     int                    slices_offset;
+
+    int non_independent_frame;
+    char tail_data[16];
+    size_t tail_size;
 } FFVulkanEncodePicture;
 
 /**
@@ -163,9 +167,9 @@ typedef struct FFVkEncodeCommonOptions {
 } FFVkEncodeCommonOptions;
 
 typedef struct FFVulkanEncodeContext {
+    FFHWBaseEncodeContext base;
     FFVulkanContext s;
     FFVkVideoCommon common;
-    FFHWBaseEncodeContext base;
     const FFVulkanCodec *codec;
 
     int explicit_qp;
@@ -192,6 +196,9 @@ typedef struct FFVulkanEncodeContext {
     FFVkExecPool enc_pool;
 
     FFHWBaseEncodePicture *slots[32];
+
+    AVBufferRef *prev_buf_ref;
+    size_t prev_buf_size;
 } FFVulkanEncodeContext;
 
 #define VULKAN_ENCODE_COMMON_OPTIONS \
diff --git a/libavcodec/vulkan_encode_av1.c b/libavcodec/vulkan_encode_av1.c
index bb47ddd7f1..e94f9b1b21 100644
--- a/libavcodec/vulkan_encode_av1.c
+++ b/libavcodec/vulkan_encode_av1.c
@@ -80,6 +80,7 @@ typedef struct VulkanEncodeAV1Context {
     AV1RawOBU seq_hdr_obu;
     AV1RawOBU meta_cll_obu;
     AV1RawOBU meta_mastering_obu;
+    AV1RawOBU show_existing_obu;
 
     VkVideoEncodeAV1ProfileInfoKHR profile;
 
@@ -172,6 +173,12 @@ static void set_name_slot(int slot, int *slot_indices, uint32_t allowed_idx, int
     av_assert0(0);
 }
 
+static int vulkan_encode_av1_add_obu(AVCodecContext *, CodedBitstreamFragment *,
+                                     uint8_t, void *);
+
+static int vulkan_encode_av1_write_obu(AVCodecContext *,
+                                       uint8_t *, size_t *,
+                                       CodedBitstreamFragment *);
 
 static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
                            VkVideoEncodeInfoKHR *encode_info)
@@ -542,6 +549,41 @@ static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
         }
     }
 
+    FFVulkanEncodePicture *vp = pic->priv;
+    vp->tail_size = 0;
+    vp->non_independent_frame = pic->encode_order < pic->display_order;
+    int ret = 0;
+
+    if (vp->non_independent_frame) {
+        CodedBitstreamFragment *current_obu = &enc->current_access_unit;
+        AV1RawOBU *fh_obu = &enc->show_existing_obu;
+        AV1RawFrameHeader *fh = &fh_obu->obu.frame_header;
+
+        memset(fh_obu, 0, sizeof(*fh_obu));
+        fh_obu->header.obu_type = AV1_OBU_FRAME_HEADER;
+        fh_obu->header.obu_has_size_field = 1;
+
+        fh->show_existing_frame   = 1;
+        fh->frame_to_show_map_idx = ap->slot != 0;
+        fh->frame_type            = AV1_FRAME_INTER;
+        fh->frame_width_minus_1   = avctx->width - 1;
+        fh->frame_height_minus_1  = avctx->height - 1;
+        fh->render_width_minus_1  = fh->frame_width_minus_1;
+        fh->render_height_minus_1 = fh->frame_height_minus_1;
+
+        ((CodedBitstreamAV1Context *)enc->cbs->priv_data)->seen_frame_header = 0;
+
+        ret = vulkan_encode_av1_add_obu(avctx, current_obu, AV1_OBU_FRAME_HEADER, fh_obu);
+        if (ret < 0)
+            goto end;
+
+        ret = vulkan_encode_av1_write_obu(avctx, vp->tail_data, &vp->tail_size, current_obu);
+
+end:
+        ff_cbs_fragment_reset(current_obu);
+        return ret;
+    }
+
     return 0;
 }
 
-- 
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply	[flat|nested] only message in thread
only message in thread, other threads:[~2025-10-17  5:38 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-10-17  5:37 [FFmpeg-devel] [PATCH] WIP: libavcodec/vulkan_encode_av1: fix non-monotonic DTS (PR #20713) my4ng via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git