* [FFmpeg-devel] [PATCH] avcodec/d3d12va_encode: add Region of Interest (ROI) support (PR #21295)
@ 2025-12-25 4:59 Steven Xiao via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: Steven Xiao via ffmpeg-devel @ 2025-12-25 4:59 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Steven Xiao
PR #21295 opened by Steven Xiao (younengxiao)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21295
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21295.patch
This pull request implements ROI (Region of Interest) encoding support for D3D12VA hardware encoders, enabling spatially-adaptive quality control for H.264, HEVC, and AV1 encoders.
Query for `D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP` support during initialization to check whether the hardware support QP delta. If delta QP is supported, then process `AV_FRAME_DATA_REGIONS_OF_INTEREST` side data and generate delta QP maps for each frame.
Sample command line:
```
ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -vf addroi=x=480:y=270:w=960:h=540:qoffset=-1/5 -c:v hevc_d3d12va output.mp4
```
>From d0a539d9717a778b2654d32fc32e554ae557a4d5 Mon Sep 17 00:00:00 2001
From: stevxiao <steven.xiao@amd.com>
Date: Wed, 24 Dec 2025 23:43:26 -0500
Subject: [PATCH] avcodec/d3d12va_encode: add Region of Interest (ROI) support
This commit implements ROI (Region of Interest) encoding support for D3D12VA hardware encoders, enabling spatially-adaptive quality control for H.264, HEVC, and AV1 encoders.
Query for `D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP` support during initialization to check whether the hardware support QP delta.If QP delta is supported, then process `AV_FRAME_DATA_REGIONS_OF_INTEREST` side data and generate delta QP maps for each frame.
Sample command line:
ffmpeg -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -vf addroi=x=480:y=270:w=960:h=540:qoffset=-1/5 -c:v hevc_d3d12va output.mp4
---
libavcodec/d3d12va_encode.c | 234 ++++++++++++++++++++++++++++++++++++
libavcodec/d3d12va_encode.h | 10 ++
2 files changed, 244 insertions(+)
diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c
index de95518be5..e070a0b650 100644
--- a/libavcodec/d3d12va_encode.c
+++ b/libavcodec/d3d12va_encode.c
@@ -140,6 +140,93 @@ static int d3d12va_encode_wait(AVCodecContext *avctx,
return 0;
}
+static int d3d12va_encode_setup_roi(AVCodecContext *avctx,
+ D3D12VAEncodePicture *pic,
+ const uint8_t *data, size_t size)
+{
+ D3D12VAEncodeContext *ctx = avctx->priv_data;
+ const AVRegionOfInterest *roi;
+ uint32_t roi_size;
+ int nb_roi, i;
+ int block_width, block_height;
+ int block_size, qp_range;
+ int8_t *qp_map;
+
+ // Use the QP map region size reported by the driver
+ block_size = ctx->qp_map_region_size;
+
+ // Determine QP range based on codec
+ switch (ctx->codec->d3d12_codec) {
+ case D3D12_VIDEO_ENCODER_CODEC_H264:
+ case D3D12_VIDEO_ENCODER_CODEC_HEVC:
+ qp_range = 51;
+ break;
+#if CONFIG_AV1_D3D12VA_ENCODER
+ case D3D12_VIDEO_ENCODER_CODEC_AV1:
+ qp_range = 255;
+ break;
+#endif
+ default:
+ av_log(avctx, AV_LOG_ERROR, "Unsupported codec for ROI.\n");
+ return AVERROR(EINVAL);
+ }
+
+ // Calculate map dimensions using ceil division as required by D3D12
+ block_width = (avctx->width + block_size - 1) / block_size;
+ block_height = (avctx->height + block_size - 1) / block_size;
+
+ // Allocate QP map (initialized to 0 for non-ROI areas)
+ qp_map = av_calloc(block_width * block_height, sizeof(*qp_map));
+ if (!qp_map)
+ return AVERROR(ENOMEM);
+
+ // Process ROI regions
+ roi = (const AVRegionOfInterest*)data;
+ roi_size = roi->self_size;
+ av_assert0(roi_size && size % roi_size == 0);
+ nb_roi = size / roi_size;
+
+ // Iterate in reverse for priority (first region in array takes priority on overlap)
+ for (i = nb_roi - 1; i >= 0; i--) {
+ int startx, endx, starty, endy;
+ int delta_qp;
+ int x, y;
+
+ roi = (const AVRegionOfInterest*)(data + roi_size * i);
+
+ // Convert pixel coordinates to block coordinates
+ starty = FFMIN(block_height, roi->top / block_size);
+ endy = FFMIN(block_height, (roi->bottom + block_size - 1) / block_size);
+ startx = FFMIN(block_width, roi->left / block_size);
+ endx = FFMIN(block_width, (roi->right + block_size - 1) / block_size);
+
+ if (roi->qoffset.den == 0) {
+ av_free(qp_map);
+ av_log(avctx, AV_LOG_ERROR, "AVRegionOfInterest.qoffset.den must not be zero.\n");
+ return AVERROR(EINVAL);
+ }
+
+ // Convert qoffset to delta QP
+ delta_qp = roi->qoffset.num * qp_range / roi->qoffset.den;
+ delta_qp = av_clip_int8(delta_qp);
+
+ av_log(avctx, AV_LOG_DEBUG, "ROI: (%d,%d)-(%d,%d) -> %+d.\n",
+ roi->top, roi->left, roi->bottom, roi->right, delta_qp);
+
+ // Fill QP map for this ROI region
+ for (y = starty; y < endy; y++) {
+ for (x = startx; x < endx; x++) {
+ qp_map[x + y * block_width] = delta_qp;
+ }
+ }
+ }
+
+ pic->qp_map = qp_map;
+ pic->qp_map_size = block_width * block_height;
+
+ return 0;
+}
+
static int d3d12va_encode_create_metadata_buffers(AVCodecContext *avctx,
D3D12VAEncodePicture *pic)
{
@@ -366,6 +453,49 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
}
}
+ // Process ROI side data if present and supported
+
+ AVFrameSideData *sd = av_frame_get_side_data(base_pic->input_image,
+ AV_FRAME_DATA_REGIONS_OF_INTEREST);
+ if (sd && base_ctx->roi_allowed) {
+ err = d3d12va_encode_setup_roi(avctx, pic, sd->data, sd->size);
+ if (err < 0)
+ goto fail;
+
+ // Enable delta QP flag in rate control only if supported
+ input_args.SequenceControlDesc.RateControl.Flags |= D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP;
+
+ // Set QP map in codec-specific picture control data
+ switch (ctx->codec->d3d12_codec) {
+ case D3D12_VIDEO_ENCODER_CODEC_H264:
+ if (pic->pic_ctl.pH264PicData) {
+ pic->pic_ctl.pH264PicData->QPMapValuesCount = pic->qp_map_size;
+ pic->pic_ctl.pH264PicData->pRateControlQPMap = pic->qp_map;
+ }
+ break;
+ case D3D12_VIDEO_ENCODER_CODEC_HEVC:
+ if (pic->pic_ctl.pHEVCPicData) {
+ pic->pic_ctl.pHEVCPicData->QPMapValuesCount = pic->qp_map_size;
+ pic->pic_ctl.pHEVCPicData->pRateControlQPMap = pic->qp_map;
+ }
+ break;
+#if CONFIG_AV1_D3D12VA_ENCODER
+ case D3D12_VIDEO_ENCODER_CODEC_AV1:
+ if (pic->pic_ctl.pAV1PicData) {
+ pic->pic_ctl.pAV1PicData->QPMapValuesCount = pic->qp_map_size;
+ pic->pic_ctl.pAV1PicData->pRateControlQPMap = (INT16 *)pic->qp_map;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+
+ av_log(avctx, AV_LOG_DEBUG, "ROI delta QP map created with %d blocks (region size: %d pixels).\n",
+ pic->qp_map_size, ctx->qp_map_region_size);
+ }
+
+
input_args.PictureControlDesc.IntraRefreshFrameIndex = ctx->intra_refresh_frame_index;
if (base_pic->is_reference)
input_args.PictureControlDesc.Flags |= D3D12_VIDEO_ENCODER_PICTURE_CONTROL_FLAG_USED_AS_REFERENCE_PICTURE;
@@ -669,6 +799,9 @@ static int d3d12va_encode_free(AVCodecContext *avctx, FFHWBaseEncodePicture *pic
if (ctx->codec->free_picture_params)
ctx->codec->free_picture_params(priv);
+ // Free ROI QP map if allocated
+ av_freep(&priv->qp_map);
+
return 0;
}
@@ -1318,6 +1451,103 @@ static int d3d12va_encode_init_gop_structure(AVCodecContext *avctx)
return 0;
}
+static int d3d12va_encode_init_roi(AVCodecContext* avctx)
+{
+ FFHWBaseEncodeContext *base_ctx = avctx->priv_data;
+ D3D12VAEncodeContext *ctx = avctx->priv_data;
+ AVD3D12VAFramesContext *frames_hwctx = base_ctx->input_frames->hwctx;
+ HRESULT hr;
+
+ D3D12_VIDEO_ENCODER_PROFILE_DESC profile = { 0 };
+ D3D12_VIDEO_ENCODER_PROFILE_H264 h264_profile = D3D12_VIDEO_ENCODER_PROFILE_H264_MAIN;
+ D3D12_VIDEO_ENCODER_PROFILE_HEVC hevc_profile = D3D12_VIDEO_ENCODER_PROFILE_HEVC_MAIN;
+#if CONFIG_AV1_D3D12VA_ENCODER
+ D3D12_VIDEO_ENCODER_AV1_PROFILE av1_profile = D3D12_VIDEO_ENCODER_AV1_PROFILE_MAIN;
+#endif
+
+ D3D12_VIDEO_ENCODER_LEVEL_SETTING level = { 0 };
+ D3D12_VIDEO_ENCODER_LEVELS_H264 h264_level = { 0 };
+ D3D12_VIDEO_ENCODER_LEVEL_TIER_CONSTRAINTS_HEVC hevc_level = { 0 };
+#if CONFIG_AV1_D3D12VA_ENCODER
+ D3D12_VIDEO_ENCODER_AV1_LEVEL_TIER_CONSTRAINTS av1_level = { 0 };
+#endif
+
+ // Initialize to defaults
+ ctx->qp_map_region_size = 0;
+ base_ctx->roi_allowed = 0;
+
+ switch (ctx->codec->d3d12_codec) {
+ case D3D12_VIDEO_ENCODER_CODEC_H264:
+ profile.DataSize = sizeof(D3D12_VIDEO_ENCODER_PROFILE_H264);
+ profile.pH264Profile = &h264_profile;
+ level.DataSize = sizeof(D3D12_VIDEO_ENCODER_LEVELS_H264);
+ level.pH264LevelSetting = &h264_level;
+ break;
+ case D3D12_VIDEO_ENCODER_CODEC_HEVC:
+ profile.DataSize = sizeof(D3D12_VIDEO_ENCODER_PROFILE_HEVC);
+ profile.pHEVCProfile = &hevc_profile;
+ level.DataSize = sizeof(D3D12_VIDEO_ENCODER_LEVEL_TIER_CONSTRAINTS_HEVC);
+ level.pHEVCLevelSetting = &hevc_level;
+ break;
+#if CONFIG_AV1_D3D12VA_ENCODER
+ case D3D12_VIDEO_ENCODER_CODEC_AV1:
+ profile.DataSize = sizeof(D3D12_VIDEO_ENCODER_AV1_PROFILE);
+ profile.pAV1Profile = &av1_profile;
+ level.DataSize = sizeof(D3D12_VIDEO_ENCODER_AV1_LEVEL_TIER_CONSTRAINTS);
+ level.pAV1LevelSetting = &av1_level;
+ break;
+#endif
+ default:
+ av_assert0(0);
+ }
+
+ // Query encoder support to check if delta QP works with current configuration
+ D3D12_FEATURE_DATA_VIDEO_ENCODER_SUPPORT1 support = {
+ .NodeIndex = 0,
+ .Codec = ctx->codec->d3d12_codec,
+ .InputFormat = frames_hwctx->format,
+ .RateControl = ctx->rc,
+ .IntraRefresh = ctx->intra_refresh.Mode,
+ .SubregionFrameEncoding = D3D12_VIDEO_ENCODER_FRAME_SUBREGION_LAYOUT_MODE_FULL_FRAME,
+ .ResolutionsListCount = 1,
+ .pResolutionList = &ctx->resolution,
+ .CodecGopSequence = ctx->gop,
+ .MaxReferenceFramesInDPB = MAX_DPB_SIZE - 1,
+ .CodecConfiguration = ctx->codec_conf,
+ .SuggestedProfile = profile,
+ .SuggestedLevel = level,
+ .pResolutionDependentSupport = &ctx->res_limits,
+#if CONFIG_AV1_D3D12VA_ENCODER
+ .SubregionFrameEncodingData.pTilesPartition_AV1 = ctx->subregions_layout.pTilesPartition_AV1,
+#endif
+ };
+
+ hr = ID3D12VideoDevice3_CheckFeatureSupport(ctx->video_device3,
+ D3D12_FEATURE_VIDEO_ENCODER_SUPPORT1,
+ &support, sizeof(support));
+ if (FAILED(hr)) {
+ av_log(avctx, AV_LOG_WARNING, "Failed to query encoder support for ROI, disabling ROI.\n");
+ return 0;
+ }
+
+ // Check if the configuration with DELTA_QP is supported
+ if ((support.SupportFlags & D3D12_VIDEO_ENCODER_SUPPORT_FLAG_GENERAL_SUPPORT_OK) &&
+ (support.SupportFlags & D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_DELTA_QP_AVAILABLE)) {
+ base_ctx->roi_allowed = 1;
+ // Store the QP map region size from resolution limits
+ ctx->qp_map_region_size = ctx->res_limits.QPMapRegionPixelsSize;
+
+ av_log(avctx, AV_LOG_VERBOSE, "ROI encoding is supported via delta QP "
+ "(QP map region size: %d pixels).\n", ctx->qp_map_region_size);
+ } else {
+ av_log(avctx, AV_LOG_VERBOSE, "ROI encoding not supported by hardware for current rate control mode "
+ "(SupportFlags: 0x%x, ValidationFlags: 0x%x).\n",
+ support.SupportFlags, support.ValidationFlags);
+ }
+
+ return 0;
+}
+
static int d3d12va_encode_init_intra_refresh(AVCodecContext *avctx)
{
FFHWBaseEncodeContext *base_ctx = avctx->priv_data;
@@ -1770,6 +2000,10 @@ int ff_d3d12va_encode_init(AVCodecContext *avctx)
if (err < 0)
goto fail;
+ err = d3d12va_encode_init_roi(avctx);
+ if (err < 0)
+ goto fail;
+
if (ctx->codec->init_sequence_params) {
err = ctx->codec->init_sequence_params(avctx);
if (err < 0) {
diff --git a/libavcodec/d3d12va_encode.h b/libavcodec/d3d12va_encode.h
index fcb97210b3..aec1abdc4f 100644
--- a/libavcodec/d3d12va_encode.h
+++ b/libavcodec/d3d12va_encode.h
@@ -57,6 +57,10 @@ typedef struct D3D12VAEncodePicture {
D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA pic_ctl;
int fence_value;
+
+ // ROI delta QP map
+ int8_t *qp_map;
+ int qp_map_size;
} D3D12VAEncodePicture;
typedef struct D3D12VAEncodeProfile {
@@ -282,6 +286,12 @@ typedef struct D3D12VAEncodeContext {
*/
D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE me_precision;
+
+ /**
+ * QP map region pixel size (block size for QP map)
+ */
+ int qp_map_region_size;
+
} D3D12VAEncodeContext;
typedef struct D3D12VAEncodeType {
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-12-25 5:03 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-12-25 4:59 [FFmpeg-devel] [PATCH] avcodec/d3d12va_encode: add Region of Interest (ROI) support (PR #21295) Steven Xiao via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git