This is an automated email from the git hooks/post-receive script.
Git pushed a commit to branch master
in repository ffmpeg.
The following commit(s) were added to refs/heads/master by this push:
new 21a3e44fbe avcodec/d3d12va_encode: add Region of Interest (ROI) support
21a3e44fbe is described below
commit 21a3e44fbe12dd6e461db6e2b0f78231a4c9a1ae
Author: stevxiao <[email protected]>
AuthorDate: Wed Dec 24 23:43:26 2025 -0500
Commit: Tong Wu <[email protected]>
CommitDate: Mon Jan 12 02:45:48 2026 +0000
avcodec/d3d12va_encode: add Region of Interest (ROI) support
This commit implements ROI (Region of Interest) encoding support for
D3D12VA hardware encoders, enabling spatially-adaptive quality control for
H.264, HEVC, and AV1 encoders.
Query for `D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP` support
during initialization to check whether the hardware support delta QP. If delta
QP is supported, then process `AV_FRAME_DATA_REGIONS_OF_INTEREST` side data and
generate delta QP maps for each frame.
Sample command line:
ffmpeg.exe -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -vf
addroi=x=480:y=270:w=960:h=540:qoffset=-1/5 -c:v hevc_d3d12va output.mp4
---
libavcodec/d3d12va_encode.c | 116 +++++++++++++++++++++++++++++++++++++++
libavcodec/d3d12va_encode.h | 8 +++
libavcodec/d3d12va_encode_av1.c | 18 ++++++
libavcodec/d3d12va_encode_h264.c | 19 +++++++
libavcodec/d3d12va_encode_hevc.c | 19 +++++++
5 files changed, 180 insertions(+)
diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c
index de95518be5..1fba31117d 100644
--- a/libavcodec/d3d12va_encode.c
+++ b/libavcodec/d3d12va_encode.c
@@ -140,6 +140,105 @@ static int d3d12va_encode_wait(AVCodecContext *avctx,
return 0;
}
+static int d3d12va_encode_setup_roi(AVCodecContext *avctx,
+ D3D12VAEncodePicture *pic,
+ const uint8_t *data, size_t size)
+{
+ D3D12VAEncodeContext *ctx = avctx->priv_data;
+ const AVRegionOfInterest *roi;
+ uint32_t roi_size;
+ int nb_roi, i;
+ int block_width, block_height;
+ int block_size, qp_range;
+ int is_av1 = 0;
+
+ // Use the QP map region size reported by the driver
+ block_size = ctx->qp_map_region_size;
+
+ // Determine QP range and element size based on codec
+ switch (ctx->codec->d3d12_codec) {
+ case D3D12_VIDEO_ENCODER_CODEC_H264:
+ case D3D12_VIDEO_ENCODER_CODEC_HEVC:
+ qp_range = 51;
+ is_av1 = 0;
+ break;
+#if CONFIG_AV1_D3D12VA_ENCODER
+ case D3D12_VIDEO_ENCODER_CODEC_AV1:
+ qp_range = 255;
+ is_av1 = 1;
+ break;
+#endif
+ default:
+ av_log(avctx, AV_LOG_ERROR, "Unsupported codec for ROI.\n");
+ return AVERROR(EINVAL);
+ }
+
+ // Calculate map dimensions using ceil division as required by D3D12
+ block_width = (avctx->width + block_size - 1) / block_size;
+ block_height = (avctx->height + block_size - 1) / block_size;
+
+ // Allocate QP map with correct type based on codec
+ if (is_av1) {
+ pic->qp_map = av_calloc(block_width * block_height, sizeof(int16_t));
+ } else {
+ pic->qp_map = av_calloc(block_width * block_height, sizeof(int8_t));
+ }
+ if (!pic->qp_map)
+ return AVERROR(ENOMEM);
+
+ // Process ROI regions
+ roi = (const AVRegionOfInterest *)data;
+ roi_size = roi->self_size;
+ av_assert0(roi_size && size % roi_size == 0);
+ nb_roi = size / roi_size;
+
+ // Iterate in reverse for priority (first region in array takes priority
on overlap)
+ for (i = nb_roi - 1; i >= 0; i--) {
+ int startx, endx, starty, endy;
+ int delta_qp;
+ int x, y;
+
+ roi = (const AVRegionOfInterest *)(data + roi_size * i);
+
+ // Convert pixel coordinates to block coordinates
+ starty = FFMIN(block_height, roi->top / block_size);
+ endy = FFMIN(block_height, (roi->bottom + block_size - 1) /
block_size);
+ startx = FFMIN(block_width, roi->left / block_size);
+ endx = FFMIN(block_width, (roi->right + block_size - 1) /
block_size);
+
+ if (roi->qoffset.den == 0) {
+ av_freep(&pic->qp_map);
+ av_log(avctx, AV_LOG_ERROR, "AVRegionOfInterest.qoffset.den must
not be zero.\n");
+ return AVERROR(EINVAL);
+ }
+
+ // Convert qoffset to delta QP
+ delta_qp = roi->qoffset.num * qp_range / roi->qoffset.den;
+
+ av_log(avctx, AV_LOG_DEBUG, "ROI: (%d,%d)-(%d,%d) -> %+d.\n",
+ roi->top, roi->left, roi->bottom, roi->right, delta_qp);
+
+ // Fill QP map for this ROI region with correct type
+ if (is_av1) {
+ int16_t *qp_map_int16 = (int16_t *)pic->qp_map;
+ delta_qp = av_clip_int16(delta_qp);
+ for (y = starty; y < endy; y++)
+ for (x = startx; x < endx; x++)
+ qp_map_int16[x + y * block_width] = delta_qp;
+ } else {
+ int8_t *qp_map_int8 = (int8_t *)pic->qp_map;
+ delta_qp = av_clip_int8(delta_qp);
+ for (y = starty; y < endy; y++)
+ for (x = startx; x < endx; x++)
+ qp_map_int8[x + y * block_width] = delta_qp;
+ }
+ }
+
+ pic->qp_map_size = block_width * block_height;
+
+ return 0;
+}
+
static int d3d12va_encode_create_metadata_buffers(AVCodecContext *avctx,
D3D12VAEncodePicture *pic)
{
@@ -299,6 +398,20 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
if (err < 0)
goto fail;
+ // Process ROI side data if present and supported
+ AVFrameSideData *sd = av_frame_get_side_data(base_pic->input_image,
+
AV_FRAME_DATA_REGIONS_OF_INTEREST);
+ if (sd && base_ctx->roi_allowed) {
+ err = d3d12va_encode_setup_roi(avctx, pic, sd->data, sd->size);
+ if (err < 0)
+ goto fail;
+
+ // Enable delta QP flag in rate control only if supported
+ input_args.SequenceControlDesc.RateControl.Flags |=
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP;
+ av_log(avctx, AV_LOG_DEBUG, "ROI delta QP map created with %d blocks
(region size: %d pixels).\n",
+ pic->qp_map_size, ctx->qp_map_region_size);
+ }
+
if (ctx->codec->init_picture_params) {
err = ctx->codec->init_picture_params(avctx, base_pic);
if (err < 0) {
@@ -669,6 +782,9 @@ static int d3d12va_encode_free(AVCodecContext *avctx,
FFHWBaseEncodePicture *pic
if (ctx->codec->free_picture_params)
ctx->codec->free_picture_params(priv);
+ // Free ROI QP map if allocated
+ av_freep(&priv->qp_map);
+
return 0;
}
diff --git a/libavcodec/d3d12va_encode.h b/libavcodec/d3d12va_encode.h
index fcb97210b3..7eecdb6eb8 100644
--- a/libavcodec/d3d12va_encode.h
+++ b/libavcodec/d3d12va_encode.h
@@ -57,6 +57,10 @@ typedef struct D3D12VAEncodePicture {
D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA pic_ctl;
int fence_value;
+
+ // ROI delta QP map (void* to support both INT8 for H.264/HEVC and INT16
for AV1)
+ void *qp_map;
+ int qp_map_size;
} D3D12VAEncodePicture;
typedef struct D3D12VAEncodeProfile {
@@ -282,6 +286,10 @@ typedef struct D3D12VAEncodeContext {
*/
D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE me_precision;
+ /**
+ * QP map region pixel size (block size for QP map)
+ */
+ int qp_map_region_size;
} D3D12VAEncodeContext;
typedef struct D3D12VAEncodeType {
diff --git a/libavcodec/d3d12va_encode_av1.c b/libavcodec/d3d12va_encode_av1.c
index cf19597f0d..5ead741141 100644
--- a/libavcodec/d3d12va_encode_av1.c
+++ b/libavcodec/d3d12va_encode_av1.c
@@ -590,6 +590,18 @@ static int
d3d12va_encode_av1_init_sequence_params(AVCodecContext *avctx)
av_log(avctx, AV_LOG_DEBUG, "D3D12 video encode on this device uses
texture array mode.\n");
}
+ // Check if the configuration with DELTA_QP is supported
+ if (support.SupportFlags &
D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_DELTA_QP_AVAILABLE) {
+ base_ctx->roi_allowed = 1;
+ // Store the QP map region size from resolution limits
+ ctx->qp_map_region_size = ctx->res_limits.QPMapRegionPixelsSize;
+ av_log(avctx, AV_LOG_DEBUG, "ROI encoding is supported via delta QP "
+ "(QP map region size: %d pixels).\n", ctx->qp_map_region_size);
+ } else {
+ base_ctx->roi_allowed = 0;
+ av_log(avctx, AV_LOG_DEBUG, "ROI encoding not supported by hardware
for current rate control mode \n");
+ }
+
memset(seqheader_obu, 0, sizeof(*seqheader_obu));
seq->seq_profile = profile;
seq->seq_level_idx[0] = level.Level;
@@ -992,6 +1004,12 @@ static int
d3d12va_encode_av1_init_picture_params(AVCodecContext *avctx,
d3d12va_pic->pic_ctl.pAV1PicData->ReferenceIndices[i] =
fh->ref_frame_idx[i];
}
+ // Process ROI side data if present and supported
+ if (base_ctx->roi_allowed && d3d12va_pic->qp_map &&
d3d12va_pic->qp_map_size > 0) {
+ d3d12va_pic->pic_ctl.pAV1PicData->QPMapValuesCount =
d3d12va_pic->qp_map_size;
+ d3d12va_pic->pic_ctl.pAV1PicData->pRateControlQPMap = (INT16
*)d3d12va_pic->qp_map;
+ }
+
return av_fifo_write(priv->picture_header_list,
&priv->units.raw_frame_header, 1);
}
diff --git a/libavcodec/d3d12va_encode_h264.c b/libavcodec/d3d12va_encode_h264.c
index bcf5a326e5..e03269722c 100644
--- a/libavcodec/d3d12va_encode_h264.c
+++ b/libavcodec/d3d12va_encode_h264.c
@@ -211,6 +211,18 @@ static int
d3d12va_encode_h264_init_sequence_params(AVCodecContext *avctx)
av_log(avctx, AV_LOG_DEBUG, "D3D12 video encode on this device uses
texture array mode.\n");
}
+ // Check if the configuration with DELTA_QP is supported
+ if (support.SupportFlags &
D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_DELTA_QP_AVAILABLE) {
+ base_ctx->roi_allowed = 1;
+ // Store the QP map region size from resolution limits
+ ctx->qp_map_region_size = ctx->res_limits.QPMapRegionPixelsSize;
+ av_log(avctx, AV_LOG_DEBUG, "ROI encoding is supported via delta QP "
+ "(QP map region size: %d pixels).\n", ctx->qp_map_region_size);
+ } else {
+ base_ctx->roi_allowed = 0;
+ av_log(avctx, AV_LOG_DEBUG, "ROI encoding not supported by hardware
for current rate control mode \n");
+ }
+
desc = av_pix_fmt_desc_get(base_ctx->input_frames->sw_format);
av_assert0(desc);
@@ -376,6 +388,7 @@ static void
d3d12va_encode_h264_free_picture_params(D3D12VAEncodePicture *pic)
static int d3d12va_encode_h264_init_picture_params(AVCodecContext *avctx,
FFHWBaseEncodePicture
*base_pic)
{
+ FFHWBaseEncodeContext *base_ctx = avctx->priv_data;
D3D12VAEncodeH264Context *ctx = avctx->priv_data;
D3D12VAEncodePicture *pic = base_pic->priv;
D3D12VAEncodeH264Picture *hpic = base_pic->codec_priv;
@@ -470,6 +483,12 @@ static int
d3d12va_encode_h264_init_picture_params(AVCodecContext *avctx,
pic->pic_ctl.pH264PicData->ReferenceFramesReconPictureDescriptorsCount =
idx;
pic->pic_ctl.pH264PicData->pReferenceFramesReconPictureDescriptors = pd;
+ // Process ROI side data if present and supported
+ if (base_ctx->roi_allowed && pic->qp_map && pic->qp_map_size > 0) {
+ pic->pic_ctl.pH264PicData->QPMapValuesCount = pic->qp_map_size;
+ pic->pic_ctl.pH264PicData->pRateControlQPMap = (INT8 *)pic->qp_map;
+ }
+
return 0;
}
diff --git a/libavcodec/d3d12va_encode_hevc.c b/libavcodec/d3d12va_encode_hevc.c
index e00ecbb4de..921324b5d2 100644
--- a/libavcodec/d3d12va_encode_hevc.c
+++ b/libavcodec/d3d12va_encode_hevc.c
@@ -283,6 +283,18 @@ static int
d3d12va_encode_hevc_init_sequence_params(AVCodecContext *avctx)
av_log(avctx, AV_LOG_DEBUG, "D3D12 video encode on this device uses
texture array mode.\n");
}
+ // Check if the configuration with DELTA_QP is supported
+ if (support.SupportFlags &
D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_DELTA_QP_AVAILABLE) {
+ base_ctx->roi_allowed = 1;
+ // Store the QP map region size from resolution limits
+ ctx->qp_map_region_size = ctx->res_limits.QPMapRegionPixelsSize;
+ av_log(avctx, AV_LOG_DEBUG, "ROI encoding is supported via delta QP "
+ "(QP map region size: %d pixels).\n", ctx->qp_map_region_size);
+ } else {
+ base_ctx->roi_allowed = 0;
+ av_log(avctx, AV_LOG_DEBUG, "ROI encoding not supported by hardware
for current rate control mode \n");
+ }
+
desc = av_pix_fmt_desc_get(base_ctx->input_frames->sw_format);
av_assert0(desc);
@@ -538,6 +550,7 @@ static void
d3d12va_encode_hevc_free_picture_params(D3D12VAEncodePicture *pic)
static int d3d12va_encode_hevc_init_picture_params(AVCodecContext *avctx,
FFHWBaseEncodePicture
*base_pic)
{
+ FFHWBaseEncodeContext *base_ctx =
avctx->priv_data;
D3D12VAEncodePicture *pic = base_pic->priv;
D3D12VAEncodeHEVCPicture *hpic =
base_pic->codec_priv;
FFHWBaseEncodePicture *prev = base_pic->prev;
@@ -631,6 +644,12 @@ static int
d3d12va_encode_hevc_init_picture_params(AVCodecContext *avctx,
pic->pic_ctl.pHEVCPicData->ReferenceFramesReconPictureDescriptorsCount =
idx;
pic->pic_ctl.pHEVCPicData->pReferenceFramesReconPictureDescriptors = pd;
+ // Process ROI side data if present and supported
+ if (base_ctx->roi_allowed && pic->qp_map && pic->qp_map_size > 0) {
+ pic->pic_ctl.pHEVCPicData->QPMapValuesCount = pic->qp_map_size;
+ pic->pic_ctl.pHEVCPicData->pRateControlQPMap = (INT8 *)pic->qp_map;
+ }
+
return 0;
}
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]