This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

The following commit(s) were added to refs/heads/master by this push:
     new 21a3e44fbe avcodec/d3d12va_encode: add Region of Interest (ROI) support
21a3e44fbe is described below

commit 21a3e44fbe12dd6e461db6e2b0f78231a4c9a1ae
Author:     stevxiao <[email protected]>
AuthorDate: Wed Dec 24 23:43:26 2025 -0500
Commit:     Tong Wu <[email protected]>
CommitDate: Mon Jan 12 02:45:48 2026 +0000

    avcodec/d3d12va_encode: add Region of Interest (ROI) support
    
    This commit implements ROI (Region of Interest) encoding support for 
D3D12VA hardware encoders, enabling spatially-adaptive quality control for 
H.264, HEVC, and AV1 encoders.
    
    Query for `D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP` support 
during initialization to check whether the hardware support delta QP. If delta 
QP is supported, then process `AV_FRAME_DATA_REGIONS_OF_INTEREST` side data and 
generate delta QP maps for each frame.
    
    Sample command line:
    ffmpeg.exe -hwaccel d3d12va -hwaccel_output_format d3d12 -i input.mp4 -vf 
addroi=x=480:y=270:w=960:h=540:qoffset=-1/5 -c:v hevc_d3d12va output.mp4
---
 libavcodec/d3d12va_encode.c      | 116 +++++++++++++++++++++++++++++++++++++++
 libavcodec/d3d12va_encode.h      |   8 +++
 libavcodec/d3d12va_encode_av1.c  |  18 ++++++
 libavcodec/d3d12va_encode_h264.c |  19 +++++++
 libavcodec/d3d12va_encode_hevc.c |  19 +++++++
 5 files changed, 180 insertions(+)

diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c
index de95518be5..1fba31117d 100644
--- a/libavcodec/d3d12va_encode.c
+++ b/libavcodec/d3d12va_encode.c
@@ -140,6 +140,105 @@ static int d3d12va_encode_wait(AVCodecContext *avctx,
     return 0;
 }
 
+static int d3d12va_encode_setup_roi(AVCodecContext *avctx,
+                                    D3D12VAEncodePicture *pic,
+                                    const uint8_t *data, size_t size)
+{
+    D3D12VAEncodeContext *ctx = avctx->priv_data;
+    const AVRegionOfInterest *roi;
+    uint32_t roi_size;
+    int nb_roi, i;
+    int block_width, block_height;
+    int block_size, qp_range;
+    int is_av1 = 0;
+
+    // Use the QP map region size reported by the driver
+    block_size = ctx->qp_map_region_size;
+
+    // Determine QP range and element size based on codec
+    switch (ctx->codec->d3d12_codec) {
+        case D3D12_VIDEO_ENCODER_CODEC_H264:
+        case D3D12_VIDEO_ENCODER_CODEC_HEVC:
+            qp_range = 51;
+            is_av1 = 0;
+            break;
+#if CONFIG_AV1_D3D12VA_ENCODER
+        case D3D12_VIDEO_ENCODER_CODEC_AV1:
+            qp_range = 255;
+            is_av1 = 1;
+            break;
+#endif
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unsupported codec for ROI.\n");
+            return AVERROR(EINVAL);
+    }
+
+    // Calculate map dimensions using ceil division as required by D3D12
+    block_width  = (avctx->width + block_size - 1) / block_size;
+    block_height = (avctx->height + block_size - 1) / block_size;
+
+    // Allocate QP map with correct type based on codec
+    if (is_av1) {
+        pic->qp_map = av_calloc(block_width * block_height, sizeof(int16_t));
+    } else {
+        pic->qp_map = av_calloc(block_width * block_height, sizeof(int8_t));
+    }
+    if (!pic->qp_map)
+        return AVERROR(ENOMEM);
+
+    // Process ROI regions
+    roi = (const AVRegionOfInterest *)data;
+    roi_size = roi->self_size;
+    av_assert0(roi_size && size % roi_size == 0);
+    nb_roi = size / roi_size;
+
+    // Iterate in reverse for priority (first region in array takes priority 
on overlap)
+    for (i = nb_roi - 1; i >= 0; i--) {
+        int startx, endx, starty, endy;
+        int delta_qp;
+        int x, y;
+
+        roi = (const AVRegionOfInterest *)(data + roi_size * i);
+
+        // Convert pixel coordinates to block coordinates
+        starty = FFMIN(block_height, roi->top / block_size);
+        endy   = FFMIN(block_height, (roi->bottom + block_size - 1) / 
block_size);
+        startx = FFMIN(block_width, roi->left / block_size);
+        endx   = FFMIN(block_width, (roi->right + block_size - 1) / 
block_size);
+
+        if (roi->qoffset.den == 0) {
+            av_freep(&pic->qp_map);
+            av_log(avctx, AV_LOG_ERROR, "AVRegionOfInterest.qoffset.den must 
not be zero.\n");
+            return AVERROR(EINVAL);
+        }
+
+        // Convert qoffset to delta QP
+        delta_qp = roi->qoffset.num * qp_range / roi->qoffset.den;
+
+        av_log(avctx, AV_LOG_DEBUG, "ROI: (%d,%d)-(%d,%d) -> %+d.\n",
+               roi->top, roi->left, roi->bottom, roi->right, delta_qp);
+
+        // Fill QP map for this ROI region with correct type
+        if (is_av1) {
+            int16_t *qp_map_int16 = (int16_t *)pic->qp_map;
+            delta_qp = av_clip_int16(delta_qp);
+            for (y = starty; y < endy; y++)
+                for (x = startx; x < endx; x++)
+                    qp_map_int16[x + y * block_width] = delta_qp;
+        } else {
+            int8_t *qp_map_int8 = (int8_t *)pic->qp_map;
+            delta_qp = av_clip_int8(delta_qp);
+            for (y = starty; y < endy; y++)
+                for (x = startx; x < endx; x++)
+                    qp_map_int8[x + y * block_width] = delta_qp;
+        }
+    }
+
+    pic->qp_map_size = block_width * block_height;
+
+    return 0;
+}
+
 static int d3d12va_encode_create_metadata_buffers(AVCodecContext *avctx,
                                                   D3D12VAEncodePicture *pic)
 {
@@ -299,6 +398,20 @@ static int d3d12va_encode_issue(AVCodecContext *avctx,
     if (err < 0)
         goto fail;
 
+    // Process ROI side data if present and supported
+    AVFrameSideData *sd = av_frame_get_side_data(base_pic->input_image,
+                                                    
AV_FRAME_DATA_REGIONS_OF_INTEREST);
+    if (sd && base_ctx->roi_allowed) {
+        err = d3d12va_encode_setup_roi(avctx, pic, sd->data, sd->size);
+        if (err < 0)
+            goto fail;
+
+        // Enable delta QP flag in rate control only if supported
+        input_args.SequenceControlDesc.RateControl.Flags |= 
D3D12_VIDEO_ENCODER_RATE_CONTROL_FLAG_ENABLE_DELTA_QP;
+        av_log(avctx, AV_LOG_DEBUG, "ROI delta QP map created with %d blocks 
(region size: %d pixels).\n",
+                pic->qp_map_size, ctx->qp_map_region_size);
+    }
+
     if (ctx->codec->init_picture_params) {
         err = ctx->codec->init_picture_params(avctx, base_pic);
         if (err < 0) {
@@ -669,6 +782,9 @@ static int d3d12va_encode_free(AVCodecContext *avctx, 
FFHWBaseEncodePicture *pic
     if (ctx->codec->free_picture_params)
         ctx->codec->free_picture_params(priv);
 
+    // Free ROI QP map if allocated
+    av_freep(&priv->qp_map);
+
     return 0;
 }
 
diff --git a/libavcodec/d3d12va_encode.h b/libavcodec/d3d12va_encode.h
index fcb97210b3..7eecdb6eb8 100644
--- a/libavcodec/d3d12va_encode.h
+++ b/libavcodec/d3d12va_encode.h
@@ -57,6 +57,10 @@ typedef struct D3D12VAEncodePicture {
     D3D12_VIDEO_ENCODER_PICTURE_CONTROL_CODEC_DATA pic_ctl;
 
     int             fence_value;
+
+    // ROI delta QP map (void* to support both INT8 for H.264/HEVC and INT16 
for AV1)
+    void           *qp_map;
+    int             qp_map_size;
 } D3D12VAEncodePicture;
 
 typedef struct D3D12VAEncodeProfile {
@@ -282,6 +286,10 @@ typedef struct D3D12VAEncodeContext {
      */
     D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE me_precision;
 
+    /**
+     * QP map region pixel size (block size for QP map)
+     */
+    int qp_map_region_size;
 } D3D12VAEncodeContext;
 
 typedef struct D3D12VAEncodeType {
diff --git a/libavcodec/d3d12va_encode_av1.c b/libavcodec/d3d12va_encode_av1.c
index cf19597f0d..5ead741141 100644
--- a/libavcodec/d3d12va_encode_av1.c
+++ b/libavcodec/d3d12va_encode_av1.c
@@ -590,6 +590,18 @@ static int 
d3d12va_encode_av1_init_sequence_params(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_DEBUG, "D3D12 video encode on this device uses 
texture array mode.\n");
     }
 
+    // Check if the configuration with DELTA_QP is supported
+    if (support.SupportFlags & 
D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_DELTA_QP_AVAILABLE) {
+        base_ctx->roi_allowed = 1;
+        // Store the QP map region size from resolution limits
+        ctx->qp_map_region_size = ctx->res_limits.QPMapRegionPixelsSize;
+        av_log(avctx, AV_LOG_DEBUG, "ROI encoding is supported via delta QP "
+               "(QP map region size: %d pixels).\n", ctx->qp_map_region_size);
+    } else {
+        base_ctx->roi_allowed = 0;
+        av_log(avctx, AV_LOG_DEBUG, "ROI encoding not supported by hardware 
for current rate control mode \n");
+    }
+
     memset(seqheader_obu, 0, sizeof(*seqheader_obu));
     seq->seq_profile = profile;
     seq->seq_level_idx[0] = level.Level;
@@ -992,6 +1004,12 @@ static int 
d3d12va_encode_av1_init_picture_params(AVCodecContext *avctx,
             d3d12va_pic->pic_ctl.pAV1PicData->ReferenceIndices[i] = 
fh->ref_frame_idx[i];
     }
 
+    // Process ROI side data if present and supported
+    if (base_ctx->roi_allowed && d3d12va_pic->qp_map && 
d3d12va_pic->qp_map_size > 0) {
+        d3d12va_pic->pic_ctl.pAV1PicData->QPMapValuesCount  = 
d3d12va_pic->qp_map_size;
+        d3d12va_pic->pic_ctl.pAV1PicData->pRateControlQPMap = (INT16 
*)d3d12va_pic->qp_map;
+    }
+
     return av_fifo_write(priv->picture_header_list, 
&priv->units.raw_frame_header, 1);
 }
 
diff --git a/libavcodec/d3d12va_encode_h264.c b/libavcodec/d3d12va_encode_h264.c
index bcf5a326e5..e03269722c 100644
--- a/libavcodec/d3d12va_encode_h264.c
+++ b/libavcodec/d3d12va_encode_h264.c
@@ -211,6 +211,18 @@ static int 
d3d12va_encode_h264_init_sequence_params(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_DEBUG, "D3D12 video encode on this device uses 
texture array mode.\n");
     }
 
+    // Check if the configuration with DELTA_QP is supported
+    if (support.SupportFlags & 
D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_DELTA_QP_AVAILABLE) {
+        base_ctx->roi_allowed = 1;
+        // Store the QP map region size from resolution limits
+        ctx->qp_map_region_size = ctx->res_limits.QPMapRegionPixelsSize;
+        av_log(avctx, AV_LOG_DEBUG, "ROI encoding is supported via delta QP "
+               "(QP map region size: %d pixels).\n", ctx->qp_map_region_size);
+    } else {
+        base_ctx->roi_allowed = 0;
+        av_log(avctx, AV_LOG_DEBUG, "ROI encoding not supported by hardware 
for current rate control mode \n");
+    }
+
     desc = av_pix_fmt_desc_get(base_ctx->input_frames->sw_format);
     av_assert0(desc);
 
@@ -376,6 +388,7 @@ static void 
d3d12va_encode_h264_free_picture_params(D3D12VAEncodePicture *pic)
 static int d3d12va_encode_h264_init_picture_params(AVCodecContext *avctx,
                                                    FFHWBaseEncodePicture 
*base_pic)
 {
+    FFHWBaseEncodeContext  *base_ctx = avctx->priv_data;
     D3D12VAEncodeH264Context    *ctx = avctx->priv_data;
     D3D12VAEncodePicture        *pic = base_pic->priv;
     D3D12VAEncodeH264Picture    *hpic = base_pic->codec_priv;
@@ -470,6 +483,12 @@ static int 
d3d12va_encode_h264_init_picture_params(AVCodecContext *avctx,
     pic->pic_ctl.pH264PicData->ReferenceFramesReconPictureDescriptorsCount = 
idx;
     pic->pic_ctl.pH264PicData->pReferenceFramesReconPictureDescriptors = pd;
 
+    // Process ROI side data if present and supported
+    if (base_ctx->roi_allowed && pic->qp_map && pic->qp_map_size > 0) {
+        pic->pic_ctl.pH264PicData->QPMapValuesCount  = pic->qp_map_size;
+        pic->pic_ctl.pH264PicData->pRateControlQPMap = (INT8 *)pic->qp_map;
+    }
+
     return 0;
 }
 
diff --git a/libavcodec/d3d12va_encode_hevc.c b/libavcodec/d3d12va_encode_hevc.c
index e00ecbb4de..921324b5d2 100644
--- a/libavcodec/d3d12va_encode_hevc.c
+++ b/libavcodec/d3d12va_encode_hevc.c
@@ -283,6 +283,18 @@ static int 
d3d12va_encode_hevc_init_sequence_params(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_DEBUG, "D3D12 video encode on this device uses 
texture array mode.\n");
     }
 
+    // Check if the configuration with DELTA_QP is supported
+    if (support.SupportFlags & 
D3D12_VIDEO_ENCODER_SUPPORT_FLAG_RATE_CONTROL_DELTA_QP_AVAILABLE) {
+        base_ctx->roi_allowed = 1;
+        // Store the QP map region size from resolution limits
+        ctx->qp_map_region_size = ctx->res_limits.QPMapRegionPixelsSize;
+        av_log(avctx, AV_LOG_DEBUG, "ROI encoding is supported via delta QP "
+               "(QP map region size: %d pixels).\n", ctx->qp_map_region_size);
+    } else {
+        base_ctx->roi_allowed = 0;
+        av_log(avctx, AV_LOG_DEBUG, "ROI encoding not supported by hardware 
for current rate control mode \n");
+    }
+
     desc = av_pix_fmt_desc_get(base_ctx->input_frames->sw_format);
     av_assert0(desc);
 
@@ -538,6 +550,7 @@ static void 
d3d12va_encode_hevc_free_picture_params(D3D12VAEncodePicture *pic)
 static int d3d12va_encode_hevc_init_picture_params(AVCodecContext *avctx,
                                                    FFHWBaseEncodePicture 
*base_pic)
 {
+    FFHWBaseEncodeContext                           *base_ctx = 
avctx->priv_data;
     D3D12VAEncodePicture                                 *pic = base_pic->priv;
     D3D12VAEncodeHEVCPicture                            *hpic = 
base_pic->codec_priv;
     FFHWBaseEncodePicture                               *prev = base_pic->prev;
@@ -631,6 +644,12 @@ static int 
d3d12va_encode_hevc_init_picture_params(AVCodecContext *avctx,
     pic->pic_ctl.pHEVCPicData->ReferenceFramesReconPictureDescriptorsCount = 
idx;
     pic->pic_ctl.pHEVCPicData->pReferenceFramesReconPictureDescriptors = pd;
 
+    // Process ROI side data if present and supported
+    if (base_ctx->roi_allowed && pic->qp_map && pic->qp_map_size > 0) {
+        pic->pic_ctl.pHEVCPicData->QPMapValuesCount  = pic->qp_map_size;
+        pic->pic_ctl.pHEVCPicData->pRateControlQPMap = (INT8 *)pic->qp_map;
+    }
+
     return 0;
 }
 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to