this patch enable HEVC 10bit encoding on KBL+. v1: remove P010->NV12 for ref/reconstructed frame and enc frame from this patch remove call i965_SyncSurface to work around the GPU hang when 10bit->8bit(P010->NV12)from this patch
Signed-off-by: Pengfei Qu <pengfei...@intel.com> --- src/gen6_mfc_common.c | 20 ++++++++++++++++++++ src/gen9_mfc_hevc.c | 37 ++++++++++++++++++++++++++----------- src/gen9_vme.c | 25 ++++++++++++++++++++++--- src/i965_device_info.c | 1 + src/i965_drv_video.c | 20 +++++++++++++++----- src/i965_drv_video.h | 1 + src/i965_encoder.c | 36 ++++++++++++++++++++++++++++-------- 7 files changed, 113 insertions(+), 27 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 9f041d8..9bbc69b 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -2089,12 +2089,19 @@ intel_hevc_vme_reference_state(VADriverContextP ctx, struct object_surface *obj_surface = NULL; struct i965_driver_data *i965 = i965_driver_data(ctx); VASurfaceID ref_surface_id; + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer; VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer; int max_num_references; VAPictureHEVC *curr_pic; VAPictureHEVC *ref_list; int ref_idx; + int frame_index,i; + unsigned int is_hevc10 = 0; + + if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0) + || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0)) + is_hevc10 = 1; if (list_index == 0) { max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1; @@ -2141,6 +2148,19 @@ intel_hevc_vme_reference_state(VADriverContextP ctx, obj_surface->bo) { assert(ref_idx >= 0); vme_context->used_reference_objects[list_index] = obj_surface; + + if(is_hevc10){ + + frame_index = -1; + for (i = 0; i < 16; i++) { + if (obj_surface == encode_state->reference_objects[i]) { + frame_index = i; + break; + } + } + + obj_surface = vme_context->reference_objects_internal[frame_index]; + } vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context); vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 | ref_idx << 16 | diff --git a/src/gen9_mfc_hevc.c b/src/gen9_mfc_hevc.c index b3ee327..6021a7e 100644 --- a/src/gen9_mfc_hevc.c +++ b/src/gen9_mfc_hevc.c @@ -193,12 +193,21 @@ gen9_hcpe_surface_state(VADriverContextP ctx, struct encode_state *encode_state, struct intel_batchbuffer *batch = encoder_context->base.batch; struct object_surface *obj_surface = encode_state->reconstructed_object; struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context; + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer; + unsigned int surface_format = SURFACE_FORMAT_PLANAR_420_8; /* to do */ unsigned int y_cb_offset; assert(obj_surface); + if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0) + || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0)) + { + assert(obj_surface->fourcc == VA_FOURCC_P010); + surface_format = SURFACE_FORMAT_P010; + } + y_cb_offset = obj_surface->y_cb_offset; BEGIN_BCS_BATCH(batch, 3); @@ -207,7 +216,7 @@ gen9_hcpe_surface_state(VADriverContextP ctx, struct encode_state *encode_state, (1 << 28) | /* surface id */ (mfc_context->surface_state.w_pitch - 1)); /* pitch - 1 */ OUT_BCS_BATCH(batch, - (SURFACE_FORMAT_PLANAR_420_8 << 28) | + surface_format << 28 | y_cb_offset); ADVANCE_BCS_BATCH(batch); @@ -217,7 +226,7 @@ gen9_hcpe_surface_state(VADriverContextP ctx, struct encode_state *encode_state, (0 << 28) | /* surface id */ (mfc_context->surface_state.w_pitch - 1)); /* pitch - 1 */ OUT_BCS_BATCH(batch, - (SURFACE_FORMAT_PLANAR_420_8 << 28) | + surface_format << 28 | y_cb_offset); ADVANCE_BCS_BATCH(batch); } @@ -588,8 +597,8 @@ gen9_hcpe_hevc_pic_state(VADriverContextP ctx, struct encode_state *encode_state seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag << 3 | /* 0 for encoder */ 0); OUT_BCS_BATCH(batch, - 0 << 27 | /* 8 bit only for encoder */ - 0 << 24 | /* 8 bit only for encoder */ + seq_param->seq_fields.bits.bit_depth_luma_minus8 << 27 | /* 10 bit for KBL+*/ + seq_param->seq_fields.bits.bit_depth_chroma_minus8 << 24 | /* 10 bit for KBL+ */ pcm_sample_bit_depth_luma_minus1 << 20 | pcm_sample_bit_depth_chroma_minus1 << 16 | seq_param->max_transform_hierarchy_depth_inter << 13 | /* for encoder */ @@ -913,6 +922,11 @@ static void gen9_hcpe_init(VADriverContextP ctx, int height_in_mb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, 16) / 16; int num_cu_record = 64; + int size_shift = 3; + + if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0) + || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0)) + size_shift = 2; if (log2_ctb_size == 5) num_cu_record = 16; else if (log2_ctb_size == 4) num_cu_record = 4; @@ -991,12 +1005,12 @@ static void gen9_hcpe_init(VADriverContextP ctx, /* Current internal buffer for HCP */ - size = ALIGN(pSequenceParameter->pic_width_in_luma_samples, 32) >> 3; + size = ALIGN(pSequenceParameter->pic_width_in_luma_samples, 32) >> size_shift; size <<= 6; ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_line_buffer), "line buffer", size); ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_line_buffer), "tile line buffer", size); - size = ALIGN(pSequenceParameter->pic_height_in_luma_samples + 6 * width_in_ctb, 32) >> 3; + size = ALIGN(pSequenceParameter->pic_height_in_luma_samples + 6 * width_in_ctb, 32) >> size_shift; size <<= 6; ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_column_buffer), "tile column buffer", size); @@ -1026,15 +1040,15 @@ static void gen9_hcpe_init(VADriverContextP ctx, ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size); } - size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 3 * width_in_ctb), 16) >> 3; + size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 3 * width_in_ctb), 16) >> size_shift; size <<= 6; ALLOC_ENCODER_BUFFER((&mfc_context->sao_line_buffer), "sao line buffer", size); - size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 6 * width_in_ctb), 16) >> 3; + size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 6 * width_in_ctb), 16) >> size_shift; size <<= 6; ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_line_buffer), "sao tile line buffer", size); - size = ALIGN(((pSequenceParameter->pic_height_in_luma_samples >> 1) + 6 * height_in_ctb), 16) >> 3; + size = ALIGN(((pSequenceParameter->pic_height_in_luma_samples >> 1) + 6 * height_in_ctb), 16) >> size_shift; size <<= 6; ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_column_buffer), "sao tile column buffer", size); @@ -1707,8 +1721,8 @@ gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx, } /* only support for 8-bit pixel bit-depth */ - assert(pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 == 0); - assert(pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 == 0); + assert(pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 <= 2); + assert(pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 <= 2); assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); assert(qp >= 0 && qp < 52); @@ -2733,6 +2747,7 @@ VAStatus gen9_hcpe_pipeline(VADriverContextP ctx, switch (profile) { case VAProfileHEVCMain: + case VAProfileHEVCMain10: vaStatus = gen9_hcpe_hevc_encode_picture(ctx, encode_state, encoder_context); break; diff --git a/src/gen9_vme.c b/src/gen9_vme.c index 1625c2b..8fcfba9 100644 --- a/src/gen9_vme.c +++ b/src/gen9_vme.c @@ -329,11 +329,13 @@ gen9_vme_surface_setup(VADriverContextP ctx, int is_intra, struct intel_encoder_context *encoder_context) { + struct gen6_vme_context *vme_context = encoder_context->vme_context; struct object_surface *obj_surface; /*Setup surfaces state*/ /* current picture for encoding */ - obj_surface = encode_state->input_yuv_object; + obj_surface = vme_context->input_yuv_object_internal; + assert(obj_surface); gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context); gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context); gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context); @@ -1417,11 +1419,17 @@ gen9_vme_hevc_surface_setup(VADriverContextP ctx, int is_intra, struct intel_encoder_context *encoder_context) { + struct gen6_vme_context *vme_context = encoder_context->vme_context; struct object_surface *obj_surface; + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer; /*Setup surfaces state*/ /* current picture for encoding */ - obj_surface = encode_state->input_yuv_object; + if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0) + || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0)) + obj_surface = vme_context->input_yuv_object_internal; + else + obj_surface = encode_state->input_yuv_object; gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context); gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context); gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context); @@ -1819,7 +1827,7 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e { struct gen6_vme_context *vme_context = calloc(1, sizeof(struct gen6_vme_context)); struct i965_kernel *vme_kernel_list = NULL; - int i965_kernel_num; + int i965_kernel_num, i; switch (encoder_context->codec) { case CODEC_H264: @@ -1885,5 +1893,16 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int)); + vme_context->driver_context = ctx; + + // HEVC 10bit + vme_context->input_yuv_surface_internal = VA_INVALID_SURFACE; + vme_context->input_yuv_object_internal = NULL; + for(i = 0 ; i < 16 ; i++) { + vme_context->reference_surface_internal[i] = VA_INVALID_SURFACE; + vme_context->reference_objects_internal[i] = NULL; + } + + return True; } diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 47fd50a..1633924 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -469,6 +469,7 @@ static struct hw_codec_info kbl_hw_codec_info = { .has_h264_mvc_encoding = 1, .has_hevc_decoding = 1, .has_hevc_encoding = 1, + .has_hevc10_encoding = 1, .has_hevc10_decoding = 1, .has_vp9_decoding = 1, .has_vpp_p010 = 1, diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 77b9f37..9e9393e 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -120,6 +120,8 @@ #define HAS_HEVC10_DECODING(ctx) ((ctx)->codec_info->has_hevc10_decoding && \ (ctx)->intel.has_bsd) +#define HAS_HEVC10_ENCODING(ctx) ((ctx)->codec_info->has_hevc10_encoding && \ + (ctx)->intel.has_bsd) #define HAS_VPP_P010(ctx) ((ctx)->codec_info->has_vpp_p010 && \ (ctx)->intel.has_bsd) @@ -605,7 +607,8 @@ i965_QueryConfigProfiles(VADriverContextP ctx, profile_list[i++] = VAProfileHEVCMain; } - if (HAS_HEVC10_DECODING(i965)) { + if (HAS_HEVC10_DECODING(i965)|| + HAS_HEVC10_ENCODING(i965)) { profile_list[i++] = VAProfileHEVCMain10; } @@ -728,6 +731,9 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, if (HAS_HEVC10_DECODING(i965)) entrypoint_list[n++] = VAEntrypointVLD; + if (HAS_HEVC10_ENCODING(i965)) + entrypoint_list[n++] = VAEntrypointEncSlice; + break; case VAProfileVP9Profile0: @@ -852,7 +858,8 @@ i965_validate_config(VADriverContextP ctx, VAProfile profile, break; case VAProfileHEVCMain10: - if (HAS_HEVC10_DECODING(i965) && (entrypoint == VAEntrypointVLD)) + if ((HAS_HEVC10_DECODING(i965) && (entrypoint == VAEntrypointVLD))|| + (HAS_HEVC10_ENCODING(i965) && (entrypoint == VAEntrypointEncSlice))) va_status = VA_STATUS_SUCCESS; else va_status = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT; @@ -988,7 +995,8 @@ i965_GetConfigAttributes(VADriverContextP ctx, profile == VAProfileH264High || profile == VAProfileH264StereoHigh || profile == VAProfileH264MultiviewHigh || - profile == VAProfileHEVCMain) { + profile == VAProfileHEVCMain || + profile == VAProfileHEVCMain10) { attrib_list[i].value |= (VA_ENC_PACKED_HEADER_RAW_DATA | VA_ENC_PACKED_HEADER_SLICE); } @@ -3270,7 +3278,8 @@ i965_encoder_render_picture(VADriverContextP ctx, if ((param->type == VAEncPackedHeaderRawData) || (param->type == VAEncPackedHeaderSlice)) { vaStatus = I965_RENDER_ENCODE_BUFFER(packed_header_params_ext); - } else if((obj_config->profile == VAProfileHEVCMain) && + } else if((obj_config->profile == VAProfileHEVCMain || + obj_config->profile == VAProfileHEVCMain10) && (encode->last_packed_header_type == VAEncPackedHeaderSequence)) { vaStatus = i965_encoder_render_packed_header_parameter_buffer(ctx, obj_context, @@ -3368,7 +3377,8 @@ i965_encoder_render_picture(VADriverContextP ctx, ((encode->last_packed_header_type & (~VAEncPackedHeaderMiscMask)) != 0)), VA_STATUS_ERROR_ENCODING_ERROR); - if((obj_config->profile == VAProfileHEVCMain) && + if((obj_config->profile == VAProfileHEVCMain || + obj_config->profile == VAProfileHEVCMain10) && (encode->last_packed_header_type == VAEncPackedHeaderSequence)) { vaStatus = i965_encoder_render_packed_header_data_buffer(ctx, diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 9fe042f..d9644a7 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -404,6 +404,7 @@ struct hw_codec_info unsigned int has_h264_mvc_encoding:1; unsigned int has_hevc_decoding:1; unsigned int has_hevc_encoding:1; + unsigned int has_hevc10_encoding:1; unsigned int has_hevc10_decoding:1; unsigned int has_vp9_decoding:1; unsigned int has_vpp_p010:1; diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 47368fb..509797c 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -112,6 +112,8 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, struct object_surface *obj_surface; VAStatus status; VARectangle rect; + unsigned int rt_format = VA_RT_FORMAT_YUV420; + unsigned int fourcc = VA_FOURCC_NV12; /* releae the temporary surface */ if (encoder_context->is_tmp_id) { @@ -126,9 +128,15 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, if (!obj_surface || !obj_surface->bo) return VA_STATUS_ERROR_INVALID_PARAMETER; - if (obj_surface->fourcc == VA_FOURCC_NV12) { - unsigned int tiling = 0, swizzle = 0; + if (VAProfileHEVCMain10 == profile && + obj_surface->fourcc != VA_FOURCC_P010) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + if (obj_surface->fourcc == VA_FOURCC_NV12 || + (VAProfileHEVCMain10 == profile && + obj_surface->fourcc == VA_FOURCC_P010)) { + unsigned int tiling = 0, swizzle = 0; dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle); if (tiling == I915_TILING_Y) { @@ -138,6 +146,11 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, } } + if(VAProfileHEVCMain10 == profile){ + rt_format = VA_RT_FORMAT_YUV420_10BPP; + fourcc = VA_FOURCC_P010; + } + rect.x = 0; rect.y = 0; rect.width = obj_surface->orig_width; @@ -150,7 +163,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, status = i965_CreateSurfaces(ctx, obj_surface->orig_width, obj_surface->orig_height, - VA_RT_FORMAT_YUV420, + rt_format, 1, &encoder_context->input_yuv_surface); ASSERT_RET(status == VA_STATUS_SUCCESS, status); @@ -158,7 +171,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, obj_surface = SURFACE(encoder_context->input_yuv_surface); encode_state->input_yuv_object = obj_surface; assert(obj_surface); - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); + i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, SUBSAMPLE_YUV420); dst_surface.base = (struct object_base *)obj_surface; dst_surface.type = I965_SURFACE_TYPE_SURFACE; @@ -173,7 +186,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, encoder_context->is_tmp_id = 1; - return clear_border(obj_surface); + return VA_STATUS_SUCCESS; } @@ -493,7 +506,7 @@ intel_encoder_check_vp8_parameter(VADriverContextP ctx, if (!obj_surface) goto error; - encode_state->reconstructed_object = obj_surface; + encode_state->reconstructed_object = obj_surface; obj_buffer = BUFFER(pic_param->coded_buf); assert(obj_buffer && obj_buffer->buffer_store && obj_buffer->buffer_store->bo); @@ -546,12 +559,15 @@ intel_encoder_check_hevc_parameter(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; struct object_surface *obj_surface; struct object_buffer *obj_buffer; VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferHEVC *slice_param; int i; + assert(vme_context); + assert(!(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_INVALID)); if (pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_INVALID) @@ -591,7 +607,9 @@ intel_encoder_check_hevc_parameter(VADriverContextP ctx, } for ( ; i < 15; i++) + { encode_state->reference_objects[i] = NULL; + } for (i = 0; i < encode_state->num_slice_params_ext; i++) { slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer; @@ -725,7 +743,8 @@ intel_encoder_sanity_check_input(VADriverContextP ctx, break; } - case VAProfileHEVCMain: { + case VAProfileHEVCMain: + case VAProfileHEVCMain10: { vaStatus = intel_encoder_check_hevc_parameter(ctx, encode_state, encoder_context); if (vaStatus != VA_STATUS_SUCCESS) goto out; @@ -867,6 +886,7 @@ intel_enc_hw_context_init(VADriverContextP ctx, break; case VAProfileHEVCMain: + case VAProfileHEVCMain10: encoder_context->codec = CODEC_HEVC; break; @@ -947,7 +967,7 @@ gen9_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) if (obj_config->entrypoint == VAEntrypointEncSliceLP) { return intel_enc_hw_context_init(ctx, obj_config, NULL, gen9_vdenc_context_init); } else { - if (obj_config->profile == VAProfileHEVCMain) { + if ((obj_config->profile == VAProfileHEVCMain) || (obj_config->profile == VAProfileHEVCMain10)) { return intel_enc_hw_context_init(ctx, obj_config, gen9_vme_context_init, gen9_hcpe_context_init); } else if (obj_config->profile == VAProfileJPEGBaseline) return intel_enc_hw_context_init(ctx, obj_config, gen8_vme_context_init, gen8_mfc_context_init); -- 2.7.4 _______________________________________________ Libva mailing list Libva@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libva