From: Zhao Yakui <yakui.z...@intel.com> v3:remove the warning according to haihao's comments
v2: merge three mbmv cost table related patches together. Encoding:Abstract the calculation of mbmv cost for qp as one function. Encoding:Add one function that initialize mbmv cost table for supported Qp range. Encoding:Setup one cost_table surface state for VME shader According haihao's comments, free pointer directly. v1: format/style aligment accordingly to avoid the warning. Currently the length of VME MEDIA_OBJECT command on Ivy can't exceed 8 dwords. If more parameter needs to be passed, the buffer length should be enlarged. Pass the Qp parameter into VME shader Signed-off-by: Zhao Yakui <yakui.z...@intel.com> Signed-off-by: pjl <cecilia.p...@intel.com> Signed-off-by: Pengfei Qu <pengfei...@intel.com> --- src/gen6_mfc_common.c | 160 ++++++++++++++++++++++++++++++++++++++++++-------- src/gen6_vme.h | 19 ++++++ src/gen75_vme.c | 35 +++++++++-- src/gen7_vme.c | 24 ++++++-- src/gen8_vme.c | 21 +++++-- 5 files changed, 219 insertions(+), 40 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 59f7785..30cf7e5 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -764,30 +764,14 @@ static float intel_lambda_qp(int qp) return lambdaf; } - -void intel_vme_update_mbmv_cost(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) +static +void intel_h264_calc_mbmvcost_qp(int qp, + int slice_type, + uint8_t *vme_state_message) { - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - struct gen6_vme_context *vme_context = encoder_context->vme_context; - VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - int qp, m_cost, j, mv_count; - uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message); + int m_cost, j, mv_count; float lambda, m_costf; - int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); - - - if (encoder_context->rate_control_mode == VA_RC_CQP) - qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; - else - qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - - if (vme_state_message == NULL) - return; - assert(qp <= QP_MAX); lambda = intel_lambda_qp(qp); @@ -880,6 +864,31 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx, vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); } } + return; +} + +void intel_vme_update_mbmv_cost(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message); + + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + if (encoder_context->rate_control_mode == VA_RC_CQP) + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + else + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; + + if (vme_state_message == NULL) + return; + + intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message); } void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx, @@ -1023,6 +1032,16 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, int mb_row; int s; unsigned int *command_ptr; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + if (encoder_context->rate_control_mode == VA_RC_CQP) + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + else + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; #define USE_SCOREBOARD (1 << 21) @@ -1062,7 +1081,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, } } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = USE_SCOREBOARD; /* Indirect data */ @@ -1073,6 +1092,8 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + /* QP occupies one byte */ + *command_ptr++ = qp; x_inner -= 2; y_inner += 1; } @@ -1106,7 +1127,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, } } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = USE_SCOREBOARD; /* Indirect data */ @@ -1117,6 +1138,8 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + /* qp occupies one byte */ + *command_ptr++ = qp; x_inner -= 2; y_inner += 1; @@ -1649,6 +1672,97 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx, return; } +void +intel_h264_initialize_mbmv_cost(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + dri_bo *bo; + uint8_t *cost_table; + + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + + if (slice_type == SLICE_TYPE_I) { + if (vme_context->i_qp_cost_table) + return; + } else if (slice_type == SLICE_TYPE_P) { + if (vme_context->p_qp_cost_table) + return; + } else { + if (vme_context->b_qp_cost_table) + return; + } + + /* It is enough to allocate 32 bytes for each qp. */ + bo = dri_bo_alloc(i965->intel.bufmgr, + "cost_table ", + QP_MAX * 32, + 64); + + dri_bo_map(bo, 1); + + cost_table = (uint8_t *)(bo->virtual); + for (qp = 0; qp < QP_MAX; qp++) { + intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table); + cost_table += 32; + } + + dri_bo_unmap(bo); + + if (slice_type == SLICE_TYPE_I) { + vme_context->i_qp_cost_table = bo; + } else if (slice_type == SLICE_TYPE_P) { + vme_context->p_qp_cost_table = bo; + } else { + vme_context->b_qp_cost_table = bo; + } + + vme_context->cost_table_size = QP_MAX * 32; + return; +} + +extern void +intel_h264_setup_cost_surface(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + unsigned long binding_table_offset, + unsigned long surface_state_offset) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + dri_bo *bo; + + + struct i965_buffer_surface cost_table; + + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + + if (slice_type == SLICE_TYPE_I) { + bo = vme_context->i_qp_cost_table; + } else if (slice_type == SLICE_TYPE_P) { + bo = vme_context->p_qp_cost_table; + } else { + bo = vme_context->b_qp_cost_table; + } + + cost_table.bo = bo; + cost_table.num_blocks = QP_MAX; + cost_table.pitch = 16; + cost_table.size_block = 32; + + vme_context->vme_buffer_suface_setup(ctx, + &vme_context->gpe_context, + &cost_table, + binding_table_offset, + surface_state_offset); +} + /* HEVC */ static int hevc_temporal_find_surface(VAPictureHEVC *curr_pic, diff --git a/src/gen6_vme.h b/src/gen6_vme.h index dc568ac..5031339 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -46,6 +46,8 @@ #define GEN6_VME_KERNEL_NUMBER 3 +#define INTEL_COST_TABLE_OFFSET 8 + struct encode_state; struct intel_encoder_context; @@ -91,6 +93,11 @@ struct gen6_vme_context struct object_surface *used_reference_objects[2]; void *used_references[2]; unsigned int ref_index_in_mb[2]; + + dri_bo *i_qp_cost_table; + dri_bo *p_qp_cost_table; + dri_bo *b_qp_cost_table; + int cost_table_size; }; #define MPEG2_PIC_WIDTH_HEIGHT 30 @@ -200,4 +207,16 @@ void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx, extern Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); extern Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); + +extern void +intel_h264_initialize_mbmv_cost(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); + +extern void +intel_h264_setup_cost_surface(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + unsigned long binding_table_offset, + unsigned long surface_state_offset); #endif /* _GEN6_VME_H_ */ diff --git a/src/gen75_vme.c b/src/gen75_vme.c index a85d6b3..dcf170e 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -280,6 +280,9 @@ gen75_vme_surface_setup(VADriverContextP ctx, /* VME output */ gen75_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context); gen75_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + intel_h264_setup_cost_surface(ctx, encode_state, encoder_context, + BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET), + SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET)); return VA_STATUS_SUCCESS; } @@ -488,6 +491,16 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, int mb_x = 0, mb_y = 0; int i, s; unsigned int *command_ptr; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + if (encoder_context->rate_control_mode == VA_RC_CQP) + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + else + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; dri_bo_map(vme_context->vme_batchbuffer.bo, 1); command_ptr = vme_context->vme_batchbuffer.bo->virtual; @@ -525,7 +538,7 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, if ((i == mb_width) && slice_mb_x) { mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; *command_ptr++ = 0; @@ -535,6 +548,8 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + /* qp occupies one byte */ + *command_ptr++ = qp; i += 1; } @@ -647,7 +662,8 @@ static VAStatus gen75_vme_prepare(VADriverContextP ctx, } intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); - + intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context); + /*Setup all the memory object*/ gen75_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); gen75_vme_interface_setup(ctx, encode_state, encoder_context); @@ -1002,10 +1018,17 @@ gen75_vme_context_destroy(void *context) dri_bo_unreference(vme_context->vme_batchbuffer.bo); vme_context->vme_batchbuffer.bo = NULL; - if (vme_context->vme_state_message) { - free(vme_context->vme_state_message); - vme_context->vme_state_message = NULL; - } + free(vme_context->vme_state_message); + vme_context->vme_state_message = NULL; + + dri_bo_unreference(vme_context->i_qp_cost_table); + vme_context->i_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->p_qp_cost_table); + vme_context->p_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->b_qp_cost_table); + vme_context->b_qp_cost_table = NULL; free(vme_context); } diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 9da44d1..fb6358f 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -227,7 +227,7 @@ gen7_vme_output_vme_batchbuffer_setup(VADriverContextP ctx, int height_in_mbs = pSequenceParameter->picture_height_in_mbs; vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1; - vme_context->vme_batchbuffer.size_block = 32; /* 2 OWORDs */ + vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */ vme_context->vme_batchbuffer.pitch = 16; vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, "VME batchbuffer", @@ -270,6 +270,9 @@ gen7_vme_surface_setup(VADriverContextP ctx, /* VME output */ gen7_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context); gen7_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + intel_h264_setup_cost_surface(ctx, encode_state, encoder_context, + BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET), + SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET)); return VA_STATUS_SUCCESS; } @@ -669,8 +672,10 @@ static VAStatus gen7_vme_prepare(VADriverContextP ctx, (vme_context->h264_level != pSequenceParameter->level_idc)) { vme_context->h264_level = pSequenceParameter->level_idc; } - + intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); + intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context); + /*Setup all the memory object*/ gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); gen7_vme_interface_setup(ctx, encode_state, encoder_context); @@ -1018,10 +1023,17 @@ gen7_vme_context_destroy(void *context) dri_bo_unreference(vme_context->vme_batchbuffer.bo); vme_context->vme_batchbuffer.bo = NULL; - if (vme_context->vme_state_message) { - free(vme_context->vme_state_message); - vme_context->vme_state_message = NULL; - } + free(vme_context->vme_state_message); + vme_context->vme_state_message = NULL; + + dri_bo_unreference(vme_context->i_qp_cost_table); + vme_context->i_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->p_qp_cost_table); + vme_context->p_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->b_qp_cost_table); + vme_context->b_qp_cost_table = NULL; free(vme_context); } diff --git a/src/gen8_vme.c b/src/gen8_vme.c index edf6060..998f7d6 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -314,6 +314,9 @@ gen8_vme_surface_setup(VADriverContextP ctx, /* VME output */ gen8_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context); gen8_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + intel_h264_setup_cost_surface(ctx, encode_state, encoder_context, + BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET), + SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET)); return VA_STATUS_SUCCESS; } @@ -721,7 +724,8 @@ static VAStatus gen8_vme_prepare(VADriverContextP ctx, } intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); - + intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context); + /*Setup all the memory object*/ gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); gen8_vme_interface_setup(ctx, encode_state, encoder_context); @@ -1287,10 +1291,17 @@ gen8_vme_context_destroy(void *context) dri_bo_unreference(vme_context->vme_batchbuffer.bo); vme_context->vme_batchbuffer.bo = NULL; - if (vme_context->vme_state_message) { - free(vme_context->vme_state_message); - vme_context->vme_state_message = NULL; - } + free(vme_context->vme_state_message); + vme_context->vme_state_message = NULL; + + dri_bo_unreference(vme_context->i_qp_cost_table); + vme_context->i_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->p_qp_cost_table); + vme_context->p_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->b_qp_cost_table); + vme_context->b_qp_cost_table = NULL; free(vme_context); } -- 2.7.4 _______________________________________________ Libva mailing list Libva@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libva