v1: ROI enable on gen8 and gen9 Enable GPU to construct GPU command under ROI scenario
Signed-off-by: Zhao Yakui <yakui.z...@intel.com> Signed-off-by: Pengfei Qu <pengfei...@intel.com> --- src/gen6_mfc_common.c | 20 ++-- src/gen8_mfc.c | 55 +++++++--- src/gen8_vme.c | 23 +++- src/gen9_mfc.c | 35 +++--- src/gen9_vme.c | 42 +++++++- src/i965_drv_video.c | 12 ++- src/shaders/vme/Makefile.am | 10 +- src/shaders/vme/intra_frame_gen8.asm | 21 +++- src/shaders/vme/intra_frame_gen8.g8b | 7 +- src/shaders/vme/intra_frame_gen9.g9b | 7 +- src/shaders/vme/mpeg2_intra_gen8.asm | 201 +++++++++++++++++++++++++++++++++++ src/shaders/vme/mpeg2_intra_gen8.g8a | 2 + src/shaders/vme/mpeg2_intra_gen8.g8b | 72 +++++++++++++ src/shaders/vme/mpeg2_intra_gen9.g9a | 2 + src/shaders/vme/mpeg2_intra_gen9.g9b | 72 +++++++++++++ src/shaders/vme/vme8.inc | 3 + 16 files changed, 527 insertions(+), 57 deletions(-) create mode 100644 src/shaders/vme/mpeg2_intra_gen8.asm create mode 100644 src/shaders/vme/mpeg2_intra_gen8.g8a create mode 100644 src/shaders/vme/mpeg2_intra_gen8.g8b create mode 100644 src/shaders/vme/mpeg2_intra_gen9.g9a create mode 100644 src/shaders/vme/mpeg2_intra_gen9.g9b diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 876ba41..45b0e8b 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1857,6 +1857,7 @@ intel_h264_enc_roi_cbr(VADriverContextP ctx, * qp_delta set by user is added to base_qp, which is then clapped by * [base_qp-min_delta, base_qp+max_delta]. */ + pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta = 1; assert (pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta); sum_roi = 0.0f; @@ -1937,6 +1938,7 @@ intel_h264_enc_roi_config(VADriverContextP ctx, { char *qp_ptr; int i, j; + struct i965_driver_data *i965 = i965_driver_data(ctx); VAEncMiscParameterBuffer* pMiscParamROI; VAEncMiscParameterBufferROI *pParamROI; VAEncROI *region_roi; @@ -1946,12 +1948,13 @@ intel_h264_enc_roi_config(VADriverContextP ctx, int width_in_mbs = pSequenceParameter->picture_width_in_mbs; int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - int row_start, row_end, col_start, col_end; - int num_roi; + int row_start, row_end, col_start, col_end; + int num_roi; vme_context->roi_enabled = 0; encoder_context->soft_batch_force = 0; - /* Restriction: Disable ROI when multi-slice is enabled */ + + /* Restriction: Disable ROI when multi-slice is enabled */ if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1)) return; @@ -1965,7 +1968,8 @@ intel_h264_enc_roi_config(VADriverContextP ctx, /* check whether number of ROI is correct */ num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi; - vme_context->roi_enabled = 1; + if(num_roi > 0) + vme_context->roi_enabled = 1; if ((vme_context->saved_width_mbs != width_in_mbs) || (vme_context->saved_height_mbs != height_in_mbs)) { @@ -2031,9 +2035,13 @@ intel_h264_enc_roi_config(VADriverContextP ctx, */ vme_context->roi_enabled = 0; } - if (vme_context->roi_enabled) + if (vme_context->roi_enabled) encoder_context->soft_batch_force = 1; - return; + + if( IS_GEN8(i965->intel.device_info) || IS_GEN9(i965->intel.device_info) ) + encoder_context->soft_batch_force = 0; + + return; } /* HEVC */ diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 1f8e57b..186bb9b 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -939,8 +939,6 @@ gen8_mfc_avc_slice_state(VADriverContextP ctx, #define AVC_INTER_MV_OFFSET 48 #define AVC_RDO_MASK 0xFFFF -#if MFC_SOFTWARE_BATCH - static int gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg, @@ -1114,6 +1112,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; int qp_slice; + int qp_mb; qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { @@ -1156,20 +1155,24 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, x = i % width_in_mbs; y = i / width_in_mbs; msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block); + if (vme_context->roi_enabled) { + qp_mb = *(vme_context->qp_per_mb + i); + } else + qp_mb = qp; if (is_intra) { assert(msg); - gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); } else { int inter_rdo, intra_rdo; inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET; if (intra_rdo < inter_rdo) { - gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); } else { msg += AVC_INTER_MSG_OFFSET; - gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch); + gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch); } } } @@ -1217,7 +1220,6 @@ gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx, return batch_bo; } -#else static void gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, @@ -1374,6 +1376,8 @@ gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, int last_mb, slice_end_x, slice_end_y; int remaining_mb = total_mbs; uint32_t fwd_ref , bwd_ref, mb_flag; + char tmp_qp; + int number_roi_mbs, max_mb_cmds, i; last_mb = slice_param->macroblock_address + total_mbs - 1; slice_end_x = last_mb % width_in_mbs; @@ -1401,13 +1405,32 @@ gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, number_mb_cmds = width_in_mbs; } + max_mb_cmds = number_mb_cmds; + do { - if (number_mb_cmds >= remaining_mb) { - number_mb_cmds = remaining_mb; - } mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs; mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs; + number_mb_cmds = max_mb_cmds; + if (vme_context->roi_enabled) { + + number_roi_mbs = 1; + tmp_qp = *(vme_context->qp_per_mb + starting_offset); + for (i = 1; i < max_mb_cmds; i++) { + if (tmp_qp != *(vme_context->qp_per_mb + starting_offset + i)) + break; + + number_roi_mbs++; + } + + number_mb_cmds = number_roi_mbs; + qp = tmp_qp; + } + + if (number_mb_cmds >= remaining_mb) { + number_mb_cmds = remaining_mb; + } + gen8_mfc_batchbuffer_emit_object_command(batch, mb_flag, head_offset, @@ -1585,8 +1608,6 @@ gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, return mfc_context->aux_batchbuffer_surface.bo; } -#endif - static void gen8_mfc_avc_pipeline_programing(VADriverContextP ctx, struct encode_state *encode_state, @@ -1601,11 +1622,11 @@ gen8_mfc_avc_pipeline_programing(VADriverContextP ctx, return; } -#if MFC_SOFTWARE_BATCH - slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); -#else - slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); -#endif + if (encoder_context->soft_batch_force) + slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); + else + slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); + // begin programing intel_batchbuffer_start_atomic_bcs(batch, 0x4000); @@ -1639,6 +1660,8 @@ gen8_mfc_avc_encode_picture(VADriverContextP ctx, unsigned int rate_control_mode = encoder_context->rate_control_mode; int current_frame_bits_size; int sts; + + encoder_context->soft_batch_force = 0; for (;;) { gen8_mfc_init(ctx, encode_state, encoder_context); diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 07dc462..c529a5b 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -96,7 +96,7 @@ static struct i965_kernel gen8_vme_kernels[] = { }; static const uint32_t gen8_vme_mpeg2_intra_frame[][4] = { -#include "shaders/vme/intra_frame_gen8.g8b" +#include "shaders/vme/mpeg2_intra_gen8.g8b" }; static const uint32_t gen8_vme_mpeg2_inter_frame[][4] = { @@ -562,6 +562,17 @@ gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, int mb_x = 0, mb_y = 0; int i, s; unsigned int *command_ptr; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + int qp_mb, qp_index; + + if (encoder_context->rate_control_mode == VA_RC_CQP) + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + else + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; dri_bo_map(vme_context->vme_batchbuffer.bo, 1); command_ptr = vme_context->vme_batchbuffer.bo->virtual; @@ -599,7 +610,7 @@ gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, if ((i == mb_width) && slice_mb_x) { mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; *command_ptr++ = 0; @@ -609,6 +620,13 @@ gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + /* qp occupies one byte */ + if (vme_context->roi_enabled) { + qp_index = mb_y * mb_width + mb_x; + qp_mb = *(vme_context->qp_per_mb + qp_index); + } else + qp_mb = qp; + *command_ptr++ = qp_mb; *command_ptr++ = CMD_MEDIA_STATE_FLUSH; *command_ptr++ = 0; @@ -725,6 +743,7 @@ static VAStatus gen8_vme_prepare(VADriverContextP ctx, intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context); + intel_h264_enc_roi_config(ctx, encode_state, encoder_context); /*Setup all the memory object*/ gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); diff --git a/src/gen9_mfc.c b/src/gen9_mfc.c index 6cca579..109e697 100644 --- a/src/gen9_mfc.c +++ b/src/gen9_mfc.c @@ -47,8 +47,6 @@ #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) -#define MFC_SOFTWARE_HASWELL 1 - #define B0_STEP_REV 2 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) @@ -856,8 +854,6 @@ gen9_mfc_avc_slice_state(VADriverContextP ctx, } -#ifdef MFC_SOFTWARE_HASWELL - static int gen9_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg, @@ -1037,6 +1033,7 @@ gen9_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; int qp_slice; + int qp_mb; qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { @@ -1080,19 +1077,24 @@ gen9_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, y = i / width_in_mbs; msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block); + if (vme_context->roi_enabled) { + qp_mb = *(vme_context->qp_per_mb + i); + } else + qp_mb = qp; + if (is_intra) { assert(msg); - gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); } else { int inter_rdo, intra_rdo; inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET; if (intra_rdo < inter_rdo) { - gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); } else { msg += AVC_INTER_MSG_OFFSET; - gen9_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch); + gen9_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch); } } } @@ -1142,8 +1144,6 @@ gen9_mfc_avc_software_batchbuffer(VADriverContextP ctx, return batch_bo; } -#else - static void gen9_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, struct encode_state *encode_state, @@ -1537,7 +1537,6 @@ gen9_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, return mfc_context->mfc_batchbuffer_surface.bo; } -#endif static void gen9_mfc_avc_pipeline_programing(VADriverContextP ctx, @@ -1553,11 +1552,11 @@ gen9_mfc_avc_pipeline_programing(VADriverContextP ctx, return; } -#ifdef MFC_SOFTWARE_HASWELL - slice_batch_bo = gen9_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); -#else - slice_batch_bo = gen9_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); -#endif + if (encoder_context->soft_batch_force) + slice_batch_bo = gen9_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); + else + slice_batch_bo = gen9_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); + // begin programing intel_batchbuffer_start_atomic_bcs(batch, 0x4000); @@ -1705,12 +1704,12 @@ Bool gen9_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e { struct gen6_mfc_context *mfc_context = NULL; -#if MFC_SOFTWARE_HASWELL + if ((encoder_context->codec == CODEC_H264) || (encoder_context->codec == CODEC_H264_MVC)) { - return gen8_mfc_context_init(ctx, encoder_context); + return gen8_mfc_context_init(ctx, encoder_context); } -#endif + if ((encoder_context->codec == CODEC_VP8) || (encoder_context->codec == CODEC_MPEG2)) diff --git a/src/gen9_vme.c b/src/gen9_vme.c index 5f9b796..6fa41dd 100644 --- a/src/gen9_vme.c +++ b/src/gen9_vme.c @@ -96,7 +96,7 @@ static struct i965_kernel gen9_vme_kernels[] = { }; static const uint32_t gen9_vme_mpeg2_intra_frame[][4] = { -#include "shaders/vme/intra_frame_gen9.g9b" +#include "shaders/vme/mpeg2_intra_gen9.g9b" }; static const uint32_t gen9_vme_mpeg2_inter_frame[][4] = { @@ -352,6 +352,9 @@ gen9_vme_surface_setup(VADriverContextP ctx, /* VME output */ gen9_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context); gen9_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + intel_h264_setup_cost_surface(ctx, encode_state, encoder_context, + BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET), + SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET)); return VA_STATUS_SUCCESS; } @@ -603,6 +606,17 @@ gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx, int mb_x = 0, mb_y = 0; int i, s; unsigned int *command_ptr; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + int qp_mb, qp_index; + + if (encoder_context->rate_control_mode == VA_RC_CQP) + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + else + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; dri_bo_map(vme_context->vme_batchbuffer.bo, 1); command_ptr = vme_context->vme_batchbuffer.bo->virtual; @@ -640,7 +654,7 @@ gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx, if ((i == mb_width) && slice_mb_x) { mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; *command_ptr++ = 0; @@ -650,6 +664,13 @@ gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + /* qp occupies one byte */ + if (vme_context->roi_enabled) { + qp_index = mb_y * mb_width + mb_x; + qp_mb = *(vme_context->qp_per_mb + qp_index); + } else + qp_mb = qp; + *command_ptr++ = qp_mb; *command_ptr++ = CMD_MEDIA_STATE_FLUSH; *command_ptr++ = 0; @@ -767,6 +788,8 @@ static VAStatus gen9_vme_prepare(VADriverContextP ctx, } intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); + intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context); + intel_h264_enc_roi_config(ctx, encode_state, encoder_context); /*Setup all the memory object*/ gen9_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); @@ -1776,6 +1799,21 @@ gen9_vme_context_destroy(void *context) vme_context->vme_state_message = NULL; } + if (vme_context->i_qp_cost_table) { + dri_bo_unreference(vme_context->i_qp_cost_table); + vme_context->i_qp_cost_table = NULL; + } + + if (vme_context->p_qp_cost_table) { + dri_bo_unreference(vme_context->p_qp_cost_table); + vme_context->p_qp_cost_table = NULL; + } + + if (vme_context->b_qp_cost_table) { + dri_bo_unreference(vme_context->b_qp_cost_table); + vme_context->b_qp_cost_table = NULL; + } + free(vme_context); } diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 9812dff..90ff146 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -1053,11 +1053,13 @@ i965_GetConfigAttributes(VADriverContextP ctx, (profile == VAProfileH264ConstrainedBaseline || profile == VAProfileH264Main || profile == VAProfileH264High ) && - (IS_GEN7(i965->intel.device_info))) { - VAConfigAttribValEncROI *roi_config = (VAConfigAttribValEncROI *)&(attrib_list[i].value); - roi_config->bits.num_roi_regions = I965_MAX_NUM_ROI_REGIONS; - roi_config->bits.roi_rc_priority_support = 0; - roi_config->bits.roi_rc_qp_delat_support = 1; + (IS_GEN7(i965->intel.device_info)|| + IS_GEN8(i965->intel.device_info)|| + IS_GEN9(i965->intel.device_info))){ + VAConfigAttribValEncROI *roi_config = (VAConfigAttribValEncROI *)&(attrib_list[i].value); + roi_config->bits.num_roi_regions = I965_MAX_NUM_ROI_REGIONS; + roi_config->bits.roi_rc_priority_support = 0; + roi_config->bits.roi_rc_qp_delat_support = 1; } break; diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am index 83d337a..71c93de 100644 --- a/src/shaders/vme/Makefile.am +++ b/src/shaders/vme/Makefile.am @@ -1,7 +1,7 @@ VME_CORE = batchbuffer.asm intra_frame.asm inter_frame.asm VME7_CORE = batchbuffer.asm intra_frame_ivb.asm inter_frame_ivb.asm inter_bframe_ivb.asm mpeg2_inter_ivb.asm VME75_CORE = batchbuffer.asm intra_frame_haswell.asm inter_frame_haswell.asm inter_bframe_haswell.asm mpeg2_inter_haswell.asm -VME8_CORE = intra_frame_gen8.asm inter_frame_gen8.asm inter_bframe_gen8.asm mpeg2_inter_gen8.asm vp8_intra_frame_gen8.asm vp8_inter_frame_gen8.asm +VME8_CORE = intra_frame_gen8.asm inter_frame_gen8.asm inter_bframe_gen8.asm mpeg2_intra_gen8.asm mpeg2_inter_gen8.asm vp8_intra_frame_gen8.asm vp8_inter_frame_gen8.asm VME9_CORE = $(VME8_CORE) INTEL_G6B = batchbuffer.g6b intra_frame.g6b inter_frame.g6b @@ -20,14 +20,14 @@ INTEL_GEN75_INC = batchbuffer.inc vme75.inc vme75_mpeg2.inc INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm) -INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b inter_bframe_gen8.g8b mpeg2_inter_gen8.g8b vp8_intra_frame_gen8.g8b vp8_inter_frame_gen8.g8b -INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a inter_bframe_gen8.g8a mpeg2_inter_gen8.g8a vp8_intra_frame_gen8.g8a vp8_inter_frame_gen8.g8a +INTEL_G8B = intra_frame_gen8.g8b inter_frame_gen8.g8b inter_bframe_gen8.g8b mpeg2_intra_gen8.g8b mpeg2_inter_gen8.g8b vp8_intra_frame_gen8.g8b vp8_inter_frame_gen8.g8b +INTEL_G8A = intra_frame_gen8.g8a inter_frame_gen8.g8a inter_bframe_gen8.g8a mpeg2_intra_gen8.g8a mpeg2_inter_gen8.g8a vp8_intra_frame_gen8.g8a vp8_inter_frame_gen8.g8a INTEL_GEN8_INC = vme8.inc vme75_mpeg2.inc INTEL_GEN8_ASM = $(INTEL_G8A:%.g8a=%.gen8.asm) -INTEL_G9B = intra_frame_gen9.g9b inter_frame_gen9.g9b inter_bframe_gen9.g9b mpeg2_inter_gen9.g9b vp8_intra_frame_gen9.g9b vp8_inter_frame_gen9.g9b -INTEL_G9A = intra_frame_gen9.g9a inter_frame_gen9.g9a inter_bframe_gen9.g9a mpeg2_inter_gen9.g9a vp8_intra_frame_gen9.g9a vp8_inter_frame_gen9.g9a +INTEL_G9B = intra_frame_gen9.g9b inter_frame_gen9.g9b inter_bframe_gen9.g9b mpeg2_intra_gen9.g9b mpeg2_inter_gen9.g9b vp8_intra_frame_gen9.g9b vp8_inter_frame_gen9.g9b +INTEL_G9A = intra_frame_gen9.g9a inter_frame_gen9.g9a inter_bframe_gen9.g9a mpeg2_intra_gen9.g9a mpeg2_inter_gen9.g9a vp8_intra_frame_gen9.g9a vp8_inter_frame_gen9.g9a INTEL_GEN9_INC = $(INTEL_GEN8_INC) INTEL_GEN9_ASM = $(INTEL_G9A:%.g9a=%.gen9.asm) diff --git a/src/shaders/vme/intra_frame_gen8.asm b/src/shaders/vme/intra_frame_gen8.asm index 15b260e..ee40ec9 100644 --- a/src/shaders/vme/intra_frame_gen8.asm +++ b/src/shaders/vme/intra_frame_gen8.asm @@ -89,9 +89,28 @@ mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1}; mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; +mov (8) msg_reg0.0<1>:ud 0:ud {align1}; +mov (1) msg_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) tmp_reg0.0<1>:UD qp_ub<0,1,0>:ub {align1}; +mul (1) msg_reg0.8<1>:ud tmp_reg0.0<0,1,0>:ud 2:ud {align1}; +send (16) + msg_ind + vme_cost_wb.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + BIND_IDX_COST, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; /* m2, get the MV/Mb cost passed by constant buffer when creating EU thread by MEDIA_OBJECT */ -mov (8) vme_msg_2<1>:UD r1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_2<1>:UD vme_cost_wb<8,8,1>:UD {align1}; /* m3. This is changed for FWD/BWD cost center */ mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; diff --git a/src/shaders/vme/intra_frame_gen8.g8b b/src/shaders/vme/intra_frame_gen8.g8b index 56c7283..f028f2d 100644 --- a/src/shaders/vme/intra_frame_gen8.g8b +++ b/src/shaders/vme/intra_frame_gen8.g8b @@ -33,7 +33,12 @@ { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 }, - { 0x00600001, 0x28400208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28000608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x28142288, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24002208, 0x000000a8, 0x00000000 }, + { 0x00000041, 0x28080208, 0x06000400, 0x00000002 }, + { 0x0a800031, 0x25a00a08, 0x0e000800, 0x02180208 }, + { 0x00600001, 0x28400208, 0x008d05a0, 0x00000000 }, { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, diff --git a/src/shaders/vme/intra_frame_gen9.g9b b/src/shaders/vme/intra_frame_gen9.g9b index 63d7455..23b10ad 100644 --- a/src/shaders/vme/intra_frame_gen9.g9b +++ b/src/shaders/vme/intra_frame_gen9.g9b @@ -33,7 +33,12 @@ { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, { 0x0c600031, 0x26200a88, 0x06000800, 0x02190006 }, - { 0x00600001, 0x28400208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28000608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x28142288, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24002208, 0x000000a8, 0x00000000 }, + { 0x00000041, 0x28080208, 0x06000400, 0x00000002 }, + { 0x0a800031, 0x25a00a08, 0x06000800, 0x02180208 }, + { 0x00600001, 0x28400208, 0x008d05a0, 0x00000000 }, { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, diff --git a/src/shaders/vme/mpeg2_intra_gen8.asm b/src/shaders/vme/mpeg2_intra_gen8.asm new file mode 100644 index 0000000..fbf030b --- /dev/null +++ b/src/shaders/vme/mpeg2_intra_gen8.asm @@ -0,0 +1,201 @@ +/* + * Copyright ?? <2010>, Intel Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ +// Modual name: IntraFrame_gen8.asm +// +// Make intra predition estimation for Intra frame on Gen8 +// + +// +// Now, begin source code.... +// + +/* + * __START + */ +__INTRA_START: +mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1}; +mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ; +mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ; + +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1}; +mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1}; +mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */ +mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1}; +add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1}; +mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1}; +mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ + +/* + * Media Read Message -- fetch Luma neighbor edge pixels + */ +/* ROW */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1}; + +/* + * Media Read Message -- fetch Chroma neighbor edge pixels + */ +/* ROW */ +shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16 , y * 8 */ +mul (1) read0_header.0<1>:D read0_header.0<0,1,0>:D 2:W {align1}; +add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */ +add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */ +mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* COL */ +shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 3:UW {align1}; /* x * 16, y * 8 */ +mul (1) read1_header.0<1>:D read1_header.0<0,1,0>:D 2:W {align1}; +add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */ +mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1}; +mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; +send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; + +/* m2, get the MV/Mb cost passed by constant buffer +when creating EU thread by MEDIA_OBJECT */ +mov (8) vme_msg_2<1>:UD r1.0<8,8,1>:UD {align1}; + +/* m3. This is changed for FWD/BWD cost center */ +mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; + +/* m4.*/ +mov (8) vme_msg_4<1>:ud 0x0:ud {align1}; + +/* m5 */ +mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1}; +and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1}; +mov (8) vme_msg_5<1>:UD INEP_ROW.0<8,8,1>:UD {align1}; + +mov (1) tmp_reg0.0<1>:UW LUMA_CHROMA_MODE:UW {align1}; +/* Use the Luma mode */ +mov (1) vme_msg_5.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +/* m6 */ +mov (8) vme_msg_6<1>:UD 0x0:UD {align1}; +mov (16) vme_msg_6.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1}; +mov (1) vme_msg_6.16<1>:UD INTRA_PREDICTORE_MODE {align1}; + +/* the penalty for Intra mode */ +mov (1) vme_msg_6.28<1>:UD 0x010101:UD {align1}; +mov (1) vme_msg_6.20<1>:UW CHROMA_ROW.6<0,1,0>:UW {align1}; + + +/* m7 */ + +mov (4) vme_msg_7.16<1>:UD CHROMA_ROW.8<4,4,1>:UD {align1}; +mov (8) vme_msg_7.0<1>:UW CHROMA_COL.2<16,8,2>:UW {align1}; + +/* + * VME message + */ + +/* m1 */ +mov (1) intra_flag<1>:UW 0x0:UW {align1}; +and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1}; +(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1}; + +/* assign MB intra struct from the thread payload*/ +mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; + +/* Disable DC HAAR component when calculating HARR SATD block */ +mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1}; +mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1}; + +mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1}; +/* m0 */ +/* 16x16 Source, Intra_harr */ +add (1) vme_m0.12<1>:UD vme_m0.12<0,1,0>:ud INTRA_SAD_HAAR:UD {align1}; +mov (8) vme_msg_0<1>:UD vme_m0.0<8,8,1>:UD {align1}; + +/* after verification it will be passed by using payload */ +send (8) + vme_msg_ind + vme_wb<1>:UD + null + cre( + BIND_IDX_VME, + VME_SIC_MESSAGE_TYPE + ) + mlen sic_vme_msg_length + rlen vme_wb_length + {align1}; +/* + * Oword Block Write message + */ +mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1}; + +mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1}; +mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1}; +mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1}; +mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1}; + +/* Distortion, Intra (17-16), */ +mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1}; + +mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1}; +/* VME clock counts */ +mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1}; + +mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1}; + +/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */ +send (16) + msg_ind + obw_wb + null + data_port( + OBW_CACHE_TYPE, + OBW_MESSAGE_TYPE, + OBW_CONTROL_2, + OBW_BIND_IDX, + OBW_WRITE_COMMIT_CATEGORY, + OBW_HEADER_PRESENT + ) + mlen 2 + rlen obw_wb_length + {align1}; + +__EXIT: +/* + * kill thread + */ +mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1}; +send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT}; diff --git a/src/shaders/vme/mpeg2_intra_gen8.g8a b/src/shaders/vme/mpeg2_intra_gen8.g8a new file mode 100644 index 0000000..7abd8b9 --- /dev/null +++ b/src/shaders/vme/mpeg2_intra_gen8.g8a @@ -0,0 +1,2 @@ +#include "vme8.inc" +#include "mpeg2_intra_gen8.asm" diff --git a/src/shaders/vme/mpeg2_intra_gen8.g8b b/src/shaders/vme/mpeg2_intra_gen8.g8b new file mode 100644 index 0000000..56c7283 --- /dev/null +++ b/src/shaders/vme/mpeg2_intra_gen8.g8b @@ -0,0 +1,72 @@ + { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f }, + { 0x00000001, 0x24142288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 }, + { 0x00000001, 0x24342288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, + { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, + { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, + { 0x00000041, 0x24880208, 0x06000488, 0x00000002 }, + { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x23800a88, 0x0e000800, 0x02190004 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x23a00a88, 0x0e000800, 0x02290004 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x04600031, 0x26000a88, 0x0e000800, 0x02190006 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x04600031, 0x26200a88, 0x0e000800, 0x02190006 }, + { 0x00600001, 0x28400208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, + { 0x00000005, 0x23840208, 0x06000384, 0xff000000 }, + { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00000000 }, + { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 }, + { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, + { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 }, + { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 }, + { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 }, + { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 }, + { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, + { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00000040, 0x244c0208, 0x0600044c, 0x00800000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0203 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24000a40, 0x0e000e00, 0x82000010 }, diff --git a/src/shaders/vme/mpeg2_intra_gen9.g9a b/src/shaders/vme/mpeg2_intra_gen9.g9a new file mode 100644 index 0000000..7abd8b9 --- /dev/null +++ b/src/shaders/vme/mpeg2_intra_gen9.g9a @@ -0,0 +1,2 @@ +#include "vme8.inc" +#include "mpeg2_intra_gen8.asm" diff --git a/src/shaders/vme/mpeg2_intra_gen9.g9b b/src/shaders/vme/mpeg2_intra_gen9.g9b new file mode 100644 index 0000000..63d7455 --- /dev/null +++ b/src/shaders/vme/mpeg2_intra_gen9.g9b @@ -0,0 +1,72 @@ + { 0x00800001, 0x24000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24400608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24800608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f }, + { 0x00000001, 0x24142288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 }, + { 0x00000001, 0x24342288, 0x00000014, 0x00000000 }, + { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 }, + { 0x00000001, 0x24542288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 }, + { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 }, + { 0x00000041, 0x24880208, 0x06000488, 0x00000002 }, + { 0x00000001, 0x24942288, 0x00000014, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x0c600031, 0x23800a88, 0x06000800, 0x02190004 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x0c600031, 0x23a00a88, 0x06000800, 0x02290004 }, + { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 }, + { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 }, + { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff }, + { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 }, + { 0x0c600031, 0x26000a88, 0x06000800, 0x02190006 }, + { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 }, + { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 }, + { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc }, + { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 }, + { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 }, + { 0x0c600031, 0x26200a88, 0x06000800, 0x02190006 }, + { 0x00600001, 0x28400208, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28600608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x28800608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23800608, 0x00000000, 0x00000000 }, + { 0x00000005, 0x23840208, 0x06000384, 0xff000000 }, + { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00000000 }, + { 0x00000001, 0x28a52288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 }, + { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 }, + { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 }, + { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 }, + { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 }, + { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 }, + { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010001, 0x247c0e88, 0x08000000, 0x00000002 }, + { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 }, + { 0x00000001, 0x24001648, 0x10000000, 0x00200020 }, + { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 }, + { 0x00000040, 0x244c0208, 0x0600044c, 0x00800000 }, + { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, + { 0x0d600031, 0x21800a08, 0x06000800, 0x10782000 }, + { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, + { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, + { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, + { 0x00000001, 0x28280208, 0x00000194, 0x00000000 }, + { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 }, + { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 }, + { 0x00000001, 0x28340208, 0x00000188, 0x00000000 }, + { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 }, + { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x06000800, 0x040a0203 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07800031, 0x24000a40, 0x06000e00, 0x82000010 }, diff --git a/src/shaders/vme/vme8.inc b/src/shaders/vme/vme8.inc index 5f05ef2..17b0fce 100644 --- a/src/shaders/vme/vme8.inc +++ b/src/shaders/vme/vme8.inc @@ -59,6 +59,7 @@ define(`BIND_IDX_VME_REF0', `1') define(`BIND_IDX_VME_REF1', `2') define(`BIND_IDX_OUTPUT', `3') define(`BIND_IDX_INEP', `4') +define(`BIND_IDX_COST', `8') define(`SUB_PEL_MODE_INTEGER', `0x00000000') define(`SUB_PEL_MODE_HALF', `0x00001000') @@ -158,6 +159,7 @@ define(`transform_8x8_ub', `inline_reg0.4') define(`input_mb_intra_ub', `inline_reg0.5') define(`num_macroblocks', `inline_reg0.6') define(`quality_level_ub', `inline_reg0.7') +define(`qp_ub', `inline_reg0.8') /* * GRF 6~11 -- reserved @@ -242,6 +244,7 @@ define(`tmp_uw7', `tmp_rega.14') define(`vme_m2', `r43') define(`vme_m3', `r44') +define(`vme_cost_wb', `r45') /* * MRF registers */ -- 2.7.4
_______________________________________________ Libva mailing list Libva@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libva